From 0bf4e3c34a6a799ccc34f403bed70119574ca9c8 Mon Sep 17 00:00:00 2001 From: jsteube Date: Tue, 15 Dec 2015 12:04:22 +0100 Subject: [PATCH] - Dropped all vector code since new GPU's are all scalar, makes the code much easier - Some performance on low-end GPU may drop because of that, but only for a few hash-modes - Dropped scalar code (aka warp) since we do not have any vector datatypes anymore - Renamed C++ overloading functions memcat32_9 -> memcat_c32_w4x4_a3x4 - Still need to fix kernels to new function names, needs to be done manually - Temperature Management needs to be rewritten partially because of conflicting datatypes names - Added code to create different codepaths for NV on AMD in runtime in host (see data.vendor_id) - Added code to create different codepaths for NV on AMD in runtime in kernels (see IS_NV and IS_AMD) - First tests working for -m 0, for example - Great performance increases in general for NV so far - Tested amp_* and markov_* kernel - Migrated special NV optimizations for rule processor --- amd/amp_a0_v1.cl => OpenCL/amp_a0.cl | 15 +- amd/amp_a1_v1.cl => OpenCL/amp_a1.cl | 267 +- amd/amp_a3_v2.cl => OpenCL/amp_a3.cl | 12 +- .../check_multi_comp4.c | 2 +- .../check_multi_comp4_bs.c | 4 +- .../check_single_comp4.c | 2 +- OpenCL/check_single_comp4_bs.c | 3 + OpenCL/common.c | 7980 ++++++++ {amd => OpenCL}/gpu_aes256_amd.c | 0 {amd => OpenCL}/gpu_serpent256_amd.c | 0 {amd => OpenCL}/gpu_twofish256_amd.c | 0 {amd => OpenCL}/m00000_a0.cl | 103 +- {amd => OpenCL}/m00000_a1.cl | 110 +- {amd => OpenCL}/m00000_a3.cl | 119 +- {amd => OpenCL}/m00010_a0.cl | 98 +- {amd => OpenCL}/m00010_a1.cl | 104 +- {amd => OpenCL}/m00010_a3.cl | 119 +- {amd => OpenCL}/m00020_a0.cl | 114 +- {amd => OpenCL}/m00020_a1.cl | 120 +- {amd => OpenCL}/m00020_a3.cl | 144 +- {amd => OpenCL}/m00030_a0.cl | 110 +- {amd => OpenCL}/m00030_a1.cl | 116 +- {amd => OpenCL}/m00030_a3.cl | 119 +- {amd => OpenCL}/m00040_a0.cl | 110 +- {amd => OpenCL}/m00040_a1.cl | 116 +- {amd => OpenCL}/m00040_a3.cl | 140 +- {amd => OpenCL}/m00050_a0.cl | 153 +- {amd => OpenCL}/m00050_a1.cl | 159 +- {amd => OpenCL}/m00050_a3.cl | 183 +- {amd => OpenCL}/m00060_a0.cl | 153 +- {amd => OpenCL}/m00060_a1.cl | 159 +- {amd => OpenCL}/m00060_a3.cl | 183 +- {amd => OpenCL}/m00100_a0.cl | 166 +- {amd => OpenCL}/m00100_a1.cl | 172 +- {amd => OpenCL}/m00100_a3.cl | 229 +- {amd => OpenCL}/m00110_a0.cl | 166 +- {amd => OpenCL}/m00110_a1.cl | 172 +- {amd => OpenCL}/m00110_a3.cl | 229 +- {amd => OpenCL}/m00120_a0.cl | 114 +- {amd => OpenCL}/m00120_a1.cl | 120 +- {amd => OpenCL}/m00120_a3.cl | 170 +- {amd => OpenCL}/m00130_a0.cl | 114 +- {amd => OpenCL}/m00130_a1.cl | 120 +- {amd => OpenCL}/m00130_a3.cl | 229 +- {amd => OpenCL}/m00140_a0.cl | 114 +- {amd => OpenCL}/m00140_a1.cl | 120 +- {amd => OpenCL}/m00140_a3.cl | 170 +- {amd => OpenCL}/m00150_a0.cl | 156 +- {amd => OpenCL}/m00150_a1.cl | 162 +- {amd => OpenCL}/m00150_a3.cl | 186 +- {amd => OpenCL}/m00160_a0.cl | 156 +- {amd => OpenCL}/m00160_a1.cl | 162 +- {amd => OpenCL}/m00160_a3.cl | 186 +- {amd => OpenCL}/m00190_a0.cl | 186 +- {amd => OpenCL}/m00190_a1.cl | 192 +- {amd => OpenCL}/m00190_a3.cl | 249 +- {amd => OpenCL}/m00200_a0.cl | 102 +- {amd => OpenCL}/m00200_a1.cl | 92 +- {amd => OpenCL}/m00200_a3.cl | 101 +- {amd => OpenCL}/m00300_a0.cl | 166 +- {amd => OpenCL}/m00300_a1.cl | 172 +- {amd => OpenCL}/m00300_a3.cl | 293 +- {amd => OpenCL}/m00400.cl | 119 +- {amd => OpenCL}/m00500.cl | 131 +- {amd => OpenCL}/m00900_a0.cl | 94 +- {amd => OpenCL}/m00900_a1.cl | 100 +- {amd => OpenCL}/m00900_a3.cl | 115 +- {amd => OpenCL}/m01000_a0.cl | 114 +- {amd => OpenCL}/m01000_a1.cl | 120 +- {amd => OpenCL}/m01000_a3.cl | 119 +- {amd => OpenCL}/m01100_a0.cl | 110 +- {amd => OpenCL}/m01100_a1.cl | 116 +- {amd => OpenCL}/m01100_a3.cl | 121 +- {amd => OpenCL}/m01400_a0.cl | 178 +- {amd => OpenCL}/m01400_a1.cl | 184 +- {amd => OpenCL}/m01400_a3.cl | 198 +- {amd => OpenCL}/m01410_a0.cl | 178 +- {amd => OpenCL}/m01410_a1.cl | 184 +- {amd => OpenCL}/m01410_a3.cl | 197 +- {amd => OpenCL}/m01420_a0.cl | 178 +- {amd => OpenCL}/m01420_a1.cl | 184 +- {amd => OpenCL}/m01420_a3.cl | 224 +- {amd => OpenCL}/m01430_a0.cl | 194 +- {amd => OpenCL}/m01430_a1.cl | 200 +- {amd => OpenCL}/m01430_a3.cl | 197 +- {amd => OpenCL}/m01440_a0.cl | 194 +- {amd => OpenCL}/m01440_a1.cl | 200 +- {amd => OpenCL}/m01440_a3.cl | 224 +- {amd => OpenCL}/m01450_a0.cl | 162 +- {amd => OpenCL}/m01450_a1.cl | 168 +- {amd => OpenCL}/m01450_a3.cl | 192 +- {amd => OpenCL}/m01460_a0.cl | 162 +- {amd => OpenCL}/m01460_a1.cl | 168 +- {amd => OpenCL}/m01460_a3.cl | 192 +- {amd => OpenCL}/m01500_a0.cl | 128 +- {amd => OpenCL}/m01500_a1.cl | 138 +- {amd => OpenCL}/m01500_a3.cl | 53 +- {amd => OpenCL}/m01600.cl | 116 +- {amd => OpenCL}/m01700_a0.cl | 150 +- {amd => OpenCL}/m01700_a1.cl | 156 +- {amd => OpenCL}/m01700_a3.cl | 161 +- {amd => OpenCL}/m01710_a0.cl | 150 +- {amd => OpenCL}/m01710_a1.cl | 156 +- {amd => OpenCL}/m01710_a3.cl | 161 +- {amd => OpenCL}/m01720_a0.cl | 150 +- {amd => OpenCL}/m01720_a1.cl | 156 +- {amd => OpenCL}/m01720_a3.cl | 180 +- {amd => OpenCL}/m01730_a0.cl | 150 +- {amd => OpenCL}/m01730_a1.cl | 156 +- {amd => OpenCL}/m01730_a3.cl | 161 +- {amd => OpenCL}/m01740_a0.cl | 150 +- {amd => OpenCL}/m01740_a1.cl | 156 +- {amd => OpenCL}/m01740_a3.cl | 180 +- {amd => OpenCL}/m01750_a0.cl | 178 +- {amd => OpenCL}/m01750_a1.cl | 184 +- {amd => OpenCL}/m01750_a3.cl | 208 +- {amd => OpenCL}/m01760_a0.cl | 178 +- {amd => OpenCL}/m01760_a1.cl | 184 +- {amd => OpenCL}/m01760_a3.cl | 208 +- {amd => OpenCL}/m01800.cl | 156 +- {amd => OpenCL}/m02100.cl | 122 +- {amd => OpenCL}/m02400_a0.cl | 94 +- {amd => OpenCL}/m02400_a1.cl | 104 +- {amd => OpenCL}/m02400_a3.cl | 109 +- {amd => OpenCL}/m02410_a0.cl | 94 +- {amd => OpenCL}/m02410_a1.cl | 104 +- {amd => OpenCL}/m02410_a3.cl | 109 +- {amd => OpenCL}/m02500.cl | 192 +- {amd => OpenCL}/m02610_a0.cl | 126 +- {amd => OpenCL}/m02610_a1.cl | 132 +- {amd => OpenCL}/m02610_a3.cl | 156 +- {amd => OpenCL}/m02710_a0.cl | 142 +- {amd => OpenCL}/m02710_a1.cl | 148 +- {amd => OpenCL}/m02710_a3.cl | 172 +- {amd => OpenCL}/m02810_a0.cl | 142 +- {amd => OpenCL}/m02810_a1.cl | 148 +- {amd => OpenCL}/m02810_a3.cl | 172 +- {amd => OpenCL}/m03000_a0.cl | 146 +- {amd => OpenCL}/m03000_a1.cl | 156 +- {amd => OpenCL}/m03000_a3.cl | 53 +- {amd => OpenCL}/m03100_a0.cl | 144 +- {amd => OpenCL}/m03100_a1.cl | 150 +- {amd => OpenCL}/m03100_a3.cl | 141 +- {amd => OpenCL}/m03200.cl | 96 +- {amd => OpenCL}/m03710_a0.cl | 110 +- {amd => OpenCL}/m03710_a1.cl | 116 +- {amd => OpenCL}/m03710_a3.cl | 140 +- {amd => OpenCL}/m03800_a0.cl | 110 +- {amd => OpenCL}/m03800_a1.cl | 116 +- {amd => OpenCL}/m03800_a3.cl | 140 +- {amd => OpenCL}/m04310_a0.cl | 126 +- {amd => OpenCL}/m04310_a1.cl | 132 +- {amd => OpenCL}/m04310_a3.cl | 156 +- {amd => OpenCL}/m04400_a0.cl | 166 +- {amd => OpenCL}/m04400_a1.cl | 172 +- {amd => OpenCL}/m04400_a3.cl | 196 +- {amd => OpenCL}/m04500_a0.cl | 170 +- {amd => OpenCL}/m04500_a1.cl | 176 +- {amd => OpenCL}/m04500_a3.cl | 200 +- {amd => OpenCL}/m04700_a0.cl | 166 +- {amd => OpenCL}/m04700_a1.cl | 172 +- {amd => OpenCL}/m04700_a3.cl | 196 +- {amd => OpenCL}/m04800_a0.cl | 94 +- {amd => OpenCL}/m04800_a1.cl | 100 +- {amd => OpenCL}/m04800_a3.cl | 144 +- {amd => OpenCL}/m04900_a0.cl | 162 +- {amd => OpenCL}/m04900_a1.cl | 168 +- {amd => OpenCL}/m04900_a3.cl | 208 +- {amd => OpenCL}/m05000_a0.cl | 101 +- {amd => OpenCL}/m05000_a1.cl | 107 +- {amd => OpenCL}/m05000_a3.cl | 131 +- {amd => OpenCL}/m05100_a0.cl | 129 +- {amd => OpenCL}/m05100_a1.cl | 135 +- {amd => OpenCL}/m05100_a3.cl | 159 +- {amd => OpenCL}/m05200.cl | 104 +- {amd => OpenCL}/m05300_a0.cl | 149 +- {amd => OpenCL}/m05300_a1.cl | 155 +- {amd => OpenCL}/m05300_a3.cl | 179 +- {amd => OpenCL}/m05400_a0.cl | 156 +- {amd => OpenCL}/m05400_a1.cl | 162 +- {amd => OpenCL}/m05400_a3.cl | 186 +- {amd => OpenCL}/m05500_a0.cl | 174 +- {amd => OpenCL}/m05500_a1.cl | 180 +- {amd => OpenCL}/m05500_a3.cl | 169 +- {amd => OpenCL}/m05600_a0.cl | 193 +- {amd => OpenCL}/m05600_a1.cl | 199 +- {amd => OpenCL}/m05600_a3.cl | 207 +- {amd => OpenCL}/m05800.cl | 116 +- {amd => OpenCL}/m06000_a0.cl | 113 +- {amd => OpenCL}/m06000_a1.cl | 119 +- {amd => OpenCL}/m06000_a3.cl | 143 +- {amd => OpenCL}/m06100_a0.cl | 151 +- {amd => OpenCL}/m06100_a1.cl | 157 +- {amd => OpenCL}/m06100_a3.cl | 181 +- {amd => OpenCL}/m06211.cl | 92 +- {amd => OpenCL}/m06212.cl | 98 +- {amd => OpenCL}/m06213.cl | 102 +- {amd => OpenCL}/m06221.cl | 30 +- {amd => OpenCL}/m06222.cl | 36 +- {amd => OpenCL}/m06223.cl | 40 +- {amd => OpenCL}/m06231.cl | 30 +- {amd => OpenCL}/m06232.cl | 36 +- {amd => OpenCL}/m06233.cl | 40 +- {amd => OpenCL}/m06300.cl | 114 +- {amd => OpenCL}/m06400.cl | 134 +- {amd => OpenCL}/m06500.cl | 142 +- {amd => OpenCL}/m06600.cl | 120 +- {amd => OpenCL}/m06700.cl | 122 +- {amd => OpenCL}/m06800.cl | 118 +- {amd => OpenCL}/m06900_a0.cl | 123 +- {amd => OpenCL}/m06900_a1.cl | 129 +- {amd => OpenCL}/m06900_a3.cl | 137 +- {amd => OpenCL}/m07100.cl | 40 +- {amd => OpenCL}/m07300_a0.cl | 156 +- {amd => OpenCL}/m07300_a1.cl | 162 +- {amd => OpenCL}/m07300_a3.cl | 186 +- {amd => OpenCL}/m07400.cl | 164 +- {amd => OpenCL}/m07500_a0.cl | 122 +- {amd => OpenCL}/m07500_a1.cl | 128 +- {amd => OpenCL}/m07500_a3.cl | 128 +- {amd => OpenCL}/m07600_a0.cl | 198 +- {amd => OpenCL}/m07600_a1.cl | 204 +- {amd => OpenCL}/m07600_a3.cl | 228 +- {amd => OpenCL}/m07700_a0.cl | 98 +- {amd => OpenCL}/m07700_a1.cl | 92 +- {amd => OpenCL}/m07700_a3.cl | 112 +- {amd => OpenCL}/m07800_a0.cl | 132 +- {amd => OpenCL}/m07800_a1.cl | 138 +- {amd => OpenCL}/m07800_a3.cl | 146 +- {amd => OpenCL}/m07900.cl | 44 +- {amd => OpenCL}/m08000_a0.cl | 154 +- {amd => OpenCL}/m08000_a1.cl | 160 +- {amd => OpenCL}/m08000_a3.cl | 149 +- {amd => OpenCL}/m08100_a0.cl | 109 +- {amd => OpenCL}/m08100_a1.cl | 115 +- {amd => OpenCL}/m08100_a3.cl | 139 +- {amd => OpenCL}/m08200.cl | 94 +- {amd => OpenCL}/m08300_a0.cl | 176 +- {amd => OpenCL}/m08300_a1.cl | 182 +- {amd => OpenCL}/m08300_a3.cl | 206 +- {amd => OpenCL}/m08400_a0.cl | 168 +- {amd => OpenCL}/m08400_a1.cl | 174 +- {amd => OpenCL}/m08400_a3.cl | 198 +- {amd => OpenCL}/m08500_a0.cl | 124 +- {amd => OpenCL}/m08500_a1.cl | 134 +- {amd => OpenCL}/m08500_a3.cl | 143 +- {amd => OpenCL}/m08600_a0.cl | 120 +- {amd => OpenCL}/m08600_a1.cl | 110 +- {amd => OpenCL}/m08600_a3.cl | 129 +- {amd => OpenCL}/m08700_a0.cl | 162 +- {amd => OpenCL}/m08700_a1.cl | 152 +- {amd => OpenCL}/m08700_a3.cl | 171 +- {amd => OpenCL}/m08800.cl | 180 +- {amd => OpenCL}/m08900.cl | 110 +- {amd => OpenCL}/m09000.cl | 136 +- {amd => OpenCL}/m09100.cl | 186 +- {amd => OpenCL}/m09400.cl | 130 +- {amd => OpenCL}/m09500.cl | 126 +- {amd => OpenCL}/m09600.cl | 84 +- {amd => OpenCL}/m09700_a0.cl | 172 +- {amd => OpenCL}/m09700_a1.cl | 178 +- {amd => OpenCL}/m09700_a3.cl | 191 +- {amd => OpenCL}/m09710_a0.cl | 150 +- {amd => OpenCL}/m09710_a1.cl | 140 +- {amd => OpenCL}/m09710_a3.cl | 143 +- {amd => OpenCL}/m09720_a0.cl | 172 +- {amd => OpenCL}/m09720_a1.cl | 178 +- {amd => OpenCL}/m09720_a3.cl | 202 +- {amd => OpenCL}/m09800_a0.cl | 152 +- {amd => OpenCL}/m09800_a1.cl | 158 +- {amd => OpenCL}/m09800_a3.cl | 177 +- {amd => OpenCL}/m09810_a0.cl | 152 +- {amd => OpenCL}/m09810_a1.cl | 146 +- {amd => OpenCL}/m09810_a3.cl | 177 +- {amd => OpenCL}/m09820_a0.cl | 152 +- {amd => OpenCL}/m09820_a1.cl | 158 +- {amd => OpenCL}/m09820_a3.cl | 182 +- {amd => OpenCL}/m09900_a0.cl | 126 +- {amd => OpenCL}/m09900_a1.cl | 132 +- {amd => OpenCL}/m09900_a3.cl | 137 +- {amd => OpenCL}/m10100_a0.cl | 102 +- {amd => OpenCL}/m10100_a1.cl | 104 +- {amd => OpenCL}/m10100_a3.cl | 121 +- {amd => OpenCL}/m10300.cl | 110 +- {amd => OpenCL}/m10400_a0.cl | 145 +- {amd => OpenCL}/m10400_a1.cl | 151 +- {amd => OpenCL}/m10400_a3.cl | 175 +- {amd => OpenCL}/m10410_a0.cl | 81 +- {amd => OpenCL}/m10410_a1.cl | 75 +- {amd => OpenCL}/m10410_a3.cl | 111 +- {amd => OpenCL}/m10420_a0.cl | 150 +- {amd => OpenCL}/m10420_a1.cl | 156 +- {amd => OpenCL}/m10420_a3.cl | 180 +- {amd => OpenCL}/m10500.cl | 112 +- {amd => OpenCL}/m10700.cl | 200 +- {amd => OpenCL}/m10800_a0.cl | 150 +- {amd => OpenCL}/m10800_a1.cl | 156 +- {amd => OpenCL}/m10800_a3.cl | 161 +- {amd => OpenCL}/m10900.cl | 92 +- {amd => OpenCL}/m11000_a0.cl | 126 +- {amd => OpenCL}/m11000_a1.cl | 132 +- {amd => OpenCL}/m11000_a3.cl | 156 +- {amd => OpenCL}/m11100_a0.cl | 110 +- {amd => OpenCL}/m11100_a1.cl | 100 +- {amd => OpenCL}/m11100_a3.cl | 140 +- {amd => OpenCL}/m11200_a0.cl | 186 +- {amd => OpenCL}/m11200_a1.cl | 192 +- {amd => OpenCL}/m11200_a3.cl | 216 +- {amd => OpenCL}/m11300.cl | 94 +- {amd => OpenCL}/m11400_a0.cl | 166 +- {amd => OpenCL}/m11400_a1.cl | 172 +- {amd => OpenCL}/m11400_a3.cl | 444 +- {amd => OpenCL}/m11500_a0.cl | 100 +- {amd => OpenCL}/m11500_a1.cl | 90 +- {amd => OpenCL}/m11500_a3.cl | 111 +- {amd => OpenCL}/m11600.cl | 142 +- {amd => OpenCL}/m11700_a0.cl | 50 +- {amd => OpenCL}/m11700_a1.cl | 56 +- {amd => OpenCL}/m11700_a3.cl | 56 +- {amd => OpenCL}/m11800_a0.cl | 50 +- {amd => OpenCL}/m11800_a1.cl | 56 +- {amd => OpenCL}/m11800_a3.cl | 56 +- {amd => OpenCL}/m11900.cl | 88 +- {amd => OpenCL}/m12000.cl | 86 +- {amd => OpenCL}/m12200.cl | 38 +- {amd => OpenCL}/m12300.cl | 36 +- {amd => OpenCL}/m12400.cl | 86 +- {amd => OpenCL}/m12500.cl | 82 +- {amd => OpenCL}/m12600_a0.cl | 178 +- {amd => OpenCL}/m12600_a1.cl | 184 +- {amd => OpenCL}/m12600_a3.cl | 208 +- {amd => OpenCL}/m12700.cl | 126 +- {amd => OpenCL}/m12800.cl | 104 +- amd/markov_be_v1.cl => OpenCL/markov_be.cl | 10 +- amd/markov_le_v1.cl => OpenCL/markov_le.cl | 4 +- nv/rp_nv.c => OpenCL/rp.c | 1967 +- amd/types_amd.c => OpenCL/types_ocl.c | 425 +- amd/amp_a0_v2.cl | 52 - amd/amp_a0_v4.cl | 52 - amd/amp_a1_v2.cl | 587 - amd/amp_a1_v4.cl | 587 - amd/amp_a3_v1.cl | 62 - amd/amp_a3_v4.cl | 62 - amd/check_multi_vect1_comp4_warp.c | 34 - amd/check_multi_vect1_comp4_warp_bs.c | 34 - amd/check_multi_vect2_comp4.c | 67 - amd/check_multi_vect2_comp4_warp.c | 67 - amd/check_multi_vect4_comp4.c | 133 - amd/check_multi_vect4_comp4_warp.c | 133 - amd/check_single_vect1_comp4_warp.c | 14 - amd/check_single_vect1_comp4_warp_bs.c | 3 - amd/check_single_vect2_comp4.c | 29 - amd/check_single_vect2_comp4_warp.c | 29 - amd/check_single_vect4_comp4.c | 59 - amd/check_single_vect4_comp4_warp.c | 59 - amd/common_amd.c | 15303 ---------------- amd/markov_be_v2.cl | 178 - amd/markov_be_v4.cl | 184 - amd/markov_le_v2.cl | 144 - amd/markov_le_v4.cl | 184 - amd/rp_amd.c | 2838 --- docs/changes.txt | 9 +- docs/readme.txt | 2 +- include/constants.h | 370 +- include/ext_ADL.h | 2 +- include/ext_OpenCL.h | 4 +- include/ext_nvapi.h | 2 +- include/ext_nvml.h | 2 +- include/kernel_functions.c | 4 +- include/kernel_vendor.h | 90 +- include/shared.h | 29 +- include/types.h | 76 +- nv/amp_a0_v1.cu | 58 - nv/amp_a0_v2.cu | 58 - nv/amp_a0_v4.cu | 58 - nv/amp_a1_v1.cu | 702 - nv/amp_a1_v2.cu | 702 - nv/amp_a1_v4.cu | 702 - nv/amp_a3_v1.cu | 63 - nv/amp_a3_v2.cu | 63 - nv/amp_a3_v4.cu | 63 - nv/check_multi_vect1_comp4_warp.c | 34 - nv/check_multi_vect1_comp4_warp_bs.c | 34 - nv/check_multi_vect2_comp4.c | 67 - nv/check_multi_vect2_comp4_warp.c | 67 - nv/check_multi_vect4_comp4.c | 133 - nv/check_multi_vect4_comp4_warp.c | 133 - nv/check_single_vect1_comp4.c | 14 - nv/check_single_vect1_comp4_warp.c | 14 - nv/check_single_vect1_comp4_warp_bs.c | 3 - nv/check_single_vect2_comp4.c | 29 - nv/check_single_vect2_comp4_warp.c | 29 - nv/check_single_vect4_comp4.c | 59 - nv/check_single_vect4_comp4_warp.c | 59 - nv/common_nv.c | 15025 --------------- nv/gpu_aes256_nv.c | 1048 -- nv/gpu_serpent256_nv.c | 583 - nv/gpu_twofish256_nv.c | 466 - nv/m00000_a0.cu | 392 - nv/m00000_a1.cu | 494 - nv/m00000_a3.cu | 703 - nv/m00010_a0.cu | 548 - nv/m00010_a1.cu | 614 - nv/m00010_a3.cu | 759 - nv/m00020_a0.cu | 506 - nv/m00020_a1.cu | 602 - nv/m00020_a3.cu | 728 - nv/m00030_a0.cu | 558 - nv/m00030_a1.cu | 652 - nv/m00030_a3.cu | 755 - nv/m00040_a0.cu | 486 - nv/m00040_a1.cu | 580 - nv/m00040_a3.cu | 724 - nv/m00050_a0.cu | 596 - nv/m00050_a1.cu | 702 - nv/m00050_a3.cu | 766 - nv/m00060_a0.cu | 568 - nv/m00060_a1.cu | 674 - nv/m00060_a3.cu | 738 - nv/m00100_a0.cu | 488 - nv/m00100_a1.cu | 598 - nv/m00100_a3.cu | 830 - nv/m00110_a0.cu | 640 - nv/m00110_a1.cu | 706 - nv/m00110_a3.cu | 887 - nv/m00120_a0.cu | 598 - nv/m00120_a1.cu | 692 - nv/m00120_a3.cu | 996 - nv/m00130_a0.cu | 654 - nv/m00130_a1.cu | 748 - nv/m00130_a3.cu | 887 - nv/m00140_a0.cu | 582 - nv/m00140_a1.cu | 676 - nv/m00140_a3.cu | 997 - nv/m00150_a0.cu | 600 - nv/m00150_a1.cu | 706 - nv/m00150_a3.cu | 770 - nv/m00160_a0.cu | 600 - nv/m00160_a1.cu | 706 - nv/m00160_a3.cu | 767 - nv/m00190_a0.cu | 513 - nv/m00190_a1.cu | 623 - nv/m00190_a3.cu | 851 - nv/m00200_a0.cu | 361 - nv/m00200_a1.cu | 411 - nv/m00200_a3.cu | 490 - nv/m00300_a0.cu | 738 - nv/m00300_a1.cu | 848 - nv/m00300_a3.cu | 1078 -- nv/m00400.cu | 358 - nv/m00500.cu | 1174 -- nv/m00900_a0.cu | 347 - nv/m00900_a1.cu | 449 - nv/m00900_a3.cu | 630 - nv/m01000_a0.cu | 367 - nv/m01000_a1.cu | 473 - nv/m01000_a3.cu | 634 - nv/m01100_a0.cu | 578 - nv/m01100_a1.cu | 684 - nv/m01100_a3.cu | 727 - nv/m01400_a0.cu | 429 - nv/m01400_a1.cu | 527 - nv/m01400_a3.cu | 538 - nv/m01410_a0.cu | 581 - nv/m01410_a1.cu | 635 - nv/m01410_a3.cu | 595 - nv/m01420_a0.cu | 503 - nv/m01420_a1.cu | 585 - nv/m01420_a3.cu | 757 - nv/m01430_a0.cu | 591 - nv/m01430_a1.cu | 673 - nv/m01430_a3.cu | 595 - nv/m01440_a0.cu | 507 - nv/m01440_a1.cu | 601 - nv/m01440_a3.cu | 757 - nv/m01450_a0.cu | 589 - nv/m01450_a1.cu | 704 - nv/m01450_a3.cu | 759 - nv/m01460_a0.cu | 589 - nv/m01460_a1.cu | 695 - nv/m01460_a3.cu | 755 - nv/m01500_a0.cu | 766 - nv/m01500_a1.cu | 886 - nv/m01500_a3.cu | 2051 --- nv/m01600.cu | 1187 -- nv/m01700_a0.cu | 431 - nv/m01700_a1.cu | 529 - nv/m01700_a3.cu | 540 - nv/m01710_a0.cu | 583 - nv/m01710_a1.cu | 637 - nv/m01710_a3.cu | 597 - nv/m01720_a0.cu | 505 - nv/m01720_a1.cu | 587 - nv/m01720_a3.cu | 749 - nv/m01730_a0.cu | 583 - nv/m01730_a1.cu | 665 - nv/m01730_a3.cu | 598 - nv/m01740_a0.cu | 499 - nv/m01740_a1.cu | 593 - nv/m01740_a3.cu | 749 - nv/m01750_a0.cu | 622 - nv/m01750_a1.cu | 728 - nv/m01750_a3.cu | 792 - nv/m01760_a0.cu | 622 - nv/m01760_a1.cu | 728 - nv/m01760_a3.cu | 788 - nv/m01800.cu | 566 - nv/m02100.cu | 629 - nv/m02400_a0.cu | 388 - nv/m02400_a1.cu | 508 - nv/m02400_a3.cu | 535 - nv/m02410_a0.cu | 496 - nv/m02410_a1.cu | 606 - nv/m02410_a3.cu | 625 - nv/m02500.cu | 920 - nv/m02610_a0.cu | 657 - nv/m02610_a1.cu | 759 - nv/m02610_a3.cu | 861 - nv/m02710_a0.cu | 830 - nv/m02710_a1.cu | 932 - nv/m02710_a3.cu | 1034 -- nv/m02810_a0.cu | 832 - nv/m02810_a1.cu | 930 - nv/m02810_a3.cu | 1032 -- nv/m03000_a0.cu | 796 - nv/m03000_a1.cu | 918 - nv/m03000_a3.cu | 1994 -- nv/m03100_a0.cu | 1063 -- nv/m03100_a1.cu | 1159 -- nv/m03100_a3.cu | 1363 -- nv/m03200.cu | 895 - nv/m03710_a0.cu | 766 - nv/m03710_a1.cu | 867 - nv/m03710_a3.cu | 999 - nv/m03800_a0.cu | 668 - nv/m03800_a1.cu | 772 - nv/m03800_a3.cu | 842 - nv/m04310_a0.cu | 657 - nv/m04310_a1.cu | 759 - nv/m04310_a3.cu | 861 - nv/m04400_a0.cu | 733 - nv/m04400_a1.cu | 843 - nv/m04400_a3.cu | 972 - nv/m04500_a0.cu | 801 - nv/m04500_a1.cu | 910 - nv/m04500_a3.cu | 1039 -- nv/m04700_a0.cu | 707 - nv/m04700_a1.cu | 817 - nv/m04700_a3.cu | 946 - nv/m04800_a0.cu | 546 - nv/m04800_a1.cu | 642 - nv/m04800_a3.cu | 773 - nv/m04900_a0.cu | 701 - nv/m04900_a1.cu | 812 - nv/m04900_a3.cu | 936 - nv/m05000_a0.cu | 528 - nv/m05000_a1.cu | 634 - nv/m05000_a3.cu | 695 - nv/m05100_a0.cu | 431 - nv/m05100_a1.cu | 533 - nv/m05100_a3.cu | 605 - nv/m05200.cu | 387 - nv/m05300_a0.cu | 748 - nv/m05300_a1.cu | 854 - nv/m05300_a3.cu | 976 - nv/m05400_a0.cu | 782 - nv/m05400_a1.cu | 888 - nv/m05400_a3.cu | 1010 - nv/m05500_a0.cu | 959 - nv/m05500_a1.cu | 1065 -- nv/m05500_a3.cu | 1077 -- nv/m05600_a0.cu | 844 - nv/m05600_a1.cu | 950 - nv/m05600_a3.cu | 958 - nv/m05800.cu | 729 - nv/m06000_a0.cu | 482 - nv/m06000_a1.cu | 588 - nv/m06000_a3.cu | 660 - nv/m06100_a0.cu | 1619 -- nv/m06100_a1.cu | 1725 -- nv/m06100_a3.cu | 1836 -- nv/m06211.cu | 717 - nv/m06212.cu | 790 - nv/m06213.cu | 848 - nv/m06221.cu | 621 - nv/m06222.cu | 694 - nv/m06223.cu | 752 - nv/m06231.cu | 1990 -- nv/m06232.cu | 2063 --- nv/m06233.cu | 2121 --- nv/m06300.cu | 1081 -- nv/m06400.cu | 555 - nv/m06500.cu | 583 - nv/m06600.cu | 1414 -- nv/m06700.cu | 532 - nv/m06800.cu | 1603 -- nv/m06900_a0.cu | 1215 -- nv/m06900_a1.cu | 1321 -- nv/m06900_a3.cu | 1331 -- nv/m07100.cu | 503 - nv/m07300_a0.cu | 621 - nv/m07300_a1.cu | 727 - nv/m07300_a3.cu | 791 - nv/m07400.cu | 1283 -- nv/m07500_a0.cu | 813 - nv/m07500_a1.cu | 919 - nv/m07500_a3.cu | 826 - nv/m07600_a0.cu | 1180 -- nv/m07600_a1.cu | 1290 -- nv/m07600_a3.cu | 1419 -- nv/m07700_a0.cu | 901 - nv/m07700_a1.cu | 973 - nv/m07700_a3.cu | 985 - nv/m07800_a0.cu | 764 - nv/m07800_a1.cu | 866 - nv/m07800_a3.cu | 860 - nv/m07900.cu | 387 - nv/m08000_a0.cu | 515 - nv/m08000_a1.cu | 601 - nv/m08000_a3.cu | 596 - nv/m08100_a0.cu | 547 - nv/m08100_a1.cu | 641 - nv/m08100_a3.cu | 855 - nv/m08200.cu | 793 - nv/m08300_a0.cu | 772 - nv/m08300_a1.cu | 866 - nv/m08300_a3.cu | 954 - nv/m08400_a0.cu | 766 - nv/m08400_a1.cu | 876 - nv/m08400_a3.cu | 1013 - nv/m08500_a0.cu | 837 - nv/m08500_a1.cu | 944 - nv/m08500_a3.cu | 863 - nv/m08600_a0.cu | 555 - nv/m08600_a1.cu | 605 - nv/m08600_a3.cu | 722 - nv/m08700_a0.cu | 723 - nv/m08700_a1.cu | 774 - nv/m08700_a3.cu | 919 - nv/m08800.cu | 1963 -- nv/m08900.cu | 1179 -- nv/m09000.cu | 821 - nv/m09100.cu | 989 - nv/m09400.cu | 1844 -- nv/m09500.cu | 1421 -- nv/m09600.cu | 1490 -- nv/m09700_a0.cu | 1047 -- nv/m09700_a1.cu | 1165 -- nv/m09700_a3.cu | 1552 -- nv/m09710_a0.cu | 625 - nv/m09710_a1.cu | 667 - nv/m09710_a3.cu | 618 - nv/m09720_a0.cu | 762 - nv/m09720_a1.cu | 873 - nv/m09720_a3.cu | 952 - nv/m09800_a0.cu | 769 - nv/m09800_a1.cu | 875 - nv/m09800_a3.cu | 928 - nv/m09810_a0.cu | 611 - nv/m09810_a1.cu | 657 - nv/m09810_a3.cu | 784 - nv/m09820_a0.cu | 519 - nv/m09820_a1.cu | 625 - nv/m09820_a3.cu | 646 - nv/m09900_a0.cu | 586 - nv/m09900_a1.cu | 692 - nv/m09900_a3.cu | 851 - nv/m10100_a0.cu | 325 - nv/m10100_a1.cu | 431 - nv/m10100_a3.cu | 482 - nv/m10300.cu | 421 - nv/m10400_a0.cu | 679 - nv/m10400_a1.cu | 785 - nv/m10400_a3.cu | 849 - nv/m10410_a0.cu | 380 - nv/m10410_a1.cu | 432 - nv/m10410_a3.cu | 552 - nv/m10420_a0.cu | 536 - nv/m10420_a1.cu | 646 - nv/m10420_a3.cu | 711 - nv/m10500.cu | 574 - nv/m10700.cu | 1720 -- nv/m10800_a0.cu | 429 - nv/m10800_a1.cu | 527 - nv/m10800_a3.cu | 538 - nv/m10900.cu | 480 - nv/m11000_a0.cu | 713 - nv/m11000_a1.cu | 805 - nv/m11000_a3.cu | 889 - nv/m11100_a0.cu | 805 - nv/m11100_a1.cu | 855 - nv/m11100_a3.cu | 1027 -- nv/m11200_a0.cu | 1043 -- nv/m11200_a1.cu | 1153 -- nv/m11200_a3.cu | 1233 -- nv/m11300.cu | 1356 -- nv/m11400_a0.cu | 2322 --- nv/m11400_a1.cu | 2428 --- nv/m11400_a3.cu | 6095 ------ nv/m11500_a0.cu | 366 - nv/m11500_a1.cu | 444 - nv/m11500_a3.cu | 516 - nv/m11600.cu | 1923 -- nv/m11700_a0.cu | 2675 --- nv/m11700_a1.cu | 2783 --- nv/m11700_a3.cu | 2993 --- nv/m11800_a0.cu | 2674 --- nv/m11800_a1.cu | 2785 --- nv/m11800_a3.cu | 2993 --- nv/m11900.cu | 417 - nv/m12000.cu | 460 - nv/m12200.cu | 335 - nv/m12300.cu | 530 - nv/m12400.cu | 778 - nv/m12500.cu | 1307 -- nv/m12600_a0.cu | 797 - nv/m12600_a1.cu | 907 - nv/m12600_a3.cu | 1036 -- nv/m12700.cu | 1557 -- nv/m12800.cu | 631 - nv/markov_be_v1.cu | 127 - nv/markov_be_v2.cu | 142 - nv/markov_be_v4.cu | 182 - nv/markov_le_v1.cu | 127 - nv/markov_le_v2.cu | 142 - nv/markov_le_v4.cu | 182 - nv/types_nv.c | 1402 -- src/Makefile | 298 +- src/ext_OpenCL.c | 5 +- src/oclHashcat.c | 2636 +-- src/shared.c | 270 +- 731 files changed, 29560 insertions(+), 350184 deletions(-) rename amd/amp_a0_v1.cl => OpenCL/amp_a0.cl (85%) rename amd/amp_a1_v1.cl => OpenCL/amp_a1.cl (67%) rename amd/amp_a3_v2.cl => OpenCL/amp_a3.cl (93%) rename amd/check_multi_vect1_comp4.c => OpenCL/check_multi_comp4.c (90%) rename nv/check_multi_vect1_comp4.c => OpenCL/check_multi_comp4_bs.c (82%) rename amd/check_single_vect1_comp4.c => OpenCL/check_single_comp4.c (76%) create mode 100644 OpenCL/check_single_comp4_bs.c create mode 100644 OpenCL/common.c rename {amd => OpenCL}/gpu_aes256_amd.c (100%) rename {amd => OpenCL}/gpu_serpent256_amd.c (100%) rename {amd => OpenCL}/gpu_twofish256_amd.c (100%) rename {amd => OpenCL}/m00000_a0.cl (92%) rename {amd => OpenCL}/m00000_a1.cl (92%) rename {amd => OpenCL}/m00000_a3.cl (73%) rename {amd => OpenCL}/m00010_a0.cl (93%) rename {amd => OpenCL}/m00010_a1.cl (94%) rename {amd => OpenCL}/m00010_a3.cl (74%) rename {amd => OpenCL}/m00020_a0.cl (93%) rename {amd => OpenCL}/m00020_a1.cl (93%) rename {amd => OpenCL}/m00020_a3.cl (87%) rename {amd => OpenCL}/m00030_a0.cl (93%) rename {amd => OpenCL}/m00030_a1.cl (93%) rename {amd => OpenCL}/m00030_a3.cl (74%) rename {amd => OpenCL}/m00040_a0.cl (93%) rename {amd => OpenCL}/m00040_a1.cl (93%) rename {amd => OpenCL}/m00040_a3.cl (87%) rename {amd => OpenCL}/m00050_a0.cl (89%) rename {amd => OpenCL}/m00050_a1.cl (90%) rename {amd => OpenCL}/m00050_a3.cl (84%) rename {amd => OpenCL}/m00060_a0.cl (89%) rename {amd => OpenCL}/m00060_a1.cl (90%) rename {amd => OpenCL}/m00060_a3.cl (83%) rename {amd => OpenCL}/m00100_a0.cl (89%) rename {amd => OpenCL}/m00100_a1.cl (90%) rename {amd => OpenCL}/m00100_a3.cl (73%) rename {amd => OpenCL}/m00110_a0.cl (90%) rename {amd => OpenCL}/m00110_a1.cl (91%) rename {amd => OpenCL}/m00110_a3.cl (74%) rename {amd => OpenCL}/m00120_a0.cl (95%) rename {amd => OpenCL}/m00120_a1.cl (95%) rename {amd => OpenCL}/m00120_a3.cl (91%) rename {amd => OpenCL}/m00130_a0.cl (95%) rename {amd => OpenCL}/m00130_a1.cl (95%) rename {amd => OpenCL}/m00130_a3.cl (74%) rename {amd => OpenCL}/m00140_a0.cl (95%) rename {amd => OpenCL}/m00140_a1.cl (95%) rename {amd => OpenCL}/m00140_a3.cl (91%) rename {amd => OpenCL}/m00150_a0.cl (90%) rename {amd => OpenCL}/m00150_a1.cl (91%) rename {amd => OpenCL}/m00150_a3.cl (85%) rename {amd => OpenCL}/m00160_a0.cl (90%) rename {amd => OpenCL}/m00160_a1.cl (91%) rename {amd => OpenCL}/m00160_a3.cl (85%) rename {amd => OpenCL}/m00190_a0.cl (89%) rename {amd => OpenCL}/m00190_a1.cl (89%) rename {amd => OpenCL}/m00190_a3.cl (72%) rename {amd => OpenCL}/m00200_a0.cl (88%) rename {amd => OpenCL}/m00200_a1.cl (90%) rename {amd => OpenCL}/m00200_a3.cl (57%) rename {amd => OpenCL}/m00300_a0.cl (93%) rename {amd => OpenCL}/m00300_a1.cl (93%) rename {amd => OpenCL}/m00300_a3.cl (80%) rename {amd => OpenCL}/m00400.cl (86%) rename {amd => OpenCL}/m00500.cl (96%) rename {amd => OpenCL}/m00900_a0.cl (92%) rename {amd => OpenCL}/m00900_a1.cl (92%) rename {amd => OpenCL}/m00900_a3.cl (70%) rename {amd => OpenCL}/m01000_a0.cl (91%) rename {amd => OpenCL}/m01000_a1.cl (91%) rename {amd => OpenCL}/m01000_a3.cl (70%) rename {amd => OpenCL}/m01100_a0.cl (94%) rename {amd => OpenCL}/m01100_a1.cl (94%) rename {amd => OpenCL}/m01100_a3.cl (74%) rename {amd => OpenCL}/m01400_a0.cl (90%) rename {amd => OpenCL}/m01400_a1.cl (90%) rename {amd => OpenCL}/m01400_a3.cl (72%) rename {amd => OpenCL}/m01410_a0.cl (90%) rename {amd => OpenCL}/m01410_a1.cl (91%) rename {amd => OpenCL}/m01410_a3.cl (73%) rename {amd => OpenCL}/m01420_a0.cl (90%) rename {amd => OpenCL}/m01420_a1.cl (91%) rename {amd => OpenCL}/m01420_a3.cl (86%) rename {amd => OpenCL}/m01430_a0.cl (90%) rename {amd => OpenCL}/m01430_a1.cl (90%) rename {amd => OpenCL}/m01430_a3.cl (73%) rename {amd => OpenCL}/m01440_a0.cl (89%) rename {amd => OpenCL}/m01440_a1.cl (90%) rename {amd => OpenCL}/m01440_a3.cl (86%) rename {amd => OpenCL}/m01450_a0.cl (88%) rename {amd => OpenCL}/m01450_a1.cl (89%) rename {amd => OpenCL}/m01450_a3.cl (83%) rename {amd => OpenCL}/m01460_a0.cl (88%) rename {amd => OpenCL}/m01460_a1.cl (89%) rename {amd => OpenCL}/m01460_a3.cl (83%) rename {amd => OpenCL}/m01500_a0.cl (93%) rename {amd => OpenCL}/m01500_a1.cl (93%) rename {amd => OpenCL}/m01500_a3.cl (98%) rename {amd => OpenCL}/m01600.cl (96%) rename {amd => OpenCL}/m01700_a0.cl (86%) rename {amd => OpenCL}/m01700_a1.cl (88%) rename {amd => OpenCL}/m01700_a3.cl (59%) rename {amd => OpenCL}/m01710_a0.cl (88%) rename {amd => OpenCL}/m01710_a1.cl (89%) rename {amd => OpenCL}/m01710_a3.cl (62%) rename {amd => OpenCL}/m01720_a0.cl (88%) rename {amd => OpenCL}/m01720_a1.cl (89%) rename {amd => OpenCL}/m01720_a3.cl (84%) rename {amd => OpenCL}/m01730_a0.cl (88%) rename {amd => OpenCL}/m01730_a1.cl (89%) rename {amd => OpenCL}/m01730_a3.cl (62%) rename {amd => OpenCL}/m01740_a0.cl (88%) rename {amd => OpenCL}/m01740_a1.cl (89%) rename {amd => OpenCL}/m01740_a3.cl (84%) rename {amd => OpenCL}/m01750_a0.cl (88%) rename {amd => OpenCL}/m01750_a1.cl (89%) rename {amd => OpenCL}/m01750_a3.cl (83%) rename {amd => OpenCL}/m01760_a0.cl (88%) rename {amd => OpenCL}/m01760_a1.cl (89%) rename {amd => OpenCL}/m01760_a3.cl (83%) rename {amd => OpenCL}/m01800.cl (89%) rename {amd => OpenCL}/m02100.cl (92%) rename {amd => OpenCL}/m02400_a0.cl (92%) rename {amd => OpenCL}/m02400_a1.cl (93%) rename {amd => OpenCL}/m02400_a3.cl (67%) rename {amd => OpenCL}/m02410_a0.cl (93%) rename {amd => OpenCL}/m02410_a1.cl (93%) rename {amd => OpenCL}/m02410_a3.cl (69%) rename {amd => OpenCL}/m02500.cl (91%) rename {amd => OpenCL}/m02610_a0.cl (92%) rename {amd => OpenCL}/m02610_a1.cl (93%) rename {amd => OpenCL}/m02610_a3.cl (89%) rename {amd => OpenCL}/m02710_a0.cl (93%) rename {amd => OpenCL}/m02710_a1.cl (93%) rename {amd => OpenCL}/m02710_a3.cl (90%) rename {amd => OpenCL}/m02810_a0.cl (93%) rename {amd => OpenCL}/m02810_a1.cl (93%) rename {amd => OpenCL}/m02810_a3.cl (90%) rename {amd => OpenCL}/m03000_a0.cl (92%) rename {amd => OpenCL}/m03000_a1.cl (92%) rename {amd => OpenCL}/m03000_a3.cl (98%) rename {amd => OpenCL}/m03100_a0.cl (94%) rename {amd => OpenCL}/m03100_a1.cl (94%) rename {amd => OpenCL}/m03100_a3.cl (82%) rename {amd => OpenCL}/m03200.cl (95%) rename {amd => OpenCL}/m03710_a0.cl (95%) rename {amd => OpenCL}/m03710_a1.cl (96%) rename {amd => OpenCL}/m03710_a3.cl (92%) rename {amd => OpenCL}/m03800_a0.cl (94%) rename {amd => OpenCL}/m03800_a1.cl (94%) rename {amd => OpenCL}/m03800_a3.cl (88%) rename {amd => OpenCL}/m04310_a0.cl (92%) rename {amd => OpenCL}/m04310_a1.cl (93%) rename {amd => OpenCL}/m04310_a3.cl (89%) rename {amd => OpenCL}/m04400_a0.cl (93%) rename {amd => OpenCL}/m04400_a1.cl (93%) rename {amd => OpenCL}/m04400_a3.cl (91%) rename {amd => OpenCL}/m04500_a0.cl (93%) rename {amd => OpenCL}/m04500_a1.cl (94%) rename {amd => OpenCL}/m04500_a3.cl (92%) rename {amd => OpenCL}/m04700_a0.cl (92%) rename {amd => OpenCL}/m04700_a1.cl (92%) rename {amd => OpenCL}/m04700_a3.cl (90%) rename {amd => OpenCL}/m04800_a0.cl (93%) rename {amd => OpenCL}/m04800_a1.cl (94%) rename {amd => OpenCL}/m04800_a3.cl (87%) rename {amd => OpenCL}/m04900_a0.cl (90%) rename {amd => OpenCL}/m04900_a1.cl (91%) rename {amd => OpenCL}/m04900_a3.cl (87%) rename {amd => OpenCL}/m05000_a0.cl (91%) rename {amd => OpenCL}/m05000_a1.cl (92%) rename {amd => OpenCL}/m05000_a3.cl (85%) rename {amd => OpenCL}/m05100_a0.cl (90%) rename {amd => OpenCL}/m05100_a1.cl (91%) rename {amd => OpenCL}/m05100_a3.cl (84%) rename {amd => OpenCL}/m05200.cl (89%) rename {amd => OpenCL}/m05300_a0.cl (91%) rename {amd => OpenCL}/m05300_a1.cl (92%) rename {amd => OpenCL}/m05300_a3.cl (87%) rename {amd => OpenCL}/m05400_a0.cl (92%) rename {amd => OpenCL}/m05400_a1.cl (92%) rename {amd => OpenCL}/m05400_a3.cl (88%) rename {amd => OpenCL}/m05500_a0.cl (93%) rename {amd => OpenCL}/m05500_a1.cl (93%) rename {amd => OpenCL}/m05500_a3.cl (80%) rename {amd => OpenCL}/m05600_a0.cl (91%) rename {amd => OpenCL}/m05600_a1.cl (91%) rename {amd => OpenCL}/m05600_a3.cl (86%) rename {amd => OpenCL}/m05800.cl (95%) rename {amd => OpenCL}/m06000_a0.cl (94%) rename {amd => OpenCL}/m06000_a1.cl (94%) rename {amd => OpenCL}/m06000_a3.cl (88%) rename {amd => OpenCL}/m06100_a0.cl (96%) rename {amd => OpenCL}/m06100_a1.cl (96%) rename {amd => OpenCL}/m06100_a3.cl (94%) rename {amd => OpenCL}/m06211.cl (95%) rename {amd => OpenCL}/m06212.cl (95%) rename {amd => OpenCL}/m06213.cl (94%) rename {amd => OpenCL}/m06221.cl (98%) rename {amd => OpenCL}/m06222.cl (97%) rename {amd => OpenCL}/m06223.cl (97%) rename {amd => OpenCL}/m06231.cl (99%) rename {amd => OpenCL}/m06232.cl (99%) rename {amd => OpenCL}/m06233.cl (99%) rename {amd => OpenCL}/m06300.cl (96%) rename {amd => OpenCL}/m06400.cl (89%) rename {amd => OpenCL}/m06500.cl (89%) rename {amd => OpenCL}/m06600.cl (97%) rename {amd => OpenCL}/m06700.cl (91%) rename {amd => OpenCL}/m06800.cl (97%) rename {amd => OpenCL}/m06900_a0.cl (96%) rename {amd => OpenCL}/m06900_a1.cl (96%) rename {amd => OpenCL}/m06900_a3.cl (93%) rename {amd => OpenCL}/m07100.cl (97%) rename {amd => OpenCL}/m07300_a0.cl (91%) rename {amd => OpenCL}/m07300_a1.cl (91%) rename {amd => OpenCL}/m07300_a3.cl (86%) rename {amd => OpenCL}/m07400.cl (92%) rename {amd => OpenCL}/m07500_a0.cl (93%) rename {amd => OpenCL}/m07500_a1.cl (93%) rename {amd => OpenCL}/m07500_a3.cl (90%) rename {amd => OpenCL}/m07600_a0.cl (95%) rename {amd => OpenCL}/m07600_a1.cl (95%) rename {amd => OpenCL}/m07600_a3.cl (94%) rename {amd => OpenCL}/m07700_a0.cl (95%) rename {amd => OpenCL}/m07700_a1.cl (96%) rename {amd => OpenCL}/m07700_a3.cl (91%) rename {amd => OpenCL}/m07800_a0.cl (94%) rename {amd => OpenCL}/m07800_a1.cl (94%) rename {amd => OpenCL}/m07800_a3.cl (89%) rename {amd => OpenCL}/m07900.cl (95%) rename {amd => OpenCL}/m08000_a0.cl (89%) rename {amd => OpenCL}/m08000_a1.cl (90%) rename {amd => OpenCL}/m08000_a3.cl (64%) rename {amd => OpenCL}/m08100_a0.cl (95%) rename {amd => OpenCL}/m08100_a1.cl (95%) rename {amd => OpenCL}/m08100_a3.cl (91%) rename {amd => OpenCL}/m08200.cl (94%) rename {amd => OpenCL}/m08300_a0.cl (91%) rename {amd => OpenCL}/m08300_a1.cl (92%) rename {amd => OpenCL}/m08300_a3.cl (87%) rename {amd => OpenCL}/m08400_a0.cl (93%) rename {amd => OpenCL}/m08400_a1.cl (93%) rename {amd => OpenCL}/m08400_a3.cl (89%) rename {amd => OpenCL}/m08500_a0.cl (95%) rename {amd => OpenCL}/m08500_a1.cl (95%) rename {amd => OpenCL}/m08500_a3.cl (80%) rename {amd => OpenCL}/m08600_a0.cl (88%) rename {amd => OpenCL}/m08600_a1.cl (90%) rename {amd => OpenCL}/m08600_a3.cl (66%) rename {amd => OpenCL}/m08700_a0.cl (86%) rename {amd => OpenCL}/m08700_a1.cl (88%) rename {amd => OpenCL}/m08700_a3.cl (74%) rename {amd => OpenCL}/m08800.cl (97%) rename {amd => OpenCL}/m08900.cl (94%) rename {amd => OpenCL}/m09000.cl (94%) rename {amd => OpenCL}/m09100.cl (90%) rename {amd => OpenCL}/m09400.cl (98%) rename {amd => OpenCL}/m09500.cl (97%) rename {amd => OpenCL}/m09600.cl (98%) rename {amd => OpenCL}/m09700_a0.cl (92%) rename {amd => OpenCL}/m09700_a1.cl (92%) rename {amd => OpenCL}/m09700_a3.cl (90%) rename {amd => OpenCL}/m09710_a0.cl (90%) rename {amd => OpenCL}/m09710_a1.cl (91%) rename {amd => OpenCL}/m09710_a3.cl (83%) rename {amd => OpenCL}/m09720_a0.cl (90%) rename {amd => OpenCL}/m09720_a1.cl (91%) rename {amd => OpenCL}/m09720_a3.cl (85%) rename {amd => OpenCL}/m09800_a0.cl (92%) rename {amd => OpenCL}/m09800_a1.cl (93%) rename {amd => OpenCL}/m09800_a3.cl (88%) rename {amd => OpenCL}/m09810_a0.cl (91%) rename {amd => OpenCL}/m09810_a1.cl (92%) rename {amd => OpenCL}/m09810_a3.cl (86%) rename {amd => OpenCL}/m09820_a0.cl (90%) rename {amd => OpenCL}/m09820_a1.cl (91%) rename {amd => OpenCL}/m09820_a3.cl (84%) rename {amd => OpenCL}/m09900_a0.cl (94%) rename {amd => OpenCL}/m09900_a1.cl (94%) rename {amd => OpenCL}/m09900_a3.cl (77%) rename {amd => OpenCL}/m10100_a0.cl (87%) rename {amd => OpenCL}/m10100_a1.cl (90%) rename {amd => OpenCL}/m10100_a3.cl (56%) rename {amd => OpenCL}/m10300.cl (91%) rename {amd => OpenCL}/m10400_a0.cl (91%) rename {amd => OpenCL}/m10400_a1.cl (92%) rename {amd => OpenCL}/m10400_a3.cl (85%) rename {amd => OpenCL}/m10410_a0.cl (91%) rename {amd => OpenCL}/m10410_a1.cl (92%) rename {amd => OpenCL}/m10410_a3.cl (83%) rename {amd => OpenCL}/m10420_a0.cl (89%) rename {amd => OpenCL}/m10420_a1.cl (90%) rename {amd => OpenCL}/m10420_a3.cl (83%) rename {amd => OpenCL}/m10500.cl (92%) rename {amd => OpenCL}/m10700.cl (94%) rename {amd => OpenCL}/m10800_a0.cl (86%) rename {amd => OpenCL}/m10800_a1.cl (88%) rename {amd => OpenCL}/m10800_a3.cl (59%) rename {amd => OpenCL}/m10900.cl (91%) rename {amd => OpenCL}/m11000_a0.cl (94%) rename {amd => OpenCL}/m11000_a1.cl (95%) rename {amd => OpenCL}/m11000_a3.cl (90%) rename {amd => OpenCL}/m11100_a0.cl (95%) rename {amd => OpenCL}/m11100_a1.cl (96%) rename {amd => OpenCL}/m11100_a3.cl (92%) rename {amd => OpenCL}/m11200_a0.cl (94%) rename {amd => OpenCL}/m11200_a1.cl (95%) rename {amd => OpenCL}/m11200_a3.cl (93%) rename {amd => OpenCL}/m11300.cl (98%) rename {amd => OpenCL}/m11400_a0.cl (97%) rename {amd => OpenCL}/m11400_a1.cl (97%) rename {amd => OpenCL}/m11400_a3.cl (95%) rename {amd => OpenCL}/m11500_a0.cl (90%) rename {amd => OpenCL}/m11500_a1.cl (91%) rename {amd => OpenCL}/m11500_a3.cl (60%) rename {amd => OpenCL}/m11600.cl (97%) rename {amd => OpenCL}/m11700_a0.cl (98%) rename {amd => OpenCL}/m11700_a1.cl (98%) rename {amd => OpenCL}/m11700_a3.cl (96%) rename {amd => OpenCL}/m11800_a0.cl (98%) rename {amd => OpenCL}/m11800_a1.cl (98%) rename {amd => OpenCL}/m11800_a3.cl (96%) rename {amd => OpenCL}/m11900.cl (90%) rename {amd => OpenCL}/m12000.cl (93%) rename {amd => OpenCL}/m12200.cl (96%) rename {amd => OpenCL}/m12300.cl (97%) rename {amd => OpenCL}/m12400.cl (94%) rename {amd => OpenCL}/m12500.cl (98%) rename {amd => OpenCL}/m12600_a0.cl (94%) rename {amd => OpenCL}/m12600_a1.cl (94%) rename {amd => OpenCL}/m12600_a3.cl (92%) rename {amd => OpenCL}/m12700.cl (97%) rename {amd => OpenCL}/m12800.cl (92%) rename amd/markov_be_v1.cl => OpenCL/markov_be.cl (81%) rename amd/markov_le_v1.cl => OpenCL/markov_le.cl (98%) rename nv/rp_nv.c => OpenCL/rp.c (71%) rename amd/types_amd.c => OpenCL/types_ocl.c (59%) delete mode 100644 amd/amp_a0_v2.cl delete mode 100644 amd/amp_a0_v4.cl delete mode 100644 amd/amp_a1_v2.cl delete mode 100644 amd/amp_a1_v4.cl delete mode 100644 amd/amp_a3_v1.cl delete mode 100644 amd/amp_a3_v4.cl delete mode 100644 amd/check_multi_vect1_comp4_warp.c delete mode 100644 amd/check_multi_vect1_comp4_warp_bs.c delete mode 100644 amd/check_multi_vect2_comp4.c delete mode 100644 amd/check_multi_vect2_comp4_warp.c delete mode 100644 amd/check_multi_vect4_comp4.c delete mode 100644 amd/check_multi_vect4_comp4_warp.c delete mode 100644 amd/check_single_vect1_comp4_warp.c delete mode 100644 amd/check_single_vect1_comp4_warp_bs.c delete mode 100644 amd/check_single_vect2_comp4.c delete mode 100644 amd/check_single_vect2_comp4_warp.c delete mode 100644 amd/check_single_vect4_comp4.c delete mode 100644 amd/check_single_vect4_comp4_warp.c delete mode 100644 amd/common_amd.c delete mode 100644 amd/markov_be_v2.cl delete mode 100644 amd/markov_be_v4.cl delete mode 100644 amd/markov_le_v2.cl delete mode 100644 amd/markov_le_v4.cl delete mode 100644 amd/rp_amd.c delete mode 100644 nv/amp_a0_v1.cu delete mode 100644 nv/amp_a0_v2.cu delete mode 100644 nv/amp_a0_v4.cu delete mode 100644 nv/amp_a1_v1.cu delete mode 100644 nv/amp_a1_v2.cu delete mode 100644 nv/amp_a1_v4.cu delete mode 100644 nv/amp_a3_v1.cu delete mode 100644 nv/amp_a3_v2.cu delete mode 100644 nv/amp_a3_v4.cu delete mode 100644 nv/check_multi_vect1_comp4_warp.c delete mode 100644 nv/check_multi_vect1_comp4_warp_bs.c delete mode 100644 nv/check_multi_vect2_comp4.c delete mode 100644 nv/check_multi_vect2_comp4_warp.c delete mode 100644 nv/check_multi_vect4_comp4.c delete mode 100644 nv/check_multi_vect4_comp4_warp.c delete mode 100644 nv/check_single_vect1_comp4.c delete mode 100644 nv/check_single_vect1_comp4_warp.c delete mode 100644 nv/check_single_vect1_comp4_warp_bs.c delete mode 100644 nv/check_single_vect2_comp4.c delete mode 100644 nv/check_single_vect2_comp4_warp.c delete mode 100644 nv/check_single_vect4_comp4.c delete mode 100644 nv/check_single_vect4_comp4_warp.c delete mode 100644 nv/common_nv.c delete mode 100644 nv/gpu_aes256_nv.c delete mode 100644 nv/gpu_serpent256_nv.c delete mode 100644 nv/gpu_twofish256_nv.c delete mode 100644 nv/m00000_a0.cu delete mode 100644 nv/m00000_a1.cu delete mode 100644 nv/m00000_a3.cu delete mode 100644 nv/m00010_a0.cu delete mode 100644 nv/m00010_a1.cu delete mode 100644 nv/m00010_a3.cu delete mode 100644 nv/m00020_a0.cu delete mode 100644 nv/m00020_a1.cu delete mode 100644 nv/m00020_a3.cu delete mode 100644 nv/m00030_a0.cu delete mode 100644 nv/m00030_a1.cu delete mode 100644 nv/m00030_a3.cu delete mode 100644 nv/m00040_a0.cu delete mode 100644 nv/m00040_a1.cu delete mode 100644 nv/m00040_a3.cu delete mode 100644 nv/m00050_a0.cu delete mode 100644 nv/m00050_a1.cu delete mode 100644 nv/m00050_a3.cu delete mode 100644 nv/m00060_a0.cu delete mode 100644 nv/m00060_a1.cu delete mode 100644 nv/m00060_a3.cu delete mode 100644 nv/m00100_a0.cu delete mode 100644 nv/m00100_a1.cu delete mode 100644 nv/m00100_a3.cu delete mode 100644 nv/m00110_a0.cu delete mode 100644 nv/m00110_a1.cu delete mode 100644 nv/m00110_a3.cu delete mode 100644 nv/m00120_a0.cu delete mode 100644 nv/m00120_a1.cu delete mode 100644 nv/m00120_a3.cu delete mode 100644 nv/m00130_a0.cu delete mode 100644 nv/m00130_a1.cu delete mode 100644 nv/m00130_a3.cu delete mode 100644 nv/m00140_a0.cu delete mode 100644 nv/m00140_a1.cu delete mode 100644 nv/m00140_a3.cu delete mode 100644 nv/m00150_a0.cu delete mode 100644 nv/m00150_a1.cu delete mode 100644 nv/m00150_a3.cu delete mode 100644 nv/m00160_a0.cu delete mode 100644 nv/m00160_a1.cu delete mode 100644 nv/m00160_a3.cu delete mode 100644 nv/m00190_a0.cu delete mode 100644 nv/m00190_a1.cu delete mode 100644 nv/m00190_a3.cu delete mode 100644 nv/m00200_a0.cu delete mode 100644 nv/m00200_a1.cu delete mode 100644 nv/m00200_a3.cu delete mode 100644 nv/m00300_a0.cu delete mode 100644 nv/m00300_a1.cu delete mode 100644 nv/m00300_a3.cu delete mode 100644 nv/m00400.cu delete mode 100644 nv/m00500.cu delete mode 100644 nv/m00900_a0.cu delete mode 100644 nv/m00900_a1.cu delete mode 100644 nv/m00900_a3.cu delete mode 100644 nv/m01000_a0.cu delete mode 100644 nv/m01000_a1.cu delete mode 100644 nv/m01000_a3.cu delete mode 100644 nv/m01100_a0.cu delete mode 100644 nv/m01100_a1.cu delete mode 100644 nv/m01100_a3.cu delete mode 100644 nv/m01400_a0.cu delete mode 100644 nv/m01400_a1.cu delete mode 100644 nv/m01400_a3.cu delete mode 100644 nv/m01410_a0.cu delete mode 100644 nv/m01410_a1.cu delete mode 100644 nv/m01410_a3.cu delete mode 100644 nv/m01420_a0.cu delete mode 100644 nv/m01420_a1.cu delete mode 100644 nv/m01420_a3.cu delete mode 100644 nv/m01430_a0.cu delete mode 100644 nv/m01430_a1.cu delete mode 100644 nv/m01430_a3.cu delete mode 100644 nv/m01440_a0.cu delete mode 100644 nv/m01440_a1.cu delete mode 100644 nv/m01440_a3.cu delete mode 100644 nv/m01450_a0.cu delete mode 100644 nv/m01450_a1.cu delete mode 100644 nv/m01450_a3.cu delete mode 100644 nv/m01460_a0.cu delete mode 100644 nv/m01460_a1.cu delete mode 100644 nv/m01460_a3.cu delete mode 100644 nv/m01500_a0.cu delete mode 100644 nv/m01500_a1.cu delete mode 100644 nv/m01500_a3.cu delete mode 100644 nv/m01600.cu delete mode 100644 nv/m01700_a0.cu delete mode 100644 nv/m01700_a1.cu delete mode 100644 nv/m01700_a3.cu delete mode 100644 nv/m01710_a0.cu delete mode 100644 nv/m01710_a1.cu delete mode 100644 nv/m01710_a3.cu delete mode 100644 nv/m01720_a0.cu delete mode 100644 nv/m01720_a1.cu delete mode 100644 nv/m01720_a3.cu delete mode 100644 nv/m01730_a0.cu delete mode 100644 nv/m01730_a1.cu delete mode 100644 nv/m01730_a3.cu delete mode 100644 nv/m01740_a0.cu delete mode 100644 nv/m01740_a1.cu delete mode 100644 nv/m01740_a3.cu delete mode 100644 nv/m01750_a0.cu delete mode 100644 nv/m01750_a1.cu delete mode 100644 nv/m01750_a3.cu delete mode 100644 nv/m01760_a0.cu delete mode 100644 nv/m01760_a1.cu delete mode 100644 nv/m01760_a3.cu delete mode 100644 nv/m01800.cu delete mode 100644 nv/m02100.cu delete mode 100644 nv/m02400_a0.cu delete mode 100644 nv/m02400_a1.cu delete mode 100644 nv/m02400_a3.cu delete mode 100644 nv/m02410_a0.cu delete mode 100644 nv/m02410_a1.cu delete mode 100644 nv/m02410_a3.cu delete mode 100644 nv/m02500.cu delete mode 100644 nv/m02610_a0.cu delete mode 100644 nv/m02610_a1.cu delete mode 100644 nv/m02610_a3.cu delete mode 100644 nv/m02710_a0.cu delete mode 100644 nv/m02710_a1.cu delete mode 100644 nv/m02710_a3.cu delete mode 100644 nv/m02810_a0.cu delete mode 100644 nv/m02810_a1.cu delete mode 100644 nv/m02810_a3.cu delete mode 100644 nv/m03000_a0.cu delete mode 100644 nv/m03000_a1.cu delete mode 100644 nv/m03000_a3.cu delete mode 100644 nv/m03100_a0.cu delete mode 100644 nv/m03100_a1.cu delete mode 100644 nv/m03100_a3.cu delete mode 100644 nv/m03200.cu delete mode 100644 nv/m03710_a0.cu delete mode 100644 nv/m03710_a1.cu delete mode 100644 nv/m03710_a3.cu delete mode 100644 nv/m03800_a0.cu delete mode 100644 nv/m03800_a1.cu delete mode 100644 nv/m03800_a3.cu delete mode 100644 nv/m04310_a0.cu delete mode 100644 nv/m04310_a1.cu delete mode 100644 nv/m04310_a3.cu delete mode 100644 nv/m04400_a0.cu delete mode 100644 nv/m04400_a1.cu delete mode 100644 nv/m04400_a3.cu delete mode 100644 nv/m04500_a0.cu delete mode 100644 nv/m04500_a1.cu delete mode 100644 nv/m04500_a3.cu delete mode 100644 nv/m04700_a0.cu delete mode 100644 nv/m04700_a1.cu delete mode 100644 nv/m04700_a3.cu delete mode 100644 nv/m04800_a0.cu delete mode 100644 nv/m04800_a1.cu delete mode 100644 nv/m04800_a3.cu delete mode 100644 nv/m04900_a0.cu delete mode 100644 nv/m04900_a1.cu delete mode 100644 nv/m04900_a3.cu delete mode 100644 nv/m05000_a0.cu delete mode 100644 nv/m05000_a1.cu delete mode 100644 nv/m05000_a3.cu delete mode 100644 nv/m05100_a0.cu delete mode 100644 nv/m05100_a1.cu delete mode 100644 nv/m05100_a3.cu delete mode 100644 nv/m05200.cu delete mode 100644 nv/m05300_a0.cu delete mode 100644 nv/m05300_a1.cu delete mode 100644 nv/m05300_a3.cu delete mode 100644 nv/m05400_a0.cu delete mode 100644 nv/m05400_a1.cu delete mode 100644 nv/m05400_a3.cu delete mode 100644 nv/m05500_a0.cu delete mode 100644 nv/m05500_a1.cu delete mode 100644 nv/m05500_a3.cu delete mode 100644 nv/m05600_a0.cu delete mode 100644 nv/m05600_a1.cu delete mode 100644 nv/m05600_a3.cu delete mode 100644 nv/m05800.cu delete mode 100644 nv/m06000_a0.cu delete mode 100644 nv/m06000_a1.cu delete mode 100644 nv/m06000_a3.cu delete mode 100644 nv/m06100_a0.cu delete mode 100644 nv/m06100_a1.cu delete mode 100644 nv/m06100_a3.cu delete mode 100644 nv/m06211.cu delete mode 100644 nv/m06212.cu delete mode 100644 nv/m06213.cu delete mode 100644 nv/m06221.cu delete mode 100644 nv/m06222.cu delete mode 100644 nv/m06223.cu delete mode 100644 nv/m06231.cu delete mode 100644 nv/m06232.cu delete mode 100644 nv/m06233.cu delete mode 100644 nv/m06300.cu delete mode 100644 nv/m06400.cu delete mode 100644 nv/m06500.cu delete mode 100644 nv/m06600.cu delete mode 100644 nv/m06700.cu delete mode 100644 nv/m06800.cu delete mode 100644 nv/m06900_a0.cu delete mode 100644 nv/m06900_a1.cu delete mode 100644 nv/m06900_a3.cu delete mode 100644 nv/m07100.cu delete mode 100644 nv/m07300_a0.cu delete mode 100644 nv/m07300_a1.cu delete mode 100644 nv/m07300_a3.cu delete mode 100644 nv/m07400.cu delete mode 100644 nv/m07500_a0.cu delete mode 100644 nv/m07500_a1.cu delete mode 100644 nv/m07500_a3.cu delete mode 100644 nv/m07600_a0.cu delete mode 100644 nv/m07600_a1.cu delete mode 100644 nv/m07600_a3.cu delete mode 100644 nv/m07700_a0.cu delete mode 100644 nv/m07700_a1.cu delete mode 100644 nv/m07700_a3.cu delete mode 100644 nv/m07800_a0.cu delete mode 100644 nv/m07800_a1.cu delete mode 100644 nv/m07800_a3.cu delete mode 100644 nv/m07900.cu delete mode 100644 nv/m08000_a0.cu delete mode 100644 nv/m08000_a1.cu delete mode 100644 nv/m08000_a3.cu delete mode 100644 nv/m08100_a0.cu delete mode 100644 nv/m08100_a1.cu delete mode 100644 nv/m08100_a3.cu delete mode 100644 nv/m08200.cu delete mode 100644 nv/m08300_a0.cu delete mode 100644 nv/m08300_a1.cu delete mode 100644 nv/m08300_a3.cu delete mode 100644 nv/m08400_a0.cu delete mode 100644 nv/m08400_a1.cu delete mode 100644 nv/m08400_a3.cu delete mode 100644 nv/m08500_a0.cu delete mode 100644 nv/m08500_a1.cu delete mode 100644 nv/m08500_a3.cu delete mode 100644 nv/m08600_a0.cu delete mode 100644 nv/m08600_a1.cu delete mode 100644 nv/m08600_a3.cu delete mode 100644 nv/m08700_a0.cu delete mode 100644 nv/m08700_a1.cu delete mode 100644 nv/m08700_a3.cu delete mode 100644 nv/m08800.cu delete mode 100644 nv/m08900.cu delete mode 100644 nv/m09000.cu delete mode 100644 nv/m09100.cu delete mode 100644 nv/m09400.cu delete mode 100644 nv/m09500.cu delete mode 100644 nv/m09600.cu delete mode 100644 nv/m09700_a0.cu delete mode 100644 nv/m09700_a1.cu delete mode 100644 nv/m09700_a3.cu delete mode 100644 nv/m09710_a0.cu delete mode 100644 nv/m09710_a1.cu delete mode 100644 nv/m09710_a3.cu delete mode 100644 nv/m09720_a0.cu delete mode 100644 nv/m09720_a1.cu delete mode 100644 nv/m09720_a3.cu delete mode 100644 nv/m09800_a0.cu delete mode 100644 nv/m09800_a1.cu delete mode 100644 nv/m09800_a3.cu delete mode 100644 nv/m09810_a0.cu delete mode 100644 nv/m09810_a1.cu delete mode 100644 nv/m09810_a3.cu delete mode 100644 nv/m09820_a0.cu delete mode 100644 nv/m09820_a1.cu delete mode 100644 nv/m09820_a3.cu delete mode 100644 nv/m09900_a0.cu delete mode 100644 nv/m09900_a1.cu delete mode 100644 nv/m09900_a3.cu delete mode 100644 nv/m10100_a0.cu delete mode 100644 nv/m10100_a1.cu delete mode 100644 nv/m10100_a3.cu delete mode 100644 nv/m10300.cu delete mode 100644 nv/m10400_a0.cu delete mode 100644 nv/m10400_a1.cu delete mode 100644 nv/m10400_a3.cu delete mode 100644 nv/m10410_a0.cu delete mode 100644 nv/m10410_a1.cu delete mode 100644 nv/m10410_a3.cu delete mode 100644 nv/m10420_a0.cu delete mode 100644 nv/m10420_a1.cu delete mode 100644 nv/m10420_a3.cu delete mode 100644 nv/m10500.cu delete mode 100644 nv/m10700.cu delete mode 100644 nv/m10800_a0.cu delete mode 100644 nv/m10800_a1.cu delete mode 100644 nv/m10800_a3.cu delete mode 100644 nv/m10900.cu delete mode 100644 nv/m11000_a0.cu delete mode 100644 nv/m11000_a1.cu delete mode 100644 nv/m11000_a3.cu delete mode 100644 nv/m11100_a0.cu delete mode 100644 nv/m11100_a1.cu delete mode 100644 nv/m11100_a3.cu delete mode 100644 nv/m11200_a0.cu delete mode 100644 nv/m11200_a1.cu delete mode 100644 nv/m11200_a3.cu delete mode 100644 nv/m11300.cu delete mode 100644 nv/m11400_a0.cu delete mode 100644 nv/m11400_a1.cu delete mode 100644 nv/m11400_a3.cu delete mode 100644 nv/m11500_a0.cu delete mode 100644 nv/m11500_a1.cu delete mode 100644 nv/m11500_a3.cu delete mode 100644 nv/m11600.cu delete mode 100644 nv/m11700_a0.cu delete mode 100644 nv/m11700_a1.cu delete mode 100644 nv/m11700_a3.cu delete mode 100644 nv/m11800_a0.cu delete mode 100644 nv/m11800_a1.cu delete mode 100644 nv/m11800_a3.cu delete mode 100644 nv/m11900.cu delete mode 100644 nv/m12000.cu delete mode 100644 nv/m12200.cu delete mode 100644 nv/m12300.cu delete mode 100644 nv/m12400.cu delete mode 100644 nv/m12500.cu delete mode 100644 nv/m12600_a0.cu delete mode 100644 nv/m12600_a1.cu delete mode 100644 nv/m12600_a3.cu delete mode 100644 nv/m12700.cu delete mode 100644 nv/m12800.cu delete mode 100644 nv/markov_be_v1.cu delete mode 100644 nv/markov_be_v2.cu delete mode 100644 nv/markov_be_v4.cu delete mode 100644 nv/markov_le_v1.cu delete mode 100644 nv/markov_le_v2.cu delete mode 100644 nv/markov_le_v4.cu delete mode 100644 nv/types_nv.c diff --git a/amd/amp_a0_v1.cl b/OpenCL/amp_a0.cl similarity index 85% rename from amd/amp_a0_v1.cl rename to OpenCL/amp_a0.cl index 9104408..170d1ce 100644 --- a/amd/amp_a0_v1.cl +++ b/OpenCL/amp_a0.cl @@ -3,19 +3,12 @@ * License.....: MIT */ -#define VECT_SIZE1 - #include "include/constants.h" #include "include/kernel_vendor.h" -#include "types_amd.c" - -static u32 swap_workaround (const u32 v) -{ - return (as_uint (as_uchar4 (v).s3210)); -} +#include "types_ocl.c" #include "include/rp_gpu.h" -#include "rp_amd.c" +#include "rp.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) amp (__global pw_t *pws, __global pw_t *pws_amp, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, const u32 combs_mode, const u32 gid_max) { @@ -25,8 +18,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) amp (__global pw_ const u32 pw_len = pws[gid].pw_len; - u32x w0[4]; - u32x w1[4]; + u32 w0[4]; + u32 w1[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; diff --git a/amd/amp_a1_v1.cl b/OpenCL/amp_a1.cl similarity index 67% rename from amd/amp_a1_v1.cl rename to OpenCL/amp_a1.cl index 722cb75..b8334fd 100644 --- a/amd/amp_a1_v1.cl +++ b/OpenCL/amp_a1.cl @@ -3,14 +3,13 @@ * License.....: MIT */ -#define VECT_SIZE1 - #include "include/constants.h" #include "include/kernel_vendor.h" -#include "types_amd.c" +#include "types_ocl.c" static void switch_buffer_by_offset (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset) { + #ifdef IS_AMD const int offset_mod_4 = offset & 3; const int offset_minus_4 = 4 - offset; @@ -458,6 +457,268 @@ static void switch_buffer_by_offset (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], break; } + #endif + + #ifdef IS_NV + const int offset_minus_4 = 4 - (offset % 4); + + const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; + + switch (offset / 4) + { + case 0: + w3[1] = __byte_perm (w3[0], w3[1], selector); + w3[0] = __byte_perm (w2[3], w3[0], selector); + w2[3] = __byte_perm (w2[2], w2[3], selector); + w2[2] = __byte_perm (w2[1], w2[2], selector); + w2[1] = __byte_perm (w2[0], w2[1], selector); + w2[0] = __byte_perm (w1[3], w2[0], selector); + w1[3] = __byte_perm (w1[2], w1[3], selector); + w1[2] = __byte_perm (w1[1], w1[2], selector); + w1[1] = __byte_perm (w1[0], w1[1], selector); + w1[0] = __byte_perm (w0[3], w1[0], selector); + w0[3] = __byte_perm (w0[2], w0[3], selector); + w0[2] = __byte_perm (w0[1], w0[2], selector); + w0[1] = __byte_perm (w0[0], w0[1], selector); + w0[0] = __byte_perm ( 0, w0[0], selector); + + break; + + case 1: + w3[1] = __byte_perm (w2[3], w3[0], selector); + w3[0] = __byte_perm (w2[2], w2[3], selector); + w2[3] = __byte_perm (w2[1], w2[2], selector); + w2[2] = __byte_perm (w2[0], w2[1], selector); + w2[1] = __byte_perm (w1[3], w2[0], selector); + w2[0] = __byte_perm (w1[2], w1[3], selector); + w1[3] = __byte_perm (w1[1], w1[2], selector); + w1[2] = __byte_perm (w1[0], w1[1], selector); + w1[1] = __byte_perm (w0[3], w1[0], selector); + w1[0] = __byte_perm (w0[2], w0[3], selector); + w0[3] = __byte_perm (w0[1], w0[2], selector); + w0[2] = __byte_perm (w0[0], w0[1], selector); + w0[1] = __byte_perm ( 0, w0[0], selector); + w0[0] = 0; + + break; + + case 2: + w3[1] = __byte_perm (w2[2], w2[3], selector); + w3[0] = __byte_perm (w2[1], w2[2], selector); + w2[3] = __byte_perm (w2[0], w2[1], selector); + w2[2] = __byte_perm (w1[3], w2[0], selector); + w2[1] = __byte_perm (w1[2], w1[3], selector); + w2[0] = __byte_perm (w1[1], w1[2], selector); + w1[3] = __byte_perm (w1[0], w1[1], selector); + w1[2] = __byte_perm (w0[3], w1[0], selector); + w1[1] = __byte_perm (w0[2], w0[3], selector); + w1[0] = __byte_perm (w0[1], w0[2], selector); + w0[3] = __byte_perm (w0[0], w0[1], selector); + w0[2] = __byte_perm ( 0, w0[0], selector); + w0[1] = 0; + w0[0] = 0; + + break; + + case 3: + w3[1] = __byte_perm (w2[1], w2[2], selector); + w3[0] = __byte_perm (w2[0], w2[1], selector); + w2[3] = __byte_perm (w1[3], w2[0], selector); + w2[2] = __byte_perm (w1[2], w1[3], selector); + w2[1] = __byte_perm (w1[1], w1[2], selector); + w2[0] = __byte_perm (w1[0], w1[1], selector); + w1[3] = __byte_perm (w0[3], w1[0], selector); + w1[2] = __byte_perm (w0[2], w0[3], selector); + w1[1] = __byte_perm (w0[1], w0[2], selector); + w1[0] = __byte_perm (w0[0], w0[1], selector); + w0[3] = __byte_perm ( 0, w0[0], selector); + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + break; + + case 4: + w3[1] = __byte_perm (w2[0], w2[1], selector); + w3[0] = __byte_perm (w1[3], w2[0], selector); + w2[3] = __byte_perm (w1[2], w1[3], selector); + w2[2] = __byte_perm (w1[1], w1[2], selector); + w2[1] = __byte_perm (w1[0], w1[1], selector); + w2[0] = __byte_perm (w0[3], w1[0], selector); + w1[3] = __byte_perm (w0[2], w0[3], selector); + w1[2] = __byte_perm (w0[1], w0[2], selector); + w1[1] = __byte_perm (w0[0], w0[1], selector); + w1[0] = __byte_perm ( 0, w0[0], selector); + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + break; + + case 5: + w3[1] = __byte_perm (w1[3], w2[0], selector); + w3[0] = __byte_perm (w1[2], w1[3], selector); + w2[3] = __byte_perm (w1[1], w1[2], selector); + w2[2] = __byte_perm (w1[0], w1[1], selector); + w2[1] = __byte_perm (w0[3], w1[0], selector); + w2[0] = __byte_perm (w0[2], w0[3], selector); + w1[3] = __byte_perm (w0[1], w0[2], selector); + w1[2] = __byte_perm (w0[0], w0[1], selector); + w1[1] = __byte_perm ( 0, w0[0], selector); + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + break; + + case 6: + w3[1] = __byte_perm (w1[2], w1[3], selector); + w3[0] = __byte_perm (w1[1], w1[2], selector); + w2[3] = __byte_perm (w1[0], w1[1], selector); + w2[2] = __byte_perm (w0[3], w1[0], selector); + w2[1] = __byte_perm (w0[2], w0[3], selector); + w2[0] = __byte_perm (w0[1], w0[2], selector); + w1[3] = __byte_perm (w0[0], w0[1], selector); + w1[2] = __byte_perm ( 0, w0[0], selector); + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + break; + + case 7: + w3[1] = __byte_perm (w1[1], w1[2], selector); + w3[0] = __byte_perm (w1[0], w1[1], selector); + w2[3] = __byte_perm (w0[3], w1[0], selector); + w2[2] = __byte_perm (w0[2], w0[3], selector); + w2[1] = __byte_perm (w0[1], w0[2], selector); + w2[0] = __byte_perm (w0[0], w0[1], selector); + w1[3] = __byte_perm ( 0, w0[0], selector); + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + break; + + case 8: + w3[1] = __byte_perm (w1[0], w1[1], selector); + w3[0] = __byte_perm (w0[3], w1[0], selector); + w2[3] = __byte_perm (w0[2], w0[3], selector); + w2[2] = __byte_perm (w0[1], w0[2], selector); + w2[1] = __byte_perm (w0[0], w0[1], selector); + w2[0] = __byte_perm ( 0, w0[0], selector); + w1[3] = 0; + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + break; + + case 9: + w3[1] = __byte_perm (w0[3], w1[0], selector); + w3[0] = __byte_perm (w0[2], w0[3], selector); + w2[3] = __byte_perm (w0[1], w0[2], selector); + w2[2] = __byte_perm (w0[0], w0[1], selector); + w2[1] = __byte_perm ( 0, w0[0], selector); + w2[0] = 0; + w1[3] = 0; + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + break; + + case 10: + w3[1] = __byte_perm (w0[2], w0[3], selector); + w3[0] = __byte_perm (w0[1], w0[2], selector); + w2[3] = __byte_perm (w0[0], w0[1], selector); + w2[2] = __byte_perm ( 0, w0[0], selector); + w2[1] = 0; + w2[0] = 0; + w1[3] = 0; + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + break; + + case 11: + w3[1] = __byte_perm (w0[1], w0[2], selector); + w3[0] = __byte_perm (w0[0], w0[1], selector); + w2[3] = __byte_perm ( 0, w0[0], selector); + w2[2] = 0; + w2[1] = 0; + w2[0] = 0; + w1[3] = 0; + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + break; + + case 12: + w3[1] = __byte_perm (w0[0], w0[1], selector); + w3[0] = __byte_perm ( 0, w0[0], selector); + w2[3] = 0; + w2[2] = 0; + w2[1] = 0; + w2[0] = 0; + w1[3] = 0; + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + break; + + case 13: + w3[1] = __byte_perm ( 0, w0[0], selector); + w3[0] = 0; + w2[3] = 0; + w2[2] = 0; + w2[1] = 0; + w2[0] = 0; + w1[3] = 0; + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + break; + } + #endif } __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) amp (__global pw_t *pws, __global pw_t *pws_amp, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, const u32 combs_mode, const u32 gid_max) diff --git a/amd/amp_a3_v2.cl b/OpenCL/amp_a3.cl similarity index 93% rename from amd/amp_a3_v2.cl rename to OpenCL/amp_a3.cl index 0e11f87..85ec6ce 100644 --- a/amd/amp_a3_v2.cl +++ b/OpenCL/amp_a3.cl @@ -3,11 +3,9 @@ * License.....: MIT */ -#define VECT_SIZE2 - #include "include/constants.h" #include "include/kernel_vendor.h" -#include "types_amd.c" +#include "types_ocl.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) amp (__global pw_t *pws, __global pw_t *pws_amp, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, const u32 combs_mode, const u32 gid_max) { @@ -17,10 +15,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) amp (__global pw_ const u32 pw_len = pws[gid].pw_len; - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; diff --git a/amd/check_multi_vect1_comp4.c b/OpenCL/check_multi_comp4.c similarity index 90% rename from amd/check_multi_vect1_comp4.c rename to OpenCL/check_multi_comp4.c index b1eadac..5d51803 100644 --- a/amd/check_multi_vect1_comp4.c +++ b/OpenCL/check_multi_comp4.c @@ -26,7 +26,7 @@ if (check (digest_tp, if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) { - mark_hash_s0 (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); + mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); d_return_buf[lid] = 1; } diff --git a/nv/check_multi_vect1_comp4.c b/OpenCL/check_multi_comp4_bs.c similarity index 82% rename from nv/check_multi_vect1_comp4.c rename to OpenCL/check_multi_comp4_bs.c index 39772fc..d08471a 100644 --- a/nv/check_multi_vect1_comp4.c +++ b/OpenCL/check_multi_comp4_bs.c @@ -24,9 +24,9 @@ if (check (digest_tp, { const u32 final_hash_pos = digests_offset + hash_pos; - if (atomicAdd (&hashes_shown[final_hash_pos], 1) == 0) + if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) { - mark_hash_s0 (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); + mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + slice); d_return_buf[lid] = 1; } diff --git a/amd/check_single_vect1_comp4.c b/OpenCL/check_single_comp4.c similarity index 76% rename from amd/check_single_vect1_comp4.c rename to OpenCL/check_single_comp4.c index bc7c51f..0478658 100644 --- a/amd/check_single_vect1_comp4.c +++ b/OpenCL/check_single_comp4.c @@ -7,7 +7,7 @@ if ((r0 == search[0]) if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) { - mark_hash_s0 (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); + mark_hash (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); d_return_buf[lid] = 1; } diff --git a/OpenCL/check_single_comp4_bs.c b/OpenCL/check_single_comp4_bs.c new file mode 100644 index 0000000..a98a3f9 --- /dev/null +++ b/OpenCL/check_single_comp4_bs.c @@ -0,0 +1,3 @@ +mark_hash (plains_buf, hashes_shown, 0, gid, il_pos + slice); + +d_return_buf[lid] = 1; diff --git a/OpenCL/common.c b/OpenCL/common.c new file mode 100644 index 0000000..23580ab --- /dev/null +++ b/OpenCL/common.c @@ -0,0 +1,7980 @@ +/** + * Author......: Jens Steube + * License.....: MIT + */ + +static int device_memcmp (const u32 d1[4], __global u32 *d2) +{ + if (d1[3] > d2[DGST_R3]) return ( 1); + if (d1[3] < d2[DGST_R3]) return (-1); + if (d1[2] > d2[DGST_R2]) return ( 1); + if (d1[2] < d2[DGST_R2]) return (-1); + if (d1[1] > d2[DGST_R1]) return ( 1); + if (d1[1] < d2[DGST_R1]) return (-1); + if (d1[0] > d2[DGST_R0]) return ( 1); + if (d1[0] < d2[DGST_R0]) return (-1); + + return (0); +} + +static int find_hash (const u32 digest[4], const u32 digests_cnt, __global digest_t *digests_buf) +{ + for (u32 l = 0, r = digests_cnt; r; r >>= 1) + { + const u32 m = r >> 1; + + const u32 c = l + m; + + const int cmp = device_memcmp (digest, digests_buf[c].digest_buf); + + if (cmp > 0) + { + l += m + 1; + + r--; + } + + if (cmp == 0) return (c); + } + + return (-1); +} + +static u32 check_bitmap (__global u32 *bitmap, const u32 bitmap_mask, const u32 bitmap_shift, const u32 digest) +{ + return (bitmap[(digest >> bitmap_shift) & bitmap_mask] & (1 << (digest & 0x1f))); +} + +static u32 check (const u32 digest[2], __global u32 *bitmap_s1_a, __global u32 *bitmap_s1_b, __global u32 *bitmap_s1_c, __global u32 *bitmap_s1_d, __global u32 *bitmap_s2_a, __global u32 *bitmap_s2_b, __global u32 *bitmap_s2_c, __global u32 *bitmap_s2_d, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2) +{ + if (check_bitmap (bitmap_s1_a, bitmap_mask, bitmap_shift1, digest[0]) == 0) return (0); + if (check_bitmap (bitmap_s1_b, bitmap_mask, bitmap_shift1, digest[1]) == 0) return (0); + if (check_bitmap (bitmap_s1_c, bitmap_mask, bitmap_shift1, digest[2]) == 0) return (0); + if (check_bitmap (bitmap_s1_d, bitmap_mask, bitmap_shift1, digest[3]) == 0) return (0); + + if (check_bitmap (bitmap_s2_a, bitmap_mask, bitmap_shift2, digest[0]) == 0) return (0); + if (check_bitmap (bitmap_s2_b, bitmap_mask, bitmap_shift2, digest[1]) == 0) return (0); + if (check_bitmap (bitmap_s2_c, bitmap_mask, bitmap_shift2, digest[2]) == 0) return (0); + if (check_bitmap (bitmap_s2_d, bitmap_mask, bitmap_shift2, digest[3]) == 0) return (0); + + return (1); +} + +static void mark_hash (__global plain_t *plains_buf, __global u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos) +{ + hashes_shown[hash_pos] = 1; + + plains_buf[hash_pos].gidvid = (gid * 1) + 0; + plains_buf[hash_pos].il_pos = il_pos; +} + +static void truncate_block (u32 w[4], const u32 len) +{ + switch (len) + { + case 0: w[0] &= 0; + w[1] &= 0; + w[2] &= 0; + w[3] &= 0; + break; + case 1: w[0] &= 0x000000FF; + w[1] &= 0; + w[2] &= 0; + w[3] &= 0; + break; + case 2: w[0] &= 0x0000FFFF; + w[1] &= 0; + w[2] &= 0; + w[3] &= 0; + break; + case 3: w[0] &= 0x00FFFFFF; + w[1] &= 0; + w[2] &= 0; + w[3] &= 0; + break; + case 4: w[1] &= 0; + w[2] &= 0; + w[3] &= 0; + break; + case 5: w[1] &= 0x000000FF; + w[2] &= 0; + w[3] &= 0; + break; + case 6: w[1] &= 0x0000FFFF; + w[2] &= 0; + w[3] &= 0; + break; + case 7: w[1] &= 0x00FFFFFF; + w[2] &= 0; + w[3] &= 0; + break; + case 8: w[2] &= 0; + w[3] &= 0; + break; + case 9: w[2] &= 0x000000FF; + w[3] &= 0; + break; + case 10: w[2] &= 0x0000FFFF; + w[3] &= 0; + break; + case 11: w[2] &= 0x00FFFFFF; + w[3] &= 0; + break; + case 12: w[3] &= 0; + break; + case 13: w[3] &= 0x000000FF; + break; + case 14: w[3] &= 0x0000FFFF; + break; + case 15: w[3] &= 0x00FFFFFF; + break; + } +} + +static void make_unicode (const u32 in[4], u32 out1[4], u32 out2[4]) +{ + #ifdef IS_NV + out2[3] = __byte_perm (in[3], 0, 0x7372); + out2[2] = __byte_perm (in[3], 0, 0x7170); + out2[1] = __byte_perm (in[2], 0, 0x7372); + out2[0] = __byte_perm (in[2], 0, 0x7170); + out1[3] = __byte_perm (in[1], 0, 0x7372); + out1[2] = __byte_perm (in[1], 0, 0x7170); + out1[1] = __byte_perm (in[0], 0, 0x7372); + out1[0] = __byte_perm (in[0], 0, 0x7170); + #endif + + #ifdef IS_AMD + out2[3] = ((in[3] >> 8) & 0x00FF0000) | ((in[3] >> 16) & 0x000000FF); + out2[2] = ((in[3] << 8) & 0x00FF0000) | ((in[3] >> 0) & 0x000000FF); + out2[1] = ((in[2] >> 8) & 0x00FF0000) | ((in[2] >> 16) & 0x000000FF); + out2[0] = ((in[2] << 8) & 0x00FF0000) | ((in[2] >> 0) & 0x000000FF); + out1[3] = ((in[1] >> 8) & 0x00FF0000) | ((in[1] >> 16) & 0x000000FF); + out1[2] = ((in[1] << 8) & 0x00FF0000) | ((in[1] >> 0) & 0x000000FF); + out1[1] = ((in[0] >> 8) & 0x00FF0000) | ((in[0] >> 16) & 0x000000FF); + out1[0] = ((in[0] << 8) & 0x00FF0000) | ((in[0] >> 0) & 0x000000FF); + #endif +} + +static void undo_unicode (const u32 in1[4], const u32 in2[4], u32 out[4]) +{ + #ifdef IS_NV + out[0] = __byte_perm (in1[0], in1[1], 0x6420); + out[1] = __byte_perm (in1[2], in1[3], 0x6420); + out[2] = __byte_perm (in2[0], in2[1], 0x6420); + out[3] = __byte_perm (in2[2], in2[3], 0x6420); + #endif + + #ifdef IS_AMD + out[0] = ((in1[0] & 0x000000ff) >> 0) | ((in1[0] & 0x00ff0000) >> 8) + | ((in1[1] & 0x000000ff) << 16) | ((in1[1] & 0x00ff0000) << 8); + out[1] = ((in1[2] & 0x000000ff) >> 0) | ((in1[2] & 0x00ff0000) >> 8) + | ((in1[3] & 0x000000ff) << 16) | ((in1[3] & 0x00ff0000) << 8); + out[2] = ((in2[0] & 0x000000ff) >> 0) | ((in2[0] & 0x00ff0000) >> 8) + | ((in2[1] & 0x000000ff) << 16) | ((in2[1] & 0x00ff0000) << 8); + out[3] = ((in2[2] & 0x000000ff) >> 0) | ((in2[2] & 0x00ff0000) >> 8) + | ((in2[3] & 0x000000ff) << 16) | ((in2[3] & 0x00ff0000) << 8); + #endif +} + +// before: append_0x01_1 +static void append_0x01_1x4 (u32 w0[4], const u32 offset) +{ + switch (offset) + { + case 0: + w0[0] = 0x01; + break; + + case 1: + w0[0] = w0[0] | 0x0100; + break; + + case 2: + w0[0] = w0[0] | 0x010000; + break; + + case 3: + w0[0] = w0[0] | 0x01000000; + break; + + case 4: + w0[1] = 0x01; + break; + + case 5: + w0[1] = w0[1] | 0x0100; + break; + + case 6: + w0[1] = w0[1] | 0x010000; + break; + + case 7: + w0[1] = w0[1] | 0x01000000; + break; + + case 8: + w0[2] = 0x01; + break; + + case 9: + w0[2] = w0[2] | 0x0100; + break; + + case 10: + w0[2] = w0[2] | 0x010000; + break; + + case 11: + w0[2] = w0[2] | 0x01000000; + break; + + case 12: + w0[3] = 0x01; + break; + + case 13: + w0[3] = w0[3] | 0x0100; + break; + + case 14: + w0[3] = w0[3] | 0x010000; + break; + + case 15: + w0[3] = w0[3] | 0x01000000; + break; + } +} + +// before: append_0x01_2 +static void append_0x01_2x4 (u32 w0[4], u32 w1[4], const u32 offset) +{ + switch (offset) + { + case 0: + w0[0] = 0x01; + break; + + case 1: + w0[0] = w0[0] | 0x0100; + break; + + case 2: + w0[0] = w0[0] | 0x010000; + break; + + case 3: + w0[0] = w0[0] | 0x01000000; + break; + + case 4: + w0[1] = 0x01; + break; + + case 5: + w0[1] = w0[1] | 0x0100; + break; + + case 6: + w0[1] = w0[1] | 0x010000; + break; + + case 7: + w0[1] = w0[1] | 0x01000000; + break; + + case 8: + w0[2] = 0x01; + break; + + case 9: + w0[2] = w0[2] | 0x0100; + break; + + case 10: + w0[2] = w0[2] | 0x010000; + break; + + case 11: + w0[2] = w0[2] | 0x01000000; + break; + + case 12: + w0[3] = 0x01; + break; + + case 13: + w0[3] = w0[3] | 0x0100; + break; + + case 14: + w0[3] = w0[3] | 0x010000; + break; + + case 15: + w0[3] = w0[3] | 0x01000000; + break; + + case 16: + w1[0] = 0x01; + break; + + case 17: + w1[0] = w1[0] | 0x0100; + break; + + case 18: + w1[0] = w1[0] | 0x010000; + break; + + case 19: + w1[0] = w1[0] | 0x01000000; + break; + + case 20: + w1[1] = 0x01; + break; + + case 21: + w1[1] = w1[1] | 0x0100; + break; + + case 22: + w1[1] = w1[1] | 0x010000; + break; + + case 23: + w1[1] = w1[1] | 0x01000000; + break; + + case 24: + w1[2] = 0x01; + break; + + case 25: + w1[2] = w1[2] | 0x0100; + break; + + case 26: + w1[2] = w1[2] | 0x010000; + break; + + case 27: + w1[2] = w1[2] | 0x01000000; + break; + + case 28: + w1[3] = 0x01; + break; + + case 29: + w1[3] = w1[3] | 0x0100; + break; + + case 30: + w1[3] = w1[3] | 0x010000; + break; + + case 31: + w1[3] = w1[3] | 0x01000000; + break; + } +} + +// before: append_0x01_3 +static void append_0x01_3x4 (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset) +{ + switch (offset) + { + case 0: + w0[0] = 0x01; + break; + + case 1: + w0[0] = w0[0] | 0x0100; + break; + + case 2: + w0[0] = w0[0] | 0x010000; + break; + + case 3: + w0[0] = w0[0] | 0x01000000; + break; + + case 4: + w0[1] = 0x01; + break; + + case 5: + w0[1] = w0[1] | 0x0100; + break; + + case 6: + w0[1] = w0[1] | 0x010000; + break; + + case 7: + w0[1] = w0[1] | 0x01000000; + break; + + case 8: + w0[2] = 0x01; + break; + + case 9: + w0[2] = w0[2] | 0x0100; + break; + + case 10: + w0[2] = w0[2] | 0x010000; + break; + + case 11: + w0[2] = w0[2] | 0x01000000; + break; + + case 12: + w0[3] = 0x01; + break; + + case 13: + w0[3] = w0[3] | 0x0100; + break; + + case 14: + w0[3] = w0[3] | 0x010000; + break; + + case 15: + w0[3] = w0[3] | 0x01000000; + break; + + case 16: + w1[0] = 0x01; + break; + + case 17: + w1[0] = w1[0] | 0x0100; + break; + + case 18: + w1[0] = w1[0] | 0x010000; + break; + + case 19: + w1[0] = w1[0] | 0x01000000; + break; + + case 20: + w1[1] = 0x01; + break; + + case 21: + w1[1] = w1[1] | 0x0100; + break; + + case 22: + w1[1] = w1[1] | 0x010000; + break; + + case 23: + w1[1] = w1[1] | 0x01000000; + break; + + case 24: + w1[2] = 0x01; + break; + + case 25: + w1[2] = w1[2] | 0x0100; + break; + + case 26: + w1[2] = w1[2] | 0x010000; + break; + + case 27: + w1[2] = w1[2] | 0x01000000; + break; + + case 28: + w1[3] = 0x01; + break; + + case 29: + w1[3] = w1[3] | 0x0100; + break; + + case 30: + w1[3] = w1[3] | 0x010000; + break; + + case 31: + w1[3] = w1[3] | 0x01000000; + break; + + case 32: + w2[0] = 0x01; + break; + + case 33: + w2[0] = w2[0] | 0x0100; + break; + + case 34: + w2[0] = w2[0] | 0x010000; + break; + + case 35: + w2[0] = w2[0] | 0x01000000; + break; + + case 36: + w2[1] = 0x01; + break; + + case 37: + w2[1] = w2[1] | 0x0100; + break; + + case 38: + w2[1] = w2[1] | 0x010000; + break; + + case 39: + w2[1] = w2[1] | 0x01000000; + break; + + case 40: + w2[2] = 0x01; + break; + + case 41: + w2[2] = w2[2] | 0x0100; + break; + + case 42: + w2[2] = w2[2] | 0x010000; + break; + + case 43: + w2[2] = w2[2] | 0x01000000; + break; + + case 44: + w2[3] = 0x01; + break; + + case 45: + w2[3] = w2[3] | 0x0100; + break; + + case 46: + w2[3] = w2[3] | 0x010000; + break; + + case 47: + w2[3] = w2[3] | 0x01000000; + break; + } +} + +// before: append_0x01_4 +static void append_0x01_4x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset) +{ + switch (offset) + { + case 0: + w0[0] = 0x01; + break; + + case 1: + w0[0] = w0[0] | 0x0100; + break; + + case 2: + w0[0] = w0[0] | 0x010000; + break; + + case 3: + w0[0] = w0[0] | 0x01000000; + break; + + case 4: + w0[1] = 0x01; + break; + + case 5: + w0[1] = w0[1] | 0x0100; + break; + + case 6: + w0[1] = w0[1] | 0x010000; + break; + + case 7: + w0[1] = w0[1] | 0x01000000; + break; + + case 8: + w0[2] = 0x01; + break; + + case 9: + w0[2] = w0[2] | 0x0100; + break; + + case 10: + w0[2] = w0[2] | 0x010000; + break; + + case 11: + w0[2] = w0[2] | 0x01000000; + break; + + case 12: + w0[3] = 0x01; + break; + + case 13: + w0[3] = w0[3] | 0x0100; + break; + + case 14: + w0[3] = w0[3] | 0x010000; + break; + + case 15: + w0[3] = w0[3] | 0x01000000; + break; + + case 16: + w1[0] = 0x01; + break; + + case 17: + w1[0] = w1[0] | 0x0100; + break; + + case 18: + w1[0] = w1[0] | 0x010000; + break; + + case 19: + w1[0] = w1[0] | 0x01000000; + break; + + case 20: + w1[1] = 0x01; + break; + + case 21: + w1[1] = w1[1] | 0x0100; + break; + + case 22: + w1[1] = w1[1] | 0x010000; + break; + + case 23: + w1[1] = w1[1] | 0x01000000; + break; + + case 24: + w1[2] = 0x01; + break; + + case 25: + w1[2] = w1[2] | 0x0100; + break; + + case 26: + w1[2] = w1[2] | 0x010000; + break; + + case 27: + w1[2] = w1[2] | 0x01000000; + break; + + case 28: + w1[3] = 0x01; + break; + + case 29: + w1[3] = w1[3] | 0x0100; + break; + + case 30: + w1[3] = w1[3] | 0x010000; + break; + + case 31: + w1[3] = w1[3] | 0x01000000; + break; + + case 32: + w2[0] = 0x01; + break; + + case 33: + w2[0] = w2[0] | 0x0100; + break; + + case 34: + w2[0] = w2[0] | 0x010000; + break; + + case 35: + w2[0] = w2[0] | 0x01000000; + break; + + case 36: + w2[1] = 0x01; + break; + + case 37: + w2[1] = w2[1] | 0x0100; + break; + + case 38: + w2[1] = w2[1] | 0x010000; + break; + + case 39: + w2[1] = w2[1] | 0x01000000; + break; + + case 40: + w2[2] = 0x01; + break; + + case 41: + w2[2] = w2[2] | 0x0100; + break; + + case 42: + w2[2] = w2[2] | 0x010000; + break; + + case 43: + w2[2] = w2[2] | 0x01000000; + break; + + case 44: + w2[3] = 0x01; + break; + + case 45: + w2[3] = w2[3] | 0x0100; + break; + + case 46: + w2[3] = w2[3] | 0x010000; + break; + + case 47: + w2[3] = w2[3] | 0x01000000; + break; + + case 48: + w3[0] = 0x01; + break; + + case 49: + w3[0] = w3[0] | 0x0100; + break; + + case 50: + w3[0] = w3[0] | 0x010000; + break; + + case 51: + w3[0] = w3[0] | 0x01000000; + break; + + case 52: + w3[1] = 0x01; + break; + + case 53: + w3[1] = w3[1] | 0x0100; + break; + + case 54: + w3[1] = w3[1] | 0x010000; + break; + + case 55: + w3[1] = w3[1] | 0x01000000; + break; + + case 56: + w3[2] = 0x01; + break; + + case 57: + w3[2] = w3[2] | 0x0100; + break; + + case 58: + w3[2] = w3[2] | 0x010000; + break; + + case 59: + w3[2] = w3[2] | 0x01000000; + break; + + case 60: + w3[3] = 0x01; + break; + + case 61: + w3[3] = w3[3] | 0x0100; + break; + + case 62: + w3[3] = w3[3] | 0x010000; + break; + + case 63: + w3[3] = w3[3] | 0x01000000; + break; + } +} + +// before: append_0x01_8 +static void append_0x01_8x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset) +{ + switch (offset) + { + case 0: + w0[0] = 0x01; + break; + + case 1: + w0[0] = w0[0] | 0x0100; + break; + + case 2: + w0[0] = w0[0] | 0x010000; + break; + + case 3: + w0[0] = w0[0] | 0x01000000; + break; + + case 4: + w0[1] = 0x01; + break; + + case 5: + w0[1] = w0[1] | 0x0100; + break; + + case 6: + w0[1] = w0[1] | 0x010000; + break; + + case 7: + w0[1] = w0[1] | 0x01000000; + break; + + case 8: + w0[2] = 0x01; + break; + + case 9: + w0[2] = w0[2] | 0x0100; + break; + + case 10: + w0[2] = w0[2] | 0x010000; + break; + + case 11: + w0[2] = w0[2] | 0x01000000; + break; + + case 12: + w0[3] = 0x01; + break; + + case 13: + w0[3] = w0[3] | 0x0100; + break; + + case 14: + w0[3] = w0[3] | 0x010000; + break; + + case 15: + w0[3] = w0[3] | 0x01000000; + break; + + case 16: + w1[0] = 0x01; + break; + + case 17: + w1[0] = w1[0] | 0x0100; + break; + + case 18: + w1[0] = w1[0] | 0x010000; + break; + + case 19: + w1[0] = w1[0] | 0x01000000; + break; + + case 20: + w1[1] = 0x01; + break; + + case 21: + w1[1] = w1[1] | 0x0100; + break; + + case 22: + w1[1] = w1[1] | 0x010000; + break; + + case 23: + w1[1] = w1[1] | 0x01000000; + break; + + case 24: + w1[2] = 0x01; + break; + + case 25: + w1[2] = w1[2] | 0x0100; + break; + + case 26: + w1[2] = w1[2] | 0x010000; + break; + + case 27: + w1[2] = w1[2] | 0x01000000; + break; + + case 28: + w1[3] = 0x01; + break; + + case 29: + w1[3] = w1[3] | 0x0100; + break; + + case 30: + w1[3] = w1[3] | 0x010000; + break; + + case 31: + w1[3] = w1[3] | 0x01000000; + break; + + case 32: + w2[0] = 0x01; + break; + + case 33: + w2[0] = w2[0] | 0x0100; + break; + + case 34: + w2[0] = w2[0] | 0x010000; + break; + + case 35: + w2[0] = w2[0] | 0x01000000; + break; + + case 36: + w2[1] = 0x01; + break; + + case 37: + w2[1] = w2[1] | 0x0100; + break; + + case 38: + w2[1] = w2[1] | 0x010000; + break; + + case 39: + w2[1] = w2[1] | 0x01000000; + break; + + case 40: + w2[2] = 0x01; + break; + + case 41: + w2[2] = w2[2] | 0x0100; + break; + + case 42: + w2[2] = w2[2] | 0x010000; + break; + + case 43: + w2[2] = w2[2] | 0x01000000; + break; + + case 44: + w2[3] = 0x01; + break; + + case 45: + w2[3] = w2[3] | 0x0100; + break; + + case 46: + w2[3] = w2[3] | 0x010000; + break; + + case 47: + w2[3] = w2[3] | 0x01000000; + break; + + case 48: + w3[0] = 0x01; + break; + + case 49: + w3[0] = w3[0] | 0x0100; + break; + + case 50: + w3[0] = w3[0] | 0x010000; + break; + + case 51: + w3[0] = w3[0] | 0x01000000; + break; + + case 52: + w3[1] = 0x01; + break; + + case 53: + w3[1] = w3[1] | 0x0100; + break; + + case 54: + w3[1] = w3[1] | 0x010000; + break; + + case 55: + w3[1] = w3[1] | 0x01000000; + break; + + case 56: + w3[2] = 0x01; + break; + + case 57: + w3[2] = w3[2] | 0x0100; + break; + + case 58: + w3[2] = w3[2] | 0x010000; + break; + + case 59: + w3[2] = w3[2] | 0x01000000; + break; + + case 60: + w3[3] = 0x01; + break; + + case 61: + w3[3] = w3[3] | 0x0100; + break; + + case 62: + w3[3] = w3[3] | 0x010000; + break; + + case 63: + w3[3] = w3[3] | 0x01000000; + break; + + case 64: + w4[0] = 0x01; + break; + + case 65: + w4[0] = w4[0] | 0x0100; + break; + + case 66: + w4[0] = w4[0] | 0x010000; + break; + + case 67: + w4[0] = w4[0] | 0x01000000; + break; + + case 68: + w4[1] = 0x01; + break; + + case 69: + w4[1] = w4[1] | 0x0100; + break; + + case 70: + w4[1] = w4[1] | 0x010000; + break; + + case 71: + w4[1] = w4[1] | 0x01000000; + break; + + case 72: + w4[2] = 0x01; + break; + + case 73: + w4[2] = w4[2] | 0x0100; + break; + + case 74: + w4[2] = w4[2] | 0x010000; + break; + + case 75: + w4[2] = w4[2] | 0x01000000; + break; + + case 76: + w4[3] = 0x01; + break; + + case 77: + w4[3] = w4[3] | 0x0100; + break; + + case 78: + w4[3] = w4[3] | 0x010000; + break; + + case 79: + w4[3] = w4[3] | 0x01000000; + break; + + case 80: + w5[0] = 0x01; + break; + + case 81: + w5[0] = w5[0] | 0x0100; + break; + + case 82: + w5[0] = w5[0] | 0x010000; + break; + + case 83: + w5[0] = w5[0] | 0x01000000; + break; + + case 84: + w5[1] = 0x01; + break; + + case 85: + w5[1] = w5[1] | 0x0100; + break; + + case 86: + w5[1] = w5[1] | 0x010000; + break; + + case 87: + w5[1] = w5[1] | 0x01000000; + break; + + case 88: + w5[2] = 0x01; + break; + + case 89: + w5[2] = w5[2] | 0x0100; + break; + + case 90: + w5[2] = w5[2] | 0x010000; + break; + + case 91: + w5[2] = w5[2] | 0x01000000; + break; + + case 92: + w5[3] = 0x01; + break; + + case 93: + w5[3] = w5[3] | 0x0100; + break; + + case 94: + w5[3] = w5[3] | 0x010000; + break; + + case 95: + w5[3] = w5[3] | 0x01000000; + break; + + case 96: + w6[0] = 0x01; + break; + + case 97: + w6[0] = w6[0] | 0x0100; + break; + + case 98: + w6[0] = w6[0] | 0x010000; + break; + + case 99: + w6[0] = w6[0] | 0x01000000; + break; + + case 100: + w6[1] = 0x01; + break; + + case 101: + w6[1] = w6[1] | 0x0100; + break; + + case 102: + w6[1] = w6[1] | 0x010000; + break; + + case 103: + w6[1] = w6[1] | 0x01000000; + break; + + case 104: + w6[2] = 0x01; + break; + + case 105: + w6[2] = w6[2] | 0x0100; + break; + + case 106: + w6[2] = w6[2] | 0x010000; + break; + + case 107: + w6[2] = w6[2] | 0x01000000; + break; + + case 108: + w6[3] = 0x01; + break; + + case 109: + w6[3] = w6[3] | 0x0100; + break; + + case 110: + w6[3] = w6[3] | 0x010000; + break; + + case 111: + w6[3] = w6[3] | 0x01000000; + break; + + case 112: + w7[0] = 0x01; + break; + + case 113: + w7[0] = w7[0] | 0x0100; + break; + + case 114: + w7[0] = w7[0] | 0x010000; + break; + + case 115: + w7[0] = w7[0] | 0x01000000; + break; + + case 116: + w7[1] = 0x01; + break; + + case 117: + w7[1] = w7[1] | 0x0100; + break; + + case 118: + w7[1] = w7[1] | 0x010000; + break; + + case 119: + w7[1] = w7[1] | 0x01000000; + break; + + case 120: + w7[2] = 0x01; + break; + + case 121: + w7[2] = w7[2] | 0x0100; + break; + + case 122: + w7[2] = w7[2] | 0x010000; + break; + + case 123: + w7[2] = w7[2] | 0x01000000; + break; + + case 124: + w7[3] = 0x01; + break; + + case 125: + w7[3] = w7[3] | 0x0100; + break; + + case 126: + w7[3] = w7[3] | 0x010000; + break; + + case 127: + w7[3] = w7[3] | 0x01000000; + break; + } +} + +// before: append_0x02_1 +static void append_0x02_1x4 (u32 w0[4], const u32 offset) +{ + switch (offset) + { + case 0: + w0[0] = 0x02; + break; + + case 1: + w0[0] = w0[0] | 0x0200; + break; + + case 2: + w0[0] = w0[0] | 0x020000; + break; + + case 3: + w0[0] = w0[0] | 0x02000000; + break; + + case 4: + w0[1] = 0x02; + break; + + case 5: + w0[1] = w0[1] | 0x0200; + break; + + case 6: + w0[1] = w0[1] | 0x020000; + break; + + case 7: + w0[1] = w0[1] | 0x02000000; + break; + + case 8: + w0[2] = 0x02; + break; + + case 9: + w0[2] = w0[2] | 0x0200; + break; + + case 10: + w0[2] = w0[2] | 0x020000; + break; + + case 11: + w0[2] = w0[2] | 0x02000000; + break; + + case 12: + w0[3] = 0x02; + break; + + case 13: + w0[3] = w0[3] | 0x0200; + break; + + case 14: + w0[3] = w0[3] | 0x020000; + break; + + case 15: + w0[3] = w0[3] | 0x02000000; + break; + } +} + +// before: append_0x02_2 +static void append_0x02_2x4 (u32 w0[4], u32 w1[4], const u32 offset) +{ + switch (offset) + { + case 0: + w0[0] = 0x02; + break; + + case 1: + w0[0] = w0[0] | 0x0200; + break; + + case 2: + w0[0] = w0[0] | 0x020000; + break; + + case 3: + w0[0] = w0[0] | 0x02000000; + break; + + case 4: + w0[1] = 0x02; + break; + + case 5: + w0[1] = w0[1] | 0x0200; + break; + + case 6: + w0[1] = w0[1] | 0x020000; + break; + + case 7: + w0[1] = w0[1] | 0x02000000; + break; + + case 8: + w0[2] = 0x02; + break; + + case 9: + w0[2] = w0[2] | 0x0200; + break; + + case 10: + w0[2] = w0[2] | 0x020000; + break; + + case 11: + w0[2] = w0[2] | 0x02000000; + break; + + case 12: + w0[3] = 0x02; + break; + + case 13: + w0[3] = w0[3] | 0x0200; + break; + + case 14: + w0[3] = w0[3] | 0x020000; + break; + + case 15: + w0[3] = w0[3] | 0x02000000; + break; + + case 16: + w1[0] = 0x02; + break; + + case 17: + w1[0] = w1[0] | 0x0200; + break; + + case 18: + w1[0] = w1[0] | 0x020000; + break; + + case 19: + w1[0] = w1[0] | 0x02000000; + break; + + case 20: + w1[1] = 0x02; + break; + + case 21: + w1[1] = w1[1] | 0x0200; + break; + + case 22: + w1[1] = w1[1] | 0x020000; + break; + + case 23: + w1[1] = w1[1] | 0x02000000; + break; + + case 24: + w1[2] = 0x02; + break; + + case 25: + w1[2] = w1[2] | 0x0200; + break; + + case 26: + w1[2] = w1[2] | 0x020000; + break; + + case 27: + w1[2] = w1[2] | 0x02000000; + break; + + case 28: + w1[3] = 0x02; + break; + + case 29: + w1[3] = w1[3] | 0x0200; + break; + + case 30: + w1[3] = w1[3] | 0x020000; + break; + + case 31: + w1[3] = w1[3] | 0x02000000; + break; + } +} + +// before: append_0x02_3 +static void append_0x02_3x4 (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset) +{ + switch (offset) + { + case 0: + w0[0] = 0x02; + break; + + case 1: + w0[0] = w0[0] | 0x0200; + break; + + case 2: + w0[0] = w0[0] | 0x020000; + break; + + case 3: + w0[0] = w0[0] | 0x02000000; + break; + + case 4: + w0[1] = 0x02; + break; + + case 5: + w0[1] = w0[1] | 0x0200; + break; + + case 6: + w0[1] = w0[1] | 0x020000; + break; + + case 7: + w0[1] = w0[1] | 0x02000000; + break; + + case 8: + w0[2] = 0x02; + break; + + case 9: + w0[2] = w0[2] | 0x0200; + break; + + case 10: + w0[2] = w0[2] | 0x020000; + break; + + case 11: + w0[2] = w0[2] | 0x02000000; + break; + + case 12: + w0[3] = 0x02; + break; + + case 13: + w0[3] = w0[3] | 0x0200; + break; + + case 14: + w0[3] = w0[3] | 0x020000; + break; + + case 15: + w0[3] = w0[3] | 0x02000000; + break; + + case 16: + w1[0] = 0x02; + break; + + case 17: + w1[0] = w1[0] | 0x0200; + break; + + case 18: + w1[0] = w1[0] | 0x020000; + break; + + case 19: + w1[0] = w1[0] | 0x02000000; + break; + + case 20: + w1[1] = 0x02; + break; + + case 21: + w1[1] = w1[1] | 0x0200; + break; + + case 22: + w1[1] = w1[1] | 0x020000; + break; + + case 23: + w1[1] = w1[1] | 0x02000000; + break; + + case 24: + w1[2] = 0x02; + break; + + case 25: + w1[2] = w1[2] | 0x0200; + break; + + case 26: + w1[2] = w1[2] | 0x020000; + break; + + case 27: + w1[2] = w1[2] | 0x02000000; + break; + + case 28: + w1[3] = 0x02; + break; + + case 29: + w1[3] = w1[3] | 0x0200; + break; + + case 30: + w1[3] = w1[3] | 0x020000; + break; + + case 31: + w1[3] = w1[3] | 0x02000000; + break; + + case 32: + w2[0] = 0x02; + break; + + case 33: + w2[0] = w2[0] | 0x0200; + break; + + case 34: + w2[0] = w2[0] | 0x020000; + break; + + case 35: + w2[0] = w2[0] | 0x02000000; + break; + + case 36: + w2[1] = 0x02; + break; + + case 37: + w2[1] = w2[1] | 0x0200; + break; + + case 38: + w2[1] = w2[1] | 0x020000; + break; + + case 39: + w2[1] = w2[1] | 0x02000000; + break; + + case 40: + w2[2] = 0x02; + break; + + case 41: + w2[2] = w2[2] | 0x0200; + break; + + case 42: + w2[2] = w2[2] | 0x020000; + break; + + case 43: + w2[2] = w2[2] | 0x02000000; + break; + + case 44: + w2[3] = 0x02; + break; + + case 45: + w2[3] = w2[3] | 0x0200; + break; + + case 46: + w2[3] = w2[3] | 0x020000; + break; + + case 47: + w2[3] = w2[3] | 0x02000000; + break; + } +} + +// before: append_0x02_4 +static void append_0x02_4x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset) +{ + switch (offset) + { + case 0: + w0[0] = 0x02; + break; + + case 1: + w0[0] = w0[0] | 0x0200; + break; + + case 2: + w0[0] = w0[0] | 0x020000; + break; + + case 3: + w0[0] = w0[0] | 0x02000000; + break; + + case 4: + w0[1] = 0x02; + break; + + case 5: + w0[1] = w0[1] | 0x0200; + break; + + case 6: + w0[1] = w0[1] | 0x020000; + break; + + case 7: + w0[1] = w0[1] | 0x02000000; + break; + + case 8: + w0[2] = 0x02; + break; + + case 9: + w0[2] = w0[2] | 0x0200; + break; + + case 10: + w0[2] = w0[2] | 0x020000; + break; + + case 11: + w0[2] = w0[2] | 0x02000000; + break; + + case 12: + w0[3] = 0x02; + break; + + case 13: + w0[3] = w0[3] | 0x0200; + break; + + case 14: + w0[3] = w0[3] | 0x020000; + break; + + case 15: + w0[3] = w0[3] | 0x02000000; + break; + + case 16: + w1[0] = 0x02; + break; + + case 17: + w1[0] = w1[0] | 0x0200; + break; + + case 18: + w1[0] = w1[0] | 0x020000; + break; + + case 19: + w1[0] = w1[0] | 0x02000000; + break; + + case 20: + w1[1] = 0x02; + break; + + case 21: + w1[1] = w1[1] | 0x0200; + break; + + case 22: + w1[1] = w1[1] | 0x020000; + break; + + case 23: + w1[1] = w1[1] | 0x02000000; + break; + + case 24: + w1[2] = 0x02; + break; + + case 25: + w1[2] = w1[2] | 0x0200; + break; + + case 26: + w1[2] = w1[2] | 0x020000; + break; + + case 27: + w1[2] = w1[2] | 0x02000000; + break; + + case 28: + w1[3] = 0x02; + break; + + case 29: + w1[3] = w1[3] | 0x0200; + break; + + case 30: + w1[3] = w1[3] | 0x020000; + break; + + case 31: + w1[3] = w1[3] | 0x02000000; + break; + + case 32: + w2[0] = 0x02; + break; + + case 33: + w2[0] = w2[0] | 0x0200; + break; + + case 34: + w2[0] = w2[0] | 0x020000; + break; + + case 35: + w2[0] = w2[0] | 0x02000000; + break; + + case 36: + w2[1] = 0x02; + break; + + case 37: + w2[1] = w2[1] | 0x0200; + break; + + case 38: + w2[1] = w2[1] | 0x020000; + break; + + case 39: + w2[1] = w2[1] | 0x02000000; + break; + + case 40: + w2[2] = 0x02; + break; + + case 41: + w2[2] = w2[2] | 0x0200; + break; + + case 42: + w2[2] = w2[2] | 0x020000; + break; + + case 43: + w2[2] = w2[2] | 0x02000000; + break; + + case 44: + w2[3] = 0x02; + break; + + case 45: + w2[3] = w2[3] | 0x0200; + break; + + case 46: + w2[3] = w2[3] | 0x020000; + break; + + case 47: + w2[3] = w2[3] | 0x02000000; + break; + + case 48: + w3[0] = 0x02; + break; + + case 49: + w3[0] = w3[0] | 0x0200; + break; + + case 50: + w3[0] = w3[0] | 0x020000; + break; + + case 51: + w3[0] = w3[0] | 0x02000000; + break; + + case 52: + w3[1] = 0x02; + break; + + case 53: + w3[1] = w3[1] | 0x0200; + break; + + case 54: + w3[1] = w3[1] | 0x020000; + break; + + case 55: + w3[1] = w3[1] | 0x02000000; + break; + + case 56: + w3[2] = 0x02; + break; + + case 57: + w3[2] = w3[2] | 0x0200; + break; + + case 58: + w3[2] = w3[2] | 0x020000; + break; + + case 59: + w3[2] = w3[2] | 0x02000000; + break; + + case 60: + w3[3] = 0x02; + break; + + case 61: + w3[3] = w3[3] | 0x0200; + break; + + case 62: + w3[3] = w3[3] | 0x020000; + break; + + case 63: + w3[3] = w3[3] | 0x02000000; + break; + } +} + +// before: append_0x02_8 +static void append_0x02_8 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset) +{ + switch (offset) + { + case 0: + w0[0] = 0x02; + break; + + case 1: + w0[0] = w0[0] | 0x0200; + break; + + case 2: + w0[0] = w0[0] | 0x020000; + break; + + case 3: + w0[0] = w0[0] | 0x02000000; + break; + + case 4: + w0[1] = 0x02; + break; + + case 5: + w0[1] = w0[1] | 0x0200; + break; + + case 6: + w0[1] = w0[1] | 0x020000; + break; + + case 7: + w0[1] = w0[1] | 0x02000000; + break; + + case 8: + w0[2] = 0x02; + break; + + case 9: + w0[2] = w0[2] | 0x0200; + break; + + case 10: + w0[2] = w0[2] | 0x020000; + break; + + case 11: + w0[2] = w0[2] | 0x02000000; + break; + + case 12: + w0[3] = 0x02; + break; + + case 13: + w0[3] = w0[3] | 0x0200; + break; + + case 14: + w0[3] = w0[3] | 0x020000; + break; + + case 15: + w0[3] = w0[3] | 0x02000000; + break; + + case 16: + w1[0] = 0x02; + break; + + case 17: + w1[0] = w1[0] | 0x0200; + break; + + case 18: + w1[0] = w1[0] | 0x020000; + break; + + case 19: + w1[0] = w1[0] | 0x02000000; + break; + + case 20: + w1[1] = 0x02; + break; + + case 21: + w1[1] = w1[1] | 0x0200; + break; + + case 22: + w1[1] = w1[1] | 0x020000; + break; + + case 23: + w1[1] = w1[1] | 0x02000000; + break; + + case 24: + w1[2] = 0x02; + break; + + case 25: + w1[2] = w1[2] | 0x0200; + break; + + case 26: + w1[2] = w1[2] | 0x020000; + break; + + case 27: + w1[2] = w1[2] | 0x02000000; + break; + + case 28: + w1[3] = 0x02; + break; + + case 29: + w1[3] = w1[3] | 0x0200; + break; + + case 30: + w1[3] = w1[3] | 0x020000; + break; + + case 31: + w1[3] = w1[3] | 0x02000000; + break; + + case 32: + w2[0] = 0x02; + break; + + case 33: + w2[0] = w2[0] | 0x0200; + break; + + case 34: + w2[0] = w2[0] | 0x020000; + break; + + case 35: + w2[0] = w2[0] | 0x02000000; + break; + + case 36: + w2[1] = 0x02; + break; + + case 37: + w2[1] = w2[1] | 0x0200; + break; + + case 38: + w2[1] = w2[1] | 0x020000; + break; + + case 39: + w2[1] = w2[1] | 0x02000000; + break; + + case 40: + w2[2] = 0x02; + break; + + case 41: + w2[2] = w2[2] | 0x0200; + break; + + case 42: + w2[2] = w2[2] | 0x020000; + break; + + case 43: + w2[2] = w2[2] | 0x02000000; + break; + + case 44: + w2[3] = 0x02; + break; + + case 45: + w2[3] = w2[3] | 0x0200; + break; + + case 46: + w2[3] = w2[3] | 0x020000; + break; + + case 47: + w2[3] = w2[3] | 0x02000000; + break; + + case 48: + w3[0] = 0x02; + break; + + case 49: + w3[0] = w3[0] | 0x0200; + break; + + case 50: + w3[0] = w3[0] | 0x020000; + break; + + case 51: + w3[0] = w3[0] | 0x02000000; + break; + + case 52: + w3[1] = 0x02; + break; + + case 53: + w3[1] = w3[1] | 0x0200; + break; + + case 54: + w3[1] = w3[1] | 0x020000; + break; + + case 55: + w3[1] = w3[1] | 0x02000000; + break; + + case 56: + w3[2] = 0x02; + break; + + case 57: + w3[2] = w3[2] | 0x0200; + break; + + case 58: + w3[2] = w3[2] | 0x020000; + break; + + case 59: + w3[2] = w3[2] | 0x02000000; + break; + + case 60: + w3[3] = 0x02; + break; + + case 61: + w3[3] = w3[3] | 0x0200; + break; + + case 62: + w3[3] = w3[3] | 0x020000; + break; + + case 63: + w3[3] = w3[3] | 0x02000000; + break; + + case 64: + w4[0] = 0x02; + break; + + case 65: + w4[0] = w4[0] | 0x0200; + break; + + case 66: + w4[0] = w4[0] | 0x020000; + break; + + case 67: + w4[0] = w4[0] | 0x02000000; + break; + + case 68: + w4[1] = 0x02; + break; + + case 69: + w4[1] = w4[1] | 0x0200; + break; + + case 70: + w4[1] = w4[1] | 0x020000; + break; + + case 71: + w4[1] = w4[1] | 0x02000000; + break; + + case 72: + w4[2] = 0x02; + break; + + case 73: + w4[2] = w4[2] | 0x0200; + break; + + case 74: + w4[2] = w4[2] | 0x020000; + break; + + case 75: + w4[2] = w4[2] | 0x02000000; + break; + + case 76: + w4[3] = 0x02; + break; + + case 77: + w4[3] = w4[3] | 0x0200; + break; + + case 78: + w4[3] = w4[3] | 0x020000; + break; + + case 79: + w4[3] = w4[3] | 0x02000000; + break; + + case 80: + w5[0] = 0x02; + break; + + case 81: + w5[0] = w5[0] | 0x0200; + break; + + case 82: + w5[0] = w5[0] | 0x020000; + break; + + case 83: + w5[0] = w5[0] | 0x02000000; + break; + + case 84: + w5[1] = 0x02; + break; + + case 85: + w5[1] = w5[1] | 0x0200; + break; + + case 86: + w5[1] = w5[1] | 0x020000; + break; + + case 87: + w5[1] = w5[1] | 0x02000000; + break; + + case 88: + w5[2] = 0x02; + break; + + case 89: + w5[2] = w5[2] | 0x0200; + break; + + case 90: + w5[2] = w5[2] | 0x020000; + break; + + case 91: + w5[2] = w5[2] | 0x02000000; + break; + + case 92: + w5[3] = 0x02; + break; + + case 93: + w5[3] = w5[3] | 0x0200; + break; + + case 94: + w5[3] = w5[3] | 0x020000; + break; + + case 95: + w5[3] = w5[3] | 0x02000000; + break; + + case 96: + w6[0] = 0x02; + break; + + case 97: + w6[0] = w6[0] | 0x0200; + break; + + case 98: + w6[0] = w6[0] | 0x020000; + break; + + case 99: + w6[0] = w6[0] | 0x02000000; + break; + + case 100: + w6[1] = 0x02; + break; + + case 101: + w6[1] = w6[1] | 0x0200; + break; + + case 102: + w6[1] = w6[1] | 0x020000; + break; + + case 103: + w6[1] = w6[1] | 0x02000000; + break; + + case 104: + w6[2] = 0x02; + break; + + case 105: + w6[2] = w6[2] | 0x0200; + break; + + case 106: + w6[2] = w6[2] | 0x020000; + break; + + case 107: + w6[2] = w6[2] | 0x02000000; + break; + + case 108: + w6[3] = 0x02; + break; + + case 109: + w6[3] = w6[3] | 0x0200; + break; + + case 110: + w6[3] = w6[3] | 0x020000; + break; + + case 111: + w6[3] = w6[3] | 0x02000000; + break; + + case 112: + w7[0] = 0x02; + break; + + case 113: + w7[0] = w7[0] | 0x0200; + break; + + case 114: + w7[0] = w7[0] | 0x020000; + break; + + case 115: + w7[0] = w7[0] | 0x02000000; + break; + + case 116: + w7[1] = 0x02; + break; + + case 117: + w7[1] = w7[1] | 0x0200; + break; + + case 118: + w7[1] = w7[1] | 0x020000; + break; + + case 119: + w7[1] = w7[1] | 0x02000000; + break; + + case 120: + w7[2] = 0x02; + break; + + case 121: + w7[2] = w7[2] | 0x0200; + break; + + case 122: + w7[2] = w7[2] | 0x020000; + break; + + case 123: + w7[2] = w7[2] | 0x02000000; + break; + + case 124: + w7[3] = 0x02; + break; + + case 125: + w7[3] = w7[3] | 0x0200; + break; + + case 126: + w7[3] = w7[3] | 0x020000; + break; + + case 127: + w7[3] = w7[3] | 0x02000000; + break; + } +} + +// before: append_0x80_1 +static void append_0x80_1x4 (u32 w0[4], const u32 offset) +{ + switch (offset) + { + case 0: + w0[0] = 0x80; + break; + + case 1: + w0[0] = w0[0] | 0x8000; + break; + + case 2: + w0[0] = w0[0] | 0x800000; + break; + + case 3: + w0[0] = w0[0] | 0x80000000; + break; + + case 4: + w0[1] = 0x80; + break; + + case 5: + w0[1] = w0[1] | 0x8000; + break; + + case 6: + w0[1] = w0[1] | 0x800000; + break; + + case 7: + w0[1] = w0[1] | 0x80000000; + break; + + case 8: + w0[2] = 0x80; + break; + + case 9: + w0[2] = w0[2] | 0x8000; + break; + + case 10: + w0[2] = w0[2] | 0x800000; + break; + + case 11: + w0[2] = w0[2] | 0x80000000; + break; + + case 12: + w0[3] = 0x80; + break; + + case 13: + w0[3] = w0[3] | 0x8000; + break; + + case 14: + w0[3] = w0[3] | 0x800000; + break; + + case 15: + w0[3] = w0[3] | 0x80000000; + break; + } +} + +// before: append_0x80_2 +static void append_0x80_2x4 (u32 w0[4], u32 w1[4], const u32 offset) +{ + switch (offset) + { + case 0: + w0[0] = 0x80; + break; + + case 1: + w0[0] = w0[0] | 0x8000; + break; + + case 2: + w0[0] = w0[0] | 0x800000; + break; + + case 3: + w0[0] = w0[0] | 0x80000000; + break; + + case 4: + w0[1] = 0x80; + break; + + case 5: + w0[1] = w0[1] | 0x8000; + break; + + case 6: + w0[1] = w0[1] | 0x800000; + break; + + case 7: + w0[1] = w0[1] | 0x80000000; + break; + + case 8: + w0[2] = 0x80; + break; + + case 9: + w0[2] = w0[2] | 0x8000; + break; + + case 10: + w0[2] = w0[2] | 0x800000; + break; + + case 11: + w0[2] = w0[2] | 0x80000000; + break; + + case 12: + w0[3] = 0x80; + break; + + case 13: + w0[3] = w0[3] | 0x8000; + break; + + case 14: + w0[3] = w0[3] | 0x800000; + break; + + case 15: + w0[3] = w0[3] | 0x80000000; + break; + + case 16: + w1[0] = 0x80; + break; + + case 17: + w1[0] = w1[0] | 0x8000; + break; + + case 18: + w1[0] = w1[0] | 0x800000; + break; + + case 19: + w1[0] = w1[0] | 0x80000000; + break; + + case 20: + w1[1] = 0x80; + break; + + case 21: + w1[1] = w1[1] | 0x8000; + break; + + case 22: + w1[1] = w1[1] | 0x800000; + break; + + case 23: + w1[1] = w1[1] | 0x80000000; + break; + + case 24: + w1[2] = 0x80; + break; + + case 25: + w1[2] = w1[2] | 0x8000; + break; + + case 26: + w1[2] = w1[2] | 0x800000; + break; + + case 27: + w1[2] = w1[2] | 0x80000000; + break; + + case 28: + w1[3] = 0x80; + break; + + case 29: + w1[3] = w1[3] | 0x8000; + break; + + case 30: + w1[3] = w1[3] | 0x800000; + break; + + case 31: + w1[3] = w1[3] | 0x80000000; + break; + } +} + +// before: append_0x80_2_be +static void append_0x80_2x4_be (u32 w0[4], u32 w1[4], const u32 offset) +{ + switch (offset) + { + case 0: + w0[0] |= 0x80000000; + break; + + case 1: + w0[0] |= 0x800000; + break; + + case 2: + w0[0] |= 0x8000; + break; + + case 3: + w0[0] |= 0x80; + break; + + case 4: + w0[1] |= 0x80000000; + break; + + case 5: + w0[1] |= 0x800000; + break; + + case 6: + w0[1] |= 0x8000; + break; + + case 7: + w0[1] |= 0x80; + break; + + case 8: + w0[2] |= 0x80000000; + break; + + case 9: + w0[2] |= 0x800000; + break; + + case 10: + w0[2] |= 0x8000; + break; + + case 11: + w0[2] |= 0x80; + break; + + case 12: + w0[3] |= 0x80000000; + break; + + case 13: + w0[3] |= 0x800000; + break; + + case 14: + w0[3] |= 0x8000; + break; + + case 15: + w0[3] |= 0x80; + break; + + case 16: + w1[0] |= 0x80000000; + break; + + case 17: + w1[0] |= 0x800000; + break; + + case 18: + w1[0] |= 0x8000; + break; + + case 19: + w1[0] |= 0x80; + break; + + case 20: + w1[1] |= 0x80000000; + break; + + case 21: + w1[1] |= 0x800000; + break; + + case 22: + w1[1] |= 0x8000; + break; + + case 23: + w1[1] |= 0x80; + break; + + case 24: + w1[2] |= 0x80000000; + break; + + case 25: + w1[2] |= 0x800000; + break; + + case 26: + w1[2] |= 0x8000; + break; + + case 27: + w1[2] |= 0x80; + break; + + case 28: + w1[3] |= 0x80000000; + break; + + case 29: + w1[3] |= 0x800000; + break; + + case 30: + w1[3] |= 0x8000; + break; + + case 31: + w1[3] |= 0x80; + break; + } +} + +// before: append_0x80_3 +static void append_0x80_3x4 (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset) +{ + switch (offset) + { + case 0: + w0[0] = 0x80; + break; + + case 1: + w0[0] = w0[0] | 0x8000; + break; + + case 2: + w0[0] = w0[0] | 0x800000; + break; + + case 3: + w0[0] = w0[0] | 0x80000000; + break; + + case 4: + w0[1] = 0x80; + break; + + case 5: + w0[1] = w0[1] | 0x8000; + break; + + case 6: + w0[1] = w0[1] | 0x800000; + break; + + case 7: + w0[1] = w0[1] | 0x80000000; + break; + + case 8: + w0[2] = 0x80; + break; + + case 9: + w0[2] = w0[2] | 0x8000; + break; + + case 10: + w0[2] = w0[2] | 0x800000; + break; + + case 11: + w0[2] = w0[2] | 0x80000000; + break; + + case 12: + w0[3] = 0x80; + break; + + case 13: + w0[3] = w0[3] | 0x8000; + break; + + case 14: + w0[3] = w0[3] | 0x800000; + break; + + case 15: + w0[3] = w0[3] | 0x80000000; + break; + + case 16: + w1[0] = 0x80; + break; + + case 17: + w1[0] = w1[0] | 0x8000; + break; + + case 18: + w1[0] = w1[0] | 0x800000; + break; + + case 19: + w1[0] = w1[0] | 0x80000000; + break; + + case 20: + w1[1] = 0x80; + break; + + case 21: + w1[1] = w1[1] | 0x8000; + break; + + case 22: + w1[1] = w1[1] | 0x800000; + break; + + case 23: + w1[1] = w1[1] | 0x80000000; + break; + + case 24: + w1[2] = 0x80; + break; + + case 25: + w1[2] = w1[2] | 0x8000; + break; + + case 26: + w1[2] = w1[2] | 0x800000; + break; + + case 27: + w1[2] = w1[2] | 0x80000000; + break; + + case 28: + w1[3] = 0x80; + break; + + case 29: + w1[3] = w1[3] | 0x8000; + break; + + case 30: + w1[3] = w1[3] | 0x800000; + break; + + case 31: + w1[3] = w1[3] | 0x80000000; + break; + + case 32: + w2[0] = 0x80; + break; + + case 33: + w2[0] = w2[0] | 0x8000; + break; + + case 34: + w2[0] = w2[0] | 0x800000; + break; + + case 35: + w2[0] = w2[0] | 0x80000000; + break; + + case 36: + w2[1] = 0x80; + break; + + case 37: + w2[1] = w2[1] | 0x8000; + break; + + case 38: + w2[1] = w2[1] | 0x800000; + break; + + case 39: + w2[1] = w2[1] | 0x80000000; + break; + + case 40: + w2[2] = 0x80; + break; + + case 41: + w2[2] = w2[2] | 0x8000; + break; + + case 42: + w2[2] = w2[2] | 0x800000; + break; + + case 43: + w2[2] = w2[2] | 0x80000000; + break; + + case 44: + w2[3] = 0x80; + break; + + case 45: + w2[3] = w2[3] | 0x8000; + break; + + case 46: + w2[3] = w2[3] | 0x800000; + break; + + case 47: + w2[3] = w2[3] | 0x80000000; + break; + } +} + +// before: append_0x80_4 +static void append_0x80_4x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset) +{ + switch (offset) + { + case 0: + w0[0] = 0x80; + break; + + case 1: + w0[0] = w0[0] | 0x8000; + break; + + case 2: + w0[0] = w0[0] | 0x800000; + break; + + case 3: + w0[0] = w0[0] | 0x80000000; + break; + + case 4: + w0[1] = 0x80; + break; + + case 5: + w0[1] = w0[1] | 0x8000; + break; + + case 6: + w0[1] = w0[1] | 0x800000; + break; + + case 7: + w0[1] = w0[1] | 0x80000000; + break; + + case 8: + w0[2] = 0x80; + break; + + case 9: + w0[2] = w0[2] | 0x8000; + break; + + case 10: + w0[2] = w0[2] | 0x800000; + break; + + case 11: + w0[2] = w0[2] | 0x80000000; + break; + + case 12: + w0[3] = 0x80; + break; + + case 13: + w0[3] = w0[3] | 0x8000; + break; + + case 14: + w0[3] = w0[3] | 0x800000; + break; + + case 15: + w0[3] = w0[3] | 0x80000000; + break; + + case 16: + w1[0] = 0x80; + break; + + case 17: + w1[0] = w1[0] | 0x8000; + break; + + case 18: + w1[0] = w1[0] | 0x800000; + break; + + case 19: + w1[0] = w1[0] | 0x80000000; + break; + + case 20: + w1[1] = 0x80; + break; + + case 21: + w1[1] = w1[1] | 0x8000; + break; + + case 22: + w1[1] = w1[1] | 0x800000; + break; + + case 23: + w1[1] = w1[1] | 0x80000000; + break; + + case 24: + w1[2] = 0x80; + break; + + case 25: + w1[2] = w1[2] | 0x8000; + break; + + case 26: + w1[2] = w1[2] | 0x800000; + break; + + case 27: + w1[2] = w1[2] | 0x80000000; + break; + + case 28: + w1[3] = 0x80; + break; + + case 29: + w1[3] = w1[3] | 0x8000; + break; + + case 30: + w1[3] = w1[3] | 0x800000; + break; + + case 31: + w1[3] = w1[3] | 0x80000000; + break; + + case 32: + w2[0] = 0x80; + break; + + case 33: + w2[0] = w2[0] | 0x8000; + break; + + case 34: + w2[0] = w2[0] | 0x800000; + break; + + case 35: + w2[0] = w2[0] | 0x80000000; + break; + + case 36: + w2[1] = 0x80; + break; + + case 37: + w2[1] = w2[1] | 0x8000; + break; + + case 38: + w2[1] = w2[1] | 0x800000; + break; + + case 39: + w2[1] = w2[1] | 0x80000000; + break; + + case 40: + w2[2] = 0x80; + break; + + case 41: + w2[2] = w2[2] | 0x8000; + break; + + case 42: + w2[2] = w2[2] | 0x800000; + break; + + case 43: + w2[2] = w2[2] | 0x80000000; + break; + + case 44: + w2[3] = 0x80; + break; + + case 45: + w2[3] = w2[3] | 0x8000; + break; + + case 46: + w2[3] = w2[3] | 0x800000; + break; + + case 47: + w2[3] = w2[3] | 0x80000000; + break; + + case 48: + w3[0] = 0x80; + break; + + case 49: + w3[0] = w3[0] | 0x8000; + break; + + case 50: + w3[0] = w3[0] | 0x800000; + break; + + case 51: + w3[0] = w3[0] | 0x80000000; + break; + + case 52: + w3[1] = 0x80; + break; + + case 53: + w3[1] = w3[1] | 0x8000; + break; + + case 54: + w3[1] = w3[1] | 0x800000; + break; + + case 55: + w3[1] = w3[1] | 0x80000000; + break; + + case 56: + w3[2] = 0x80; + break; + + case 57: + w3[2] = w3[2] | 0x8000; + break; + + case 58: + w3[2] = w3[2] | 0x800000; + break; + + case 59: + w3[2] = w3[2] | 0x80000000; + break; + + case 60: + w3[3] = 0x80; + break; + + case 61: + w3[3] = w3[3] | 0x8000; + break; + + case 62: + w3[3] = w3[3] | 0x800000; + break; + + case 63: + w3[3] = w3[3] | 0x80000000; + break; + } +} + +// before: append_0x80_8 +static void append_0x80_8x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset) +{ + switch (offset) + { + case 0: + w0[0] = 0x80; + break; + + case 1: + w0[0] = w0[0] | 0x8000; + break; + + case 2: + w0[0] = w0[0] | 0x800000; + break; + + case 3: + w0[0] = w0[0] | 0x80000000; + break; + + case 4: + w0[1] = 0x80; + break; + + case 5: + w0[1] = w0[1] | 0x8000; + break; + + case 6: + w0[1] = w0[1] | 0x800000; + break; + + case 7: + w0[1] = w0[1] | 0x80000000; + break; + + case 8: + w0[2] = 0x80; + break; + + case 9: + w0[2] = w0[2] | 0x8000; + break; + + case 10: + w0[2] = w0[2] | 0x800000; + break; + + case 11: + w0[2] = w0[2] | 0x80000000; + break; + + case 12: + w0[3] = 0x80; + break; + + case 13: + w0[3] = w0[3] | 0x8000; + break; + + case 14: + w0[3] = w0[3] | 0x800000; + break; + + case 15: + w0[3] = w0[3] | 0x80000000; + break; + + case 16: + w1[0] = 0x80; + break; + + case 17: + w1[0] = w1[0] | 0x8000; + break; + + case 18: + w1[0] = w1[0] | 0x800000; + break; + + case 19: + w1[0] = w1[0] | 0x80000000; + break; + + case 20: + w1[1] = 0x80; + break; + + case 21: + w1[1] = w1[1] | 0x8000; + break; + + case 22: + w1[1] = w1[1] | 0x800000; + break; + + case 23: + w1[1] = w1[1] | 0x80000000; + break; + + case 24: + w1[2] = 0x80; + break; + + case 25: + w1[2] = w1[2] | 0x8000; + break; + + case 26: + w1[2] = w1[2] | 0x800000; + break; + + case 27: + w1[2] = w1[2] | 0x80000000; + break; + + case 28: + w1[3] = 0x80; + break; + + case 29: + w1[3] = w1[3] | 0x8000; + break; + + case 30: + w1[3] = w1[3] | 0x800000; + break; + + case 31: + w1[3] = w1[3] | 0x80000000; + break; + + case 32: + w2[0] = 0x80; + break; + + case 33: + w2[0] = w2[0] | 0x8000; + break; + + case 34: + w2[0] = w2[0] | 0x800000; + break; + + case 35: + w2[0] = w2[0] | 0x80000000; + break; + + case 36: + w2[1] = 0x80; + break; + + case 37: + w2[1] = w2[1] | 0x8000; + break; + + case 38: + w2[1] = w2[1] | 0x800000; + break; + + case 39: + w2[1] = w2[1] | 0x80000000; + break; + + case 40: + w2[2] = 0x80; + break; + + case 41: + w2[2] = w2[2] | 0x8000; + break; + + case 42: + w2[2] = w2[2] | 0x800000; + break; + + case 43: + w2[2] = w2[2] | 0x80000000; + break; + + case 44: + w2[3] = 0x80; + break; + + case 45: + w2[3] = w2[3] | 0x8000; + break; + + case 46: + w2[3] = w2[3] | 0x800000; + break; + + case 47: + w2[3] = w2[3] | 0x80000000; + break; + + case 48: + w3[0] = 0x80; + break; + + case 49: + w3[0] = w3[0] | 0x8000; + break; + + case 50: + w3[0] = w3[0] | 0x800000; + break; + + case 51: + w3[0] = w3[0] | 0x80000000; + break; + + case 52: + w3[1] = 0x80; + break; + + case 53: + w3[1] = w3[1] | 0x8000; + break; + + case 54: + w3[1] = w3[1] | 0x800000; + break; + + case 55: + w3[1] = w3[1] | 0x80000000; + break; + + case 56: + w3[2] = 0x80; + break; + + case 57: + w3[2] = w3[2] | 0x8000; + break; + + case 58: + w3[2] = w3[2] | 0x800000; + break; + + case 59: + w3[2] = w3[2] | 0x80000000; + break; + + case 60: + w3[3] = 0x80; + break; + + case 61: + w3[3] = w3[3] | 0x8000; + break; + + case 62: + w3[3] = w3[3] | 0x800000; + break; + + case 63: + w3[3] = w3[3] | 0x80000000; + break; + + case 64: + w4[0] = 0x80; + break; + + case 65: + w4[0] = w4[0] | 0x8000; + break; + + case 66: + w4[0] = w4[0] | 0x800000; + break; + + case 67: + w4[0] = w4[0] | 0x80000000; + break; + + case 68: + w4[1] = 0x80; + break; + + case 69: + w4[1] = w4[1] | 0x8000; + break; + + case 70: + w4[1] = w4[1] | 0x800000; + break; + + case 71: + w4[1] = w4[1] | 0x80000000; + break; + + case 72: + w4[2] = 0x80; + break; + + case 73: + w4[2] = w4[2] | 0x8000; + break; + + case 74: + w4[2] = w4[2] | 0x800000; + break; + + case 75: + w4[2] = w4[2] | 0x80000000; + break; + + case 76: + w4[3] = 0x80; + break; + + case 77: + w4[3] = w4[3] | 0x8000; + break; + + case 78: + w4[3] = w4[3] | 0x800000; + break; + + case 79: + w4[3] = w4[3] | 0x80000000; + break; + + case 80: + w5[0] = 0x80; + break; + + case 81: + w5[0] = w5[0] | 0x8000; + break; + + case 82: + w5[0] = w5[0] | 0x800000; + break; + + case 83: + w5[0] = w5[0] | 0x80000000; + break; + + case 84: + w5[1] = 0x80; + break; + + case 85: + w5[1] = w5[1] | 0x8000; + break; + + case 86: + w5[1] = w5[1] | 0x800000; + break; + + case 87: + w5[1] = w5[1] | 0x80000000; + break; + + case 88: + w5[2] = 0x80; + break; + + case 89: + w5[2] = w5[2] | 0x8000; + break; + + case 90: + w5[2] = w5[2] | 0x800000; + break; + + case 91: + w5[2] = w5[2] | 0x80000000; + break; + + case 92: + w5[3] = 0x80; + break; + + case 93: + w5[3] = w5[3] | 0x8000; + break; + + case 94: + w5[3] = w5[3] | 0x800000; + break; + + case 95: + w5[3] = w5[3] | 0x80000000; + break; + + case 96: + w6[0] = 0x80; + break; + + case 97: + w6[0] = w6[0] | 0x8000; + break; + + case 98: + w6[0] = w6[0] | 0x800000; + break; + + case 99: + w6[0] = w6[0] | 0x80000000; + break; + + case 100: + w6[1] = 0x80; + break; + + case 101: + w6[1] = w6[1] | 0x8000; + break; + + case 102: + w6[1] = w6[1] | 0x800000; + break; + + case 103: + w6[1] = w6[1] | 0x80000000; + break; + + case 104: + w6[2] = 0x80; + break; + + case 105: + w6[2] = w6[2] | 0x8000; + break; + + case 106: + w6[2] = w6[2] | 0x800000; + break; + + case 107: + w6[2] = w6[2] | 0x80000000; + break; + + case 108: + w6[3] = 0x80; + break; + + case 109: + w6[3] = w6[3] | 0x8000; + break; + + case 110: + w6[3] = w6[3] | 0x800000; + break; + + case 111: + w6[3] = w6[3] | 0x80000000; + break; + + case 112: + w7[0] = 0x80; + break; + + case 113: + w7[0] = w7[0] | 0x8000; + break; + + case 114: + w7[0] = w7[0] | 0x800000; + break; + + case 115: + w7[0] = w7[0] | 0x80000000; + break; + + case 116: + w7[1] = 0x80; + break; + + case 117: + w7[1] = w7[1] | 0x8000; + break; + + case 118: + w7[1] = w7[1] | 0x800000; + break; + + case 119: + w7[1] = w7[1] | 0x80000000; + break; + + case 120: + w7[2] = 0x80; + break; + + case 121: + w7[2] = w7[2] | 0x8000; + break; + + case 122: + w7[2] = w7[2] | 0x800000; + break; + + case 123: + w7[2] = w7[2] | 0x80000000; + break; + + case 124: + w7[3] = 0x80; + break; + + case 125: + w7[3] = w7[3] | 0x8000; + break; + + case 126: + w7[3] = w7[3] | 0x800000; + break; + + case 127: + w7[3] = w7[3] | 0x80000000; + break; + } +} + +// before: append_0x80_4 +static void append_0x80_1x16 (u32 w[16], const u32 offset) +{ + switch (offset) + { + case 0: + w[ 0] = 0x80; + break; + + case 1: + w[ 0] = w[ 0] | 0x8000; + break; + + case 2: + w[ 0] = w[ 0] | 0x800000; + break; + + case 3: + w[ 0] = w[ 0] | 0x80000000; + break; + + case 4: + w[ 1] = 0x80; + break; + + case 5: + w[ 1] = w[ 1] | 0x8000; + break; + + case 6: + w[ 1] = w[ 1] | 0x800000; + break; + + case 7: + w[ 1] = w[ 1] | 0x80000000; + break; + + case 8: + w[ 2] = 0x80; + break; + + case 9: + w[ 2] = w[ 2] | 0x8000; + break; + + case 10: + w[ 2] = w[ 2] | 0x800000; + break; + + case 11: + w[ 2] = w[ 2] | 0x80000000; + break; + + case 12: + w[ 3] = 0x80; + break; + + case 13: + w[ 3] = w[ 3] | 0x8000; + break; + + case 14: + w[ 3] = w[ 3] | 0x800000; + break; + + case 15: + w[ 3] = w[ 3] | 0x80000000; + break; + + case 16: + w[ 4] = 0x80; + break; + + case 17: + w[ 4] = w[ 4] | 0x8000; + break; + + case 18: + w[ 4] = w[ 4] | 0x800000; + break; + + case 19: + w[ 4] = w[ 4] | 0x80000000; + break; + + case 20: + w[ 5] = 0x80; + break; + + case 21: + w[ 5] = w[ 5] | 0x8000; + break; + + case 22: + w[ 5] = w[ 5] | 0x800000; + break; + + case 23: + w[ 5] = w[ 5] | 0x80000000; + break; + + case 24: + w[ 6] = 0x80; + break; + + case 25: + w[ 6] = w[ 6] | 0x8000; + break; + + case 26: + w[ 6] = w[ 6] | 0x800000; + break; + + case 27: + w[ 6] = w[ 6] | 0x80000000; + break; + + case 28: + w[ 7] = 0x80; + break; + + case 29: + w[ 7] = w[ 7] | 0x8000; + break; + + case 30: + w[ 7] = w[ 7] | 0x800000; + break; + + case 31: + w[ 7] = w[ 7] | 0x80000000; + break; + + case 32: + w[ 8] = 0x80; + break; + + case 33: + w[ 8] = w[ 8] | 0x8000; + break; + + case 34: + w[ 8] = w[ 8] | 0x800000; + break; + + case 35: + w[ 8] = w[ 8] | 0x80000000; + break; + + case 36: + w[ 9] = 0x80; + break; + + case 37: + w[ 9] = w[ 9] | 0x8000; + break; + + case 38: + w[ 9] = w[ 9] | 0x800000; + break; + + case 39: + w[ 9] = w[ 9] | 0x80000000; + break; + + case 40: + w[10] = 0x80; + break; + + case 41: + w[10] = w[10] | 0x8000; + break; + + case 42: + w[10] = w[10] | 0x800000; + break; + + case 43: + w[10] = w[10] | 0x80000000; + break; + + case 44: + w[11] = 0x80; + break; + + case 45: + w[11] = w[11] | 0x8000; + break; + + case 46: + w[11] = w[11] | 0x800000; + break; + + case 47: + w[11] = w[11] | 0x80000000; + break; + + case 48: + w[12] = 0x80; + break; + + case 49: + w[12] = w[12] | 0x8000; + break; + + case 50: + w[12] = w[12] | 0x800000; + break; + + case 51: + w[12] = w[12] | 0x80000000; + break; + + case 52: + w[13] = 0x80; + break; + + case 53: + w[13] = w[13] | 0x8000; + break; + + case 54: + w[13] = w[13] | 0x800000; + break; + + case 55: + w[13] = w[13] | 0x80000000; + break; + + case 56: + w[14] = 0x80; + break; + + case 57: + w[14] = w[14] | 0x8000; + break; + + case 58: + w[14] = w[14] | 0x800000; + break; + + case 59: + w[14] = w[14] | 0x80000000; + break; + + case 60: + w[15] = 0x80; + break; + + case 61: + w[15] = w[15] | 0x8000; + break; + + case 62: + w[15] = w[15] | 0x800000; + break; + + case 63: + w[15] = w[15] | 0x80000000; + break; + } +} + +// before: append_0x80_8 +static void append_0x80_1x32 (u32 w[32], const u32 offset) +{ + switch (offset) + { + case 0: + w[ 0] = 0x80; + break; + + case 1: + w[ 0] = w[ 0] | 0x8000; + break; + + case 2: + w[ 0] = w[ 0] | 0x800000; + break; + + case 3: + w[ 0] = w[ 0] | 0x80000000; + break; + + case 4: + w[ 1] = 0x80; + break; + + case 5: + w[ 1] = w[ 1] | 0x8000; + break; + + case 6: + w[ 1] = w[ 1] | 0x800000; + break; + + case 7: + w[ 1] = w[ 1] | 0x80000000; + break; + + case 8: + w[ 2] = 0x80; + break; + + case 9: + w[ 2] = w[ 2] | 0x8000; + break; + + case 10: + w[ 2] = w[ 2] | 0x800000; + break; + + case 11: + w[ 2] = w[ 2] | 0x80000000; + break; + + case 12: + w[ 3] = 0x80; + break; + + case 13: + w[ 3] = w[ 3] | 0x8000; + break; + + case 14: + w[ 3] = w[ 3] | 0x800000; + break; + + case 15: + w[ 3] = w[ 3] | 0x80000000; + break; + + case 16: + w[ 4] = 0x80; + break; + + case 17: + w[ 4] = w[ 4] | 0x8000; + break; + + case 18: + w[ 4] = w[ 4] | 0x800000; + break; + + case 19: + w[ 4] = w[ 4] | 0x80000000; + break; + + case 20: + w[ 5] = 0x80; + break; + + case 21: + w[ 5] = w[ 5] | 0x8000; + break; + + case 22: + w[ 5] = w[ 5] | 0x800000; + break; + + case 23: + w[ 5] = w[ 5] | 0x80000000; + break; + + case 24: + w[ 6] = 0x80; + break; + + case 25: + w[ 6] = w[ 6] | 0x8000; + break; + + case 26: + w[ 6] = w[ 6] | 0x800000; + break; + + case 27: + w[ 6] = w[ 6] | 0x80000000; + break; + + case 28: + w[ 7] = 0x80; + break; + + case 29: + w[ 7] = w[ 7] | 0x8000; + break; + + case 30: + w[ 7] = w[ 7] | 0x800000; + break; + + case 31: + w[ 7] = w[ 7] | 0x80000000; + break; + + case 32: + w[ 8] = 0x80; + break; + + case 33: + w[ 8] = w[ 8] | 0x8000; + break; + + case 34: + w[ 8] = w[ 8] | 0x800000; + break; + + case 35: + w[ 8] = w[ 8] | 0x80000000; + break; + + case 36: + w[ 9] = 0x80; + break; + + case 37: + w[ 9] = w[ 9] | 0x8000; + break; + + case 38: + w[ 9] = w[ 9] | 0x800000; + break; + + case 39: + w[ 9] = w[ 9] | 0x80000000; + break; + + case 40: + w[10] = 0x80; + break; + + case 41: + w[10] = w[10] | 0x8000; + break; + + case 42: + w[10] = w[10] | 0x800000; + break; + + case 43: + w[10] = w[10] | 0x80000000; + break; + + case 44: + w[11] = 0x80; + break; + + case 45: + w[11] = w[11] | 0x8000; + break; + + case 46: + w[11] = w[11] | 0x800000; + break; + + case 47: + w[11] = w[11] | 0x80000000; + break; + + case 48: + w[12] = 0x80; + break; + + case 49: + w[12] = w[12] | 0x8000; + break; + + case 50: + w[12] = w[12] | 0x800000; + break; + + case 51: + w[12] = w[12] | 0x80000000; + break; + + case 52: + w[13] = 0x80; + break; + + case 53: + w[13] = w[13] | 0x8000; + break; + + case 54: + w[13] = w[13] | 0x800000; + break; + + case 55: + w[13] = w[13] | 0x80000000; + break; + + case 56: + w[14] = 0x80; + break; + + case 57: + w[14] = w[14] | 0x8000; + break; + + case 58: + w[14] = w[14] | 0x800000; + break; + + case 59: + w[14] = w[14] | 0x80000000; + break; + + case 60: + w[15] = 0x80; + break; + + case 61: + w[15] = w[15] | 0x8000; + break; + + case 62: + w[15] = w[15] | 0x800000; + break; + + case 63: + w[15] = w[15] | 0x80000000; + break; + + case 64: + w[16] = 0x80; + break; + + case 65: + w[16] = w[16] | 0x8000; + break; + + case 66: + w[16] = w[16] | 0x800000; + break; + + case 67: + w[16] = w[16] | 0x80000000; + break; + + case 68: + w[17] = 0x80; + break; + + case 69: + w[17] = w[17] | 0x8000; + break; + + case 70: + w[17] = w[17] | 0x800000; + break; + + case 71: + w[17] = w[17] | 0x80000000; + break; + + case 72: + w[18] = 0x80; + break; + + case 73: + w[18] = w[18] | 0x8000; + break; + + case 74: + w[18] = w[18] | 0x800000; + break; + + case 75: + w[18] = w[18] | 0x80000000; + break; + + case 76: + w[19] = 0x80; + break; + + case 77: + w[19] = w[19] | 0x8000; + break; + + case 78: + w[19] = w[19] | 0x800000; + break; + + case 79: + w[19] = w[19] | 0x80000000; + break; + + case 80: + w[20] = 0x80; + break; + + case 81: + w[20] = w[20] | 0x8000; + break; + + case 82: + w[20] = w[20] | 0x800000; + break; + + case 83: + w[20] = w[20] | 0x80000000; + break; + + case 84: + w[21] = 0x80; + break; + + case 85: + w[21] = w[21] | 0x8000; + break; + + case 86: + w[21] = w[21] | 0x800000; + break; + + case 87: + w[21] = w[21] | 0x80000000; + break; + + case 88: + w[22] = 0x80; + break; + + case 89: + w[22] = w[22] | 0x8000; + break; + + case 90: + w[22] = w[22] | 0x800000; + break; + + case 91: + w[22] = w[22] | 0x80000000; + break; + + case 92: + w[23] = 0x80; + break; + + case 93: + w[23] = w[23] | 0x8000; + break; + + case 94: + w[23] = w[23] | 0x800000; + break; + + case 95: + w[23] = w[23] | 0x80000000; + break; + + case 96: + w[24] = 0x80; + break; + + case 97: + w[24] = w[24] | 0x8000; + break; + + case 98: + w[24] = w[24] | 0x800000; + break; + + case 99: + w[24] = w[24] | 0x80000000; + break; + + case 100: + w[25] = 0x80; + break; + + case 101: + w[25] = w[25] | 0x8000; + break; + + case 102: + w[25] = w[25] | 0x800000; + break; + + case 103: + w[25] = w[25] | 0x80000000; + break; + + case 104: + w[26] = 0x80; + break; + + case 105: + w[26] = w[26] | 0x8000; + break; + + case 106: + w[26] = w[26] | 0x800000; + break; + + case 107: + w[26] = w[26] | 0x80000000; + break; + + case 108: + w[27] = 0x80; + break; + + case 109: + w[27] = w[27] | 0x8000; + break; + + case 110: + w[27] = w[27] | 0x800000; + break; + + case 111: + w[27] = w[27] | 0x80000000; + break; + + case 112: + w[28] = 0x80; + break; + + case 113: + w[28] = w[28] | 0x8000; + break; + + case 114: + w[28] = w[28] | 0x800000; + break; + + case 115: + w[28] = w[28] | 0x80000000; + break; + + case 116: + w[29] = 0x80; + break; + + case 117: + w[29] = w[29] | 0x8000; + break; + + case 118: + w[29] = w[29] | 0x800000; + break; + + case 119: + w[29] = w[29] | 0x80000000; + break; + + case 120: + w[30] = 0x80; + break; + + case 121: + w[30] = w[30] | 0x8000; + break; + + case 122: + w[30] = w[30] | 0x800000; + break; + + case 123: + w[30] = w[30] | 0x80000000; + break; + + case 124: + w[31] = 0x80; + break; + + case 125: + w[31] = w[31] | 0x8000; + break; + + case 126: + w[31] = w[31] | 0x800000; + break; + + case 127: + w[31] = w[31] | 0x80000000; + break; + } +} + +// before: device_memcat2L +static void memcat_c7_d1x2_sl1x2_sr1x2 (const u32 offset, u32 dst0[2], u32 src_l0[2], u32 src_r0[2]) +{ + switch (offset) + { + case 1: + dst0[0] = src_l0[0] | src_r0[0] << 8; + dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; + break; + + case 2: + dst0[0] = src_l0[0] | src_r0[0] << 16; + dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; + break; + + case 3: + dst0[0] = src_l0[0] | src_r0[0] << 24; + dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; + break; + + case 4: + dst0[1] = src_r0[0]; + break; + + case 5: + dst0[1] = src_l0[1] | src_r0[0] << 8; + break; + + case 6: + dst0[1] = src_l0[1] | src_r0[0] << 16; + break; + + case 7: + dst0[1] = src_l0[1] | src_r0[0] << 24; + break; + } +} + +// before: device_memcat4L +static void memcat_c15_d1x4_sl1x4_sr1x4 (const u32 offset, u32 dst0[4], u32 src_l0[4], u32 src_r0[4]) +{ + switch (offset) + { + case 1: + dst0[0] = src_l0[0] | src_r0[0] << 8; + dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; + dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8; + dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8; + break; + + case 2: + dst0[0] = src_l0[0] | src_r0[0] << 16; + dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; + dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16; + dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16; + break; + + case 3: + dst0[0] = src_l0[0] | src_r0[0] << 24; + dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; + dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24; + dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24; + break; + + case 4: + dst0[1] = src_r0[0]; + dst0[2] = src_r0[1]; + dst0[3] = src_r0[2]; + break; + + case 5: + dst0[1] = src_l0[1] | src_r0[0] << 8; + dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8; + dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8; + break; + + case 6: + dst0[1] = src_l0[1] | src_r0[0] << 16; + dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16; + dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16; + break; + + case 7: + dst0[1] = src_l0[1] | src_r0[0] << 24; + dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24; + dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24; + break; + + case 8: + dst0[2] = src_r0[0]; + dst0[3] = src_r0[1]; + break; + + case 9: + dst0[2] = src_l0[2] | src_r0[0] << 8; + dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8; + break; + + case 10: + dst0[2] = src_l0[2] | src_r0[0] << 16; + dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16; + break; + + case 11: + dst0[2] = src_l0[2] | src_r0[0] << 24; + dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24; + break; + + case 12: + dst0[3] = src_r0[0]; + break; + + case 13: + dst0[3] = src_l0[3] | src_r0[0] << 8; + break; + + case 14: + dst0[3] = src_l0[3] | src_r0[0] << 16; + break; + + case 15: + dst0[3] = src_l0[3] | src_r0[0] << 24; + break; + } +} + +// before: device_memcat8L +static void memcat_c31_d2x4_sl2x4_sr1x4 (const u32 offset, u32 dst0[4], u32 dst1[4], u32 src_l0[4], u32 src_l1[4], u32 src_r0[4]) +{ + switch (offset) + { + case 1: + dst0[0] = src_l0[0] | src_r0[0] << 8; + dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; + dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8; + dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8; + dst1[0] = src_r0[3] >> 24; + break; + + case 2: + dst0[0] = src_l0[0] | src_r0[0] << 16; + dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; + dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16; + dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16; + dst1[0] = src_r0[3] >> 16; + break; + + case 3: + dst0[0] = src_l0[0] | src_r0[0] << 24; + dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; + dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24; + dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24; + dst1[0] = src_r0[3] >> 8; + break; + + case 4: + dst0[1] = src_r0[0]; + dst0[2] = src_r0[1]; + dst0[3] = src_r0[2]; + dst1[0] = src_r0[3]; + break; + + case 5: + dst0[1] = src_l0[1] | src_r0[0] << 8; + dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8; + dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8; + dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8; + dst1[1] = src_r0[3] >> 24; + break; + + case 6: + dst0[1] = src_l0[1] | src_r0[0] << 16; + dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16; + dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16; + dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16; + dst1[1] = src_r0[3] >> 16; + break; + + case 7: + dst0[1] = src_l0[1] | src_r0[0] << 24; + dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24; + dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24; + dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24; + dst1[1] = src_r0[3] >> 8; + break; + + case 8: + dst0[2] = src_r0[0]; + dst0[3] = src_r0[1]; + dst1[0] = src_r0[2]; + dst1[1] = src_r0[3]; + break; + + case 9: + dst0[2] = src_l0[2] | src_r0[0] << 8; + dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8; + dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8; + dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8; + dst1[2] = src_r0[3] >> 24; + break; + + case 10: + dst0[2] = src_l0[2] | src_r0[0] << 16; + dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16; + dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16; + dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16; + dst1[2] = src_r0[3] >> 16; + break; + + case 11: + dst0[2] = src_l0[2] | src_r0[0] << 24; + dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24; + dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24; + dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24; + dst1[2] = src_r0[3] >> 8; + break; + + case 12: + dst0[3] = src_r0[0]; + dst1[0] = src_r0[1]; + dst1[1] = src_r0[2]; + dst1[2] = src_r0[3]; + break; + + case 13: + dst0[3] = src_l0[3] | src_r0[0] << 8; + dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8; + dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8; + dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8; + dst1[3] = src_r0[3] >> 24; + break; + + case 14: + dst0[3] = src_l0[3] | src_r0[0] << 16; + dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16; + dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16; + dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16; + dst1[3] = src_r0[3] >> 16; + break; + + case 15: + dst0[3] = src_l0[3] | src_r0[0] << 24; + dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24; + dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24; + dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24; + dst1[3] = src_r0[3] >> 8; + break; + + case 16: + dst1[0] = src_r0[0]; + dst1[1] = src_r0[1]; + dst1[2] = src_r0[2]; + dst1[3] = src_r0[3]; + break; + + case 17: + dst1[0] = src_l1[0] | src_r0[0] << 8; + dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8; + dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8; + dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8; + break; + + case 18: + dst1[0] = src_l1[0] | src_r0[0] << 16; + dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16; + dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16; + dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16; + break; + + case 19: + dst1[0] = src_l1[0] | src_r0[0] << 24; + dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24; + dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24; + dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24; + break; + + case 20: + dst1[1] = src_r0[0]; + dst1[2] = src_r0[1]; + dst1[3] = src_r0[2]; + break; + + case 21: + dst1[1] = src_l1[1] | src_r0[0] << 8; + dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8; + dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8; + break; + + case 22: + dst1[1] = src_l1[1] | src_r0[0] << 16; + dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16; + dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16; + break; + + case 23: + dst1[1] = src_l1[1] | src_r0[0] << 24; + dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24; + dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24; + break; + + case 24: + dst1[2] = src_r0[0]; + dst1[3] = src_r0[1]; + break; + + case 25: + dst1[2] = src_l1[2] | src_r0[0] << 8; + dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8; + break; + + case 26: + dst1[2] = src_l1[2] | src_r0[0] << 16; + dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16; + break; + + case 27: + dst1[2] = src_l1[2] | src_r0[0] << 24; + dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24; + break; + + case 28: + dst1[3] = src_r0[0]; + break; + + case 29: + dst1[3] = src_l1[3] | src_r0[0] << 8; + break; + + case 30: + dst1[3] = src_l1[3] | src_r0[0] << 16; + break; + + case 31: + dst1[3] = src_l1[3] | src_r0[0] << 24; + break; + } +} + +// before: device_memcat12L +static void memcat_c47_d3x4_sl3x4_sr1x4 (const u32 offset, u32 dst0[4], u32 dst1[4], u32 dst2[4], u32 src_l0[4], u32 src_l1[4], u32 src_l2[4], u32 src_r0[4]) +{ + switch (offset) + { + case 1: + dst0[0] = src_l0[0] | src_r0[0] << 8; + dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; + dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8; + dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8; + dst1[0] = src_r0[3] >> 24; + break; + + case 2: + dst0[0] = src_l0[0] | src_r0[0] << 16; + dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; + dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16; + dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16; + dst1[0] = src_r0[3] >> 16; + break; + + case 3: + dst0[0] = src_l0[0] | src_r0[0] << 24; + dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; + dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24; + dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24; + dst1[0] = src_r0[3] >> 8; + break; + + case 4: + dst0[1] = src_r0[0]; + dst0[2] = src_r0[1]; + dst0[3] = src_r0[2]; + dst1[0] = src_r0[3]; + break; + + case 5: + dst0[1] = src_l0[1] | src_r0[0] << 8; + dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8; + dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8; + dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8; + dst1[1] = src_r0[3] >> 24; + break; + + case 6: + dst0[1] = src_l0[1] | src_r0[0] << 16; + dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16; + dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16; + dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16; + dst1[1] = src_r0[3] >> 16; + break; + + case 7: + dst0[1] = src_l0[1] | src_r0[0] << 24; + dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24; + dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24; + dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24; + dst1[1] = src_r0[3] >> 8; + break; + + case 8: + dst0[2] = src_r0[0]; + dst0[3] = src_r0[1]; + dst1[0] = src_r0[2]; + dst1[1] = src_r0[3]; + break; + + case 9: + dst0[2] = src_l0[2] | src_r0[0] << 8; + dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8; + dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8; + dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8; + dst1[2] = src_r0[3] >> 24; + break; + + case 10: + dst0[2] = src_l0[2] | src_r0[0] << 16; + dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16; + dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16; + dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16; + dst1[2] = src_r0[3] >> 16; + break; + + case 11: + dst0[2] = src_l0[2] | src_r0[0] << 24; + dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24; + dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24; + dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24; + dst1[2] = src_r0[3] >> 8; + break; + + case 12: + dst0[3] = src_r0[0]; + dst1[0] = src_r0[1]; + dst1[1] = src_r0[2]; + dst1[2] = src_r0[3]; + break; + + case 13: + dst0[3] = src_l0[3] | src_r0[0] << 8; + dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8; + dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8; + dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8; + dst1[3] = src_r0[3] >> 24; + break; + + case 14: + dst0[3] = src_l0[3] | src_r0[0] << 16; + dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16; + dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16; + dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16; + dst1[3] = src_r0[3] >> 16; + break; + + case 15: + dst0[3] = src_l0[3] | src_r0[0] << 24; + dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24; + dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24; + dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24; + dst1[3] = src_r0[3] >> 8; + break; + + case 16: + dst1[0] = src_r0[0]; + dst1[1] = src_r0[1]; + dst1[2] = src_r0[2]; + dst1[3] = src_r0[3]; + break; + + case 17: + dst1[0] = src_l1[0] | src_r0[0] << 8; + dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8; + dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8; + dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8; + dst2[0] = src_r0[3] >> 24; + break; + + case 18: + dst1[0] = src_l1[0] | src_r0[0] << 16; + dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16; + dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16; + dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16; + dst2[0] = src_r0[3] >> 16; + break; + + case 19: + dst1[0] = src_l1[0] | src_r0[0] << 24; + dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24; + dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24; + dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24; + dst2[0] = src_r0[3] >> 8; + break; + + case 20: + dst1[1] = src_r0[0]; + dst1[2] = src_r0[1]; + dst1[3] = src_r0[2]; + dst2[0] = src_r0[3]; + break; + + case 21: + dst1[1] = src_l1[1] | src_r0[0] << 8; + dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8; + dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8; + dst2[0] = src_r0[2] >> 24 | src_r0[3] << 8; + dst2[1] = src_r0[3] >> 24; + break; + + case 22: + dst1[1] = src_l1[1] | src_r0[0] << 16; + dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16; + dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16; + dst2[0] = src_r0[2] >> 16 | src_r0[3] << 16; + dst2[1] = src_r0[3] >> 16; + break; + + case 23: + dst1[1] = src_l1[1] | src_r0[0] << 24; + dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24; + dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24; + dst2[0] = src_r0[2] >> 8 | src_r0[3] << 24; + dst2[1] = src_r0[3] >> 8; + break; + + case 24: + dst1[2] = src_r0[0]; + dst1[3] = src_r0[1]; + dst2[0] = src_r0[2]; + dst2[1] = src_r0[3]; + break; + + case 25: + dst1[2] = src_l1[2] | src_r0[0] << 8; + dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8; + dst2[0] = src_r0[1] >> 24 | src_r0[2] << 8; + dst2[1] = src_r0[2] >> 24 | src_r0[3] << 8; + dst2[2] = src_r0[3] >> 24; + break; + + case 26: + dst1[2] = src_l1[2] | src_r0[0] << 16; + dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16; + dst2[0] = src_r0[1] >> 16 | src_r0[2] << 16; + dst2[1] = src_r0[2] >> 16 | src_r0[3] << 16; + dst2[2] = src_r0[3] >> 16; + break; + + case 27: + dst1[2] = src_l1[2] | src_r0[0] << 24; + dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24; + dst2[0] = src_r0[1] >> 8 | src_r0[2] << 24; + dst2[1] = src_r0[2] >> 8 | src_r0[3] << 24; + dst2[2] = src_r0[3] >> 8; + break; + + case 28: + dst1[3] = src_r0[0]; + dst2[0] = src_r0[1]; + dst2[1] = src_r0[2]; + dst2[2] = src_r0[3]; + break; + + case 29: + dst1[3] = src_l1[3] | src_r0[0] << 8; + dst2[0] = src_r0[0] >> 24 | src_r0[1] << 8; + dst2[1] = src_r0[1] >> 24 | src_r0[2] << 8; + dst2[2] = src_r0[2] >> 24 | src_r0[3] << 8; + dst2[3] = src_r0[3] >> 24; + break; + + case 30: + dst1[3] = src_l1[3] | src_r0[0] << 16; + dst2[0] = src_r0[0] >> 16 | src_r0[1] << 16; + dst2[1] = src_r0[1] >> 16 | src_r0[2] << 16; + dst2[2] = src_r0[2] >> 16 | src_r0[3] << 16; + dst2[3] = src_r0[3] >> 16; + break; + + case 31: + dst1[3] = src_l1[3] | src_r0[0] << 24; + dst2[0] = src_r0[0] >> 8 | src_r0[1] << 24; + dst2[1] = src_r0[1] >> 8 | src_r0[2] << 24; + dst2[2] = src_r0[2] >> 8 | src_r0[3] << 24; + dst2[3] = src_r0[3] >> 8; + break; + + case 32: + dst2[0] = src_r0[0]; + dst2[1] = src_r0[1]; + dst2[2] = src_r0[2]; + dst2[3] = src_r0[3]; + break; + + case 33: + dst2[0] = src_l2[0] | src_r0[0] << 8; + dst2[1] = src_r0[0] >> 24 | src_r0[1] << 8; + dst2[2] = src_r0[1] >> 24 | src_r0[2] << 8; + dst2[3] = src_r0[2] >> 24 | src_r0[3] << 8; + break; + + case 34: + dst2[0] = src_l2[0] | src_r0[0] << 16; + dst2[1] = src_r0[0] >> 16 | src_r0[1] << 16; + dst2[2] = src_r0[1] >> 16 | src_r0[2] << 16; + dst2[3] = src_r0[2] >> 16 | src_r0[3] << 16; + break; + + case 35: + dst2[0] = src_l2[0] | src_r0[0] << 24; + dst2[1] = src_r0[0] >> 8 | src_r0[1] << 24; + dst2[2] = src_r0[1] >> 8 | src_r0[2] << 24; + dst2[3] = src_r0[2] >> 8 | src_r0[3] << 24; + break; + + case 36: + dst2[1] = src_r0[0]; + dst2[2] = src_r0[1]; + dst2[3] = src_r0[2]; + break; + + case 37: + dst2[1] = src_l2[1] | src_r0[0] << 8; + dst2[2] = src_r0[0] >> 24 | src_r0[1] << 8; + dst2[3] = src_r0[1] >> 24 | src_r0[2] << 8; + break; + + case 38: + dst2[1] = src_l2[1] | src_r0[0] << 16; + dst2[2] = src_r0[0] >> 16 | src_r0[1] << 16; + dst2[3] = src_r0[1] >> 16 | src_r0[2] << 16; + break; + + case 39: + dst2[1] = src_l2[1] | src_r0[0] << 24; + dst2[2] = src_r0[0] >> 8 | src_r0[1] << 24; + dst2[3] = src_r0[1] >> 8 | src_r0[2] << 24; + break; + + case 40: + dst2[2] = src_r0[0]; + dst2[3] = src_r0[1]; + break; + + case 41: + dst2[2] = src_l2[2] | src_r0[0] << 8; + dst2[3] = src_r0[0] >> 24 | src_r0[1] << 8; + break; + + case 42: + dst2[2] = src_l2[2] | src_r0[0] << 16; + dst2[3] = src_r0[0] >> 16 | src_r0[1] << 16; + break; + + case 43: + dst2[2] = src_l2[2] | src_r0[0] << 24; + dst2[3] = src_r0[0] >> 8 | src_r0[1] << 24; + break; + + case 44: + dst2[3] = src_r0[0]; + break; + + case 45: + dst2[3] = src_l2[3] | src_r0[0] << 8; + break; + + case 46: + dst2[3] = src_l2[3] | src_r0[0] << 16; + break; + + case 47: + dst2[3] = src_l2[3] | src_r0[0] << 24; + break; + } +} + +// before: device_memcat12L +static void memcat_c47_d3x4_sl3x4_sr2x4 (const u32 offset, u32 dst0[4], u32 dst1[4], u32 dst2[4], u32 src_l0[4], u32 src_l1[4], u32 src_l2[4], u32 src_r0[4], u32 src_r1[4]) +{ + switch (offset) + { + case 0: + dst0[0] = src_r0[0]; + dst0[1] = src_r0[1]; + dst0[2] = src_r0[2]; + dst0[3] = src_r0[3]; + dst1[0] = src_r1[0]; + dst1[1] = src_r1[1]; + dst1[2] = src_r1[2]; + dst1[3] = src_r1[3]; + break; + + case 1: + dst0[0] = src_l0[0] | src_r0[0] << 8; + dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; + dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8; + dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8; + dst1[0] = src_r0[3] >> 24 | src_r1[0] << 8; + dst1[1] = src_r1[0] >> 24 | src_r1[1] << 8; + dst1[2] = src_r1[1] >> 24 | src_r1[2] << 8; + dst1[3] = src_r1[2] >> 24 | src_r1[3] << 8; + dst2[0] = src_r1[3] >> 24; + break; + + case 2: + dst0[0] = src_l0[0] | src_r0[0] << 16; + dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; + dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16; + dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16; + dst1[0] = src_r0[3] >> 16 | src_r1[0] << 16; + dst1[1] = src_r1[0] >> 16 | src_r1[1] << 16; + dst1[2] = src_r1[1] >> 16 | src_r1[2] << 16; + dst1[3] = src_r1[2] >> 16 | src_r1[3] << 16; + dst2[0] = src_r1[3] >> 16; + break; + + case 3: + dst0[0] = src_l0[0] | src_r0[0] << 24; + dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; + dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24; + dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24; + dst1[0] = src_r0[3] >> 8 | src_r1[0] << 24; + dst1[1] = src_r1[0] >> 8 | src_r1[1] << 24; + dst1[2] = src_r1[1] >> 8 | src_r1[2] << 24; + dst1[3] = src_r1[2] >> 8 | src_r1[3] << 24; + dst2[0] = src_r1[3] >> 8; + break; + + case 4: + dst0[1] = src_r0[0]; + dst0[2] = src_r0[1]; + dst0[3] = src_r0[2]; + dst1[0] = src_r0[3]; + dst1[1] = src_r1[0]; + dst1[2] = src_r1[1]; + dst1[3] = src_r1[2]; + dst2[0] = src_r1[3]; + break; + + case 5: + dst0[1] = src_l0[1] | src_r0[0] << 8; + dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8; + dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8; + dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8; + dst1[1] = src_r0[3] >> 24 | src_r1[0] << 8; + dst1[2] = src_r1[0] >> 24 | src_r1[1] << 8; + dst1[3] = src_r1[1] >> 24 | src_r1[2] << 8; + dst2[0] = src_r1[2] >> 24 | src_r1[3] << 8; + dst2[1] = src_r1[3] >> 24; + break; + + case 6: + dst0[1] = src_l0[1] | src_r0[0] << 16; + dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16; + dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16; + dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16; + dst1[1] = src_r0[3] >> 16 | src_r1[0] << 16; + dst1[2] = src_r1[0] >> 16 | src_r1[1] << 16; + dst1[3] = src_r1[1] >> 16 | src_r1[2] << 16; + dst2[0] = src_r1[2] >> 16 | src_r1[3] << 16; + dst2[1] = src_r1[3] >> 16; + break; + + case 7: + dst0[1] = src_l0[1] | src_r0[0] << 24; + dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24; + dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24; + dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24; + dst1[1] = src_r0[3] >> 8 | src_r1[0] << 24; + dst1[2] = src_r1[0] >> 8 | src_r1[1] << 24; + dst1[3] = src_r1[1] >> 8 | src_r1[2] << 24; + dst2[0] = src_r1[2] >> 8 | src_r1[3] << 24; + dst2[1] = src_r1[3] >> 8; + break; + + case 8: + dst0[2] = src_r0[0]; + dst0[3] = src_r0[1]; + dst1[0] = src_r0[2]; + dst1[1] = src_r0[3]; + dst1[2] = src_r1[0]; + dst1[3] = src_r1[1]; + dst2[0] = src_r1[2]; + dst2[1] = src_r1[3]; + break; + + case 9: + dst0[2] = src_l0[2] | src_r0[0] << 8; + dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8; + dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8; + dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8; + dst1[2] = src_r0[3] >> 24 | src_r1[0] << 8; + dst1[3] = src_r1[0] >> 24 | src_r1[1] << 8; + dst2[0] = src_r1[1] >> 24 | src_r1[2] << 8; + dst2[1] = src_r1[2] >> 24 | src_r1[3] << 8; + dst2[2] = src_r1[3] >> 24; + break; + + case 10: + dst0[2] = src_l0[2] | src_r0[0] << 16; + dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16; + dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16; + dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16; + dst1[2] = src_r0[3] >> 16 | src_r1[0] << 16; + dst1[3] = src_r1[0] >> 16 | src_r1[1] << 16; + dst2[0] = src_r1[1] >> 16 | src_r1[2] << 16; + dst2[1] = src_r1[2] >> 16 | src_r1[3] << 16; + dst2[2] = src_r1[3] >> 16; + break; + + case 11: + dst0[2] = src_l0[2] | src_r0[0] << 24; + dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24; + dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24; + dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24; + dst1[2] = src_r0[3] >> 8 | src_r1[0] << 24; + dst1[3] = src_r1[0] >> 8 | src_r1[1] << 24; + dst2[0] = src_r1[1] >> 8 | src_r1[2] << 24; + dst2[1] = src_r1[2] >> 8 | src_r1[3] << 24; + dst2[2] = src_r1[3] >> 8; + break; + + case 12: + dst0[3] = src_r0[0]; + dst1[0] = src_r0[1]; + dst1[1] = src_r0[2]; + dst1[2] = src_r0[3]; + dst1[3] = src_r1[0]; + dst2[0] = src_r1[1]; + dst2[1] = src_r1[2]; + dst2[2] = src_r1[3]; + break; + + case 13: + dst0[3] = src_l0[3] | src_r0[0] << 8; + dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8; + dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8; + dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8; + dst1[3] = src_r0[3] >> 24 | src_r1[0] << 8; + dst2[0] = src_r1[0] >> 24 | src_r1[1] << 8; + dst2[1] = src_r1[1] >> 24 | src_r1[2] << 8; + dst2[2] = src_r1[2] >> 24 | src_r1[3] << 8; + dst2[3] = src_r1[3] >> 24; + break; + + case 14: + dst0[3] = src_l0[3] | src_r0[0] << 16; + dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16; + dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16; + dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16; + dst1[3] = src_r0[3] >> 16 | src_r1[0] << 16; + dst2[0] = src_r1[0] >> 16 | src_r1[1] << 16; + dst2[1] = src_r1[1] >> 16 | src_r1[2] << 16; + dst2[2] = src_r1[2] >> 16 | src_r1[3] << 16; + dst2[3] = src_r1[3] >> 16; + break; + + case 15: + dst0[3] = src_l0[3] | src_r0[0] << 24; + dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24; + dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24; + dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24; + dst1[3] = src_r0[3] >> 8 | src_r1[0] << 24; + dst2[0] = src_r1[0] >> 8 | src_r1[1] << 24; + dst2[1] = src_r1[1] >> 8 | src_r1[2] << 24; + dst2[2] = src_r1[2] >> 8 | src_r1[3] << 24; + dst2[3] = src_r1[3] >> 8; + break; + + case 16: + dst1[0] = src_r0[0]; + dst1[1] = src_r0[1]; + dst1[2] = src_r0[2]; + dst1[3] = src_r0[3]; + dst2[0] = src_r1[0]; + dst2[1] = src_r1[1]; + dst2[2] = src_r1[2]; + dst2[3] = src_r1[3]; + break; + + case 17: + dst1[0] = src_l1[0] | src_r0[0] << 8; + dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8; + dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8; + dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8; + dst2[0] = src_r0[3] >> 24 | src_r1[0] << 8; + dst2[1] = src_r1[0] >> 24 | src_r1[1] << 8; + dst2[2] = src_r1[1] >> 24 | src_r1[2] << 8; + dst2[3] = src_r1[2] >> 24 | src_r1[3] << 8; + break; + + case 18: + dst1[0] = src_l1[0] | src_r0[0] << 16; + dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16; + dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16; + dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16; + dst2[0] = src_r0[3] >> 16 | src_r1[0] << 16; + dst2[1] = src_r1[0] >> 16 | src_r1[1] << 16; + dst2[2] = src_r1[1] >> 16 | src_r1[2] << 16; + dst2[3] = src_r1[2] >> 16 | src_r1[3] << 16; + break; + + case 19: + dst1[0] = src_l1[0] | src_r0[0] << 24; + dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24; + dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24; + dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24; + dst2[0] = src_r0[3] >> 8 | src_r1[0] << 24; + dst2[1] = src_r1[0] >> 8 | src_r1[1] << 24; + dst2[2] = src_r1[1] >> 8 | src_r1[2] << 24; + dst2[3] = src_r1[2] >> 8 | src_r1[3] << 24; + break; + + case 20: + dst1[1] = src_r1[0]; + dst1[2] = src_r0[1]; + dst1[3] = src_r0[2]; + dst2[0] = src_r0[3]; + dst2[1] = src_r1[0]; + dst2[2] = src_r1[1]; + dst2[3] = src_r1[2]; + break; + + case 21: + dst1[1] = src_l1[1] | src_r0[0] << 8; + dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8; + dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8; + dst2[0] = src_r0[2] >> 24 | src_r0[3] << 8; + dst2[1] = src_r0[3] >> 24 | src_r1[0] << 8; + dst2[2] = src_r1[0] >> 24 | src_r1[1] << 8; + dst2[3] = src_r1[1] >> 24 | src_r1[2] << 8; + break; + + case 22: + dst1[1] = src_l1[1] | src_r0[0] << 16; + dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16; + dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16; + dst2[0] = src_r0[2] >> 16 | src_r0[3] << 16; + dst2[1] = src_r0[3] >> 16 | src_r1[0] << 16; + dst2[2] = src_r1[0] >> 16 | src_r1[1] << 16; + dst2[3] = src_r1[1] >> 16 | src_r1[2] << 16; + break; + + case 23: + dst1[1] = src_l1[1] | src_r0[0] << 24; + dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24; + dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24; + dst2[0] = src_r0[2] >> 8 | src_r0[3] << 24; + dst2[1] = src_r0[3] >> 8 | src_r1[0] << 24; + dst2[2] = src_r1[0] >> 8 | src_r1[1] << 24; + dst2[3] = src_r1[1] >> 8 | src_r1[2] << 24; + break; + + case 24: + dst1[2] = src_r1[0]; + dst1[3] = src_r0[1]; + dst2[0] = src_r0[2]; + dst2[1] = src_r0[3]; + dst2[2] = src_r1[0]; + dst2[3] = src_r1[1]; + break; + + case 25: + dst1[2] = src_l1[2] | src_r0[0] << 8; + dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8; + dst2[0] = src_r0[1] >> 24 | src_r0[2] << 8; + dst2[1] = src_r0[2] >> 24 | src_r0[3] << 8; + dst2[2] = src_r0[3] >> 24 | src_r1[0] << 8; + dst2[3] = src_r1[0] >> 24 | src_r1[1] << 8; + break; + + case 26: + dst1[2] = src_l1[2] | src_r0[0] << 16; + dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16; + dst2[0] = src_r0[1] >> 16 | src_r0[2] << 16; + dst2[1] = src_r0[2] >> 16 | src_r0[3] << 16; + dst2[2] = src_r0[3] >> 16 | src_r1[0] << 16; + dst2[3] = src_r1[0] >> 16 | src_r1[1] << 16; + break; + + case 27: + dst1[2] = src_l1[2] | src_r0[0] << 24; + dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24; + dst2[0] = src_r0[1] >> 8 | src_r0[2] << 24; + dst2[1] = src_r0[2] >> 8 | src_r0[3] << 24; + dst2[2] = src_r0[3] >> 8 | src_r1[0] << 24; + dst2[3] = src_r1[0] >> 8 | src_r1[1] << 24; + break; + + case 28: + dst1[3] = src_r1[0]; + dst2[0] = src_r0[1]; + dst2[1] = src_r0[2]; + dst2[2] = src_r0[3]; + dst2[3] = src_r1[0]; + break; + + case 29: + dst1[3] = src_l1[3] | src_r0[0] << 8; + dst2[0] = src_r0[0] >> 24 | src_r0[1] << 8; + dst2[1] = src_r0[1] >> 24 | src_r0[2] << 8; + dst2[2] = src_r0[2] >> 24 | src_r0[3] << 8; + dst2[3] = src_r0[3] >> 24 | src_r1[0] << 8; + break; + + case 30: + dst1[3] = src_l1[3] | src_r0[0] << 16; + dst2[0] = src_r0[0] >> 16 | src_r0[1] << 16; + dst2[1] = src_r0[1] >> 16 | src_r0[2] << 16; + dst2[2] = src_r0[2] >> 16 | src_r0[3] << 16; + dst2[3] = src_r0[3] >> 16 | src_r1[0] << 16; + break; + + case 31: + dst1[3] = src_l1[3] | src_r0[0] << 24; + dst2[0] = src_r0[0] >> 8 | src_r0[1] << 24; + dst2[1] = src_r0[1] >> 8 | src_r0[2] << 24; + dst2[2] = src_r0[2] >> 8 | src_r0[3] << 24; + dst2[3] = src_r0[3] >> 8 | src_r1[0] << 24; + break; + + case 32: + dst2[0] = src_r0[0]; + dst2[1] = src_r0[1]; + dst2[2] = src_r0[2]; + dst2[3] = src_r0[3]; + break; + + case 33: + dst2[0] = src_l2[0] | src_r0[0] << 8; + dst2[1] = src_r0[0] >> 24 | src_r0[1] << 8; + dst2[2] = src_r0[1] >> 24 | src_r0[2] << 8; + dst2[3] = src_r0[2] >> 24 | src_r0[3] << 8; + break; + + case 34: + dst2[0] = src_l2[0] | src_r0[0] << 16; + dst2[1] = src_r0[0] >> 16 | src_r0[1] << 16; + dst2[2] = src_r0[1] >> 16 | src_r0[2] << 16; + dst2[3] = src_r0[2] >> 16 | src_r0[3] << 16; + break; + + case 35: + dst2[0] = src_l2[0] | src_r0[0] << 24; + dst2[1] = src_r0[0] >> 8 | src_r0[1] << 24; + dst2[2] = src_r0[1] >> 8 | src_r0[2] << 24; + dst2[3] = src_r0[2] >> 8 | src_r0[3] << 24; + break; + + case 36: + dst2[1] = src_r0[0]; + dst2[2] = src_r0[1]; + dst2[3] = src_r0[2]; + break; + + case 37: + dst2[1] = src_l2[1] | src_r0[0] << 8; + dst2[2] = src_r0[0] >> 24 | src_r0[1] << 8; + dst2[3] = src_r0[1] >> 24 | src_r0[2] << 8; + break; + + case 38: + dst2[1] = src_l2[1] | src_r0[0] << 16; + dst2[2] = src_r0[0] >> 16 | src_r0[1] << 16; + dst2[3] = src_r0[1] >> 16 | src_r0[2] << 16; + break; + + case 39: + dst2[1] = src_l2[1] | src_r0[0] << 24; + dst2[2] = src_r0[0] >> 8 | src_r0[1] << 24; + dst2[3] = src_r0[1] >> 8 | src_r0[2] << 24; + break; + + case 40: + dst2[2] = src_r0[0]; + dst2[3] = src_r0[1]; + break; + + case 41: + dst2[2] = src_l2[2] | src_r0[0] << 8; + dst2[3] = src_r0[0] >> 24 | src_r0[1] << 8; + break; + + case 42: + dst2[2] = src_l2[2] | src_r0[0] << 16; + dst2[3] = src_r0[0] >> 16 | src_r0[1] << 16; + break; + + case 43: + dst2[2] = src_l2[2] | src_r0[0] << 24; + dst2[3] = src_r0[0] >> 8 | src_r0[1] << 24; + break; + + case 44: + dst2[3] = src_r0[0]; + break; + + case 45: + dst2[3] = src_l2[3] | src_r0[0] << 8; + break; + + case 46: + dst2[3] = src_l2[3] | src_r0[0] << 16; + break; + + case 47: + dst2[3] = src_l2[3] | src_r0[0] << 24; + break; + } +} + +// before: memcat16_9 +static void memcat_c15_w4x4_a3x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 offset) +{ + switch (offset) + { + case 0: + w0[0] = append0[0]; + w0[1] = append0[1]; + w0[2] = append0[2]; + w0[3] = append0[3]; + w1[0] = append1[0]; + w1[1] = append1[1]; + w1[2] = append1[2]; + w1[3] = append1[3]; + w2[0] = append2[0]; + break; + + case 1: + w0[0] = w0[0] | append0[0] << 8; + w0[1] = append0[0] >> 24 | append0[1] << 8; + w0[2] = append0[1] >> 24 | append0[2] << 8; + w0[3] = append0[2] >> 24 | append0[3] << 8; + w1[0] = append0[3] >> 24 | append1[0] << 8; + w1[1] = append1[0] >> 24 | append1[1] << 8; + w1[2] = append1[1] >> 24 | append1[2] << 8; + w1[3] = append1[2] >> 24 | append1[3] << 8; + w2[0] = append1[3] >> 24 | append2[0] << 8; + w2[1] = append2[0] >> 24; + break; + + case 2: + w0[0] = w0[0] | append0[0] << 16; + w0[1] = append0[0] >> 16 | append0[1] << 16; + w0[2] = append0[1] >> 16 | append0[2] << 16; + w0[3] = append0[2] >> 16 | append0[3] << 16; + w1[0] = append0[3] >> 16 | append1[0] << 16; + w1[1] = append1[0] >> 16 | append1[1] << 16; + w1[2] = append1[1] >> 16 | append1[2] << 16; + w1[3] = append1[2] >> 16 | append1[3] << 16; + w2[0] = append1[3] >> 16 | append2[0] << 16; + w2[1] = append2[0] >> 16; + break; + + case 3: + w0[0] = w0[0] | append0[0] << 24; + w0[1] = append0[0] >> 8 | append0[1] << 24; + w0[2] = append0[1] >> 8 | append0[2] << 24; + w0[3] = append0[2] >> 8 | append0[3] << 24; + w1[0] = append0[3] >> 8 | append1[0] << 24; + w1[1] = append1[0] >> 8 | append1[1] << 24; + w1[2] = append1[1] >> 8 | append1[2] << 24; + w1[3] = append1[2] >> 8 | append1[3] << 24; + w2[0] = append1[3] >> 8 | append2[0] << 24; + w2[1] = append2[0] >> 8; + break; + + case 4: + w0[1] = append0[0]; + w0[2] = append0[1]; + w0[3] = append0[2]; + w1[0] = append0[3]; + w1[1] = append1[0]; + w1[2] = append1[1]; + w1[3] = append1[2]; + w2[0] = append1[3]; + w2[1] = append2[0]; + break; + + case 5: + w0[1] = w0[1] | append0[0] << 8; + w0[2] = append0[0] >> 24 | append0[1] << 8; + w0[3] = append0[1] >> 24 | append0[2] << 8; + w1[0] = append0[2] >> 24 | append0[3] << 8; + w1[1] = append0[3] >> 24 | append1[0] << 8; + w1[2] = append1[0] >> 24 | append1[1] << 8; + w1[3] = append1[1] >> 24 | append1[2] << 8; + w2[0] = append1[2] >> 24 | append1[3] << 8; + w2[1] = append1[3] >> 24 | append2[0] << 8; + w2[2] = append2[0] >> 24; + break; + + case 6: + w0[1] = w0[1] | append0[0] << 16; + w0[2] = append0[0] >> 16 | append0[1] << 16; + w0[3] = append0[1] >> 16 | append0[2] << 16; + w1[0] = append0[2] >> 16 | append0[3] << 16; + w1[1] = append0[3] >> 16 | append1[0] << 16; + w1[2] = append1[0] >> 16 | append1[1] << 16; + w1[3] = append1[1] >> 16 | append1[2] << 16; + w2[0] = append1[2] >> 16 | append1[3] << 16; + w2[1] = append1[3] >> 16 | append2[0] << 16; + w2[2] = append2[0] >> 16; + break; + + case 7: + w0[1] = w0[1] | append0[0] << 24; + w0[2] = append0[0] >> 8 | append0[1] << 24; + w0[3] = append0[1] >> 8 | append0[2] << 24; + w1[0] = append0[2] >> 8 | append0[3] << 24; + w1[1] = append0[3] >> 8 | append1[0] << 24; + w1[2] = append1[0] >> 8 | append1[1] << 24; + w1[3] = append1[1] >> 8 | append1[2] << 24; + w2[0] = append1[2] >> 8 | append1[3] << 24; + w2[1] = append1[3] >> 8 | append2[0] << 24; + w2[2] = append2[0] >> 8; + break; + + case 8: + w0[2] = append0[0]; + w0[3] = append0[1]; + w1[0] = append0[2]; + w1[1] = append0[3]; + w1[2] = append1[0]; + w1[3] = append1[1]; + w2[0] = append1[2]; + w2[1] = append1[3]; + w2[2] = append2[0]; + break; + + case 9: + w0[2] = w0[2] | append0[0] << 8; + w0[3] = append0[0] >> 24 | append0[1] << 8; + w1[0] = append0[1] >> 24 | append0[2] << 8; + w1[1] = append0[2] >> 24 | append0[3] << 8; + w1[2] = append0[3] >> 24 | append1[0] << 8; + w1[3] = append1[0] >> 24 | append1[1] << 8; + w2[0] = append1[1] >> 24 | append1[2] << 8; + w2[1] = append1[2] >> 24 | append1[3] << 8; + w2[2] = append1[3] >> 24 | append2[0] << 8; + w2[3] = append2[0] >> 24; + break; + + case 10: + w0[2] = w0[2] | append0[0] << 16; + w0[3] = append0[0] >> 16 | append0[1] << 16; + w1[0] = append0[1] >> 16 | append0[2] << 16; + w1[1] = append0[2] >> 16 | append0[3] << 16; + w1[2] = append0[3] >> 16 | append1[0] << 16; + w1[3] = append1[0] >> 16 | append1[1] << 16; + w2[0] = append1[1] >> 16 | append1[2] << 16; + w2[1] = append1[2] >> 16 | append1[3] << 16; + w2[2] = append1[3] >> 16 | append2[0] << 16; + w2[3] = append2[0] >> 16; + break; + + case 11: + w0[2] = w0[2] | append0[0] << 24; + w0[3] = append0[0] >> 8 | append0[1] << 24; + w1[0] = append0[1] >> 8 | append0[2] << 24; + w1[1] = append0[2] >> 8 | append0[3] << 24; + w1[2] = append0[3] >> 8 | append1[0] << 24; + w1[3] = append1[0] >> 8 | append1[1] << 24; + w2[0] = append1[1] >> 8 | append1[2] << 24; + w2[1] = append1[2] >> 8 | append1[3] << 24; + w2[2] = append1[3] >> 8 | append2[0] << 24; + w2[3] = append2[0] >> 8; + break; + + case 12: + w0[3] = append0[0]; + w1[0] = append0[1]; + w1[1] = append0[2]; + w1[2] = append0[3]; + w1[3] = append1[0]; + w2[0] = append1[1]; + w2[1] = append1[2]; + w2[2] = append1[3]; + w2[3] = append2[0]; + break; + + case 13: + w0[3] = w0[3] | append0[0] << 8; + w1[0] = append0[0] >> 24 | append0[1] << 8; + w1[1] = append0[1] >> 24 | append0[2] << 8; + w1[2] = append0[2] >> 24 | append0[3] << 8; + w1[3] = append0[3] >> 24 | append1[0] << 8; + w2[0] = append1[0] >> 24 | append1[1] << 8; + w2[1] = append1[1] >> 24 | append1[2] << 8; + w2[2] = append1[2] >> 24 | append1[3] << 8; + w2[3] = append1[3] >> 24 | append2[0] << 8; + w3[0] = append2[0] >> 24; + break; + + case 14: + w0[3] = w0[3] | append0[0] << 16; + w1[0] = append0[0] >> 16 | append0[1] << 16; + w1[1] = append0[1] >> 16 | append0[2] << 16; + w1[2] = append0[2] >> 16 | append0[3] << 16; + w1[3] = append0[3] >> 16 | append1[0] << 16; + w2[0] = append1[0] >> 16 | append1[1] << 16; + w2[1] = append1[1] >> 16 | append1[2] << 16; + w2[2] = append1[2] >> 16 | append1[3] << 16; + w2[3] = append1[3] >> 16 | append2[0] << 16; + w3[0] = append2[0] >> 16; + break; + + case 15: + w0[3] = w0[3] | append0[0] << 24; + w1[0] = append0[0] >> 8 | append0[1] << 24; + w1[1] = append0[1] >> 8 | append0[2] << 24; + w1[2] = append0[2] >> 8 | append0[3] << 24; + w1[3] = append0[3] >> 8 | append1[0] << 24; + w2[0] = append1[0] >> 8 | append1[1] << 24; + w2[1] = append1[1] >> 8 | append1[2] << 24; + w2[2] = append1[2] >> 8 | append1[3] << 24; + w2[3] = append1[3] >> 8 | append2[0] << 24; + w3[0] = append2[0] >> 8; + break; + } +} + +// before: memcat32_8 +static void memcat_c32_w4x4_a2x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 offset) +{ + switch (offset) + { + case 0: + w0[0] = append0[0]; + w0[1] = append0[1]; + w0[2] = append0[2]; + w0[3] = append0[3]; + w1[0] = append1[0]; + w1[1] = append1[1]; + w1[2] = append1[2]; + w1[3] = append1[3]; + break; + + case 1: + w0[0] = w0[0] | append0[0] << 8; + w0[1] = append0[0] >> 24 | append0[1] << 8; + w0[2] = append0[1] >> 24 | append0[2] << 8; + w0[3] = append0[2] >> 24 | append0[3] << 8; + w1[0] = append0[3] >> 24 | append1[0] << 8; + w1[1] = append1[0] >> 24 | append1[1] << 8; + w1[2] = append1[1] >> 24 | append1[2] << 8; + w1[3] = append1[2] >> 24 | append1[3] << 8; + w2[0] = append1[3] >> 24; + break; + + case 2: + w0[0] = w0[0] | append0[0] << 16; + w0[1] = append0[0] >> 16 | append0[1] << 16; + w0[2] = append0[1] >> 16 | append0[2] << 16; + w0[3] = append0[2] >> 16 | append0[3] << 16; + w1[0] = append0[3] >> 16 | append1[0] << 16; + w1[1] = append1[0] >> 16 | append1[1] << 16; + w1[2] = append1[1] >> 16 | append1[2] << 16; + w1[3] = append1[2] >> 16 | append1[3] << 16; + w2[0] = append1[3] >> 16; + break; + + case 3: + w0[0] = w0[0] | append0[0] << 24; + w0[1] = append0[0] >> 8 | append0[1] << 24; + w0[2] = append0[1] >> 8 | append0[2] << 24; + w0[3] = append0[2] >> 8 | append0[3] << 24; + w1[0] = append0[3] >> 8 | append1[0] << 24; + w1[1] = append1[0] >> 8 | append1[1] << 24; + w1[2] = append1[1] >> 8 | append1[2] << 24; + w1[3] = append1[2] >> 8 | append1[3] << 24; + w2[0] = append1[3] >> 8; + break; + + case 4: + w0[1] = append0[0]; + w0[2] = append0[1]; + w0[3] = append0[2]; + w1[0] = append0[3]; + w1[1] = append1[0]; + w1[2] = append1[1]; + w1[3] = append1[2]; + w2[0] = append1[3]; + break; + + case 5: + w0[1] = w0[1] | append0[0] << 8; + w0[2] = append0[0] >> 24 | append0[1] << 8; + w0[3] = append0[1] >> 24 | append0[2] << 8; + w1[0] = append0[2] >> 24 | append0[3] << 8; + w1[1] = append0[3] >> 24 | append1[0] << 8; + w1[2] = append1[0] >> 24 | append1[1] << 8; + w1[3] = append1[1] >> 24 | append1[2] << 8; + w2[0] = append1[2] >> 24 | append1[3] << 8; + w2[1] = append1[3] >> 24; + break; + + case 6: + w0[1] = w0[1] | append0[0] << 16; + w0[2] = append0[0] >> 16 | append0[1] << 16; + w0[3] = append0[1] >> 16 | append0[2] << 16; + w1[0] = append0[2] >> 16 | append0[3] << 16; + w1[1] = append0[3] >> 16 | append1[0] << 16; + w1[2] = append1[0] >> 16 | append1[1] << 16; + w1[3] = append1[1] >> 16 | append1[2] << 16; + w2[0] = append1[2] >> 16 | append1[3] << 16; + w2[1] = append1[3] >> 16; + break; + + case 7: + w0[1] = w0[1] | append0[0] << 24; + w0[2] = append0[0] >> 8 | append0[1] << 24; + w0[3] = append0[1] >> 8 | append0[2] << 24; + w1[0] = append0[2] >> 8 | append0[3] << 24; + w1[1] = append0[3] >> 8 | append1[0] << 24; + w1[2] = append1[0] >> 8 | append1[1] << 24; + w1[3] = append1[1] >> 8 | append1[2] << 24; + w2[0] = append1[2] >> 8 | append1[3] << 24; + w2[1] = append1[3] >> 8; + break; + + case 8: + w0[2] = append0[0]; + w0[3] = append0[1]; + w1[0] = append0[2]; + w1[1] = append0[3]; + w1[2] = append1[0]; + w1[3] = append1[1]; + w2[0] = append1[2]; + w2[1] = append1[3]; + break; + + case 9: + w0[2] = w0[2] | append0[0] << 8; + w0[3] = append0[0] >> 24 | append0[1] << 8; + w1[0] = append0[1] >> 24 | append0[2] << 8; + w1[1] = append0[2] >> 24 | append0[3] << 8; + w1[2] = append0[3] >> 24 | append1[0] << 8; + w1[3] = append1[0] >> 24 | append1[1] << 8; + w2[0] = append1[1] >> 24 | append1[2] << 8; + w2[1] = append1[2] >> 24 | append1[3] << 8; + w2[2] = append1[3] >> 24; + break; + + case 10: + w0[2] = w0[2] | append0[0] << 16; + w0[3] = append0[0] >> 16 | append0[1] << 16; + w1[0] = append0[1] >> 16 | append0[2] << 16; + w1[1] = append0[2] >> 16 | append0[3] << 16; + w1[2] = append0[3] >> 16 | append1[0] << 16; + w1[3] = append1[0] >> 16 | append1[1] << 16; + w2[0] = append1[1] >> 16 | append1[2] << 16; + w2[1] = append1[2] >> 16 | append1[3] << 16; + w2[2] = append1[3] >> 16; + break; + + case 11: + w0[2] = w0[2] | append0[0] << 24; + w0[3] = append0[0] >> 8 | append0[1] << 24; + w1[0] = append0[1] >> 8 | append0[2] << 24; + w1[1] = append0[2] >> 8 | append0[3] << 24; + w1[2] = append0[3] >> 8 | append1[0] << 24; + w1[3] = append1[0] >> 8 | append1[1] << 24; + w2[0] = append1[1] >> 8 | append1[2] << 24; + w2[1] = append1[2] >> 8 | append1[3] << 24; + w2[2] = append1[3] >> 8; + break; + + case 12: + w0[3] = append0[0]; + w1[0] = append0[1]; + w1[1] = append0[2]; + w1[2] = append0[3]; + w1[3] = append1[0]; + w2[0] = append1[1]; + w2[1] = append1[2]; + w2[2] = append1[3]; + break; + + case 13: + w0[3] = w0[3] | append0[0] << 8; + w1[0] = append0[0] >> 24 | append0[1] << 8; + w1[1] = append0[1] >> 24 | append0[2] << 8; + w1[2] = append0[2] >> 24 | append0[3] << 8; + w1[3] = append0[3] >> 24 | append1[0] << 8; + w2[0] = append1[0] >> 24 | append1[1] << 8; + w2[1] = append1[1] >> 24 | append1[2] << 8; + w2[2] = append1[2] >> 24 | append1[3] << 8; + w2[3] = append1[3] >> 24; + break; + + case 14: + w0[3] = w0[3] | append0[0] << 16; + w1[0] = append0[0] >> 16 | append0[1] << 16; + w1[1] = append0[1] >> 16 | append0[2] << 16; + w1[2] = append0[2] >> 16 | append0[3] << 16; + w1[3] = append0[3] >> 16 | append1[0] << 16; + w2[0] = append1[0] >> 16 | append1[1] << 16; + w2[1] = append1[1] >> 16 | append1[2] << 16; + w2[2] = append1[2] >> 16 | append1[3] << 16; + w2[3] = append1[3] >> 16; + break; + + case 15: + w0[3] = w0[3] | append0[0] << 24; + w1[0] = append0[0] >> 8 | append0[1] << 24; + w1[1] = append0[1] >> 8 | append0[2] << 24; + w1[2] = append0[2] >> 8 | append0[3] << 24; + w1[3] = append0[3] >> 8 | append1[0] << 24; + w2[0] = append1[0] >> 8 | append1[1] << 24; + w2[1] = append1[1] >> 8 | append1[2] << 24; + w2[2] = append1[2] >> 8 | append1[3] << 24; + w2[3] = append1[3] >> 8; + break; + + case 16: + w1[0] = append0[0]; + w1[1] = append0[1]; + w1[2] = append0[2]; + w1[3] = append0[3]; + w2[0] = append1[0]; + w2[1] = append1[1]; + w2[2] = append1[2]; + w2[3] = append1[3]; + break; + + case 17: + w1[0] = w1[0] | append0[0] << 8; + w1[1] = append0[0] >> 24 | append0[1] << 8; + w1[2] = append0[1] >> 24 | append0[2] << 8; + w1[3] = append0[2] >> 24 | append0[3] << 8; + w2[0] = append0[3] >> 24 | append1[0] << 8; + w2[1] = append1[0] >> 24 | append1[1] << 8; + w2[2] = append1[1] >> 24 | append1[2] << 8; + w2[3] = append1[2] >> 24 | append1[3] << 8; + w3[0] = append1[3] >> 24; + break; + + case 18: + w1[0] = w1[0] | append0[0] << 16; + w1[1] = append0[0] >> 16 | append0[1] << 16; + w1[2] = append0[1] >> 16 | append0[2] << 16; + w1[3] = append0[2] >> 16 | append0[3] << 16; + w2[0] = append0[3] >> 16 | append1[0] << 16; + w2[1] = append1[0] >> 16 | append1[1] << 16; + w2[2] = append1[1] >> 16 | append1[2] << 16; + w2[3] = append1[2] >> 16 | append1[3] << 16; + w3[0] = append1[3] >> 16; + break; + + case 19: + w1[0] = w1[0] | append0[0] << 24; + w1[1] = append0[0] >> 8 | append0[1] << 24; + w1[2] = append0[1] >> 8 | append0[2] << 24; + w1[3] = append0[2] >> 8 | append0[3] << 24; + w2[0] = append0[3] >> 8 | append1[0] << 24; + w2[1] = append1[0] >> 8 | append1[1] << 24; + w2[2] = append1[1] >> 8 | append1[2] << 24; + w2[3] = append1[2] >> 8 | append1[3] << 24; + w3[0] = append1[3] >> 8; + break; + + case 20: + w1[1] = append0[0]; + w1[2] = append0[1]; + w1[3] = append0[2]; + w2[0] = append0[3]; + w2[1] = append1[0]; + w2[2] = append1[1]; + w2[3] = append1[2]; + w3[0] = append1[3]; + break; + + case 21: + w1[1] = w1[1] | append0[0] << 8; + w1[2] = append0[0] >> 24 | append0[1] << 8; + w1[3] = append0[1] >> 24 | append0[2] << 8; + w2[0] = append0[2] >> 24 | append0[3] << 8; + w2[1] = append0[3] >> 24 | append1[0] << 8; + w2[2] = append1[0] >> 24 | append1[1] << 8; + w2[3] = append1[1] >> 24 | append1[2] << 8; + w3[0] = append1[2] >> 24 | append1[3] << 8; + w3[1] = append1[3] >> 24; + break; + + case 22: + w1[1] = w1[1] | append0[0] << 16; + w1[2] = append0[0] >> 16 | append0[1] << 16; + w1[3] = append0[1] >> 16 | append0[2] << 16; + w2[0] = append0[2] >> 16 | append0[3] << 16; + w2[1] = append0[3] >> 16 | append1[0] << 16; + w2[2] = append1[0] >> 16 | append1[1] << 16; + w2[3] = append1[1] >> 16 | append1[2] << 16; + w3[0] = append1[2] >> 16 | append1[3] << 16; + w3[1] = append1[3] >> 16; + break; + + case 23: + w1[1] = w1[1] | append0[0] << 24; + w1[2] = append0[0] >> 8 | append0[1] << 24; + w1[3] = append0[1] >> 8 | append0[2] << 24; + w2[0] = append0[2] >> 8 | append0[3] << 24; + w2[1] = append0[3] >> 8 | append1[0] << 24; + w2[2] = append1[0] >> 8 | append1[1] << 24; + w2[3] = append1[1] >> 8 | append1[2] << 24; + w3[0] = append1[2] >> 8 | append1[3] << 24; + w3[1] = append1[3] >> 8; + break; + + case 24: + w1[2] = append0[0]; + w1[3] = append0[1]; + w2[0] = append0[2]; + w2[1] = append0[3]; + w2[2] = append1[0]; + w2[3] = append1[1]; + w3[0] = append1[2]; + w3[1] = append1[3]; + break; + + case 25: + w1[2] = w1[2] | append0[0] << 8; + w1[3] = append0[0] >> 24 | append0[1] << 8; + w2[0] = append0[1] >> 24 | append0[2] << 8; + w2[1] = append0[2] >> 24 | append0[3] << 8; + w2[2] = append0[3] >> 24 | append1[0] << 8; + w2[3] = append1[0] >> 24 | append1[1] << 8; + w3[0] = append1[1] >> 24 | append1[2] << 8; + w3[1] = append1[2] >> 24 | append1[3] << 8; + break; + + case 26: + w1[2] = w1[2] | append0[0] << 16; + w1[3] = append0[0] >> 16 | append0[1] << 16; + w2[0] = append0[1] >> 16 | append0[2] << 16; + w2[1] = append0[2] >> 16 | append0[3] << 16; + w2[2] = append0[3] >> 16 | append1[0] << 16; + w2[3] = append1[0] >> 16 | append1[1] << 16; + w3[0] = append1[1] >> 16 | append1[2] << 16; + w3[1] = append1[2] >> 16 | append1[3] << 16; + break; + + case 27: + w1[2] = w1[2] | append0[0] << 24; + w1[3] = append0[0] >> 8 | append0[1] << 24; + w2[0] = append0[1] >> 8 | append0[2] << 24; + w2[1] = append0[2] >> 8 | append0[3] << 24; + w2[2] = append0[3] >> 8 | append1[0] << 24; + w2[3] = append1[0] >> 8 | append1[1] << 24; + w3[0] = append1[1] >> 8 | append1[2] << 24; + w3[1] = append1[2] >> 8 | append1[3] << 24; + break; + + case 28: + w1[3] = append0[0]; + w2[0] = append0[1]; + w2[1] = append0[2]; + w2[2] = append0[3]; + w2[3] = append1[0]; + w3[0] = append1[1]; + w3[1] = append1[2]; + break; + + case 29: + w1[3] = w1[3] | append0[0] << 8; + w2[0] = append0[0] >> 24 | append0[1] << 8; + w2[1] = append0[1] >> 24 | append0[2] << 8; + w2[2] = append0[2] >> 24 | append0[3] << 8; + w2[3] = append0[3] >> 24 | append1[0] << 8; + w3[0] = append1[0] >> 24 | append1[1] << 8; + w3[1] = append1[1] >> 24 | append1[2] << 8; + break; + + case 30: + w1[3] = w1[3] | append0[0] << 16; + w2[0] = append0[0] >> 16 | append0[1] << 16; + w2[1] = append0[1] >> 16 | append0[2] << 16; + w2[2] = append0[2] >> 16 | append0[3] << 16; + w2[3] = append0[3] >> 16 | append1[0] << 16; + w3[0] = append1[0] >> 16 | append1[1] << 16; + w3[1] = append1[1] >> 16 | append1[2] << 16; + break; + + case 31: + w1[3] = w1[3] | append0[0] << 24; + w2[0] = append0[0] >> 8 | append0[1] << 24; + w2[1] = append0[1] >> 8 | append0[2] << 24; + w2[2] = append0[2] >> 8 | append0[3] << 24; + w2[3] = append0[3] >> 8 | append1[0] << 24; + w3[0] = append1[0] >> 8 | append1[1] << 24; + w3[1] = append1[1] >> 8 | append1[2] << 24; + break; + + case 32: + w2[0] = append0[0]; + w2[1] = append0[1]; + w2[2] = append0[2]; + w2[3] = append0[3]; + w3[0] = append1[0]; + w3[1] = append1[1]; + break; + } +} + +// before: memcat32_9 +static void memcat_c32_w4x4_a3x4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 offset) +{ + switch (offset) + { + case 0: + w0[0] = append0[0]; + w0[1] = append0[1]; + w0[2] = append0[2]; + w0[3] = append0[3]; + w1[0] = append1[0]; + w1[1] = append1[1]; + w1[2] = append1[2]; + w1[3] = append1[3]; + w2[0] = append2[0]; + break; + + case 1: + w0[0] = w0[0] | append0[0] << 8; + w0[1] = append0[0] >> 24 | append0[1] << 8; + w0[2] = append0[1] >> 24 | append0[2] << 8; + w0[3] = append0[2] >> 24 | append0[3] << 8; + w1[0] = append0[3] >> 24 | append1[0] << 8; + w1[1] = append1[0] >> 24 | append1[1] << 8; + w1[2] = append1[1] >> 24 | append1[2] << 8; + w1[3] = append1[2] >> 24 | append1[3] << 8; + w2[0] = append1[3] >> 24 | append2[0] << 8; + w2[1] = append2[0] >> 24; + break; + + case 2: + w0[0] = w0[0] | append0[0] << 16; + w0[1] = append0[0] >> 16 | append0[1] << 16; + w0[2] = append0[1] >> 16 | append0[2] << 16; + w0[3] = append0[2] >> 16 | append0[3] << 16; + w1[0] = append0[3] >> 16 | append1[0] << 16; + w1[1] = append1[0] >> 16 | append1[1] << 16; + w1[2] = append1[1] >> 16 | append1[2] << 16; + w1[3] = append1[2] >> 16 | append1[3] << 16; + w2[0] = append1[3] >> 16 | append2[0] << 16; + w2[1] = append2[0] >> 16; + break; + + case 3: + w0[0] = w0[0] | append0[0] << 24; + w0[1] = append0[0] >> 8 | append0[1] << 24; + w0[2] = append0[1] >> 8 | append0[2] << 24; + w0[3] = append0[2] >> 8 | append0[3] << 24; + w1[0] = append0[3] >> 8 | append1[0] << 24; + w1[1] = append1[0] >> 8 | append1[1] << 24; + w1[2] = append1[1] >> 8 | append1[2] << 24; + w1[3] = append1[2] >> 8 | append1[3] << 24; + w2[0] = append1[3] >> 8 | append2[0] << 24; + w2[1] = append2[0] >> 8; + break; + + case 4: + w0[1] = append0[0]; + w0[2] = append0[1]; + w0[3] = append0[2]; + w1[0] = append0[3]; + w1[1] = append1[0]; + w1[2] = append1[1]; + w1[3] = append1[2]; + w2[0] = append1[3]; + w2[1] = append2[0]; + break; + + case 5: + w0[1] = w0[1] | append0[0] << 8; + w0[2] = append0[0] >> 24 | append0[1] << 8; + w0[3] = append0[1] >> 24 | append0[2] << 8; + w1[0] = append0[2] >> 24 | append0[3] << 8; + w1[1] = append0[3] >> 24 | append1[0] << 8; + w1[2] = append1[0] >> 24 | append1[1] << 8; + w1[3] = append1[1] >> 24 | append1[2] << 8; + w2[0] = append1[2] >> 24 | append1[3] << 8; + w2[1] = append1[3] >> 24 | append2[0] << 8; + w2[2] = append2[0] >> 24; + break; + + case 6: + w0[1] = w0[1] | append0[0] << 16; + w0[2] = append0[0] >> 16 | append0[1] << 16; + w0[3] = append0[1] >> 16 | append0[2] << 16; + w1[0] = append0[2] >> 16 | append0[3] << 16; + w1[1] = append0[3] >> 16 | append1[0] << 16; + w1[2] = append1[0] >> 16 | append1[1] << 16; + w1[3] = append1[1] >> 16 | append1[2] << 16; + w2[0] = append1[2] >> 16 | append1[3] << 16; + w2[1] = append1[3] >> 16 | append2[0] << 16; + w2[2] = append2[0] >> 16; + break; + + case 7: + w0[1] = w0[1] | append0[0] << 24; + w0[2] = append0[0] >> 8 | append0[1] << 24; + w0[3] = append0[1] >> 8 | append0[2] << 24; + w1[0] = append0[2] >> 8 | append0[3] << 24; + w1[1] = append0[3] >> 8 | append1[0] << 24; + w1[2] = append1[0] >> 8 | append1[1] << 24; + w1[3] = append1[1] >> 8 | append1[2] << 24; + w2[0] = append1[2] >> 8 | append1[3] << 24; + w2[1] = append1[3] >> 8 | append2[0] << 24; + w2[2] = append2[0] >> 8; + break; + + case 8: + w0[2] = append0[0]; + w0[3] = append0[1]; + w1[0] = append0[2]; + w1[1] = append0[3]; + w1[2] = append1[0]; + w1[3] = append1[1]; + w2[0] = append1[2]; + w2[1] = append1[3]; + w2[2] = append2[0]; + break; + + case 9: + w0[2] = w0[2] | append0[0] << 8; + w0[3] = append0[0] >> 24 | append0[1] << 8; + w1[0] = append0[1] >> 24 | append0[2] << 8; + w1[1] = append0[2] >> 24 | append0[3] << 8; + w1[2] = append0[3] >> 24 | append1[0] << 8; + w1[3] = append1[0] >> 24 | append1[1] << 8; + w2[0] = append1[1] >> 24 | append1[2] << 8; + w2[1] = append1[2] >> 24 | append1[3] << 8; + w2[2] = append1[3] >> 24 | append2[0] << 8; + w2[3] = append2[0] >> 24; + break; + + case 10: + w0[2] = w0[2] | append0[0] << 16; + w0[3] = append0[0] >> 16 | append0[1] << 16; + w1[0] = append0[1] >> 16 | append0[2] << 16; + w1[1] = append0[2] >> 16 | append0[3] << 16; + w1[2] = append0[3] >> 16 | append1[0] << 16; + w1[3] = append1[0] >> 16 | append1[1] << 16; + w2[0] = append1[1] >> 16 | append1[2] << 16; + w2[1] = append1[2] >> 16 | append1[3] << 16; + w2[2] = append1[3] >> 16 | append2[0] << 16; + w2[3] = append2[0] >> 16; + break; + + case 11: + w0[2] = w0[2] | append0[0] << 24; + w0[3] = append0[0] >> 8 | append0[1] << 24; + w1[0] = append0[1] >> 8 | append0[2] << 24; + w1[1] = append0[2] >> 8 | append0[3] << 24; + w1[2] = append0[3] >> 8 | append1[0] << 24; + w1[3] = append1[0] >> 8 | append1[1] << 24; + w2[0] = append1[1] >> 8 | append1[2] << 24; + w2[1] = append1[2] >> 8 | append1[3] << 24; + w2[2] = append1[3] >> 8 | append2[0] << 24; + w2[3] = append2[0] >> 8; + break; + + case 12: + w0[3] = append0[0]; + w1[0] = append0[1]; + w1[1] = append0[2]; + w1[2] = append0[3]; + w1[3] = append1[0]; + w2[0] = append1[1]; + w2[1] = append1[2]; + w2[2] = append1[3]; + w2[3] = append2[0]; + break; + + case 13: + w0[3] = w0[3] | append0[0] << 8; + w1[0] = append0[0] >> 24 | append0[1] << 8; + w1[1] = append0[1] >> 24 | append0[2] << 8; + w1[2] = append0[2] >> 24 | append0[3] << 8; + w1[3] = append0[3] >> 24 | append1[0] << 8; + w2[0] = append1[0] >> 24 | append1[1] << 8; + w2[1] = append1[1] >> 24 | append1[2] << 8; + w2[2] = append1[2] >> 24 | append1[3] << 8; + w2[3] = append1[3] >> 24 | append2[0] << 8; + w3[0] = append2[0] >> 24; + break; + + case 14: + w0[3] = w0[3] | append0[0] << 16; + w1[0] = append0[0] >> 16 | append0[1] << 16; + w1[1] = append0[1] >> 16 | append0[2] << 16; + w1[2] = append0[2] >> 16 | append0[3] << 16; + w1[3] = append0[3] >> 16 | append1[0] << 16; + w2[0] = append1[0] >> 16 | append1[1] << 16; + w2[1] = append1[1] >> 16 | append1[2] << 16; + w2[2] = append1[2] >> 16 | append1[3] << 16; + w2[3] = append1[3] >> 16 | append2[0] << 16; + w3[0] = append2[0] >> 16; + break; + + case 15: + w0[3] = w0[3] | append0[0] << 24; + w1[0] = append0[0] >> 8 | append0[1] << 24; + w1[1] = append0[1] >> 8 | append0[2] << 24; + w1[2] = append0[2] >> 8 | append0[3] << 24; + w1[3] = append0[3] >> 8 | append1[0] << 24; + w2[0] = append1[0] >> 8 | append1[1] << 24; + w2[1] = append1[1] >> 8 | append1[2] << 24; + w2[2] = append1[2] >> 8 | append1[3] << 24; + w2[3] = append1[3] >> 8 | append2[0] << 24; + w3[0] = append2[0] >> 8; + break; + + case 16: + w1[0] = append0[0]; + w1[1] = append0[1]; + w1[2] = append0[2]; + w1[3] = append0[3]; + w2[0] = append1[0]; + w2[1] = append1[1]; + w2[2] = append1[2]; + w2[3] = append1[3]; + w3[0] = append2[0]; + break; + + case 17: + w1[0] = w1[0] | append0[0] << 8; + w1[1] = append0[0] >> 24 | append0[1] << 8; + w1[2] = append0[1] >> 24 | append0[2] << 8; + w1[3] = append0[2] >> 24 | append0[3] << 8; + w2[0] = append0[3] >> 24 | append1[0] << 8; + w2[1] = append1[0] >> 24 | append1[1] << 8; + w2[2] = append1[1] >> 24 | append1[2] << 8; + w2[3] = append1[2] >> 24 | append1[3] << 8; + w3[0] = append1[3] >> 24 | append2[0] << 8; + w3[1] = append2[0] >> 24; + break; + + case 18: + w1[0] = w1[0] | append0[0] << 16; + w1[1] = append0[0] >> 16 | append0[1] << 16; + w1[2] = append0[1] >> 16 | append0[2] << 16; + w1[3] = append0[2] >> 16 | append0[3] << 16; + w2[0] = append0[3] >> 16 | append1[0] << 16; + w2[1] = append1[0] >> 16 | append1[1] << 16; + w2[2] = append1[1] >> 16 | append1[2] << 16; + w2[3] = append1[2] >> 16 | append1[3] << 16; + w3[0] = append1[3] >> 16 | append2[0] << 16; + w3[1] = append2[0] >> 16; + break; + + case 19: + w1[0] = w1[0] | append0[0] << 24; + w1[1] = append0[0] >> 8 | append0[1] << 24; + w1[2] = append0[1] >> 8 | append0[2] << 24; + w1[3] = append0[2] >> 8 | append0[3] << 24; + w2[0] = append0[3] >> 8 | append1[0] << 24; + w2[1] = append1[0] >> 8 | append1[1] << 24; + w2[2] = append1[1] >> 8 | append1[2] << 24; + w2[3] = append1[2] >> 8 | append1[3] << 24; + w3[0] = append1[3] >> 8 | append2[0] << 24; + w3[1] = append2[0] >> 8; + break; + + case 20: + w1[1] = append0[0]; + w1[2] = append0[1]; + w1[3] = append0[2]; + w2[0] = append0[3]; + w2[1] = append1[0]; + w2[2] = append1[1]; + w2[3] = append1[2]; + w3[0] = append1[3]; + w3[1] = append2[0]; + break; + + case 21: + w1[1] = w1[1] | append0[0] << 8; + w1[2] = append0[0] >> 24 | append0[1] << 8; + w1[3] = append0[1] >> 24 | append0[2] << 8; + w2[0] = append0[2] >> 24 | append0[3] << 8; + w2[1] = append0[3] >> 24 | append1[0] << 8; + w2[2] = append1[0] >> 24 | append1[1] << 8; + w2[3] = append1[1] >> 24 | append1[2] << 8; + w3[0] = append1[2] >> 24 | append1[3] << 8; + w3[1] = append1[3] >> 24 | append2[0] << 8; + break; + + case 22: + w1[1] = w1[1] | append0[0] << 16; + w1[2] = append0[0] >> 16 | append0[1] << 16; + w1[3] = append0[1] >> 16 | append0[2] << 16; + w2[0] = append0[2] >> 16 | append0[3] << 16; + w2[1] = append0[3] >> 16 | append1[0] << 16; + w2[2] = append1[0] >> 16 | append1[1] << 16; + w2[3] = append1[1] >> 16 | append1[2] << 16; + w3[0] = append1[2] >> 16 | append1[3] << 16; + w3[1] = append1[3] >> 16 | append2[0] << 16; + break; + + case 23: + w1[1] = w1[1] | append0[0] << 24; + w1[2] = append0[0] >> 8 | append0[1] << 24; + w1[3] = append0[1] >> 8 | append0[2] << 24; + w2[0] = append0[2] >> 8 | append0[3] << 24; + w2[1] = append0[3] >> 8 | append1[0] << 24; + w2[2] = append1[0] >> 8 | append1[1] << 24; + w2[3] = append1[1] >> 8 | append1[2] << 24; + w3[0] = append1[2] >> 8 | append1[3] << 24; + w3[1] = append1[3] >> 8 | append2[0] << 24; + break; + + case 24: + w1[2] = append0[0]; + w1[3] = append0[1]; + w2[0] = append0[2]; + w2[1] = append0[3]; + w2[2] = append1[0]; + w2[3] = append1[1]; + w3[0] = append1[2]; + w3[1] = append1[3]; + break; + + case 25: + w1[2] = w1[2] | append0[0] << 8; + w1[3] = append0[0] >> 24 | append0[1] << 8; + w2[0] = append0[1] >> 24 | append0[2] << 8; + w2[1] = append0[2] >> 24 | append0[3] << 8; + w2[2] = append0[3] >> 24 | append1[0] << 8; + w2[3] = append1[0] >> 24 | append1[1] << 8; + w3[0] = append1[1] >> 24 | append1[2] << 8; + w3[1] = append1[2] >> 24 | append1[3] << 8; + break; + + case 26: + w1[2] = w1[2] | append0[0] << 16; + w1[3] = append0[0] >> 16 | append0[1] << 16; + w2[0] = append0[1] >> 16 | append0[2] << 16; + w2[1] = append0[2] >> 16 | append0[3] << 16; + w2[2] = append0[3] >> 16 | append1[0] << 16; + w2[3] = append1[0] >> 16 | append1[1] << 16; + w3[0] = append1[1] >> 16 | append1[2] << 16; + w3[1] = append1[2] >> 16 | append1[3] << 16; + break; + + case 27: + w1[2] = w1[2] | append0[0] << 24; + w1[3] = append0[0] >> 8 | append0[1] << 24; + w2[0] = append0[1] >> 8 | append0[2] << 24; + w2[1] = append0[2] >> 8 | append0[3] << 24; + w2[2] = append0[3] >> 8 | append1[0] << 24; + w2[3] = append1[0] >> 8 | append1[1] << 24; + w3[0] = append1[1] >> 8 | append1[2] << 24; + w3[1] = append1[2] >> 8 | append1[3] << 24; + break; + + case 28: + w1[3] = append0[0]; + w2[0] = append0[1]; + w2[1] = append0[2]; + w2[2] = append0[3]; + w2[3] = append1[0]; + w3[0] = append1[1]; + w3[1] = append1[2]; + break; + + case 29: + w1[3] = w1[3] | append0[0] << 8; + w2[0] = append0[0] >> 24 | append0[1] << 8; + w2[1] = append0[1] >> 24 | append0[2] << 8; + w2[2] = append0[2] >> 24 | append0[3] << 8; + w2[3] = append0[3] >> 24 | append1[0] << 8; + w3[0] = append1[0] >> 24 | append1[1] << 8; + w3[1] = append1[1] >> 24 | append1[2] << 8; + break; + + case 30: + w1[3] = w1[3] | append0[0] << 16; + w2[0] = append0[0] >> 16 | append0[1] << 16; + w2[1] = append0[1] >> 16 | append0[2] << 16; + w2[2] = append0[2] >> 16 | append0[3] << 16; + w2[3] = append0[3] >> 16 | append1[0] << 16; + w3[0] = append1[0] >> 16 | append1[1] << 16; + w3[1] = append1[1] >> 16 | append1[2] << 16; + break; + + case 31: + w1[3] = w1[3] | append0[0] << 24; + w2[0] = append0[0] >> 8 | append0[1] << 24; + w2[1] = append0[1] >> 8 | append0[2] << 24; + w2[2] = append0[2] >> 8 | append0[3] << 24; + w2[3] = append0[3] >> 8 | append1[0] << 24; + w3[0] = append1[0] >> 8 | append1[1] << 24; + w3[1] = append1[1] >> 8 | append1[2] << 24; + break; + + case 32: + w2[0] = append0[0]; + w2[1] = append0[1]; + w2[2] = append0[2]; + w2[3] = append0[3]; + w3[0] = append1[0]; + w3[1] = append1[1]; + break; + } +} + +static void switch_buffer_by_offset (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset) +{ + #ifdef IS_AMD + const int offset_mod_4 = offset & 3; + + const int offset_minus_4 = 4 - offset; + + switch (offset / 4) + { + case 0: + w3[2] = amd_bytealign ( 0, w3[1], offset_minus_4); + w3[1] = amd_bytealign (w3[1], w3[0], offset_minus_4); + w3[0] = amd_bytealign (w3[0], w2[3], offset_minus_4); + w2[3] = amd_bytealign (w2[3], w2[2], offset_minus_4); + w2[2] = amd_bytealign (w2[2], w2[1], offset_minus_4); + w2[1] = amd_bytealign (w2[1], w2[0], offset_minus_4); + w2[0] = amd_bytealign (w2[0], w1[3], offset_minus_4); + w1[3] = amd_bytealign (w1[3], w1[2], offset_minus_4); + w1[2] = amd_bytealign (w1[2], w1[1], offset_minus_4); + w1[1] = amd_bytealign (w1[1], w1[0], offset_minus_4); + w1[0] = amd_bytealign (w1[0], w0[3], offset_minus_4); + w0[3] = amd_bytealign (w0[3], w0[2], offset_minus_4); + w0[2] = amd_bytealign (w0[2], w0[1], offset_minus_4); + w0[1] = amd_bytealign (w0[1], w0[0], offset_minus_4); + w0[0] = amd_bytealign (w0[0], 0, offset_minus_4); + + if (offset_mod_4 == 0) + { + w0[0] = w0[1]; + w0[1] = w0[2]; + w0[2] = w0[3]; + w0[3] = w1[0]; + w1[0] = w1[1]; + w1[1] = w1[2]; + w1[2] = w1[3]; + w1[3] = w2[0]; + w2[0] = w2[1]; + w2[1] = w2[2]; + w2[2] = w2[3]; + w2[3] = w3[0]; + w3[0] = w3[1]; + w3[1] = w3[2]; + w3[2] = 0; + } + + break; + + case 1: + w3[2] = amd_bytealign ( 0, w3[0], offset_minus_4); + w3[1] = amd_bytealign (w3[0], w2[3], offset_minus_4); + w3[0] = amd_bytealign (w2[3], w2[2], offset_minus_4); + w2[3] = amd_bytealign (w2[2], w2[1], offset_minus_4); + w2[2] = amd_bytealign (w2[1], w2[0], offset_minus_4); + w2[1] = amd_bytealign (w2[0], w1[3], offset_minus_4); + w2[0] = amd_bytealign (w1[3], w1[2], offset_minus_4); + w1[3] = amd_bytealign (w1[2], w1[1], offset_minus_4); + w1[2] = amd_bytealign (w1[1], w1[0], offset_minus_4); + w1[1] = amd_bytealign (w1[0], w0[3], offset_minus_4); + w1[0] = amd_bytealign (w0[3], w0[2], offset_minus_4); + w0[3] = amd_bytealign (w0[2], w0[1], offset_minus_4); + w0[2] = amd_bytealign (w0[1], w0[0], offset_minus_4); + w0[1] = amd_bytealign (w0[0], 0, offset_minus_4); + w0[0] = 0; + + if (offset_mod_4 == 0) + { + w0[1] = w0[2]; + w0[2] = w0[3]; + w0[3] = w1[0]; + w1[0] = w1[1]; + w1[1] = w1[2]; + w1[2] = w1[3]; + w1[3] = w2[0]; + w2[0] = w2[1]; + w2[1] = w2[2]; + w2[2] = w2[3]; + w2[3] = w3[0]; + w3[0] = w3[1]; + w3[1] = w3[2]; + w3[2] = 0; + } + + break; + + case 2: + w3[2] = amd_bytealign ( 0, w2[3], offset_minus_4); + w3[1] = amd_bytealign (w2[3], w2[2], offset_minus_4); + w3[0] = amd_bytealign (w2[2], w2[1], offset_minus_4); + w2[3] = amd_bytealign (w2[1], w2[0], offset_minus_4); + w2[2] = amd_bytealign (w2[0], w1[3], offset_minus_4); + w2[1] = amd_bytealign (w1[3], w1[2], offset_minus_4); + w2[0] = amd_bytealign (w1[2], w1[1], offset_minus_4); + w1[3] = amd_bytealign (w1[1], w1[0], offset_minus_4); + w1[2] = amd_bytealign (w1[0], w0[3], offset_minus_4); + w1[1] = amd_bytealign (w0[3], w0[2], offset_minus_4); + w1[0] = amd_bytealign (w0[2], w0[1], offset_minus_4); + w0[3] = amd_bytealign (w0[1], w0[0], offset_minus_4); + w0[2] = amd_bytealign (w0[0], 0, offset_minus_4); + w0[1] = 0; + w0[0] = 0; + + if (offset_mod_4 == 0) + { + w0[2] = w0[3]; + w0[3] = w1[0]; + w1[0] = w1[1]; + w1[1] = w1[2]; + w1[2] = w1[3]; + w1[3] = w2[0]; + w2[0] = w2[1]; + w2[1] = w2[2]; + w2[2] = w2[3]; + w2[3] = w3[0]; + w3[0] = w3[1]; + w3[1] = w3[2]; + w3[2] = 0; + } + + break; + + case 3: + w3[2] = amd_bytealign ( 0, w2[2], offset_minus_4); + w3[1] = amd_bytealign (w2[2], w2[1], offset_minus_4); + w3[0] = amd_bytealign (w2[1], w2[0], offset_minus_4); + w2[3] = amd_bytealign (w2[0], w1[3], offset_minus_4); + w2[2] = amd_bytealign (w1[3], w1[2], offset_minus_4); + w2[1] = amd_bytealign (w1[2], w1[1], offset_minus_4); + w2[0] = amd_bytealign (w1[1], w1[0], offset_minus_4); + w1[3] = amd_bytealign (w1[0], w0[3], offset_minus_4); + w1[2] = amd_bytealign (w0[3], w0[2], offset_minus_4); + w1[1] = amd_bytealign (w0[2], w0[1], offset_minus_4); + w1[0] = amd_bytealign (w0[1], w0[0], offset_minus_4); + w0[3] = amd_bytealign (w0[0], 0, offset_minus_4); + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + if (offset_mod_4 == 0) + { + w0[3] = w1[0]; + w1[0] = w1[1]; + w1[1] = w1[2]; + w1[2] = w1[3]; + w1[3] = w2[0]; + w2[0] = w2[1]; + w2[1] = w2[2]; + w2[2] = w2[3]; + w2[3] = w3[0]; + w3[0] = w3[1]; + w3[1] = w3[2]; + w3[2] = 0; + } + + break; + + case 4: + w3[2] = amd_bytealign ( 0, w2[1], offset_minus_4); + w3[1] = amd_bytealign (w2[1], w2[0], offset_minus_4); + w3[0] = amd_bytealign (w2[0], w1[3], offset_minus_4); + w2[3] = amd_bytealign (w1[3], w1[2], offset_minus_4); + w2[2] = amd_bytealign (w1[2], w1[1], offset_minus_4); + w2[1] = amd_bytealign (w1[1], w1[0], offset_minus_4); + w2[0] = amd_bytealign (w1[0], w0[3], offset_minus_4); + w1[3] = amd_bytealign (w0[3], w0[2], offset_minus_4); + w1[2] = amd_bytealign (w0[2], w0[1], offset_minus_4); + w1[1] = amd_bytealign (w0[1], w0[0], offset_minus_4); + w1[0] = amd_bytealign (w0[0], 0, offset_minus_4); + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + if (offset_mod_4 == 0) + { + w1[0] = w1[1]; + w1[1] = w1[2]; + w1[2] = w1[3]; + w1[3] = w2[0]; + w2[0] = w2[1]; + w2[1] = w2[2]; + w2[2] = w2[3]; + w2[3] = w3[0]; + w3[0] = w3[1]; + w3[1] = w3[2]; + w3[2] = 0; + } + + break; + + case 5: + w3[2] = amd_bytealign ( 0, w2[0], offset_minus_4); + w3[1] = amd_bytealign (w2[0], w1[3], offset_minus_4); + w3[0] = amd_bytealign (w1[3], w1[2], offset_minus_4); + w2[3] = amd_bytealign (w1[2], w1[1], offset_minus_4); + w2[2] = amd_bytealign (w1[1], w1[0], offset_minus_4); + w2[1] = amd_bytealign (w1[0], w0[3], offset_minus_4); + w2[0] = amd_bytealign (w0[3], w0[2], offset_minus_4); + w1[3] = amd_bytealign (w0[2], w0[1], offset_minus_4); + w1[2] = amd_bytealign (w0[1], w0[0], offset_minus_4); + w1[1] = amd_bytealign (w0[0], 0, offset_minus_4); + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + if (offset_mod_4 == 0) + { + w1[1] = w1[2]; + w1[2] = w1[3]; + w1[3] = w2[0]; + w2[0] = w2[1]; + w2[1] = w2[2]; + w2[2] = w2[3]; + w2[3] = w3[0]; + w3[0] = w3[1]; + w3[1] = w3[2]; + w3[2] = 0; + } + + break; + + case 6: + w3[2] = amd_bytealign ( 0, w1[3], offset_minus_4); + w3[1] = amd_bytealign (w1[3], w1[2], offset_minus_4); + w3[0] = amd_bytealign (w1[2], w1[1], offset_minus_4); + w2[3] = amd_bytealign (w1[1], w1[0], offset_minus_4); + w2[2] = amd_bytealign (w1[0], w0[3], offset_minus_4); + w2[1] = amd_bytealign (w0[3], w0[2], offset_minus_4); + w2[0] = amd_bytealign (w0[2], w0[1], offset_minus_4); + w1[3] = amd_bytealign (w0[1], w0[0], offset_minus_4); + w1[2] = amd_bytealign (w0[0], 0, offset_minus_4); + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + if (offset_mod_4 == 0) + { + w1[2] = w1[3]; + w1[3] = w2[0]; + w2[0] = w2[1]; + w2[1] = w2[2]; + w2[2] = w2[3]; + w2[3] = w3[0]; + w3[0] = w3[1]; + w3[1] = w3[2]; + w3[2] = 0; + } + + break; + + case 7: + w3[2] = amd_bytealign ( 0, w1[2], offset_minus_4); + w3[1] = amd_bytealign (w1[2], w1[1], offset_minus_4); + w3[0] = amd_bytealign (w1[1], w1[0], offset_minus_4); + w2[3] = amd_bytealign (w1[0], w0[3], offset_minus_4); + w2[2] = amd_bytealign (w0[3], w0[2], offset_minus_4); + w2[1] = amd_bytealign (w0[2], w0[1], offset_minus_4); + w2[0] = amd_bytealign (w0[1], w0[0], offset_minus_4); + w1[3] = amd_bytealign (w0[0], 0, offset_minus_4); + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + if (offset_mod_4 == 0) + { + w1[3] = w2[0]; + w2[0] = w2[1]; + w2[1] = w2[2]; + w2[2] = w2[3]; + w2[3] = w3[0]; + w3[0] = w3[1]; + w3[1] = w3[2]; + w3[2] = 0; + } + + break; + + case 8: + w3[2] = amd_bytealign ( 0, w1[1], offset_minus_4); + w3[1] = amd_bytealign (w1[1], w1[0], offset_minus_4); + w3[0] = amd_bytealign (w1[0], w0[3], offset_minus_4); + w2[3] = amd_bytealign (w0[3], w0[2], offset_minus_4); + w2[2] = amd_bytealign (w0[2], w0[1], offset_minus_4); + w2[1] = amd_bytealign (w0[1], w0[0], offset_minus_4); + w2[0] = amd_bytealign (w0[0], 0, offset_minus_4); + w1[3] = 0; + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + if (offset_mod_4 == 0) + { + w2[0] = w2[1]; + w2[1] = w2[2]; + w2[2] = w2[3]; + w2[3] = w3[0]; + w3[0] = w3[1]; + w3[1] = w3[2]; + w3[2] = 0; + } + + break; + + case 9: + w3[2] = amd_bytealign ( 0, w1[0], offset_minus_4); + w3[1] = amd_bytealign (w1[0], w0[3], offset_minus_4); + w3[0] = amd_bytealign (w0[3], w0[2], offset_minus_4); + w2[3] = amd_bytealign (w0[2], w0[1], offset_minus_4); + w2[2] = amd_bytealign (w0[1], w0[0], offset_minus_4); + w2[1] = amd_bytealign (w0[0], 0, offset_minus_4); + w2[0] = 0; + w1[3] = 0; + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + if (offset_mod_4 == 0) + { + w2[1] = w2[2]; + w2[2] = w2[3]; + w2[3] = w3[0]; + w3[0] = w3[1]; + w3[1] = w3[2]; + w3[2] = 0; + } + + break; + + case 10: + w3[2] = amd_bytealign ( 0, w0[3], offset_minus_4); + w3[1] = amd_bytealign (w0[3], w0[2], offset_minus_4); + w3[0] = amd_bytealign (w0[2], w0[1], offset_minus_4); + w2[3] = amd_bytealign (w0[1], w0[0], offset_minus_4); + w2[2] = amd_bytealign (w0[0], 0, offset_minus_4); + w2[1] = 0; + w2[0] = 0; + w1[3] = 0; + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + if (offset_mod_4 == 0) + { + w2[2] = w2[3]; + w2[3] = w3[0]; + w3[0] = w3[1]; + w3[1] = w3[2]; + w3[2] = 0; + } + + break; + + case 11: + w3[2] = amd_bytealign ( 0, w0[2], offset_minus_4); + w3[1] = amd_bytealign (w0[2], w0[1], offset_minus_4); + w3[0] = amd_bytealign (w0[1], w0[0], offset_minus_4); + w2[3] = amd_bytealign (w0[0], 0, offset_minus_4); + w2[2] = 0; + w2[1] = 0; + w2[0] = 0; + w1[3] = 0; + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + if (offset_mod_4 == 0) + { + w2[3] = w3[0]; + w3[0] = w3[1]; + w3[1] = w3[2]; + w3[2] = 0; + } + + break; + + case 12: + w3[2] = amd_bytealign ( 0, w0[1], offset_minus_4); + w3[1] = amd_bytealign (w0[1], w0[0], offset_minus_4); + w3[0] = amd_bytealign (w0[0], 0, offset_minus_4); + w2[3] = 0; + w2[2] = 0; + w2[1] = 0; + w2[0] = 0; + w1[3] = 0; + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + if (offset_mod_4 == 0) + { + w3[0] = w3[1]; + w3[1] = w3[2]; + w3[2] = 0; + } + + break; + + case 13: + w3[2] = amd_bytealign ( 0, w0[0], offset_minus_4); + w3[1] = amd_bytealign (w0[0], 0, offset_minus_4); + w3[0] = 0; + w2[3] = 0; + w2[2] = 0; + w2[1] = 0; + w2[0] = 0; + w1[3] = 0; + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + if (offset_mod_4 == 0) + { + w3[1] = w3[2]; + w3[2] = 0; + } + + break; + } + #endif + + #ifdef IS_NV + const int offset_minus_4 = 4 - (offset % 4); + + const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; + + switch (offset / 4) + { + case 0: + w3[1] = __byte_perm (w3[0], w3[1], selector); + w3[0] = __byte_perm (w2[3], w3[0], selector); + w2[3] = __byte_perm (w2[2], w2[3], selector); + w2[2] = __byte_perm (w2[1], w2[2], selector); + w2[1] = __byte_perm (w2[0], w2[1], selector); + w2[0] = __byte_perm (w1[3], w2[0], selector); + w1[3] = __byte_perm (w1[2], w1[3], selector); + w1[2] = __byte_perm (w1[1], w1[2], selector); + w1[1] = __byte_perm (w1[0], w1[1], selector); + w1[0] = __byte_perm (w0[3], w1[0], selector); + w0[3] = __byte_perm (w0[2], w0[3], selector); + w0[2] = __byte_perm (w0[1], w0[2], selector); + w0[1] = __byte_perm (w0[0], w0[1], selector); + w0[0] = __byte_perm ( 0, w0[0], selector); + + break; + + case 1: + w3[1] = __byte_perm (w2[3], w3[0], selector); + w3[0] = __byte_perm (w2[2], w2[3], selector); + w2[3] = __byte_perm (w2[1], w2[2], selector); + w2[2] = __byte_perm (w2[0], w2[1], selector); + w2[1] = __byte_perm (w1[3], w2[0], selector); + w2[0] = __byte_perm (w1[2], w1[3], selector); + w1[3] = __byte_perm (w1[1], w1[2], selector); + w1[2] = __byte_perm (w1[0], w1[1], selector); + w1[1] = __byte_perm (w0[3], w1[0], selector); + w1[0] = __byte_perm (w0[2], w0[3], selector); + w0[3] = __byte_perm (w0[1], w0[2], selector); + w0[2] = __byte_perm (w0[0], w0[1], selector); + w0[1] = __byte_perm ( 0, w0[0], selector); + w0[0] = 0; + + break; + + case 2: + w3[1] = __byte_perm (w2[2], w2[3], selector); + w3[0] = __byte_perm (w2[1], w2[2], selector); + w2[3] = __byte_perm (w2[0], w2[1], selector); + w2[2] = __byte_perm (w1[3], w2[0], selector); + w2[1] = __byte_perm (w1[2], w1[3], selector); + w2[0] = __byte_perm (w1[1], w1[2], selector); + w1[3] = __byte_perm (w1[0], w1[1], selector); + w1[2] = __byte_perm (w0[3], w1[0], selector); + w1[1] = __byte_perm (w0[2], w0[3], selector); + w1[0] = __byte_perm (w0[1], w0[2], selector); + w0[3] = __byte_perm (w0[0], w0[1], selector); + w0[2] = __byte_perm ( 0, w0[0], selector); + w0[1] = 0; + w0[0] = 0; + + break; + + case 3: + w3[1] = __byte_perm (w2[1], w2[2], selector); + w3[0] = __byte_perm (w2[0], w2[1], selector); + w2[3] = __byte_perm (w1[3], w2[0], selector); + w2[2] = __byte_perm (w1[2], w1[3], selector); + w2[1] = __byte_perm (w1[1], w1[2], selector); + w2[0] = __byte_perm (w1[0], w1[1], selector); + w1[3] = __byte_perm (w0[3], w1[0], selector); + w1[2] = __byte_perm (w0[2], w0[3], selector); + w1[1] = __byte_perm (w0[1], w0[2], selector); + w1[0] = __byte_perm (w0[0], w0[1], selector); + w0[3] = __byte_perm ( 0, w0[0], selector); + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + break; + + case 4: + w3[1] = __byte_perm (w2[0], w2[1], selector); + w3[0] = __byte_perm (w1[3], w2[0], selector); + w2[3] = __byte_perm (w1[2], w1[3], selector); + w2[2] = __byte_perm (w1[1], w1[2], selector); + w2[1] = __byte_perm (w1[0], w1[1], selector); + w2[0] = __byte_perm (w0[3], w1[0], selector); + w1[3] = __byte_perm (w0[2], w0[3], selector); + w1[2] = __byte_perm (w0[1], w0[2], selector); + w1[1] = __byte_perm (w0[0], w0[1], selector); + w1[0] = __byte_perm ( 0, w0[0], selector); + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + break; + + case 5: + w3[1] = __byte_perm (w1[3], w2[0], selector); + w3[0] = __byte_perm (w1[2], w1[3], selector); + w2[3] = __byte_perm (w1[1], w1[2], selector); + w2[2] = __byte_perm (w1[0], w1[1], selector); + w2[1] = __byte_perm (w0[3], w1[0], selector); + w2[0] = __byte_perm (w0[2], w0[3], selector); + w1[3] = __byte_perm (w0[1], w0[2], selector); + w1[2] = __byte_perm (w0[0], w0[1], selector); + w1[1] = __byte_perm ( 0, w0[0], selector); + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + break; + + case 6: + w3[1] = __byte_perm (w1[2], w1[3], selector); + w3[0] = __byte_perm (w1[1], w1[2], selector); + w2[3] = __byte_perm (w1[0], w1[1], selector); + w2[2] = __byte_perm (w0[3], w1[0], selector); + w2[1] = __byte_perm (w0[2], w0[3], selector); + w2[0] = __byte_perm (w0[1], w0[2], selector); + w1[3] = __byte_perm (w0[0], w0[1], selector); + w1[2] = __byte_perm ( 0, w0[0], selector); + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + break; + + case 7: + w3[1] = __byte_perm (w1[1], w1[2], selector); + w3[0] = __byte_perm (w1[0], w1[1], selector); + w2[3] = __byte_perm (w0[3], w1[0], selector); + w2[2] = __byte_perm (w0[2], w0[3], selector); + w2[1] = __byte_perm (w0[1], w0[2], selector); + w2[0] = __byte_perm (w0[0], w0[1], selector); + w1[3] = __byte_perm ( 0, w0[0], selector); + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + break; + + case 8: + w3[1] = __byte_perm (w1[0], w1[1], selector); + w3[0] = __byte_perm (w0[3], w1[0], selector); + w2[3] = __byte_perm (w0[2], w0[3], selector); + w2[2] = __byte_perm (w0[1], w0[2], selector); + w2[1] = __byte_perm (w0[0], w0[1], selector); + w2[0] = __byte_perm ( 0, w0[0], selector); + w1[3] = 0; + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + break; + + case 9: + w3[1] = __byte_perm (w0[3], w1[0], selector); + w3[0] = __byte_perm (w0[2], w0[3], selector); + w2[3] = __byte_perm (w0[1], w0[2], selector); + w2[2] = __byte_perm (w0[0], w0[1], selector); + w2[1] = __byte_perm ( 0, w0[0], selector); + w2[0] = 0; + w1[3] = 0; + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + break; + + case 10: + w3[1] = __byte_perm (w0[2], w0[3], selector); + w3[0] = __byte_perm (w0[1], w0[2], selector); + w2[3] = __byte_perm (w0[0], w0[1], selector); + w2[2] = __byte_perm ( 0, w0[0], selector); + w2[1] = 0; + w2[0] = 0; + w1[3] = 0; + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + break; + + case 11: + w3[1] = __byte_perm (w0[1], w0[2], selector); + w3[0] = __byte_perm (w0[0], w0[1], selector); + w2[3] = __byte_perm ( 0, w0[0], selector); + w2[2] = 0; + w2[1] = 0; + w2[0] = 0; + w1[3] = 0; + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + break; + + case 12: + w3[1] = __byte_perm (w0[0], w0[1], selector); + w3[0] = __byte_perm ( 0, w0[0], selector); + w2[3] = 0; + w2[2] = 0; + w2[1] = 0; + w2[0] = 0; + w1[3] = 0; + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + break; + + case 13: + w3[1] = __byte_perm ( 0, w0[0], selector); + w3[0] = 0; + w2[3] = 0; + w2[2] = 0; + w2[1] = 0; + w2[0] = 0; + w1[3] = 0; + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + + break; + } + #endif +} + +static void switch_buffer_by_offset_be (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset) +{ + #ifdef IS_AMD + switch (offset / 4) + { + case 0: + w3[2] = amd_bytealign (w3[1], 0, offset); + w3[1] = amd_bytealign (w3[0], w3[1], offset); + w3[0] = amd_bytealign (w2[3], w3[0], offset); + w2[3] = amd_bytealign (w2[2], w2[3], offset); + w2[2] = amd_bytealign (w2[1], w2[2], offset); + w2[1] = amd_bytealign (w2[0], w2[1], offset); + w2[0] = amd_bytealign (w1[3], w2[0], offset); + w1[3] = amd_bytealign (w1[2], w1[3], offset); + w1[2] = amd_bytealign (w1[1], w1[2], offset); + w1[1] = amd_bytealign (w1[0], w1[1], offset); + w1[0] = amd_bytealign (w0[3], w1[0], offset); + w0[3] = amd_bytealign (w0[2], w0[3], offset); + w0[2] = amd_bytealign (w0[1], w0[2], offset); + w0[1] = amd_bytealign (w0[0], w0[1], offset); + w0[0] = amd_bytealign ( 0, w0[0], offset); + break; + + case 1: + w3[2] = amd_bytealign (w3[0], 0, offset); + w3[1] = amd_bytealign (w2[3], w3[0], offset); + w3[0] = amd_bytealign (w2[2], w2[3], offset); + w2[3] = amd_bytealign (w2[1], w2[2], offset); + w2[2] = amd_bytealign (w2[0], w2[1], offset); + w2[1] = amd_bytealign (w1[3], w2[0], offset); + w2[0] = amd_bytealign (w1[2], w1[3], offset); + w1[3] = amd_bytealign (w1[1], w1[2], offset); + w1[2] = amd_bytealign (w1[0], w1[1], offset); + w1[1] = amd_bytealign (w0[3], w1[0], offset); + w1[0] = amd_bytealign (w0[2], w0[3], offset); + w0[3] = amd_bytealign (w0[1], w0[2], offset); + w0[2] = amd_bytealign (w0[0], w0[1], offset); + w0[1] = amd_bytealign ( 0, w0[0], offset); + w0[0] = 0; + break; + + case 2: + w3[2] = amd_bytealign (w2[3], 0, offset); + w3[1] = amd_bytealign (w2[2], w2[3], offset); + w3[0] = amd_bytealign (w2[1], w2[2], offset); + w2[3] = amd_bytealign (w2[0], w2[1], offset); + w2[2] = amd_bytealign (w1[3], w2[0], offset); + w2[1] = amd_bytealign (w1[2], w1[3], offset); + w2[0] = amd_bytealign (w1[1], w1[2], offset); + w1[3] = amd_bytealign (w1[0], w1[1], offset); + w1[2] = amd_bytealign (w0[3], w1[0], offset); + w1[1] = amd_bytealign (w0[2], w0[3], offset); + w1[0] = amd_bytealign (w0[1], w0[2], offset); + w0[3] = amd_bytealign (w0[0], w0[1], offset); + w0[2] = amd_bytealign ( 0, w0[0], offset); + w0[1] = 0; + w0[0] = 0; + break; + + case 3: + w3[2] = amd_bytealign (w2[2], 0, offset); + w3[1] = amd_bytealign (w2[1], w2[2], offset); + w3[0] = amd_bytealign (w2[0], w2[1], offset); + w2[3] = amd_bytealign (w1[3], w2[0], offset); + w2[2] = amd_bytealign (w1[2], w1[3], offset); + w2[1] = amd_bytealign (w1[1], w1[2], offset); + w2[0] = amd_bytealign (w1[0], w1[1], offset); + w1[3] = amd_bytealign (w0[3], w1[0], offset); + w1[2] = amd_bytealign (w0[2], w0[3], offset); + w1[1] = amd_bytealign (w0[1], w0[2], offset); + w1[0] = amd_bytealign (w0[0], w0[1], offset); + w0[3] = amd_bytealign ( 0, w0[0], offset); + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + break; + + case 4: + w3[2] = amd_bytealign (w2[1], 0, offset); + w3[1] = amd_bytealign (w2[0], w2[1], offset); + w3[0] = amd_bytealign (w1[3], w2[0], offset); + w2[3] = amd_bytealign (w1[2], w1[3], offset); + w2[2] = amd_bytealign (w1[1], w1[2], offset); + w2[1] = amd_bytealign (w1[0], w1[1], offset); + w2[0] = amd_bytealign (w0[3], w1[0], offset); + w1[3] = amd_bytealign (w0[2], w0[3], offset); + w1[2] = amd_bytealign (w0[1], w0[2], offset); + w1[1] = amd_bytealign (w0[0], w0[1], offset); + w1[0] = amd_bytealign ( 0, w0[0], offset); + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + break; + + case 5: + w3[2] = amd_bytealign (w2[0], 0, offset); + w3[1] = amd_bytealign (w1[3], w2[0], offset); + w3[0] = amd_bytealign (w1[2], w1[3], offset); + w2[3] = amd_bytealign (w1[1], w1[2], offset); + w2[2] = amd_bytealign (w1[0], w1[1], offset); + w2[1] = amd_bytealign (w0[3], w1[0], offset); + w2[0] = amd_bytealign (w0[2], w0[3], offset); + w1[3] = amd_bytealign (w0[1], w0[2], offset); + w1[2] = amd_bytealign (w0[0], w0[1], offset); + w1[1] = amd_bytealign ( 0, w0[0], offset); + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + break; + + case 6: + w3[2] = amd_bytealign (w1[3], 0, offset); + w3[1] = amd_bytealign (w1[2], w1[3], offset); + w3[0] = amd_bytealign (w1[1], w1[2], offset); + w2[3] = amd_bytealign (w1[0], w1[1], offset); + w2[2] = amd_bytealign (w0[3], w1[0], offset); + w2[1] = amd_bytealign (w0[2], w0[3], offset); + w2[0] = amd_bytealign (w0[1], w0[2], offset); + w1[3] = amd_bytealign (w0[0], w0[1], offset); + w1[2] = amd_bytealign ( 0, w0[0], offset); + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + break; + + case 7: + w3[2] = amd_bytealign (w1[2], 0, offset); + w3[1] = amd_bytealign (w1[1], w1[2], offset); + w3[0] = amd_bytealign (w1[0], w1[1], offset); + w2[3] = amd_bytealign (w0[3], w1[0], offset); + w2[2] = amd_bytealign (w0[2], w0[3], offset); + w2[1] = amd_bytealign (w0[1], w0[2], offset); + w2[0] = amd_bytealign (w0[0], w0[1], offset); + w1[3] = amd_bytealign ( 0, w0[0], offset); + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + break; + + case 8: + w3[2] = amd_bytealign (w1[1], 0, offset); + w3[1] = amd_bytealign (w1[0], w1[1], offset); + w3[0] = amd_bytealign (w0[3], w1[0], offset); + w2[3] = amd_bytealign (w0[2], w0[3], offset); + w2[2] = amd_bytealign (w0[1], w0[2], offset); + w2[1] = amd_bytealign (w0[0], w0[1], offset); + w2[0] = amd_bytealign ( 0, w0[0], offset); + w1[3] = 0; + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + break; + + case 9: + w3[2] = amd_bytealign (w1[0], 0, offset); + w3[1] = amd_bytealign (w0[3], w1[0], offset); + w3[0] = amd_bytealign (w0[2], w0[3], offset); + w2[3] = amd_bytealign (w0[1], w0[2], offset); + w2[2] = amd_bytealign (w0[0], w0[1], offset); + w2[1] = amd_bytealign ( 0, w0[0], offset); + w2[0] = 0; + w1[3] = 0; + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + break; + + case 10: + w3[2] = amd_bytealign (w0[3], 0, offset); + w3[1] = amd_bytealign (w0[2], w0[3], offset); + w3[0] = amd_bytealign (w0[1], w0[2], offset); + w2[3] = amd_bytealign (w0[0], w0[1], offset); + w2[2] = amd_bytealign ( 0, w0[0], offset); + w2[1] = 0; + w2[0] = 0; + w1[3] = 0; + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + break; + + case 11: + w3[2] = amd_bytealign (w0[2], 0, offset); + w3[1] = amd_bytealign (w0[1], w0[2], offset); + w3[0] = amd_bytealign (w0[0], w0[1], offset); + w2[3] = amd_bytealign ( 0, w0[0], offset); + w2[2] = 0; + w2[1] = 0; + w2[0] = 0; + w1[3] = 0; + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + break; + + case 12: + w3[2] = amd_bytealign (w0[1], 0, offset); + w3[1] = amd_bytealign (w0[0], w0[1], offset); + w3[0] = amd_bytealign ( 0, w0[0], offset); + w2[3] = 0; + w2[2] = 0; + w2[1] = 0; + w2[0] = 0; + w1[3] = 0; + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + break; + + case 13: + w3[2] = amd_bytealign (w0[0], 0, offset); + w3[1] = amd_bytealign ( 0, w0[0], offset); + w3[0] = 0; + w2[3] = 0; + w2[2] = 0; + w2[1] = 0; + w2[0] = 0; + w1[3] = 0; + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + break; + } + #endif + + #ifdef IS_NV + const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; + + switch (offset / 4) + { + case 0: + w3[1] = __byte_perm (w3[1], w3[0], selector); + w3[0] = __byte_perm (w3[0], w2[3], selector); + w2[3] = __byte_perm (w2[3], w2[2], selector); + w2[2] = __byte_perm (w2[2], w2[1], selector); + w2[1] = __byte_perm (w2[1], w2[0], selector); + w2[0] = __byte_perm (w2[0], w1[3], selector); + w1[3] = __byte_perm (w1[3], w1[2], selector); + w1[2] = __byte_perm (w1[2], w1[1], selector); + w1[1] = __byte_perm (w1[1], w1[0], selector); + w1[0] = __byte_perm (w1[0], w0[3], selector); + w0[3] = __byte_perm (w0[3], w0[2], selector); + w0[2] = __byte_perm (w0[2], w0[1], selector); + w0[1] = __byte_perm (w0[1], w0[0], selector); + w0[0] = __byte_perm (w0[0], 0, selector); + break; + + case 1: + w3[1] = __byte_perm (w3[0], w2[3], selector); + w3[0] = __byte_perm (w2[3], w2[2], selector); + w2[3] = __byte_perm (w2[2], w2[1], selector); + w2[2] = __byte_perm (w2[1], w2[0], selector); + w2[1] = __byte_perm (w2[0], w1[3], selector); + w2[0] = __byte_perm (w1[3], w1[2], selector); + w1[3] = __byte_perm (w1[2], w1[1], selector); + w1[2] = __byte_perm (w1[1], w1[0], selector); + w1[1] = __byte_perm (w1[0], w0[3], selector); + w1[0] = __byte_perm (w0[3], w0[2], selector); + w0[3] = __byte_perm (w0[2], w0[1], selector); + w0[2] = __byte_perm (w0[1], w0[0], selector); + w0[1] = __byte_perm (w0[0], 0, selector); + w0[0] = 0; + break; + + case 2: + w3[1] = __byte_perm (w2[3], w2[2], selector); + w3[0] = __byte_perm (w2[2], w2[1], selector); + w2[3] = __byte_perm (w2[1], w2[0], selector); + w2[2] = __byte_perm (w2[0], w1[3], selector); + w2[1] = __byte_perm (w1[3], w1[2], selector); + w2[0] = __byte_perm (w1[2], w1[1], selector); + w1[3] = __byte_perm (w1[1], w1[0], selector); + w1[2] = __byte_perm (w1[0], w0[3], selector); + w1[1] = __byte_perm (w0[3], w0[2], selector); + w1[0] = __byte_perm (w0[2], w0[1], selector); + w0[3] = __byte_perm (w0[1], w0[0], selector); + w0[2] = __byte_perm (w0[0], 0, selector); + w0[1] = 0; + w0[0] = 0; + break; + + case 3: + w3[1] = __byte_perm (w2[2], w2[1], selector); + w3[0] = __byte_perm (w2[1], w2[0], selector); + w2[3] = __byte_perm (w2[0], w1[3], selector); + w2[2] = __byte_perm (w1[3], w1[2], selector); + w2[1] = __byte_perm (w1[2], w1[1], selector); + w2[0] = __byte_perm (w1[1], w1[0], selector); + w1[3] = __byte_perm (w1[0], w0[3], selector); + w1[2] = __byte_perm (w0[3], w0[2], selector); + w1[1] = __byte_perm (w0[2], w0[1], selector); + w1[0] = __byte_perm (w0[1], w0[0], selector); + w0[3] = __byte_perm (w0[0], 0, selector); + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + break; + + case 4: + w3[1] = __byte_perm (w2[1], w2[0], selector); + w3[0] = __byte_perm (w2[0], w1[3], selector); + w2[3] = __byte_perm (w1[3], w1[2], selector); + w2[2] = __byte_perm (w1[2], w1[1], selector); + w2[1] = __byte_perm (w1[1], w1[0], selector); + w2[0] = __byte_perm (w1[0], w0[3], selector); + w1[3] = __byte_perm (w0[3], w0[2], selector); + w1[2] = __byte_perm (w0[2], w0[1], selector); + w1[1] = __byte_perm (w0[1], w0[0], selector); + w1[0] = __byte_perm (w0[0], 0, selector); + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + break; + + case 5: + w3[1] = __byte_perm (w2[0], w1[3], selector); + w3[0] = __byte_perm (w1[3], w1[2], selector); + w2[3] = __byte_perm (w1[2], w1[1], selector); + w2[2] = __byte_perm (w1[1], w1[0], selector); + w2[1] = __byte_perm (w1[0], w0[3], selector); + w2[0] = __byte_perm (w0[3], w0[2], selector); + w1[3] = __byte_perm (w0[2], w0[1], selector); + w1[2] = __byte_perm (w0[1], w0[0], selector); + w1[1] = __byte_perm (w0[0], 0, selector); + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + break; + + case 6: + w3[1] = __byte_perm (w1[3], w1[2], selector); + w3[0] = __byte_perm (w1[2], w1[1], selector); + w2[3] = __byte_perm (w1[1], w1[0], selector); + w2[2] = __byte_perm (w1[0], w0[3], selector); + w2[1] = __byte_perm (w0[3], w0[2], selector); + w2[0] = __byte_perm (w0[2], w0[1], selector); + w1[3] = __byte_perm (w0[1], w0[0], selector); + w1[2] = __byte_perm (w0[0], 0, selector); + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + break; + + case 7: + w3[1] = __byte_perm (w1[2], w1[1], selector); + w3[0] = __byte_perm (w1[1], w1[0], selector); + w2[3] = __byte_perm (w1[0], w0[3], selector); + w2[2] = __byte_perm (w0[3], w0[2], selector); + w2[1] = __byte_perm (w0[2], w0[1], selector); + w2[0] = __byte_perm (w0[1], w0[0], selector); + w1[3] = __byte_perm (w0[0], 0, selector); + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + break; + + case 8: + w3[1] = __byte_perm (w1[1], w1[0], selector); + w3[0] = __byte_perm (w1[0], w0[3], selector); + w2[3] = __byte_perm (w0[3], w0[2], selector); + w2[2] = __byte_perm (w0[2], w0[1], selector); + w2[1] = __byte_perm (w0[1], w0[0], selector); + w2[0] = __byte_perm (w0[0], 0, selector); + w1[3] = 0; + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + break; + + case 9: + w3[1] = __byte_perm (w1[0], w0[3], selector); + w3[0] = __byte_perm (w0[3], w0[2], selector); + w2[3] = __byte_perm (w0[2], w0[1], selector); + w2[2] = __byte_perm (w0[1], w0[0], selector); + w2[1] = __byte_perm (w0[0], 0, selector); + w2[0] = 0; + w1[3] = 0; + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + break; + + case 10: + w3[1] = __byte_perm (w0[3], w0[2], selector); + w3[0] = __byte_perm (w0[2], w0[1], selector); + w2[3] = __byte_perm (w0[1], w0[0], selector); + w2[2] = __byte_perm (w0[0], 0, selector); + w2[1] = 0; + w2[0] = 0; + w1[3] = 0; + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + break; + + case 11: + w3[1] = __byte_perm (w0[2], w0[1], selector); + w3[0] = __byte_perm (w0[1], w0[0], selector); + w2[3] = __byte_perm (w0[0], 0, selector); + w2[2] = 0; + w2[1] = 0; + w2[0] = 0; + w1[3] = 0; + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + break; + + case 12: + w3[1] = __byte_perm (w0[1], w0[0], selector); + w3[0] = __byte_perm (w0[0], 0, selector); + w2[3] = 0; + w2[2] = 0; + w2[1] = 0; + w2[0] = 0; + w1[3] = 0; + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + break; + + case 13: + w3[1] = __byte_perm (w0[0], 0, selector); + w3[0] = 0; + w2[3] = 0; + w2[2] = 0; + w2[1] = 0; + w2[0] = 0; + w1[3] = 0; + w1[2] = 0; + w1[1] = 0; + w1[0] = 0; + w0[3] = 0; + w0[2] = 0; + w0[1] = 0; + w0[0] = 0; + break; + } + #endif +} diff --git a/amd/gpu_aes256_amd.c b/OpenCL/gpu_aes256_amd.c similarity index 100% rename from amd/gpu_aes256_amd.c rename to OpenCL/gpu_aes256_amd.c diff --git a/amd/gpu_serpent256_amd.c b/OpenCL/gpu_serpent256_amd.c similarity index 100% rename from amd/gpu_serpent256_amd.c rename to OpenCL/gpu_serpent256_amd.c diff --git a/amd/gpu_twofish256_amd.c b/OpenCL/gpu_twofish256_amd.c similarity index 100% rename from amd/gpu_twofish256_amd.c rename to OpenCL/gpu_twofish256_amd.c diff --git a/amd/m00000_a0.cl b/OpenCL/m00000_a0.cl similarity index 92% rename from amd/m00000_a0.cl rename to OpenCL/m00000_a0.cl index e6b29a3..e9f3080 100644 --- a/amd/m00000_a0.cl +++ b/OpenCL/m00000_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" +#include "rp.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -62,14 +38,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -84,28 +60,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -114,16 +90,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_m04 (__glo const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len); - append_0x80_2 (w0, w1, out_len); + append_0x80_2x4 (w0, w1, out_len); w3[2] = out_len * 8; - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -194,12 +170,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_m04 (__glo MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -227,14 +203,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -261,28 +237,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -291,16 +267,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_s04 (__glo const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len); - append_0x80_2 (w0, w1, out_len); + append_0x80_2x4 (w0, w1, out_len); w3[2] = out_len * 8; - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -375,13 +351,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_s04 (__glo MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00000_a1.cl b/OpenCL/m00000_a1.cl similarity index 92% rename from amd/m00000_a1.cl rename to OpenCL/m00000_a1.cl index 5332887..9ed68a2 100644 --- a/amd/m00000_a1.cl +++ b/OpenCL/m00000_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -60,28 +36,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -92,7 +68,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_m04 (__glo if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) { - append_0x80_2 (wordl0, wordl1, pw_l_len); + append_0x80_2x4 (wordl0, wordl1, pw_l_len); switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len); } @@ -140,40 +116,40 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; w3[2] = pw_len * 8; w3[3] = 0; - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -243,13 +219,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_m04 (__glo MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -277,28 +252,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -309,7 +284,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_s04 (__glo if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) { - append_0x80_2 (wordl0, wordl1, pw_l_len); + append_0x80_2x4 (wordl0, wordl1, pw_l_len); switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, combs_buf[0].pw_len); } @@ -369,40 +344,40 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; w3[2] = pw_len * 8; w3[3] = 0; - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -477,13 +452,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_s04 (__glo MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00000_a3.cl b/OpenCL/m00000_a3.cl similarity index 73% rename from amd/m00000_a3.cl rename to OpenCL/m00000_a3.cl index 20c32ce..f7d2ee7 100644 --- a/amd/m00000_a3.cl +++ b/OpenCL/m00000_a3.cl @@ -4,46 +4,21 @@ */ #define _MD5_ -#define _SCALAR_ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define MD5_STEP_REV(f,a,b,c,d,x,t,s) \ { \ @@ -62,7 +37,7 @@ a -= t; \ } -static void m00000m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00000m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -147,22 +122,20 @@ static void m00000m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00); MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01); @@ -233,16 +206,16 @@ static void m00000m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g MD5_STEP0(MD5_I , b, c, d, a, I_w9c3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m00000s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00000s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -361,7 +334,7 @@ static void m00000s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[ 7], MD5C31, MD5S31); MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, 0, MD5C30, MD5S30); - const u32x pre_cd = c_rev ^ d_rev; + const u32 pre_cd = c_rev ^ d_rev; MD5_STEP_REV1(MD5_H, b_rev, c_rev, d_rev, a_rev, w[ 2], MD5C2f, MD5S23); MD5_STEP_REV1(MD5_H, c_rev, d_rev, a_rev, b_rev, w[15], MD5C2e, MD5S22); @@ -370,27 +343,25 @@ static void m00000s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - const u32x pre_d = d_rev; - const u32x pre_a = a_rev - w0; - const u32x pre_b = b_rev - (pre_a ^ pre_cd); - const u32x pre_c = c_rev - (pre_a ^ pre_b ^ pre_d); + const u32 pre_d = d_rev; + const u32 pre_a = a_rev - w0; + const u32 pre_b = b_rev - (pre_a ^ pre_cd); + const u32 pre_c = c_rev - (pre_a ^ pre_b ^ pre_d); - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00); MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01); @@ -471,16 +442,16 @@ static void m00000s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g MD5_STEP0(MD5_I , b, c, d, a, I_w9c3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -518,7 +489,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_m04 (__glo m00000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -556,7 +527,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_m08 (__glo m00000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -594,7 +565,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_m16 (__glo m00000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -632,7 +603,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_s04 (__glo m00000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -670,7 +641,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_s08 (__glo m00000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00000_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base diff --git a/amd/m00010_a0.cl b/OpenCL/m00010_a0.cl similarity index 93% rename from amd/m00010_a0.cl rename to OpenCL/m00010_a0.cl index 9d2fc7c..6d8262f 100644 --- a/amd/m00010_a0.cl +++ b/OpenCL/m00010_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -62,14 +38,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -104,28 +80,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -196,12 +172,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_m04 (__glo * md5 */ - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -272,12 +248,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_m04 (__glo MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -305,14 +281,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -359,28 +335,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -451,12 +427,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_s04 (__glo * md5 */ - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -532,12 +508,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_s04 (__glo MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00010_a1.cl b/OpenCL/m00010_a1.cl similarity index 94% rename from amd/m00010_a1.cl rename to OpenCL/m00010_a1.cl index ea4bde9..956dc54 100644 --- a/amd/m00010_a1.cl +++ b/OpenCL/m00010_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -60,28 +36,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -194,28 +170,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_m04 (__glo const u32 pw_salt_len = pw_len + salt_len; - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0] | s0[0]; w0[1] = wordl0[1] | wordr0[1] | s0[1]; w0[2] = wordl0[2] | wordr0[2] | s0[2]; w0[3] = wordl0[3] | wordr0[3] | s0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0] | s1[0]; w1[1] = wordl1[1] | wordr1[1] | s1[1]; w1[2] = wordl1[2] | wordr1[2] | s1[2]; w1[3] = wordl1[3] | wordr1[3] | s1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0] | s2[0]; w2[1] = wordl2[1] | wordr2[1] | s2[1]; w2[2] = wordl2[2] | wordr2[2] | s2[2]; w2[3] = wordl2[3] | wordr2[3] | s2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0] | s3[0]; w3[1] = wordl3[1] | wordr3[1] | s3[1]; @@ -228,12 +204,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_m04 (__glo * md5 */ - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -304,12 +280,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_m04 (__glo MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -337,28 +313,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -483,28 +459,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_s04 (__glo const u32 pw_salt_len = pw_len + salt_len; - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0] | s0[0]; w0[1] = wordl0[1] | wordr0[1] | s0[1]; w0[2] = wordl0[2] | wordr0[2] | s0[2]; w0[3] = wordl0[3] | wordr0[3] | s0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0] | s1[0]; w1[1] = wordl1[1] | wordr1[1] | s1[1]; w1[2] = wordl1[2] | wordr1[2] | s1[2]; w1[3] = wordl1[3] | wordr1[3] | s1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0] | s2[0]; w2[1] = wordl2[1] | wordr2[1] | s2[1]; w2[2] = wordl2[2] | wordr2[2] | s2[2]; w2[3] = wordl2[3] | wordr2[3] | s2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0] | s3[0]; w3[1] = wordl3[1] | wordr3[1] | s3[1]; @@ -517,12 +493,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_s04 (__glo * md5 */ - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -598,12 +574,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_s04 (__glo MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00010_a3.cl b/OpenCL/m00010_a3.cl similarity index 74% rename from amd/m00010_a3.cl rename to OpenCL/m00010_a3.cl index 3c8cae9..8547767 100644 --- a/amd/m00010_a3.cl +++ b/OpenCL/m00010_a3.cl @@ -4,46 +4,21 @@ */ #define _MD5_ -#define _SCALAR_ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define MD5_STEP_REV(f,a,b,c,d,x,t,s) \ { \ @@ -62,7 +37,7 @@ a -= t; \ } -static void m00010m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00010m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -204,22 +179,20 @@ static void m00010m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00); MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01); @@ -290,16 +263,16 @@ static void m00010m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g MD5_STEP0(MD5_I , b, c, d, a, I_w9c3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m00010s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00010s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -418,7 +391,7 @@ static void m00010s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[ 7], MD5C31, MD5S31); MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, 0, MD5C30, MD5S30); - const u32x pre_cd = c_rev ^ d_rev; + const u32 pre_cd = c_rev ^ d_rev; MD5_STEP_REV1(MD5_H, b_rev, c_rev, d_rev, a_rev, w[ 2], MD5C2f, MD5S23); MD5_STEP_REV1(MD5_H, c_rev, d_rev, a_rev, b_rev, w[15], MD5C2e, MD5S22); @@ -427,27 +400,25 @@ static void m00010s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - const u32x pre_d = d_rev; - const u32x pre_a = a_rev - w0; - const u32x pre_b = b_rev - (pre_a ^ pre_cd); - const u32x pre_c = c_rev - (pre_a ^ pre_b ^ pre_d); + const u32 pre_d = d_rev; + const u32 pre_a = a_rev - w0; + const u32 pre_b = b_rev - (pre_a ^ pre_cd); + const u32 pre_c = c_rev - (pre_a ^ pre_b ^ pre_d); - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00); MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01); @@ -528,16 +499,16 @@ static void m00010s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g MD5_STEP0(MD5_I , b, c, d, a, I_w9c3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -575,7 +546,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_m04 (__glo m00010m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -613,7 +584,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_m08 (__glo m00010m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -651,7 +622,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_m16 (__glo m00010m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -689,7 +660,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_s04 (__glo m00010s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -727,7 +698,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_s08 (__glo m00010s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00010_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base diff --git a/amd/m00020_a0.cl b/OpenCL/m00020_a0.cl similarity index 93% rename from amd/m00020_a0.cl rename to OpenCL/m00020_a0.cl index c0816a0..6dfd6c9 100644 --- a/amd/m00020_a0.cl +++ b/OpenCL/m00020_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00020_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -62,14 +38,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00020_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -104,10 +80,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00020_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; @@ -134,10 +110,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00020_m04 (__glo const u32 out_salt_len = out_len + salt_len; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; @@ -175,12 +151,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00020_m04 (__glo * md5 */ - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -251,12 +227,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00020_m04 (__glo MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -284,14 +260,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00020_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -338,10 +314,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00020_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; @@ -368,10 +344,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00020_s04 (__glo const u32 out_salt_len = out_len + salt_len; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; @@ -409,12 +385,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00020_s04 (__glo * md5 */ - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -490,12 +466,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00020_s04 (__glo MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00020_a1.cl b/OpenCL/m00020_a1.cl similarity index 93% rename from amd/m00020_a1.cl rename to OpenCL/m00020_a1.cl index 6f433c8..672737f 100644 --- a/amd/m00020_a1.cl +++ b/OpenCL/m00020_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00020_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -60,28 +36,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00020_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -152,10 +128,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00020_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -180,10 +156,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00020_m04 (__glo const u32 pw_salt_len = pw_len + salt_len; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; @@ -221,12 +197,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00020_m04 (__glo * md5 */ - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -297,12 +273,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00020_m04 (__glo MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -330,28 +306,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00020_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -434,10 +410,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00020_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -462,10 +438,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00020_s04 (__glo const u32 pw_salt_len = pw_len + salt_len; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; @@ -505,12 +481,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00020_s04 (__glo * md5 */ - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -586,12 +562,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00020_s04 (__glo MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00020_a3.cl b/OpenCL/m00020_a3.cl similarity index 87% rename from amd/m00020_a3.cl rename to OpenCL/m00020_a3.cl index 8d1e4b7..c40e0ad 100644 --- a/amd/m00020_a3.cl +++ b/OpenCL/m00020_a3.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void m00020m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00020m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -93,7 +69,7 @@ static void m00020m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -105,10 +81,10 @@ static void m00020m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * prepend salt */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; @@ -152,12 +128,12 @@ static void m00020m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * md5 */ - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -228,16 +204,16 @@ static void m00020m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m00020s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00020s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -298,7 +274,7 @@ static void m00020s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -310,10 +286,10 @@ static void m00020s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * prepend salt */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; @@ -357,12 +333,12 @@ static void m00020s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * md5 */ - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -438,12 +414,12 @@ static void m00020s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -457,28 +433,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00020_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -504,28 +480,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00020_m08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -551,28 +527,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00020_m16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -598,28 +574,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00020_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -645,28 +621,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00020_s08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -692,28 +668,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00020_s16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m00030_a0.cl b/OpenCL/m00030_a0.cl similarity index 93% rename from amd/m00030_a0.cl rename to OpenCL/m00030_a0.cl index 6a593be..ff2e7ac 100644 --- a/amd/m00030_a0.cl +++ b/OpenCL/m00030_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" // no unicode yet @@ -64,14 +40,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -106,28 +82,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -172,10 +148,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_m04 (__glo const u32 out_salt_len = (out_len * 2) + salt_len; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); @@ -205,10 +181,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_m04 (__glo * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -279,12 +255,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_m04 (__glo MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -312,14 +288,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -366,28 +342,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -432,10 +408,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_s04 (__glo const u32 out_salt_len = (out_len * 2) + salt_len; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); @@ -465,10 +441,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_s04 (__glo * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -544,12 +520,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_s04 (__glo MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00030_a1.cl b/OpenCL/m00030_a1.cl similarity index 93% rename from amd/m00030_a1.cl rename to OpenCL/m00030_a1.cl index 4abf699..d156010 100644 --- a/amd/m00030_a1.cl +++ b/OpenCL/m00030_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" // no unicode yet @@ -62,28 +38,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -196,10 +172,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_m04 (__glo const u32 pw_salt_len = (pw_len * 2) + salt_len; - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -218,10 +194,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_m04 (__glo w3[2] = 0; w3[3] = 0; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); @@ -251,10 +227,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_m04 (__glo * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -325,12 +301,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_m04 (__glo MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -358,28 +334,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -504,10 +480,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_s04 (__glo const u32 pw_salt_len = (pw_len * 2) + salt_len; - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -526,10 +502,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_s04 (__glo w3[2] = 0; w3[3] = 0; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); @@ -559,10 +535,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_s04 (__glo * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -638,12 +614,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_s04 (__glo MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00030_a3.cl b/OpenCL/m00030_a3.cl similarity index 74% rename from amd/m00030_a3.cl rename to OpenCL/m00030_a3.cl index b253d0e..dcc37ac 100644 --- a/amd/m00030_a3.cl +++ b/OpenCL/m00030_a3.cl @@ -4,46 +4,21 @@ */ #define _MD5_ -#define _SCALAR_ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define MD5_STEP_REV(f,a,b,c,d,x,t,s) \ { \ @@ -62,7 +37,7 @@ a -= t; \ } -static void m00030m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00030m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -204,22 +179,20 @@ static void m00030m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00); MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01); @@ -290,16 +263,16 @@ static void m00030m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g MD5_STEP0(MD5_I , b, c, d, a, I_w9c3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m00030s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00030s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -418,7 +391,7 @@ static void m00030s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[ 7], MD5C31, MD5S31); MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, 0, MD5C30, MD5S30); - const u32x pre_cd = c_rev ^ d_rev; + const u32 pre_cd = c_rev ^ d_rev; MD5_STEP_REV1(MD5_H, b_rev, c_rev, d_rev, a_rev, w[ 2], MD5C2f, MD5S23); MD5_STEP_REV1(MD5_H, c_rev, d_rev, a_rev, b_rev, w[15], MD5C2e, MD5S22); @@ -427,27 +400,25 @@ static void m00030s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - const u32x pre_d = d_rev; - const u32x pre_a = a_rev - w0; - const u32x pre_b = b_rev - (pre_a ^ pre_cd); - const u32x pre_c = c_rev - (pre_a ^ pre_b ^ pre_d); + const u32 pre_d = d_rev; + const u32 pre_a = a_rev - w0; + const u32 pre_b = b_rev - (pre_a ^ pre_cd); + const u32 pre_c = c_rev - (pre_a ^ pre_b ^ pre_d); - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00); MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01); @@ -528,16 +499,16 @@ static void m00030s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g MD5_STEP0(MD5_I , b, c, d, a, I_w9c3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -575,7 +546,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_m04 (__glo m00030m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -613,7 +584,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_m08 (__glo m00030m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -651,7 +622,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_m16 (__glo m00030m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -689,7 +660,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_s04 (__glo m00030s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -727,7 +698,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_s08 (__glo m00030s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00030_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base diff --git a/amd/m00040_a0.cl b/OpenCL/m00040_a0.cl similarity index 93% rename from amd/m00040_a0.cl rename to OpenCL/m00040_a0.cl index 3fff8d6..7eb35bb 100644 --- a/amd/m00040_a0.cl +++ b/OpenCL/m00040_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00040_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -62,14 +38,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00040_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -104,28 +80,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00040_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -140,10 +116,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00040_m04 (__glo const u32 out_salt_len = (out_len * 2) + salt_len; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); @@ -167,10 +143,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00040_m04 (__glo * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -241,12 +217,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00040_m04 (__glo MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -274,14 +250,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00040_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -328,28 +304,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00040_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -364,10 +340,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00040_s04 (__glo const u32 out_salt_len = (out_len * 2) + salt_len; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); @@ -391,10 +367,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00040_s04 (__glo * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -470,12 +446,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00040_s04 (__glo MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00040_a1.cl b/OpenCL/m00040_a1.cl similarity index 93% rename from amd/m00040_a1.cl rename to OpenCL/m00040_a1.cl index 3679517..2b2e1fb 100644 --- a/amd/m00040_a1.cl +++ b/OpenCL/m00040_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" // no unicode yet @@ -62,28 +38,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00040_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -166,10 +142,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00040_m04 (__glo const u32 pw_salt_len = (pw_len * 2) + salt_len; - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -188,10 +164,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00040_m04 (__glo w3[2] = 0; w3[3] = 0; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); @@ -215,10 +191,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00040_m04 (__glo * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -289,12 +265,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00040_m04 (__glo MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -322,28 +298,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00040_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -438,10 +414,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00040_s04 (__glo const u32 pw_salt_len = (pw_len * 2) + salt_len; - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -460,10 +436,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00040_s04 (__glo w3[2] = 0; w3[3] = 0; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); @@ -487,10 +463,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00040_s04 (__glo * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -566,12 +542,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00040_s04 (__glo MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00040_a3.cl b/OpenCL/m00040_a3.cl similarity index 87% rename from amd/m00040_a3.cl rename to OpenCL/m00040_a3.cl index 54d6cde..31cd660 100644 --- a/amd/m00040_a3.cl +++ b/OpenCL/m00040_a3.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void m00040m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00040m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -93,7 +69,7 @@ static void m00040m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -105,10 +81,10 @@ static void m00040m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * prepend salt */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; @@ -152,10 +128,10 @@ static void m00040m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -226,16 +202,16 @@ static void m00040m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m00040s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00040s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -296,7 +272,7 @@ static void m00040s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -308,10 +284,10 @@ static void m00040s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * prepend salt */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; @@ -355,10 +331,10 @@ static void m00040s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -434,12 +410,12 @@ static void m00040s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -453,28 +429,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00040_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -500,28 +476,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00040_m08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -547,28 +523,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00040_m16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -594,28 +570,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00040_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -641,28 +617,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00040_s08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -688,28 +664,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00040_s16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m00050_a0.cl b/OpenCL/m00050_a0.cl similarity index 89% rename from amd/m00050_a0.cl rename to OpenCL/m00050_a0.cl index 4047234..efda0bd 100644 --- a/amd/m00050_a0.cl +++ b/OpenCL/m00050_a0.cl @@ -8,64 +8,45 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - u32x tmp2; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; + + u32 tmp2; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -141,7 +122,7 @@ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) +static void hmac_md5_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -192,7 +173,7 @@ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x i md5_transform (w0, w1, w2, w3, opad); } -static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) +static void hmac_md5_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4], u32 digest[4]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -242,14 +223,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00050_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -298,28 +279,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00050_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -332,36 +313,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00050_m04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[4]; - u32x opad[4]; + u32 ipad[4]; + u32 opad[4]; hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -382,16 +363,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00050_m04 (__glo w3_t[2] = (64 + salt_len) * 8; w3_t[3] = 0; - u32x digest[4]; + u32 digest[4]; hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[0]; + const u32 r1 = digest[3]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -419,14 +400,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00050_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -487,28 +468,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00050_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -521,36 +502,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00050_s04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[4]; - u32x opad[4]; + u32 ipad[4]; + u32 opad[4]; hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -571,16 +552,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00050_s04 (__glo w3_t[2] = (64 + salt_len) * 8; w3_t[3] = 0; - u32x digest[4]; + u32 digest[4]; hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[0]; + const u32 r1 = digest[3]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00050_a1.cl b/OpenCL/m00050_a1.cl similarity index 90% rename from amd/m00050_a1.cl rename to OpenCL/m00050_a1.cl index 66506ae..80602e1 100644 --- a/amd/m00050_a1.cl +++ b/OpenCL/m00050_a1.cl @@ -8,62 +8,43 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - u32x tmp2; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; + + u32 tmp2; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -139,7 +120,7 @@ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) +static void hmac_md5_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -190,7 +171,7 @@ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x i md5_transform (w0, w1, w2, w3, opad); } -static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) +static void hmac_md5_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4], u32 digest[4]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -240,28 +221,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00050_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -352,28 +333,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00050_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -384,36 +365,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00050_m04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[4]; - u32x opad[4]; + u32 ipad[4]; + u32 opad[4]; hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -434,16 +415,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00050_m04 (__glo w3_t[2] = (64 + salt_len) * 8; w3_t[3] = 0; - u32x digest[4]; + u32 digest[4]; hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[0]; + const u32 r1 = digest[3]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -471,28 +452,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00050_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -595,28 +576,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00050_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -627,36 +608,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00050_s04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[4]; - u32x opad[4]; + u32 ipad[4]; + u32 opad[4]; hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -677,16 +658,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00050_s04 (__glo w3_t[2] = (64 + salt_len) * 8; w3_t[3] = 0; - u32x digest[4]; + u32 digest[4]; hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[0]; + const u32 r1 = digest[3]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00050_a3.cl b/OpenCL/m00050_a3.cl similarity index 84% rename from amd/m00050_a3.cl rename to OpenCL/m00050_a3.cl index 9701b6a..292f636 100644 --- a/amd/m00050_a3.cl +++ b/OpenCL/m00050_a3.cl @@ -8,62 +8,43 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - u32x tmp2; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; + + u32 tmp2; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -139,7 +120,7 @@ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) +static void hmac_md5_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -190,7 +171,7 @@ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x i md5_transform (w0, w1, w2, w3, opad); } -static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) +static void hmac_md5_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4], u32 digest[4]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -224,7 +205,7 @@ static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x i md5_transform (w0, w1, w2, w3, digest); } -static void m00050m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00050m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -271,7 +252,7 @@ static void m00050m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -283,36 +264,36 @@ static void m00050m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[4]; - u32x opad[4]; + u32 ipad[4]; + u32 opad[4]; hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -333,20 +314,20 @@ static void m00050m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[2] = (64 + salt_len) * 8; w3_t[3] = 0; - u32x digest[4]; + u32 digest[4]; hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[0]; + const u32 r1 = digest[3]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m00050s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00050s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -405,7 +386,7 @@ static void m00050s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -417,36 +398,36 @@ static void m00050s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[4]; - u32x opad[4]; + u32 ipad[4]; + u32 opad[4]; hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -467,16 +448,16 @@ static void m00050s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[2] = (64 + salt_len) * 8; w3_t[3] = 0; - u32x digest[4]; + u32 digest[4]; hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[0]; + const u32 r1 = digest[3]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -490,28 +471,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00050_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -537,28 +518,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00050_m08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -584,28 +565,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00050_m16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -631,28 +612,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00050_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -678,28 +659,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00050_s08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -725,28 +706,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00050_s16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m00060_a0.cl b/OpenCL/m00060_a0.cl similarity index 89% rename from amd/m00060_a0.cl rename to OpenCL/m00060_a0.cl index aa9a34a..0bfd81d 100644 --- a/amd/m00060_a0.cl +++ b/OpenCL/m00060_a0.cl @@ -8,64 +8,45 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - u32x tmp2; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; + + u32 tmp2; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -141,7 +122,7 @@ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) +static void hmac_md5_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -192,7 +173,7 @@ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x i md5_transform (w0, w1, w2, w3, opad); } -static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) +static void hmac_md5_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4], u32 digest[4]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -242,14 +223,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00060_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -280,36 +261,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00060_m04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = salt_buf0[0]; w0_t[1] = salt_buf0[1]; w0_t[2] = salt_buf0[2]; w0_t[3] = salt_buf0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = salt_buf1[0]; w1_t[1] = salt_buf1[1]; w1_t[2] = salt_buf1[2]; w1_t[3] = salt_buf1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[4]; - u32x opad[4]; + u32 ipad[4]; + u32 opad[4]; hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -319,28 +300,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00060_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -368,16 +349,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00060_m04 (__glo w3_t[2] = (64 + out_len) * 8; w3_t[3] = 0; - u32x digest[4]; + u32 digest[4]; hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[0]; + const u32 r1 = digest[3]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -405,14 +386,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00060_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -443,36 +424,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00060_s04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = salt_buf0[0]; w0_t[1] = salt_buf0[1]; w0_t[2] = salt_buf0[2]; w0_t[3] = salt_buf0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = salt_buf1[0]; w1_t[1] = salt_buf1[1]; w1_t[2] = salt_buf1[2]; w1_t[3] = salt_buf1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[4]; - u32x opad[4]; + u32 ipad[4]; + u32 opad[4]; hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -494,28 +475,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00060_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -543,16 +524,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00060_s04 (__glo w3_t[2] = (64 + out_len) * 8; w3_t[3] = 0; - u32x digest[4]; + u32 digest[4]; hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[0]; + const u32 r1 = digest[3]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00060_a1.cl b/OpenCL/m00060_a1.cl similarity index 90% rename from amd/m00060_a1.cl rename to OpenCL/m00060_a1.cl index 5a1548e..4fb02e9 100644 --- a/amd/m00060_a1.cl +++ b/OpenCL/m00060_a1.cl @@ -8,62 +8,43 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - u32x tmp2; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; + + u32 tmp2; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -139,7 +120,7 @@ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) +static void hmac_md5_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -190,7 +171,7 @@ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x i md5_transform (w0, w1, w2, w3, opad); } -static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) +static void hmac_md5_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4], u32 digest[4]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -240,28 +221,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00060_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -297,36 +278,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00060_m04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = salt_buf0[0]; w0_t[1] = salt_buf0[1]; w0_t[2] = salt_buf0[2]; w0_t[3] = salt_buf0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = salt_buf1[0]; w1_t[1] = salt_buf1[1]; w1_t[2] = salt_buf1[2]; w1_t[3] = salt_buf1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[4]; - u32x opad[4]; + u32 ipad[4]; + u32 opad[4]; hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -373,28 +354,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00060_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -420,16 +401,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00060_m04 (__glo w3_t[2] = (64 + pw_len) * 8; w3_t[3] = 0; - u32x digest[4]; + u32 digest[4]; hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[0]; + const u32 r1 = digest[3]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -457,28 +438,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00060_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -514,36 +495,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00060_s04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = salt_buf0[0]; w0_t[1] = salt_buf0[1]; w0_t[2] = salt_buf0[2]; w0_t[3] = salt_buf0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = salt_buf1[0]; w1_t[1] = salt_buf1[1]; w1_t[2] = salt_buf1[2]; w1_t[3] = salt_buf1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[4]; - u32x opad[4]; + u32 ipad[4]; + u32 opad[4]; hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -602,28 +583,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00060_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -649,16 +630,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00060_s04 (__glo w3_t[2] = (64 + pw_len) * 8; w3_t[3] = 0; - u32x digest[4]; + u32 digest[4]; hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[0]; + const u32 r1 = digest[3]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00060_a3.cl b/OpenCL/m00060_a3.cl similarity index 83% rename from amd/m00060_a3.cl rename to OpenCL/m00060_a3.cl index 0d832d8..1f9b124 100644 --- a/amd/m00060_a3.cl +++ b/OpenCL/m00060_a3.cl @@ -8,62 +8,43 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - u32x tmp2; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; + + u32 tmp2; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -139,7 +120,7 @@ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) +static void hmac_md5_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -190,7 +171,7 @@ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x i md5_transform (w0, w1, w2, w3, opad); } -static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) +static void hmac_md5_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4], u32 digest[4]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -224,7 +205,7 @@ static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x i md5_transform (w0, w1, w2, w3, digest); } -static void m00060m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esal_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00060m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esal_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -255,36 +236,36 @@ static void m00060m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = salt_buf0[0]; w0_t[1] = salt_buf0[1]; w0_t[2] = salt_buf0[2]; w0_t[3] = salt_buf0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = salt_buf1[0]; w1_t[1] = salt_buf1[1]; w1_t[2] = salt_buf1[2]; w1_t[3] = salt_buf1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[4]; - u32x opad[4]; + u32 ipad[4]; + u32 opad[4]; hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -292,7 +273,7 @@ static void m00060m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -319,20 +300,20 @@ static void m00060m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[2] = (64 + pw_len) * 8; w3_t[3] = 0; - u32x digest[4]; + u32 digest[4]; hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[0]; + const u32 r1 = digest[3]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m00060s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00060s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -363,36 +344,36 @@ static void m00060s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = salt_buf0[0]; w0_t[1] = salt_buf0[1]; w0_t[2] = salt_buf0[2]; w0_t[3] = salt_buf0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = salt_buf1[0]; w1_t[1] = salt_buf1[1]; w1_t[2] = salt_buf1[2]; w1_t[3] = salt_buf1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[4]; - u32x opad[4]; + u32 ipad[4]; + u32 opad[4]; hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -412,7 +393,7 @@ static void m00060s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -439,16 +420,16 @@ static void m00060s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[2] = (64 + pw_len) * 8; w3_t[3] = 0; - u32x digest[4]; + u32 digest[4]; hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[0]; + const u32 r1 = digest[3]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -462,28 +443,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00060_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -509,28 +490,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00060_m08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -556,28 +537,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00060_m16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -603,28 +584,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00060_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -650,28 +631,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00060_s08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -697,28 +678,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00060_s16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m00100_a0.cl b/OpenCL/m00100_a0.cl similarity index 89% rename from amd/m00100_a0.cl rename to OpenCL/m00100_a0.cl index 1bdbecf..568564f 100644 --- a/amd/m00100_a0.cl +++ b/OpenCL/m00100_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -62,14 +38,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -84,28 +60,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -120,28 +96,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_m04 (__glo * sha1 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = out_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -240,12 +216,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_m04 (__glo wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -273,14 +249,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -313,28 +289,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -349,28 +325,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_s04 (__glo * sha1 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = out_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -472,12 +448,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_s04 (__glo wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00100_a1.cl b/OpenCL/m00100_a1.cl similarity index 90% rename from amd/m00100_a1.cl rename to OpenCL/m00100_a1.cl index c2c8b60..160e41b 100644 --- a/amd/m00100_a1.cl +++ b/OpenCL/m00100_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -60,28 +36,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -142,28 +118,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -174,28 +150,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_m04 (__glo * sha1 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = pw_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -294,12 +270,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_m04 (__glo wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -327,28 +303,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -427,28 +403,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -459,28 +435,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_s04 (__glo * sha1 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = pw_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -582,12 +558,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_s04 (__glo wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00100_a3.cl b/OpenCL/m00100_a3.cl similarity index 73% rename from amd/m00100_a3.cl rename to OpenCL/m00100_a3.cl index 53e8e61..544e322 100644 --- a/amd/m00100_a3.cl +++ b/OpenCL/m00100_a3.cl @@ -4,48 +4,23 @@ */ #define _SHA1_ -#define _SCALAR_ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -static void m00100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -139,47 +114,45 @@ static void m00100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; - - const u32x w0 = w0l | w0r; - - const u32x w0s01 = rotl32 (w0, 1u); - const u32x w0s02 = rotl32 (w0, 2u); - const u32x w0s03 = rotl32 (w0, 3u); - const u32x w0s04 = rotl32 (w0, 4u); - const u32x w0s05 = rotl32 (w0, 5u); - const u32x w0s06 = rotl32 (w0, 6u); - const u32x w0s07 = rotl32 (w0, 7u); - const u32x w0s08 = rotl32 (w0, 8u); - const u32x w0s09 = rotl32 (w0, 9u); - const u32x w0s10 = rotl32 (w0, 10u); - const u32x w0s11 = rotl32 (w0, 11u); - const u32x w0s12 = rotl32 (w0, 12u); - const u32x w0s13 = rotl32 (w0, 13u); - const u32x w0s14 = rotl32 (w0, 14u); - const u32x w0s15 = rotl32 (w0, 15u); - const u32x w0s16 = rotl32 (w0, 16u); - const u32x w0s17 = rotl32 (w0, 17u); - const u32x w0s18 = rotl32 (w0, 18u); - const u32x w0s19 = rotl32 (w0, 19u); - const u32x w0s20 = rotl32 (w0, 20u); - - const u32x w0s04___w0s06 = w0s04 ^ w0s06; - const u32x w0s04___w0s08 = w0s04 ^ w0s08; - const u32x w0s08___w0s12 = w0s08 ^ w0s12; - const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + const u32 w0r = words_buf_r[il_pos]; + + const u32 w0 = w0l | w0r; + + const u32 w0s01 = rotl32 (w0, 1u); + const u32 w0s02 = rotl32 (w0, 2u); + const u32 w0s03 = rotl32 (w0, 3u); + const u32 w0s04 = rotl32 (w0, 4u); + const u32 w0s05 = rotl32 (w0, 5u); + const u32 w0s06 = rotl32 (w0, 6u); + const u32 w0s07 = rotl32 (w0, 7u); + const u32 w0s08 = rotl32 (w0, 8u); + const u32 w0s09 = rotl32 (w0, 9u); + const u32 w0s10 = rotl32 (w0, 10u); + const u32 w0s11 = rotl32 (w0, 11u); + const u32 w0s12 = rotl32 (w0, 12u); + const u32 w0s13 = rotl32 (w0, 13u); + const u32 w0s14 = rotl32 (w0, 14u); + const u32 w0s15 = rotl32 (w0, 15u); + const u32 w0s16 = rotl32 (w0, 16u); + const u32 w0s17 = rotl32 (w0, 17u); + const u32 w0s18 = rotl32 (w0, 18u); + const u32 w0s19 = rotl32 (w0, 19u); + const u32 w0s20 = rotl32 (w0, 20u); + + const u32 w0s04___w0s06 = w0s04 ^ w0s06; + const u32 w0s04___w0s08 = w0s04 ^ w0s08; + const u32 w0s08___w0s12 = w0s08 ^ w0s12; + const u32 w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -279,8 +252,8 @@ static void m00100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g const u32 c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u); const u32 c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u); - const u32x w0s21 = rotl32 (w0, 21u); - const u32x w0s22 = rotl32 (w0, 22U); + const u32 w0s21 = rotl32 (w0, 21u); + const u32 w0s22 = rotl32 (w0, 22U); SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_76s ^ w0s07 ^ w0s08___w0s12 ^ w0s16 ^ w0s21)); SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_77s)); @@ -288,16 +261,16 @@ static void m00100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_79s ^ w0s08 ^ w0s22)); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m00100s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00100s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -409,47 +382,45 @@ static void m00100s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; - - const u32x w0 = w0l | w0r; - - const u32x w0s01 = rotl32 (w0, 1u); - const u32x w0s02 = rotl32 (w0, 2u); - const u32x w0s03 = rotl32 (w0, 3u); - const u32x w0s04 = rotl32 (w0, 4u); - const u32x w0s05 = rotl32 (w0, 5u); - const u32x w0s06 = rotl32 (w0, 6u); - const u32x w0s07 = rotl32 (w0, 7u); - const u32x w0s08 = rotl32 (w0, 8u); - const u32x w0s09 = rotl32 (w0, 9u); - const u32x w0s10 = rotl32 (w0, 10u); - const u32x w0s11 = rotl32 (w0, 11u); - const u32x w0s12 = rotl32 (w0, 12u); - const u32x w0s13 = rotl32 (w0, 13u); - const u32x w0s14 = rotl32 (w0, 14u); - const u32x w0s15 = rotl32 (w0, 15u); - const u32x w0s16 = rotl32 (w0, 16u); - const u32x w0s17 = rotl32 (w0, 17u); - const u32x w0s18 = rotl32 (w0, 18u); - const u32x w0s19 = rotl32 (w0, 19u); - const u32x w0s20 = rotl32 (w0, 20u); - - const u32x w0s04___w0s06 = w0s04 ^ w0s06; - const u32x w0s04___w0s08 = w0s04 ^ w0s08; - const u32x w0s08___w0s12 = w0s08 ^ w0s12; - const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + const u32 w0r = words_buf_r[il_pos]; + + const u32 w0 = w0l | w0r; + + const u32 w0s01 = rotl32 (w0, 1u); + const u32 w0s02 = rotl32 (w0, 2u); + const u32 w0s03 = rotl32 (w0, 3u); + const u32 w0s04 = rotl32 (w0, 4u); + const u32 w0s05 = rotl32 (w0, 5u); + const u32 w0s06 = rotl32 (w0, 6u); + const u32 w0s07 = rotl32 (w0, 7u); + const u32 w0s08 = rotl32 (w0, 8u); + const u32 w0s09 = rotl32 (w0, 9u); + const u32 w0s10 = rotl32 (w0, 10u); + const u32 w0s11 = rotl32 (w0, 11u); + const u32 w0s12 = rotl32 (w0, 12u); + const u32 w0s13 = rotl32 (w0, 13u); + const u32 w0s14 = rotl32 (w0, 14u); + const u32 w0s15 = rotl32 (w0, 15u); + const u32 w0s16 = rotl32 (w0, 16u); + const u32 w0s17 = rotl32 (w0, 17u); + const u32 w0s18 = rotl32 (w0, 18u); + const u32 w0s19 = rotl32 (w0, 19u); + const u32 w0s20 = rotl32 (w0, 20u); + + const u32 w0s04___w0s06 = w0s04 ^ w0s06; + const u32 w0s04___w0s08 = w0s04 ^ w0s08; + const u32 w0s08___w0s12 = w0s08 ^ w0s12; + const u32 w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -556,8 +527,8 @@ static void m00100s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g const u32 c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u); const u32 c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u); - const u32x w0s21 = rotl32 (w0, 21u); - const u32x w0s22 = rotl32 (w0, 22U); + const u32 w0s21 = rotl32 (w0, 21u); + const u32 w0s22 = rotl32 (w0, 22U); SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_76s ^ w0s07 ^ w0s08___w0s12 ^ w0s16 ^ w0s21)); SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_77s)); @@ -565,16 +536,16 @@ static void m00100s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_79s ^ w0s08 ^ w0s22)); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -612,7 +583,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_m04 (__glo m00100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -650,7 +621,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_m08 (__glo m00100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -688,7 +659,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_m16 (__glo m00100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -726,7 +697,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_s04 (__glo m00100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -764,7 +735,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_s08 (__glo m00100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00100_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base diff --git a/amd/m00110_a0.cl b/OpenCL/m00110_a0.cl similarity index 90% rename from amd/m00110_a0.cl rename to OpenCL/m00110_a0.cl index b5f4801..1263ab2 100644 --- a/amd/m00110_a0.cl +++ b/OpenCL/m00110_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -62,14 +38,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -104,28 +80,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -196,28 +172,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_m04 (__glo * sha1 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = pw_salt_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -316,12 +292,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_m04 (__glo wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -349,14 +325,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -409,28 +385,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -501,28 +477,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_s04 (__glo * sha1 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = pw_salt_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -624,12 +600,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_s04 (__glo wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00110_a1.cl b/OpenCL/m00110_a1.cl similarity index 91% rename from amd/m00110_a1.cl rename to OpenCL/m00110_a1.cl index 43351a2..5bc9a16 100644 --- a/amd/m00110_a1.cl +++ b/OpenCL/m00110_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -60,28 +36,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -194,28 +170,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_m04 (__glo const u32 pw_salt_len = pw_len + salt_len; - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0] | s0[0]; w0[1] = wordl0[1] | wordr0[1] | s0[1]; w0[2] = wordl0[2] | wordr0[2] | s0[2]; w0[3] = wordl0[3] | wordr0[3] | s0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0] | s1[0]; w1[1] = wordl1[1] | wordr1[1] | s1[1]; w1[2] = wordl1[2] | wordr1[2] | s1[2]; w1[3] = wordl1[3] | wordr1[3] | s1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0] | s2[0]; w2[1] = wordl2[1] | wordr2[1] | s2[1]; w2[2] = wordl2[2] | wordr2[2] | s2[2]; w2[3] = wordl2[3] | wordr2[3] | s2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0] | s3[0]; w3[1] = wordl3[1] | wordr3[1] | s3[1]; @@ -228,28 +204,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_m04 (__glo * sha1 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = pw_salt_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -348,12 +324,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_m04 (__glo wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -381,28 +357,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -533,28 +509,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_s04 (__glo const u32 pw_salt_len = pw_len + salt_len; - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0] | s0[0]; w0[1] = wordl0[1] | wordr0[1] | s0[1]; w0[2] = wordl0[2] | wordr0[2] | s0[2]; w0[3] = wordl0[3] | wordr0[3] | s0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0] | s1[0]; w1[1] = wordl1[1] | wordr1[1] | s1[1]; w1[2] = wordl1[2] | wordr1[2] | s1[2]; w1[3] = wordl1[3] | wordr1[3] | s1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0] | s2[0]; w2[1] = wordl2[1] | wordr2[1] | s2[1]; w2[2] = wordl2[2] | wordr2[2] | s2[2]; w2[3] = wordl2[3] | wordr2[3] | s2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0] | s3[0]; w3[1] = wordl3[1] | wordr3[1] | s3[1]; @@ -567,28 +543,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_s04 (__glo * sha1 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = pw_salt_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -690,12 +666,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_s04 (__glo wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00110_a3.cl b/OpenCL/m00110_a3.cl similarity index 74% rename from amd/m00110_a3.cl rename to OpenCL/m00110_a3.cl index d99c7fb..e62def9 100644 --- a/amd/m00110_a3.cl +++ b/OpenCL/m00110_a3.cl @@ -4,48 +4,23 @@ */ #define _SHA1_ -#define _SCALAR_ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -static void m00110m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00110m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -200,49 +175,47 @@ static void m00110m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; - - const u32x w0 = w0l | w0r; - - const u32x w0s01 = rotl32 (w0, 1u); - const u32x w0s02 = rotl32 (w0, 2u); - const u32x w0s03 = rotl32 (w0, 3u); - const u32x w0s04 = rotl32 (w0, 4u); - const u32x w0s05 = rotl32 (w0, 5u); - const u32x w0s06 = rotl32 (w0, 6u); - const u32x w0s07 = rotl32 (w0, 7u); - const u32x w0s08 = rotl32 (w0, 8u); - const u32x w0s09 = rotl32 (w0, 9u); - const u32x w0s10 = rotl32 (w0, 10u); - const u32x w0s11 = rotl32 (w0, 11u); - const u32x w0s12 = rotl32 (w0, 12u); - const u32x w0s13 = rotl32 (w0, 13u); - const u32x w0s14 = rotl32 (w0, 14u); - const u32x w0s15 = rotl32 (w0, 15u); - const u32x w0s16 = rotl32 (w0, 16u); - const u32x w0s17 = rotl32 (w0, 17u); - const u32x w0s18 = rotl32 (w0, 18u); - const u32x w0s19 = rotl32 (w0, 19u); - const u32x w0s20 = rotl32 (w0, 20u); - const u32x w0s21 = rotl32 (w0, 21u); - const u32x w0s22 = rotl32 (w0, 22U); - - const u32x w0s04___w0s06 = w0s04 ^ w0s06; - const u32x w0s04___w0s08 = w0s04 ^ w0s08; - const u32x w0s08___w0s12 = w0s08 ^ w0s12; - const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + const u32 w0r = words_buf_r[il_pos]; + + const u32 w0 = w0l | w0r; + + const u32 w0s01 = rotl32 (w0, 1u); + const u32 w0s02 = rotl32 (w0, 2u); + const u32 w0s03 = rotl32 (w0, 3u); + const u32 w0s04 = rotl32 (w0, 4u); + const u32 w0s05 = rotl32 (w0, 5u); + const u32 w0s06 = rotl32 (w0, 6u); + const u32 w0s07 = rotl32 (w0, 7u); + const u32 w0s08 = rotl32 (w0, 8u); + const u32 w0s09 = rotl32 (w0, 9u); + const u32 w0s10 = rotl32 (w0, 10u); + const u32 w0s11 = rotl32 (w0, 11u); + const u32 w0s12 = rotl32 (w0, 12u); + const u32 w0s13 = rotl32 (w0, 13u); + const u32 w0s14 = rotl32 (w0, 14u); + const u32 w0s15 = rotl32 (w0, 15u); + const u32 w0s16 = rotl32 (w0, 16u); + const u32 w0s17 = rotl32 (w0, 17u); + const u32 w0s18 = rotl32 (w0, 18u); + const u32 w0s19 = rotl32 (w0, 19u); + const u32 w0s20 = rotl32 (w0, 20u); + const u32 w0s21 = rotl32 (w0, 21u); + const u32 w0s22 = rotl32 (w0, 22U); + + const u32 w0s04___w0s06 = w0s04 ^ w0s06; + const u32 w0s04___w0s08 = w0s04 ^ w0s08; + const u32 w0s08___w0s12 = w0s08 ^ w0s12; + const u32 w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -342,16 +315,16 @@ static void m00110m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_79s ^ w0s08 ^ w0s22)); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m00110s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00110s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -463,47 +436,45 @@ static void m00110s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; - - const u32x w0 = w0l | w0r; - - const u32x w0s01 = rotl32 (w0, 1u); - const u32x w0s02 = rotl32 (w0, 2u); - const u32x w0s03 = rotl32 (w0, 3u); - const u32x w0s04 = rotl32 (w0, 4u); - const u32x w0s05 = rotl32 (w0, 5u); - const u32x w0s06 = rotl32 (w0, 6u); - const u32x w0s07 = rotl32 (w0, 7u); - const u32x w0s08 = rotl32 (w0, 8u); - const u32x w0s09 = rotl32 (w0, 9u); - const u32x w0s10 = rotl32 (w0, 10u); - const u32x w0s11 = rotl32 (w0, 11u); - const u32x w0s12 = rotl32 (w0, 12u); - const u32x w0s13 = rotl32 (w0, 13u); - const u32x w0s14 = rotl32 (w0, 14u); - const u32x w0s15 = rotl32 (w0, 15u); - const u32x w0s16 = rotl32 (w0, 16u); - const u32x w0s17 = rotl32 (w0, 17u); - const u32x w0s18 = rotl32 (w0, 18u); - const u32x w0s19 = rotl32 (w0, 19u); - const u32x w0s20 = rotl32 (w0, 20u); - - const u32x w0s04___w0s06 = w0s04 ^ w0s06; - const u32x w0s04___w0s08 = w0s04 ^ w0s08; - const u32x w0s08___w0s12 = w0s08 ^ w0s12; - const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + const u32 w0r = words_buf_r[il_pos]; + + const u32 w0 = w0l | w0r; + + const u32 w0s01 = rotl32 (w0, 1u); + const u32 w0s02 = rotl32 (w0, 2u); + const u32 w0s03 = rotl32 (w0, 3u); + const u32 w0s04 = rotl32 (w0, 4u); + const u32 w0s05 = rotl32 (w0, 5u); + const u32 w0s06 = rotl32 (w0, 6u); + const u32 w0s07 = rotl32 (w0, 7u); + const u32 w0s08 = rotl32 (w0, 8u); + const u32 w0s09 = rotl32 (w0, 9u); + const u32 w0s10 = rotl32 (w0, 10u); + const u32 w0s11 = rotl32 (w0, 11u); + const u32 w0s12 = rotl32 (w0, 12u); + const u32 w0s13 = rotl32 (w0, 13u); + const u32 w0s14 = rotl32 (w0, 14u); + const u32 w0s15 = rotl32 (w0, 15u); + const u32 w0s16 = rotl32 (w0, 16u); + const u32 w0s17 = rotl32 (w0, 17u); + const u32 w0s18 = rotl32 (w0, 18u); + const u32 w0s19 = rotl32 (w0, 19u); + const u32 w0s20 = rotl32 (w0, 20u); + + const u32 w0s04___w0s06 = w0s04 ^ w0s06; + const u32 w0s04___w0s08 = w0s04 ^ w0s08; + const u32 w0s08___w0s12 = w0s08 ^ w0s12; + const u32 w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -610,8 +581,8 @@ static void m00110s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g const u32 c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u); const u32 c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u); - const u32x w0s21 = rotl32 (w0, 21u); - const u32x w0s22 = rotl32 (w0, 22U); + const u32 w0s21 = rotl32 (w0, 21u); + const u32 w0s22 = rotl32 (w0, 22U); SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_76s ^ w0s07 ^ w0s08___w0s12 ^ w0s16 ^ w0s21)); SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_77s)); @@ -619,16 +590,16 @@ static void m00110s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_79s ^ w0s08 ^ w0s22)); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -666,7 +637,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_m04 (__glo m00110m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -704,7 +675,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_m08 (__glo m00110m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -742,7 +713,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_m16 (__glo m00110m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -780,7 +751,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_s04 (__glo m00110s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -818,7 +789,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_s08 (__glo m00110s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00110_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base diff --git a/amd/m00120_a0.cl b/OpenCL/m00120_a0.cl similarity index 95% rename from amd/m00120_a0.cl rename to OpenCL/m00120_a0.cl index 29e6c8c..fe682eb 100644 --- a/amd/m00120_a0.cl +++ b/OpenCL/m00120_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00120_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -62,14 +38,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00120_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -104,10 +80,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00120_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; @@ -134,10 +110,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00120_m04 (__glo const u32 out_salt_len = out_len + salt_len; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; @@ -192,11 +168,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00120_m04 (__glo //w3_t[2] = swap_workaround (w3_t[2]); //w3_t[3] = swap_workaround (w3_t[3]); - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -295,12 +271,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00120_m04 (__glo w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -328,14 +304,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00120_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -388,10 +364,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00120_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; @@ -418,10 +394,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00120_s04 (__glo const u32 out_salt_len = out_len + salt_len; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; @@ -476,11 +452,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00120_s04 (__glo //w3_t[2] = swap_workaround (w3_t[2]); //w3_t[3] = swap_workaround (w3_t[3]); - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -582,12 +558,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00120_s04 (__glo w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00120_a1.cl b/OpenCL/m00120_a1.cl similarity index 95% rename from amd/m00120_a1.cl rename to OpenCL/m00120_a1.cl index 7080a53..c73c2e0 100644 --- a/amd/m00120_a1.cl +++ b/OpenCL/m00120_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00120_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -60,28 +36,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00120_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -152,10 +128,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00120_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -180,10 +156,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00120_m04 (__glo const u32 pw_salt_len = pw_len + salt_len; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; @@ -238,11 +214,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00120_m04 (__glo //w3_t[2] = swap_workaround (w3_t[2]); //w3_t[3] = swap_workaround (w3_t[3]); - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -341,12 +317,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00120_m04 (__glo w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -374,28 +350,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00120_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -484,10 +460,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00120_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -512,10 +488,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00120_s04 (__glo const u32 pw_salt_len = pw_len + salt_len; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; @@ -570,11 +546,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00120_s04 (__glo //w3_t[2] = swap_workaround (w3_t[2]); //w3_t[3] = swap_workaround (w3_t[3]); - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -676,12 +652,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00120_s04 (__glo w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00120_a3.cl b/OpenCL/m00120_a3.cl similarity index 91% rename from amd/m00120_a3.cl rename to OpenCL/m00120_a3.cl index 444f062..593a98b 100644 --- a/amd/m00120_a3.cl +++ b/OpenCL/m00120_a3.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void overwrite_at (u32x sw[16], const u32x w0, const u32 salt_len) +static void overwrite_at (u32 sw[16], const u32 w0, const u32 salt_len) { switch (salt_len) { @@ -139,7 +115,7 @@ static void overwrite_at (u32x sw[16], const u32x w0, const u32 salt_len) } } -static void m00120m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00120m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -188,10 +164,10 @@ static void m00120m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * prepend salt */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); @@ -250,15 +226,15 @@ static void m00120m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { const u32 w0r = bfs_buf[il_pos].i; - const u32x w0n = w0l | w0r; + const u32 w0n = w0l | w0r; - u32x wx[16]; + u32 wx[16]; wx[ 0] = w0_t[0]; wx[ 1] = w0_t[1]; @@ -279,10 +255,10 @@ static void m00120m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p overwrite_at (wx, w0n, salt_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = wx[ 0]; w0_t[1] = wx[ 1]; @@ -305,11 +281,11 @@ static void m00120m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * sha1 */ - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -407,16 +383,16 @@ static void m00120m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]); w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m00120s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00120s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -483,10 +459,10 @@ static void m00120s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * prepend salt */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); @@ -545,15 +521,15 @@ static void m00120s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { const u32 w0r = bfs_buf[il_pos].i; - const u32x w0n = w0l | w0r; + const u32 w0n = w0l | w0r; - u32x wx[16]; + u32 wx[16]; wx[ 0] = w0_t[0]; wx[ 1] = w0_t[1]; @@ -574,10 +550,10 @@ static void m00120s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p overwrite_at (wx, w0n, salt_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = wx[ 0]; w0_t[1] = wx[ 1]; @@ -600,11 +576,11 @@ static void m00120s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * sha1 */ - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -706,12 +682,12 @@ static void m00120s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -725,28 +701,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00120_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -772,28 +748,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00120_m08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -819,28 +795,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00120_m16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -866,28 +842,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00120_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -913,28 +889,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00120_s08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -960,28 +936,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00120_s16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m00130_a0.cl b/OpenCL/m00130_a0.cl similarity index 95% rename from amd/m00130_a0.cl rename to OpenCL/m00130_a0.cl index f4e4c4b..491f451 100644 --- a/amd/m00130_a0.cl +++ b/OpenCL/m00130_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" // no unicode yet @@ -64,14 +40,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -106,28 +82,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -172,10 +148,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_m04 (__glo const u32 out_salt_len = (out_len * 2) + salt_len; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); @@ -222,11 +198,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_m04 (__glo //w3_t[2] = swap_workaround (w3_t[2]); //w3_t[3] = swap_workaround (w3_t[3]); - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -325,12 +301,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_m04 (__glo w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -358,14 +334,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -418,28 +394,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -484,10 +460,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_s04 (__glo const u32 out_salt_len = (out_len * 2) + salt_len; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); @@ -534,11 +510,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_s04 (__glo //w3_t[2] = swap_workaround (w3_t[2]); //w3_t[3] = swap_workaround (w3_t[3]); - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -640,12 +616,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_s04 (__glo w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00130_a1.cl b/OpenCL/m00130_a1.cl similarity index 95% rename from amd/m00130_a1.cl rename to OpenCL/m00130_a1.cl index 6d998d2..6b556a8 100644 --- a/amd/m00130_a1.cl +++ b/OpenCL/m00130_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" // no unicode yet @@ -62,28 +38,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -196,10 +172,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_m04 (__glo const u32 pw_salt_len = (pw_len * 2) + salt_len; - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -218,10 +194,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_m04 (__glo w3[2] = 0; w3[3] = 0; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); @@ -268,11 +244,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_m04 (__glo //w3_t[2] = swap_workaround (w3_t[2]); //w3_t[3] = swap_workaround (w3_t[3]); - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -371,12 +347,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_m04 (__glo w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -404,28 +380,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -556,10 +532,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_s04 (__glo const u32 pw_salt_len = (pw_len * 2) + salt_len; - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -578,10 +554,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_s04 (__glo w3[2] = 0; w3[3] = 0; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); @@ -628,11 +604,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_s04 (__glo //w3_t[2] = swap_workaround (w3_t[2]); //w3_t[3] = swap_workaround (w3_t[3]); - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -734,12 +710,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_s04 (__glo w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00130_a3.cl b/OpenCL/m00130_a3.cl similarity index 74% rename from amd/m00130_a3.cl rename to OpenCL/m00130_a3.cl index d63424c..9847b7e 100644 --- a/amd/m00130_a3.cl +++ b/OpenCL/m00130_a3.cl @@ -4,48 +4,23 @@ */ #define _SHA1_ -#define _SCALAR_ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -static void m00130m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00130m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -200,49 +175,47 @@ static void m00130m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; - - const u32x w0 = w0l | w0r; - - const u32x w0s01 = rotl32 (w0, 1u); - const u32x w0s02 = rotl32 (w0, 2u); - const u32x w0s03 = rotl32 (w0, 3u); - const u32x w0s04 = rotl32 (w0, 4u); - const u32x w0s05 = rotl32 (w0, 5u); - const u32x w0s06 = rotl32 (w0, 6u); - const u32x w0s07 = rotl32 (w0, 7u); - const u32x w0s08 = rotl32 (w0, 8u); - const u32x w0s09 = rotl32 (w0, 9u); - const u32x w0s10 = rotl32 (w0, 10u); - const u32x w0s11 = rotl32 (w0, 11u); - const u32x w0s12 = rotl32 (w0, 12u); - const u32x w0s13 = rotl32 (w0, 13u); - const u32x w0s14 = rotl32 (w0, 14u); - const u32x w0s15 = rotl32 (w0, 15u); - const u32x w0s16 = rotl32 (w0, 16u); - const u32x w0s17 = rotl32 (w0, 17u); - const u32x w0s18 = rotl32 (w0, 18u); - const u32x w0s19 = rotl32 (w0, 19u); - const u32x w0s20 = rotl32 (w0, 20u); - const u32x w0s21 = rotl32 (w0, 21u); - const u32x w0s22 = rotl32 (w0, 22U); - - const u32x w0s04___w0s06 = w0s04 ^ w0s06; - const u32x w0s04___w0s08 = w0s04 ^ w0s08; - const u32x w0s08___w0s12 = w0s08 ^ w0s12; - const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + const u32 w0r = words_buf_r[il_pos]; + + const u32 w0 = w0l | w0r; + + const u32 w0s01 = rotl32 (w0, 1u); + const u32 w0s02 = rotl32 (w0, 2u); + const u32 w0s03 = rotl32 (w0, 3u); + const u32 w0s04 = rotl32 (w0, 4u); + const u32 w0s05 = rotl32 (w0, 5u); + const u32 w0s06 = rotl32 (w0, 6u); + const u32 w0s07 = rotl32 (w0, 7u); + const u32 w0s08 = rotl32 (w0, 8u); + const u32 w0s09 = rotl32 (w0, 9u); + const u32 w0s10 = rotl32 (w0, 10u); + const u32 w0s11 = rotl32 (w0, 11u); + const u32 w0s12 = rotl32 (w0, 12u); + const u32 w0s13 = rotl32 (w0, 13u); + const u32 w0s14 = rotl32 (w0, 14u); + const u32 w0s15 = rotl32 (w0, 15u); + const u32 w0s16 = rotl32 (w0, 16u); + const u32 w0s17 = rotl32 (w0, 17u); + const u32 w0s18 = rotl32 (w0, 18u); + const u32 w0s19 = rotl32 (w0, 19u); + const u32 w0s20 = rotl32 (w0, 20u); + const u32 w0s21 = rotl32 (w0, 21u); + const u32 w0s22 = rotl32 (w0, 22U); + + const u32 w0s04___w0s06 = w0s04 ^ w0s06; + const u32 w0s04___w0s08 = w0s04 ^ w0s08; + const u32 w0s08___w0s12 = w0s08 ^ w0s12; + const u32 w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -342,16 +315,16 @@ static void m00130m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_79s ^ w0s08 ^ w0s22)); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m00130s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00130s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -463,47 +436,45 @@ static void m00130s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; - - const u32x w0 = w0l | w0r; - - const u32x w0s01 = rotl32 (w0, 1u); - const u32x w0s02 = rotl32 (w0, 2u); - const u32x w0s03 = rotl32 (w0, 3u); - const u32x w0s04 = rotl32 (w0, 4u); - const u32x w0s05 = rotl32 (w0, 5u); - const u32x w0s06 = rotl32 (w0, 6u); - const u32x w0s07 = rotl32 (w0, 7u); - const u32x w0s08 = rotl32 (w0, 8u); - const u32x w0s09 = rotl32 (w0, 9u); - const u32x w0s10 = rotl32 (w0, 10u); - const u32x w0s11 = rotl32 (w0, 11u); - const u32x w0s12 = rotl32 (w0, 12u); - const u32x w0s13 = rotl32 (w0, 13u); - const u32x w0s14 = rotl32 (w0, 14u); - const u32x w0s15 = rotl32 (w0, 15u); - const u32x w0s16 = rotl32 (w0, 16u); - const u32x w0s17 = rotl32 (w0, 17u); - const u32x w0s18 = rotl32 (w0, 18u); - const u32x w0s19 = rotl32 (w0, 19u); - const u32x w0s20 = rotl32 (w0, 20u); - - const u32x w0s04___w0s06 = w0s04 ^ w0s06; - const u32x w0s04___w0s08 = w0s04 ^ w0s08; - const u32x w0s08___w0s12 = w0s08 ^ w0s12; - const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + const u32 w0r = words_buf_r[il_pos]; + + const u32 w0 = w0l | w0r; + + const u32 w0s01 = rotl32 (w0, 1u); + const u32 w0s02 = rotl32 (w0, 2u); + const u32 w0s03 = rotl32 (w0, 3u); + const u32 w0s04 = rotl32 (w0, 4u); + const u32 w0s05 = rotl32 (w0, 5u); + const u32 w0s06 = rotl32 (w0, 6u); + const u32 w0s07 = rotl32 (w0, 7u); + const u32 w0s08 = rotl32 (w0, 8u); + const u32 w0s09 = rotl32 (w0, 9u); + const u32 w0s10 = rotl32 (w0, 10u); + const u32 w0s11 = rotl32 (w0, 11u); + const u32 w0s12 = rotl32 (w0, 12u); + const u32 w0s13 = rotl32 (w0, 13u); + const u32 w0s14 = rotl32 (w0, 14u); + const u32 w0s15 = rotl32 (w0, 15u); + const u32 w0s16 = rotl32 (w0, 16u); + const u32 w0s17 = rotl32 (w0, 17u); + const u32 w0s18 = rotl32 (w0, 18u); + const u32 w0s19 = rotl32 (w0, 19u); + const u32 w0s20 = rotl32 (w0, 20u); + + const u32 w0s04___w0s06 = w0s04 ^ w0s06; + const u32 w0s04___w0s08 = w0s04 ^ w0s08; + const u32 w0s08___w0s12 = w0s08 ^ w0s12; + const u32 w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -610,8 +581,8 @@ static void m00130s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g const u32 c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u); const u32 c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u); - const u32x w0s21 = rotl32 (w0, 21u); - const u32x w0s22 = rotl32 (w0, 22U); + const u32 w0s21 = rotl32 (w0, 21u); + const u32 w0s22 = rotl32 (w0, 22U); SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_76s ^ w0s07 ^ w0s08___w0s12 ^ w0s16 ^ w0s21)); SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_77s)); @@ -619,16 +590,16 @@ static void m00130s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_79s ^ w0s08 ^ w0s22)); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -666,7 +637,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_m04 (__glo m00130m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -704,7 +675,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_m08 (__glo m00130m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -742,7 +713,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_m16 (__glo m00130m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -780,7 +751,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_s04 (__glo m00130s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -818,7 +789,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_s08 (__glo m00130s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00130_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base diff --git a/amd/m00140_a0.cl b/OpenCL/m00140_a0.cl similarity index 95% rename from amd/m00140_a0.cl rename to OpenCL/m00140_a0.cl index fe1a38d..466b7cd 100644 --- a/amd/m00140_a0.cl +++ b/OpenCL/m00140_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" // no unicode yet @@ -64,14 +40,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00140_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -106,28 +82,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00140_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -142,10 +118,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00140_m04 (__glo const u32 out_salt_len = (out_len * 2) + salt_len; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); @@ -186,11 +162,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00140_m04 (__glo //w3_t[2] = swap_workaround (w3_t[2]); //w3_t[3] = swap_workaround (w3_t[3]); - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -289,12 +265,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00140_m04 (__glo w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -322,14 +298,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00140_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -382,28 +358,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00140_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -418,10 +394,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00140_s04 (__glo const u32 out_salt_len = (out_len * 2) + salt_len; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); @@ -462,11 +438,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00140_s04 (__glo //w3_t[2] = swap_workaround (w3_t[2]); //w3_t[3] = swap_workaround (w3_t[3]); - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -568,12 +544,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00140_s04 (__glo w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00140_a1.cl b/OpenCL/m00140_a1.cl similarity index 95% rename from amd/m00140_a1.cl rename to OpenCL/m00140_a1.cl index 68518fa..6512ef8 100644 --- a/amd/m00140_a1.cl +++ b/OpenCL/m00140_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" // no unicode yet @@ -62,28 +38,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00140_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -166,10 +142,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00140_m04 (__glo const u32 pw_salt_len = (pw_len * 2) + salt_len; - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -188,10 +164,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00140_m04 (__glo w3[2] = 0; w3[3] = 0; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); @@ -232,11 +208,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00140_m04 (__glo //w3_t[2] = swap_workaround (w3_t[2]); //w3_t[3] = swap_workaround (w3_t[3]); - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -335,12 +311,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00140_m04 (__glo w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -368,28 +344,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00140_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -490,10 +466,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00140_s04 (__glo const u32 pw_salt_len = (pw_len * 2) + salt_len; - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -512,10 +488,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00140_s04 (__glo w3[2] = 0; w3[3] = 0; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); @@ -556,11 +532,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00140_s04 (__glo //w3_t[2] = swap_workaround (w3_t[2]); //w3_t[3] = swap_workaround (w3_t[3]); - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -662,12 +638,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00140_s04 (__glo w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00140_a3.cl b/OpenCL/m00140_a3.cl similarity index 91% rename from amd/m00140_a3.cl rename to OpenCL/m00140_a3.cl index bd32487..48489ef 100644 --- a/amd/m00140_a3.cl +++ b/OpenCL/m00140_a3.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void overwrite_at (u32x sw[16], const u32x w0, const u32 salt_len) +static void overwrite_at (u32 sw[16], const u32 w0, const u32 salt_len) { switch (salt_len) { @@ -139,7 +115,7 @@ static void overwrite_at (u32x sw[16], const u32x w0, const u32 salt_len) } } -static void m00140m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00140m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -188,10 +164,10 @@ static void m00140m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * prepend salt */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); @@ -250,15 +226,15 @@ static void m00140m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { const u32 w0r = bfs_buf[il_pos].i; - const u32x w0n = w0l | w0r; + const u32 w0n = w0l | w0r; - u32x wx[16]; + u32 wx[16]; wx[ 0] = w0_t[0]; wx[ 1] = w0_t[1]; @@ -279,10 +255,10 @@ static void m00140m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p overwrite_at (wx, w0n, salt_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = wx[ 0]; w0_t[1] = wx[ 1]; @@ -305,11 +281,11 @@ static void m00140m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * sha1 */ - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -407,16 +383,16 @@ static void m00140m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]); w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m00140s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00140s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -483,10 +459,10 @@ static void m00140s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * prepend salt */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); @@ -545,15 +521,15 @@ static void m00140s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { const u32 w0r = bfs_buf[il_pos].i; - const u32x w0n = w0l | w0r; + const u32 w0n = w0l | w0r; - u32x wx[16]; + u32 wx[16]; wx[ 0] = w0_t[0]; wx[ 1] = w0_t[1]; @@ -574,10 +550,10 @@ static void m00140s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p overwrite_at (wx, w0n, salt_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = wx[ 0]; w0_t[1] = wx[ 1]; @@ -600,11 +576,11 @@ static void m00140s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * sha1 */ - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -706,12 +682,12 @@ static void m00140s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -725,28 +701,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00140_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -772,28 +748,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00140_m08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -819,28 +795,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00140_m16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -866,28 +842,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00140_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -913,28 +889,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00140_s08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -960,28 +936,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00140_s16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m00150_a0.cl b/OpenCL/m00150_a0.cl similarity index 90% rename from amd/m00150_a0.cl rename to OpenCL/m00150_a0.cl index adf5cc7..2981602 100644 --- a/amd/m00150_a0.cl +++ b/OpenCL/m00150_a0.cl @@ -8,68 +8,44 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -174,7 +150,7 @@ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4] digest[4] += E; } -static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) +static void hmac_sha1_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -227,7 +203,7 @@ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x sha1_transform (w0, w1, w2, w3, opad); } -static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) +static void hmac_sha1_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5], u32 digest[5]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -279,14 +255,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00150_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -321,28 +297,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00150_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -355,36 +331,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00150_m04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); w0_t[2] = swap_workaround (w0[2]); w0_t[3] = swap_workaround (w0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (w1[0]); w1_t[1] = swap_workaround (w1[1]); w1_t[2] = swap_workaround (w1[2]); w1_t[3] = swap_workaround (w1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -405,16 +381,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00150_m04 (__glo w3_t[2] = 0; w3_t[3] = (64 + salt_len) * 8; - u32x digest[5]; + u32 digest[5]; hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -442,14 +418,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00150_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -496,28 +472,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00150_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -530,36 +506,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00150_s04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); w0_t[2] = swap_workaround (w0[2]); w0_t[3] = swap_workaround (w0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (w1[0]); w1_t[1] = swap_workaround (w1[1]); w1_t[2] = swap_workaround (w1[2]); w1_t[3] = swap_workaround (w1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -580,16 +556,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00150_s04 (__glo w3_t[2] = 0; w3_t[3] = (64 + salt_len) * 8; - u32x digest[5]; + u32 digest[5]; hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00150_a1.cl b/OpenCL/m00150_a1.cl similarity index 91% rename from amd/m00150_a1.cl rename to OpenCL/m00150_a1.cl index a5dfc94..3cf3afe 100644 --- a/amd/m00150_a1.cl +++ b/OpenCL/m00150_a1.cl @@ -8,66 +8,42 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -172,7 +148,7 @@ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4] digest[4] += E; } -static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) +static void hmac_sha1_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -225,7 +201,7 @@ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x sha1_transform (w0, w1, w2, w3, opad); } -static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) +static void hmac_sha1_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5], u32 digest[5]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -277,28 +253,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00150_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -375,28 +351,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00150_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -407,36 +383,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00150_m04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); w0_t[2] = swap_workaround (w0[2]); w0_t[3] = swap_workaround (w0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (w1[0]); w1_t[1] = swap_workaround (w1[1]); w1_t[2] = swap_workaround (w1[2]); w1_t[3] = swap_workaround (w1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -457,16 +433,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00150_m04 (__glo w3_t[2] = 0; w3_t[3] = (64 + salt_len) * 8; - u32x digest[5]; + u32 digest[5]; hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -494,28 +470,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00150_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -604,28 +580,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00150_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -636,36 +612,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00150_s04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); w0_t[2] = swap_workaround (w0[2]); w0_t[3] = swap_workaround (w0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (w1[0]); w1_t[1] = swap_workaround (w1[1]); w1_t[2] = swap_workaround (w1[2]); w1_t[3] = swap_workaround (w1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -686,16 +662,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00150_s04 (__glo w3_t[2] = 0; w3_t[3] = (64 + salt_len) * 8; - u32x digest[5]; + u32 digest[5]; hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00150_a3.cl b/OpenCL/m00150_a3.cl similarity index 85% rename from amd/m00150_a3.cl rename to OpenCL/m00150_a3.cl index ef3d594..18da15a 100644 --- a/amd/m00150_a3.cl +++ b/OpenCL/m00150_a3.cl @@ -8,66 +8,42 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -172,7 +148,7 @@ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4] digest[4] += E; } -static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) +static void hmac_sha1_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -225,7 +201,7 @@ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x sha1_transform (w0, w1, w2, w3, opad); } -static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) +static void hmac_sha1_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5], u32 digest[5]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -261,7 +237,7 @@ static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x sha1_transform (w0, w1, w2, w3, digest); } -static void m00150m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00150m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -294,7 +270,7 @@ static void m00150m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -306,36 +282,36 @@ static void m00150m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -356,20 +332,20 @@ static void m00150m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[2] = 0; w3_t[3] = (64 + salt_len) * 8; - u32x digest[5]; + u32 digest[5]; hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m00150s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00150s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -414,7 +390,7 @@ static void m00150s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -426,36 +402,36 @@ static void m00150s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -476,16 +452,16 @@ static void m00150s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[2] = 0; w3_t[3] = (64 + salt_len) * 8; - u32x digest[5]; + u32 digest[5]; hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -499,28 +475,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00150_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -546,28 +522,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00150_m08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -593,28 +569,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00150_m16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -640,28 +616,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00150_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -687,28 +663,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00150_s08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -734,28 +710,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00150_s16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m00160_a0.cl b/OpenCL/m00160_a0.cl similarity index 90% rename from amd/m00160_a0.cl rename to OpenCL/m00160_a0.cl index bdb8887..0707e96 100644 --- a/amd/m00160_a0.cl +++ b/OpenCL/m00160_a0.cl @@ -8,68 +8,44 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -174,7 +150,7 @@ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4] digest[4] += E; } -static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) +static void hmac_sha1_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -227,7 +203,7 @@ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x sha1_transform (w0, w1, w2, w3, opad); } -static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) +static void hmac_sha1_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5], u32 digest[5]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -279,14 +255,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00160_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -317,36 +293,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00160_m04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (salt_buf0[0]); w0_t[1] = swap_workaround (salt_buf0[1]); w0_t[2] = swap_workaround (salt_buf0[2]); w0_t[3] = swap_workaround (salt_buf0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (salt_buf1[0]); w1_t[1] = swap_workaround (salt_buf1[1]); w1_t[2] = swap_workaround (salt_buf1[2]); w1_t[3] = swap_workaround (salt_buf1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -356,28 +332,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00160_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -405,16 +381,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00160_m04 (__glo w3_t[2] = 0; w3_t[3] = (64 + out_len) * 8; - u32x digest[5]; + u32 digest[5]; hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -442,14 +418,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00160_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -480,36 +456,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00160_s04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (salt_buf0[0]); w0_t[1] = swap_workaround (salt_buf0[1]); w0_t[2] = swap_workaround (salt_buf0[2]); w0_t[3] = swap_workaround (salt_buf0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (salt_buf1[0]); w1_t[1] = swap_workaround (salt_buf1[1]); w1_t[2] = swap_workaround (salt_buf1[2]); w1_t[3] = swap_workaround (salt_buf1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -531,28 +507,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00160_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -580,16 +556,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00160_s04 (__glo w3_t[2] = 0; w3_t[3] = (64 + out_len) * 8; - u32x digest[5]; + u32 digest[5]; hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00160_a1.cl b/OpenCL/m00160_a1.cl similarity index 91% rename from amd/m00160_a1.cl rename to OpenCL/m00160_a1.cl index dc27cdf..f6cee14 100644 --- a/amd/m00160_a1.cl +++ b/OpenCL/m00160_a1.cl @@ -8,66 +8,42 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -172,7 +148,7 @@ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4] digest[4] += E; } -static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) +static void hmac_sha1_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -225,7 +201,7 @@ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x sha1_transform (w0, w1, w2, w3, opad); } -static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) +static void hmac_sha1_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5], u32 digest[5]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -277,28 +253,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00160_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -334,36 +310,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00160_m04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (salt_buf0[0]); w0_t[1] = swap_workaround (salt_buf0[1]); w0_t[2] = swap_workaround (salt_buf0[2]); w0_t[3] = swap_workaround (salt_buf0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (salt_buf1[0]); w1_t[1] = swap_workaround (salt_buf1[1]); w1_t[2] = swap_workaround (salt_buf1[2]); w1_t[3] = swap_workaround (salt_buf1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -410,28 +386,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00160_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -457,16 +433,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00160_m04 (__glo w3_t[2] = 0; w3_t[3] = (64 + pw_len) * 8; - u32x digest[5]; + u32 digest[5]; hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -494,28 +470,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00160_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -551,36 +527,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00160_s04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (salt_buf0[0]); w0_t[1] = swap_workaround (salt_buf0[1]); w0_t[2] = swap_workaround (salt_buf0[2]); w0_t[3] = swap_workaround (salt_buf0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (salt_buf1[0]); w1_t[1] = swap_workaround (salt_buf1[1]); w1_t[2] = swap_workaround (salt_buf1[2]); w1_t[3] = swap_workaround (salt_buf1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -639,28 +615,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00160_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -686,16 +662,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00160_s04 (__glo w3_t[2] = 0; w3_t[3] = (64 + pw_len) * 8; - u32x digest[5]; + u32 digest[5]; hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00160_a3.cl b/OpenCL/m00160_a3.cl similarity index 85% rename from amd/m00160_a3.cl rename to OpenCL/m00160_a3.cl index 3dfd322..3fcb174 100644 --- a/amd/m00160_a3.cl +++ b/OpenCL/m00160_a3.cl @@ -8,66 +8,42 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -172,7 +148,7 @@ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4] digest[4] += E; } -static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) +static void hmac_sha1_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -225,7 +201,7 @@ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x sha1_transform (w0, w1, w2, w3, opad); } -static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) +static void hmac_sha1_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5], u32 digest[5]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -261,7 +237,7 @@ static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x sha1_transform (w0, w1, w2, w3, digest); } -static void m00160m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esal_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00160m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esal_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -292,36 +268,36 @@ static void m00160m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (salt_buf0[0]); w0_t[1] = swap_workaround (salt_buf0[1]); w0_t[2] = swap_workaround (salt_buf0[2]); w0_t[3] = swap_workaround (salt_buf0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (salt_buf1[0]); w1_t[1] = swap_workaround (salt_buf1[1]); w1_t[2] = swap_workaround (salt_buf1[2]); w1_t[3] = swap_workaround (salt_buf1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -329,7 +305,7 @@ static void m00160m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -354,20 +330,20 @@ static void m00160m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[2] = 0; w3_t[3] = (64 + pw_len) * 8; - u32x digest[5]; + u32 digest[5]; hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m00160s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00160s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -398,36 +374,36 @@ static void m00160s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (salt_buf0[0]); w0_t[1] = swap_workaround (salt_buf0[1]); w0_t[2] = swap_workaround (salt_buf0[2]); w0_t[3] = swap_workaround (salt_buf0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (salt_buf1[0]); w1_t[1] = swap_workaround (salt_buf1[1]); w1_t[2] = swap_workaround (salt_buf1[2]); w1_t[3] = swap_workaround (salt_buf1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -447,7 +423,7 @@ static void m00160s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -472,16 +448,16 @@ static void m00160s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[2] = 0; w3_t[3] = (64 + pw_len) * 8; - u32x digest[5]; + u32 digest[5]; hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -495,28 +471,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00160_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -542,28 +518,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00160_m08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -589,28 +565,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00160_m16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -636,28 +612,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00160_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -683,28 +659,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00160_s08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -730,28 +706,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00160_s16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m00190_a0.cl b/OpenCL/m00190_a0.cl similarity index 89% rename from amd/m00190_a0.cl rename to OpenCL/m00190_a0.cl index aeb98d6..895c9de 100644 --- a/amd/m00190_a0.cl +++ b/OpenCL/m00190_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 4 #define DGST_R2 3 #define DGST_R3 2 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -62,14 +38,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -84,28 +60,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -120,28 +96,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_m04 (__glo * sha1 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = out_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -245,23 +221,23 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_m04 (__glo c += SHA1M_C; { - const u32x r0 = a; - const u32x r1 = e; - const u32x r2 = d; - const u32x r3 = c; + const u32 r0 = a; + const u32 r1 = e; + const u32 r2 = d; + const u32 r3 = c; - #include VECT_COMPARE_M + #include COMPARE_M } a &= 0x00000fff; { - const u32x r0 = a; - const u32x r1 = e; - const u32x r2 = d; - const u32x r3 = c; + const u32 r0 = a; + const u32 r1 = e; + const u32 r2 = d; + const u32 r3 = c; - #include VECT_COMPARE_M + #include COMPARE_M } } } @@ -290,14 +266,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -324,28 +300,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -360,28 +336,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_s04 (__glo * sha1 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = out_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -485,23 +461,23 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_s04 (__glo c += SHA1M_C; { - const u32x r0 = a; - const u32x r1 = e; - const u32x r2 = d; - const u32x r3 = c; + const u32 r0 = a; + const u32 r1 = e; + const u32 r2 = d; + const u32 r3 = c; - #include VECT_COMPARE_S + #include COMPARE_S } a &= 0x00000fff; { - const u32x r0 = a; - const u32x r1 = e; - const u32x r2 = d; - const u32x r3 = c; + const u32 r0 = a; + const u32 r1 = e; + const u32 r2 = d; + const u32 r3 = c; - #include VECT_COMPARE_S + #include COMPARE_S } } } diff --git a/amd/m00190_a1.cl b/OpenCL/m00190_a1.cl similarity index 89% rename from amd/m00190_a1.cl rename to OpenCL/m00190_a1.cl index 68bae1b..e55709d 100644 --- a/amd/m00190_a1.cl +++ b/OpenCL/m00190_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 4 #define DGST_R2 3 #define DGST_R3 2 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -60,28 +36,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -142,28 +118,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -174,28 +150,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_m04 (__glo * sha1 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = pw_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -299,23 +275,23 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_m04 (__glo c += SHA1M_C; { - const u32x r0 = a; - const u32x r1 = e; - const u32x r2 = d; - const u32x r3 = c; + const u32 r0 = a; + const u32 r1 = e; + const u32 r2 = d; + const u32 r3 = c; - #include VECT_COMPARE_M + #include COMPARE_M } a &= 0x00000fff; { - const u32x r0 = a; - const u32x r1 = e; - const u32x r2 = d; - const u32x r3 = c; + const u32 r0 = a; + const u32 r1 = e; + const u32 r2 = d; + const u32 r3 = c; - #include VECT_COMPARE_M + #include COMPARE_M } } } @@ -344,28 +320,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -438,28 +414,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -470,28 +446,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_s04 (__glo * sha1 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = pw_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -595,23 +571,23 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_s04 (__glo c += SHA1M_C; { - const u32x r0 = a; - const u32x r1 = e; - const u32x r2 = d; - const u32x r3 = c; + const u32 r0 = a; + const u32 r1 = e; + const u32 r2 = d; + const u32 r3 = c; - #include VECT_COMPARE_S + #include COMPARE_S } a &= 0x00000fff; { - const u32x r0 = a; - const u32x r1 = e; - const u32x r2 = d; - const u32x r3 = c; + const u32 r0 = a; + const u32 r1 = e; + const u32 r2 = d; + const u32 r3 = c; - #include VECT_COMPARE_S + #include COMPARE_S } } } diff --git a/amd/m00190_a3.cl b/OpenCL/m00190_a3.cl similarity index 72% rename from amd/m00190_a3.cl rename to OpenCL/m00190_a3.cl index f76d23d..fe0452c 100644 --- a/amd/m00190_a3.cl +++ b/OpenCL/m00190_a3.cl @@ -4,48 +4,23 @@ */ #define _SHA1_ -#define _SCALAR_ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 4 #define DGST_R2 3 #define DGST_R3 2 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -static void m00190m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00190m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -139,47 +114,45 @@ static void m00190m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; - - const u32x w0 = w0l | w0r; - - const u32x w0s01 = rotl32 (w0, 1u); - const u32x w0s02 = rotl32 (w0, 2u); - const u32x w0s03 = rotl32 (w0, 3u); - const u32x w0s04 = rotl32 (w0, 4u); - const u32x w0s05 = rotl32 (w0, 5u); - const u32x w0s06 = rotl32 (w0, 6u); - const u32x w0s07 = rotl32 (w0, 7u); - const u32x w0s08 = rotl32 (w0, 8u); - const u32x w0s09 = rotl32 (w0, 9u); - const u32x w0s10 = rotl32 (w0, 10u); - const u32x w0s11 = rotl32 (w0, 11u); - const u32x w0s12 = rotl32 (w0, 12u); - const u32x w0s13 = rotl32 (w0, 13u); - const u32x w0s14 = rotl32 (w0, 14u); - const u32x w0s15 = rotl32 (w0, 15u); - const u32x w0s16 = rotl32 (w0, 16u); - const u32x w0s17 = rotl32 (w0, 17u); - const u32x w0s18 = rotl32 (w0, 18u); - const u32x w0s19 = rotl32 (w0, 19u); - const u32x w0s20 = rotl32 (w0, 20u); - - const u32x w0s04___w0s06 = w0s04 ^ w0s06; - const u32x w0s04___w0s08 = w0s04 ^ w0s08; - const u32x w0s08___w0s12 = w0s08 ^ w0s12; - const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + const u32 w0r = words_buf_r[il_pos]; + + const u32 w0 = w0l | w0r; + + const u32 w0s01 = rotl32 (w0, 1u); + const u32 w0s02 = rotl32 (w0, 2u); + const u32 w0s03 = rotl32 (w0, 3u); + const u32 w0s04 = rotl32 (w0, 4u); + const u32 w0s05 = rotl32 (w0, 5u); + const u32 w0s06 = rotl32 (w0, 6u); + const u32 w0s07 = rotl32 (w0, 7u); + const u32 w0s08 = rotl32 (w0, 8u); + const u32 w0s09 = rotl32 (w0, 9u); + const u32 w0s10 = rotl32 (w0, 10u); + const u32 w0s11 = rotl32 (w0, 11u); + const u32 w0s12 = rotl32 (w0, 12u); + const u32 w0s13 = rotl32 (w0, 13u); + const u32 w0s14 = rotl32 (w0, 14u); + const u32 w0s15 = rotl32 (w0, 15u); + const u32 w0s16 = rotl32 (w0, 16u); + const u32 w0s17 = rotl32 (w0, 17u); + const u32 w0s18 = rotl32 (w0, 18u); + const u32 w0s19 = rotl32 (w0, 19u); + const u32 w0s20 = rotl32 (w0, 20u); + + const u32 w0s04___w0s06 = w0s04 ^ w0s06; + const u32 w0s04___w0s08 = w0s04 ^ w0s08; + const u32 w0s08___w0s12 = w0s08 ^ w0s12; + const u32 w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -279,8 +252,8 @@ static void m00190m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g const u32 c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u); const u32 c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u); - const u32x w0s21 = rotl32 (w0, 21u); - const u32x w0s22 = rotl32 (w0, 22U); + const u32 w0s21 = rotl32 (w0, 21u); + const u32 w0s22 = rotl32 (w0, 22U); SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_76s ^ w0s07 ^ w0s08___w0s12 ^ w0s16 ^ w0s21)); SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_77s)); @@ -293,28 +266,28 @@ static void m00190m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g c += SHA1M_C; { - const u32x r0 = a; - const u32x r1 = e; - const u32x r2 = d; - const u32x r3 = c; + const u32 r0 = a; + const u32 r1 = e; + const u32 r2 = d; + const u32 r3 = c; - #include VECT_COMPARE_M + #include COMPARE_M } a &= 0x00000fff; { - const u32x r0 = a; - const u32x r1 = e; - const u32x r2 = d; - const u32x r3 = c; + const u32 r0 = a; + const u32 r1 = e; + const u32 r2 = d; + const u32 r3 = c; - #include VECT_COMPARE_M + #include COMPARE_M } } } -static void m00190s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00190s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -420,47 +393,45 @@ static void m00190s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; - - const u32x w0 = w0l | w0r; - - const u32x w0s01 = rotl32 (w0, 1u); - const u32x w0s02 = rotl32 (w0, 2u); - const u32x w0s03 = rotl32 (w0, 3u); - const u32x w0s04 = rotl32 (w0, 4u); - const u32x w0s05 = rotl32 (w0, 5u); - const u32x w0s06 = rotl32 (w0, 6u); - const u32x w0s07 = rotl32 (w0, 7u); - const u32x w0s08 = rotl32 (w0, 8u); - const u32x w0s09 = rotl32 (w0, 9u); - const u32x w0s10 = rotl32 (w0, 10u); - const u32x w0s11 = rotl32 (w0, 11u); - const u32x w0s12 = rotl32 (w0, 12u); - const u32x w0s13 = rotl32 (w0, 13u); - const u32x w0s14 = rotl32 (w0, 14u); - const u32x w0s15 = rotl32 (w0, 15u); - const u32x w0s16 = rotl32 (w0, 16u); - const u32x w0s17 = rotl32 (w0, 17u); - const u32x w0s18 = rotl32 (w0, 18u); - const u32x w0s19 = rotl32 (w0, 19u); - const u32x w0s20 = rotl32 (w0, 20u); - - const u32x w0s04___w0s06 = w0s04 ^ w0s06; - const u32x w0s04___w0s08 = w0s04 ^ w0s08; - const u32x w0s08___w0s12 = w0s08 ^ w0s12; - const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + const u32 w0r = words_buf_r[il_pos]; + + const u32 w0 = w0l | w0r; + + const u32 w0s01 = rotl32 (w0, 1u); + const u32 w0s02 = rotl32 (w0, 2u); + const u32 w0s03 = rotl32 (w0, 3u); + const u32 w0s04 = rotl32 (w0, 4u); + const u32 w0s05 = rotl32 (w0, 5u); + const u32 w0s06 = rotl32 (w0, 6u); + const u32 w0s07 = rotl32 (w0, 7u); + const u32 w0s08 = rotl32 (w0, 8u); + const u32 w0s09 = rotl32 (w0, 9u); + const u32 w0s10 = rotl32 (w0, 10u); + const u32 w0s11 = rotl32 (w0, 11u); + const u32 w0s12 = rotl32 (w0, 12u); + const u32 w0s13 = rotl32 (w0, 13u); + const u32 w0s14 = rotl32 (w0, 14u); + const u32 w0s15 = rotl32 (w0, 15u); + const u32 w0s16 = rotl32 (w0, 16u); + const u32 w0s17 = rotl32 (w0, 17u); + const u32 w0s18 = rotl32 (w0, 18u); + const u32 w0s19 = rotl32 (w0, 19u); + const u32 w0s20 = rotl32 (w0, 20u); + + const u32 w0s04___w0s06 = w0s04 ^ w0s06; + const u32 w0s04___w0s08 = w0s04 ^ w0s08; + const u32 w0s08___w0s12 = w0s08 ^ w0s12; + const u32 w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -561,8 +532,8 @@ static void m00190s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g const u32 c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u); const u32 c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u); - const u32x w0s21 = rotl32 (w0, 21u); - const u32x w0s22 = rotl32 (w0, 22U); + const u32 w0s21 = rotl32 (w0, 21u); + const u32 w0s22 = rotl32 (w0, 22U); SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_76s ^ w0s07 ^ w0s08___w0s12 ^ w0s16 ^ w0s21)); SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_77s)); @@ -575,28 +546,28 @@ static void m00190s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g c += SHA1M_C; { - const u32x r0 = a; - const u32x r1 = e; - const u32x r2 = d; - const u32x r3 = c; + const u32 r0 = a; + const u32 r1 = e; + const u32 r2 = d; + const u32 r3 = c; - #include VECT_COMPARE_S + #include COMPARE_S } a &= 0x00000fff; { - const u32x r0 = a; - const u32x r1 = e; - const u32x r2 = d; - const u32x r3 = c; + const u32 r0 = a; + const u32 r1 = e; + const u32 r2 = d; + const u32 r3 = c; - #include VECT_COMPARE_S + #include COMPARE_S } } } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -634,7 +605,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_m04 (__glo m00190m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -672,7 +643,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_m08 (__glo m00190m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -710,7 +681,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_m16 (__glo m00190m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -748,7 +719,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_s04 (__glo m00190s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -786,7 +757,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_s08 (__glo m00190s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00190_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base diff --git a/amd/m00200_a0.cl b/OpenCL/m00200_a0.cl similarity index 88% rename from amd/m00200_a0.cl rename to OpenCL/m00200_a0.cl index d6fe4b5..e04fce0 100644 --- a/amd/m00200_a0.cl +++ b/OpenCL/m00200_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -62,14 +38,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -84,28 +60,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -114,7 +90,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_m04 (__glo const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len); - u32x w_t[16]; + u32 w_t[16]; w_t[ 0] = w0[0]; w_t[ 1] = w0[1]; @@ -133,10 +109,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_m04 (__glo w_t[14] = 0; w_t[15] = 0; - u32x a = MYSQL323_A; - u32x b = MYSQL323_B; + u32 a = MYSQL323_A; + u32 b = MYSQL323_B; - u32x add = 7; + u32 add = 7; #define ROUND(v) \ { \ @@ -150,7 +126,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_m04 (__glo for (i = 0, j = 0; i <= (int) out_len - 4; i += 4, j += 1) { - const u32x wj = w_t[j]; + const u32 wj = w_t[j]; ROUND ((wj >> 0) & 0xff); ROUND ((wj >> 8) & 0xff); @@ -158,7 +134,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_m04 (__glo ROUND ((wj >> 24) & 0xff); } - const u32x wj = w_t[j]; + const u32 wj = w_t[j]; const u32 left = out_len - i; @@ -181,12 +157,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_m04 (__glo a &= 0x7fffffff; b &= 0x7fffffff; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -214,14 +190,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -248,28 +224,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -278,7 +254,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_s04 (__glo const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len); - u32x w_t[16]; + u32 w_t[16]; w_t[ 0] = w0[0]; w_t[ 1] = w0[1]; @@ -297,10 +273,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_s04 (__glo w_t[14] = 0; w_t[15] = 0; - u32x a = MYSQL323_A; - u32x b = MYSQL323_B; + u32 a = MYSQL323_A; + u32 b = MYSQL323_B; - u32x add = 7; + u32 add = 7; #define ROUND(v) \ { \ @@ -314,7 +290,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_s04 (__glo for (i = 0, j = 0; i <= (int) out_len - 4; i += 4, j += 1) { - const u32x wj = w_t[j]; + const u32 wj = w_t[j]; ROUND ((wj >> 0) & 0xff); ROUND ((wj >> 8) & 0xff); @@ -322,7 +298,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_s04 (__glo ROUND ((wj >> 24) & 0xff); } - const u32x wj = w_t[j]; + const u32 wj = w_t[j]; const u32 left = out_len - i; @@ -345,12 +321,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_s04 (__glo a &= 0x7fffffff; b &= 0x7fffffff; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00200_a1.cl b/OpenCL/m00200_a1.cl similarity index 90% rename from amd/m00200_a1.cl rename to OpenCL/m00200_a1.cl index 2a088d5..3c6c672 100644 --- a/amd/m00200_a1.cl +++ b/OpenCL/m00200_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -60,28 +36,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -138,7 +114,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w_t[16]; + u32 w_t[16]; w_t[ 0] = wordl0[0] | wordr0[0]; w_t[ 1] = wordl0[1] | wordr0[1]; @@ -157,10 +133,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_m04 (__glo w_t[14] = wordl3[2] | wordr3[2]; w_t[15] = 0; - u32x a = MYSQL323_A; - u32x b = MYSQL323_B; + u32 a = MYSQL323_A; + u32 b = MYSQL323_B; - u32x add = 7; + u32 add = 7; #define ROUND(v) \ { \ @@ -174,7 +150,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_m04 (__glo for (i = 0, j = 0; i <= (int) pw_len - 4; i += 4, j += 1) { - const u32x wj = w_t[j]; + const u32 wj = w_t[j]; ROUND ((wj >> 0) & 0xff); ROUND ((wj >> 8) & 0xff); @@ -182,7 +158,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_m04 (__glo ROUND ((wj >> 24) & 0xff); } - const u32x wj = w_t[j]; + const u32 wj = w_t[j]; const u32 left = pw_len - i; @@ -205,12 +181,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_m04 (__glo a &= 0x7fffffff; b &= 0x7fffffff; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -238,28 +214,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -328,7 +304,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w_t[16]; + u32 w_t[16]; w_t[ 0] = wordl0[0] | wordr0[0]; w_t[ 1] = wordl0[1] | wordr0[1]; @@ -347,10 +323,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_s04 (__glo w_t[14] = wordl3[2] | wordr3[2]; w_t[15] = 0; - u32x a = MYSQL323_A; - u32x b = MYSQL323_B; + u32 a = MYSQL323_A; + u32 b = MYSQL323_B; - u32x add = 7; + u32 add = 7; #define ROUND(v) \ { \ @@ -364,7 +340,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_s04 (__glo for (i = 0, j = 0; i <= (int) pw_len - 4; i += 4, j += 1) { - const u32x wj = w_t[j]; + const u32 wj = w_t[j]; ROUND ((wj >> 0) & 0xff); ROUND ((wj >> 8) & 0xff); @@ -372,7 +348,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_s04 (__glo ROUND ((wj >> 24) & 0xff); } - const u32x wj = w_t[j]; + const u32 wj = w_t[j]; const u32 left = pw_len - i; @@ -395,12 +371,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_s04 (__glo a &= 0x7fffffff; b &= 0x7fffffff; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00200_a3.cl b/OpenCL/m00200_a3.cl similarity index 57% rename from amd/m00200_a3.cl rename to OpenCL/m00200_a3.cl index 0f3ac89..6ead9d4 100644 --- a/amd/m00200_a3.cl +++ b/OpenCL/m00200_a3.cl @@ -4,48 +4,23 @@ */ #define _MYSQL323_ -#define _SCALAR_ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -static void m00200m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00200m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -58,20 +33,18 @@ static void m00200m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x a = MYSQL323_A; - u32x b = MYSQL323_B; + u32 a = MYSQL323_A; + u32 b = MYSQL323_B; - u32x add = 7; + u32 add = 7; #define ROUND(v) \ { \ @@ -139,16 +112,16 @@ static void m00200m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g a &= 0x7fffffff; b &= 0x7fffffff; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m00200s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00200s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -173,20 +146,18 @@ static void m00200s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x a = MYSQL323_A; - u32x b = MYSQL323_B; + u32 a = MYSQL323_A; + u32 b = MYSQL323_B; - u32x add = 7; + u32 add = 7; #define ROUND(v) \ { \ @@ -254,16 +225,16 @@ static void m00200s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g a &= 0x7fffffff; b &= 0x7fffffff; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -301,7 +272,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_m04 (__glo m00200m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -339,7 +310,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_m08 (__glo m00200m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -377,7 +348,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_m16 (__glo m00200m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -415,7 +386,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_s04 (__glo m00200s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -453,7 +424,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_s08 (__glo m00200s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00200_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base diff --git a/amd/m00300_a0.cl b/OpenCL/m00300_a0.cl similarity index 93% rename from amd/m00300_a0.cl rename to OpenCL/m00300_a0.cl index 2a5c883..8e2db4a 100644 --- a/amd/m00300_a0.cl +++ b/OpenCL/m00300_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -62,14 +38,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -84,28 +60,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -120,28 +96,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_m04 (__glo * sha1 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = out_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -365,12 +341,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_m04 (__glo wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -398,14 +374,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -438,28 +414,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -474,28 +450,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_s04 (__glo * sha1 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = out_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -722,12 +698,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_s04 (__glo wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00300_a1.cl b/OpenCL/m00300_a1.cl similarity index 93% rename from amd/m00300_a1.cl rename to OpenCL/m00300_a1.cl index 872b33a..57938be 100644 --- a/amd/m00300_a1.cl +++ b/OpenCL/m00300_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -60,28 +36,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -142,28 +118,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -174,28 +150,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_m04 (__glo * sha1 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = pw_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -419,12 +395,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_m04 (__glo wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -452,28 +428,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -552,28 +528,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -584,28 +560,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_s04 (__glo * sha1 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = pw_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -832,12 +808,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_s04 (__glo wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00300_a3.cl b/OpenCL/m00300_a3.cl similarity index 80% rename from amd/m00300_a3.cl rename to OpenCL/m00300_a3.cl index b07105f..41a673a 100644 --- a/amd/m00300_a3.cl +++ b/OpenCL/m00300_a3.cl @@ -4,48 +4,23 @@ */ #define _SHA1_ -#define _SCALAR_ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -static void m00300m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00300m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -159,49 +134,47 @@ static void m00300m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; - - const u32x w0 = w0l | w0r; - - const u32x w0s01 = rotl32 (w0, 1u); - const u32x w0s02 = rotl32 (w0, 2u); - const u32x w0s03 = rotl32 (w0, 3u); - const u32x w0s04 = rotl32 (w0, 4u); - const u32x w0s05 = rotl32 (w0, 5u); - const u32x w0s06 = rotl32 (w0, 6u); - const u32x w0s07 = rotl32 (w0, 7u); - const u32x w0s08 = rotl32 (w0, 8u); - const u32x w0s09 = rotl32 (w0, 9u); - const u32x w0s10 = rotl32 (w0, 10u); - const u32x w0s11 = rotl32 (w0, 11u); - const u32x w0s12 = rotl32 (w0, 12u); - const u32x w0s13 = rotl32 (w0, 13u); - const u32x w0s14 = rotl32 (w0, 14u); - const u32x w0s15 = rotl32 (w0, 15u); - const u32x w0s16 = rotl32 (w0, 16u); - const u32x w0s17 = rotl32 (w0, 17u); - const u32x w0s18 = rotl32 (w0, 18u); - const u32x w0s19 = rotl32 (w0, 19u); - const u32x w0s20 = rotl32 (w0, 20u); - const u32x w0s21 = rotl32 (w0, 21u); - const u32x w0s22 = rotl32 (w0, 22U); - - const u32x w0s04___w0s06 = w0s04 ^ w0s06; - const u32x w0s04___w0s08 = w0s04 ^ w0s08; - const u32x w0s08___w0s12 = w0s08 ^ w0s12; - const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + const u32 w0r = words_buf_r[il_pos]; + + const u32 w0 = w0l | w0r; + + const u32 w0s01 = rotl32 (w0, 1u); + const u32 w0s02 = rotl32 (w0, 2u); + const u32 w0s03 = rotl32 (w0, 3u); + const u32 w0s04 = rotl32 (w0, 4u); + const u32 w0s05 = rotl32 (w0, 5u); + const u32 w0s06 = rotl32 (w0, 6u); + const u32 w0s07 = rotl32 (w0, 7u); + const u32 w0s08 = rotl32 (w0, 8u); + const u32 w0s09 = rotl32 (w0, 9u); + const u32 w0s10 = rotl32 (w0, 10u); + const u32 w0s11 = rotl32 (w0, 11u); + const u32 w0s12 = rotl32 (w0, 12u); + const u32 w0s13 = rotl32 (w0, 13u); + const u32 w0s14 = rotl32 (w0, 14u); + const u32 w0s15 = rotl32 (w0, 15u); + const u32 w0s16 = rotl32 (w0, 16u); + const u32 w0s17 = rotl32 (w0, 17u); + const u32 w0s18 = rotl32 (w0, 18u); + const u32 w0s19 = rotl32 (w0, 19u); + const u32 w0s20 = rotl32 (w0, 20u); + const u32 w0s21 = rotl32 (w0, 21u); + const u32 w0s22 = rotl32 (w0, 22U); + + const u32 w0s04___w0s06 = w0s04 ^ w0s06; + const u32 w0s04___w0s08 = w0s04 ^ w0s08; + const u32 w0s08___w0s12 = w0s08 ^ w0s12; + const u32 w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -306,22 +279,22 @@ static void m00300m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g d += SHA1M_D; e += SHA1M_E; - u32x w0_t = a; - u32x w1_t = b; - u32x w2_t = c; - u32x w3_t = d; - u32x w4_t = e; - u32x w5_t = 0x80000000; - u32x w6_t = 0; - u32x w7_t = 0; - u32x w8_t = 0; - u32x w9_t = 0; - u32x wa_t = 0; - u32x wb_t = 0; - u32x wc_t = 0; - u32x wd_t = 0; - u32x we_t = 0; - u32x wf_t = 20 * 8; + u32 w0_t = a; + u32 w1_t = b; + u32 w2_t = c; + u32 w3_t = d; + u32 w4_t = e; + u32 w5_t = 0x80000000; + u32 w6_t = 0; + u32 w7_t = 0; + u32 w8_t = 0; + u32 w9_t = 0; + u32 wa_t = 0; + u32 wb_t = 0; + u32 wc_t = 0; + u32 wd_t = 0; + u32 we_t = 0; + u32 wf_t = 20 * 8; a = SHA1M_A; b = SHA1M_B; @@ -426,16 +399,16 @@ static void m00300m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m00300s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00300s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -563,47 +536,45 @@ static void m00300s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; - - const u32x w0 = w0l | w0r; - - const u32x w0s01 = rotl32 (w0, 1u); - const u32x w0s02 = rotl32 (w0, 2u); - const u32x w0s03 = rotl32 (w0, 3u); - const u32x w0s04 = rotl32 (w0, 4u); - const u32x w0s05 = rotl32 (w0, 5u); - const u32x w0s06 = rotl32 (w0, 6u); - const u32x w0s07 = rotl32 (w0, 7u); - const u32x w0s08 = rotl32 (w0, 8u); - const u32x w0s09 = rotl32 (w0, 9u); - const u32x w0s10 = rotl32 (w0, 10u); - const u32x w0s11 = rotl32 (w0, 11u); - const u32x w0s12 = rotl32 (w0, 12u); - const u32x w0s13 = rotl32 (w0, 13u); - const u32x w0s14 = rotl32 (w0, 14u); - const u32x w0s15 = rotl32 (w0, 15u); - const u32x w0s16 = rotl32 (w0, 16u); - const u32x w0s17 = rotl32 (w0, 17u); - const u32x w0s18 = rotl32 (w0, 18u); - const u32x w0s19 = rotl32 (w0, 19u); - const u32x w0s20 = rotl32 (w0, 20u); - - const u32x w0s04___w0s06 = w0s04 ^ w0s06; - const u32x w0s04___w0s08 = w0s04 ^ w0s08; - const u32x w0s08___w0s12 = w0s08 ^ w0s12; - const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + const u32 w0r = words_buf_r[il_pos]; + + const u32 w0 = w0l | w0r; + + const u32 w0s01 = rotl32 (w0, 1u); + const u32 w0s02 = rotl32 (w0, 2u); + const u32 w0s03 = rotl32 (w0, 3u); + const u32 w0s04 = rotl32 (w0, 4u); + const u32 w0s05 = rotl32 (w0, 5u); + const u32 w0s06 = rotl32 (w0, 6u); + const u32 w0s07 = rotl32 (w0, 7u); + const u32 w0s08 = rotl32 (w0, 8u); + const u32 w0s09 = rotl32 (w0, 9u); + const u32 w0s10 = rotl32 (w0, 10u); + const u32 w0s11 = rotl32 (w0, 11u); + const u32 w0s12 = rotl32 (w0, 12u); + const u32 w0s13 = rotl32 (w0, 13u); + const u32 w0s14 = rotl32 (w0, 14u); + const u32 w0s15 = rotl32 (w0, 15u); + const u32 w0s16 = rotl32 (w0, 16u); + const u32 w0s17 = rotl32 (w0, 17u); + const u32 w0s18 = rotl32 (w0, 18u); + const u32 w0s19 = rotl32 (w0, 19u); + const u32 w0s20 = rotl32 (w0, 20u); + + const u32 w0s04___w0s06 = w0s04 ^ w0s06; + const u32 w0s04___w0s08 = w0s04 ^ w0s08; + const u32 w0s08___w0s12 = w0s08 ^ w0s12; + const u32 w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -703,8 +674,8 @@ static void m00300s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g const u32 c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u); const u32 c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u); - const u32x w0s21 = rotl32 (w0, 21u); - const u32x w0s22 = rotl32 (w0, 22U); + const u32 w0s21 = rotl32 (w0, 21u); + const u32 w0s22 = rotl32 (w0, 22U); SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_76s ^ w0s07 ^ w0s08___w0s12 ^ w0s16 ^ w0s21)); SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_77s)); @@ -717,22 +688,22 @@ static void m00300s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g d += SHA1M_D; e += SHA1M_E; - u32x w0_t = a; - u32x w1_t = b; - u32x w2_t = c; - u32x w3_t = d; - u32x w4_t = e; - u32x w5_t = 0x80000000; - u32x w6_t = 0; - u32x w7_t = 0; - u32x w8_t = 0; - u32x w9_t = 0; - u32x wa_t = 0; - u32x wb_t = 0; - u32x wc_t = 0; - u32x wd_t = 0; - u32x we_t = 0; - u32x wf_t = 20 * 8; + u32 w0_t = a; + u32 w1_t = b; + u32 w2_t = c; + u32 w3_t = d; + u32 w4_t = e; + u32 w5_t = 0x80000000; + u32 w6_t = 0; + u32 w7_t = 0; + u32 w8_t = 0; + u32 w9_t = 0; + u32 wa_t = 0; + u32 wb_t = 0; + u32 wc_t = 0; + u32 wd_t = 0; + u32 we_t = 0; + u32 wf_t = 20 * 8; a = SHA1M_A; b = SHA1M_B; @@ -842,16 +813,16 @@ static void m00300s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -889,7 +860,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_m04 (__glo m00300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -927,7 +898,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_m08 (__glo m00300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -965,7 +936,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_m16 (__glo m00300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -1003,7 +974,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_s04 (__glo m00300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -1041,7 +1012,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_s08 (__glo m00300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00300_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base diff --git a/amd/m00400.cl b/OpenCL/m00400.cl similarity index 86% rename from amd/m00400.cl rename to OpenCL/m00400.cl index 8de3b0f..46500bd 100644 --- a/amd/m00400.cl +++ b/OpenCL/m00400.cl @@ -8,60 +8,43 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = 0; - - u32x tmp2; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = 0; + + u32 tmp2; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -147,21 +130,21 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00400_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; @@ -185,41 +168,41 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00400_init (__gl u32 block_len = 8 + pw_len; - u32x block0[4]; + u32 block0[4]; block0[0] = salt_buf[0]; block0[1] = salt_buf[1]; block0[2] = w0[0]; block0[3] = w0[1]; - u32x block1[4]; + u32 block1[4]; block1[0] = w0[2]; block1[1] = w0[3]; block1[2] = w1[0]; block1[3] = w1[1]; - u32x block2[4]; + u32 block2[4]; block2[0] = w1[2]; block2[1] = w1[3]; block2[2] = w2[0]; block2[3] = w2[1]; - u32x block3[4]; + u32 block3[4]; block3[0] = 0; block3[1] = 0; block3[2] = block_len * 8; block3[3] = 0; - append_0x80_4 (block0, block1, block2, block3, block_len); + append_0x80_4x4 (block0, block1, block2, block3, block_len); /** * init */ - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -244,21 +227,21 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00400_loop (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; @@ -271,7 +254,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00400_loop (__gl * digest */ - u32x digest[4]; + u32 digest[4]; digest[0] = tmps[gid].digest_buf[0]; digest[1] = tmps[gid].digest_buf[1]; @@ -284,35 +267,35 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00400_loop (__gl u32 block_len = (16 + pw_len); - u32x block0[4]; + u32 block0[4]; block0[0] = 0; block0[1] = 0; block0[2] = 0; block0[3] = 0; - u32x block1[4]; + u32 block1[4]; block1[0] = w0[0]; block1[1] = w0[1]; block1[2] = w0[2]; block1[3] = w0[3]; - u32x block2[4]; + u32 block2[4]; block2[0] = w1[0]; block2[1] = w1[1]; block2[2] = w1[2]; block2[3] = w1[3]; - u32x block3[4]; + u32 block3[4]; block3[0] = w2[0]; block3[1] = w2[1]; block3[2] = block_len * 8; block3[3] = 0; - append_0x80_4 (block0, block1, block2, block3, block_len); + append_0x80_4x4 (block0, block1, block2, block3, block_len); /** * init @@ -355,12 +338,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00400_comp (__gl * digest */ - const u32x r0 = tmps[gid].digest_buf[DGST_R0]; - const u32x r1 = tmps[gid].digest_buf[DGST_R1]; - const u32x r2 = tmps[gid].digest_buf[DGST_R2]; - const u32x r3 = tmps[gid].digest_buf[DGST_R3]; + const u32 r0 = tmps[gid].digest_buf[DGST_R0]; + const u32 r1 = tmps[gid].digest_buf[DGST_R1]; + const u32 r2 = tmps[gid].digest_buf[DGST_R2]; + const u32 r3 = tmps[gid].digest_buf[DGST_R3]; #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m00500.cl b/OpenCL/m00500.cl similarity index 96% rename from amd/m00500.cl rename to OpenCL/m00500.cl index 9253890..ccce640 100644 --- a/amd/m00500.cl +++ b/OpenCL/m00500.cl @@ -8,66 +8,45 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define md5crypt_magic 0x00243124 -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = 0; - - u32x tmp2; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = 0; + + u32 tmp2; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -143,7 +122,7 @@ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void memcat16 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32x append[4]) +static void memcat16 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[4]) { switch (block_len) { @@ -578,7 +557,7 @@ static void memcat16 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block } } -static void memcat16_x80 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32x append[4]) +static void memcat16_x80 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[4]) { switch (block_len) { @@ -1025,7 +1004,7 @@ static void memcat16_x80 (u32x block0[4], u32x block1[4], u32x block2[4], u32x b } } -static void memcat8 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32 append[2]) +static void memcat8 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[2]) { switch (block_len) { @@ -1358,7 +1337,7 @@ static void memcat8 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3 } } -static void append_sign (u32x block0[4], u32x block1[4], const u32 block_len) +static void append_sign (u32 block0[4], u32 block1[4], const u32 block_len) { switch (block_len) { @@ -1440,7 +1419,7 @@ static void append_sign (u32x block0[4], u32x block1[4], const u32 block_len) } } -static void append_1st (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32x append) +static void append_1st (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append) { switch (block_len) { @@ -1684,7 +1663,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00500_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[0]; w0[1] = pws[gid].i[1]; @@ -1713,28 +1692,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00500_init (__gl u32 block_len = pw_len; - u32x block0[4]; + u32 block0[4]; block0[0] = w0[0]; block0[1] = w0[1]; block0[2] = w0[2]; block0[3] = w0[3]; - u32x block1[4]; + u32 block1[4]; block1[0] = 0; block1[1] = 0; block1[2] = 0; block1[3] = 0; - u32x block2[4]; + u32 block2[4]; block2[0] = 0; block2[1] = 0; block2[2] = 0; block2[3] = 0; - u32x block3[4]; + u32 block3[4]; block3[0] = 0; block3[1] = 0; @@ -1749,11 +1728,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00500_init (__gl block_len += pw_len; - append_0x80_4 (block0, block1, block2, block3, block_len); + append_0x80_4x4 (block0, block1, block2, block3, block_len); block3[2] = block_len * 8; - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -1808,7 +1787,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00500_init (__gl /* Then something really weird... */ - u32x append = block0[0] & 0xFF; + u32 append = block0[0] & 0xFF; for (u32 j = pw_len; j; j >>= 1) { @@ -1820,7 +1799,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00500_init (__gl block_len++; } - append_0x80_4 (block0, block1, block2, block3, block_len); + append_0x80_4x4 (block0, block1, block2, block3, block_len); block3[2] = block_len * 8; @@ -1847,7 +1826,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00500_loop (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[0]; w0[1] = pws[gid].i[1]; @@ -1856,14 +1835,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00500_loop (__gl const u32 pw_len = pws[gid].pw_len; - u32x w0_x80[4]; + u32 w0_x80[4]; w0_x80[0] = w0[0]; w0_x80[1] = w0[1]; w0_x80[2] = w0[2]; w0_x80[3] = w0[3]; - append_0x80_1 (w0_x80, pw_len); + append_0x80_1x4 (w0_x80, pw_len); /** * salt @@ -1880,7 +1859,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00500_loop (__gl * digest */ - u32x digest[4]; + u32 digest[4]; digest[0] = tmps[gid].digest_buf[0]; digest[1] = tmps[gid].digest_buf[1]; @@ -1895,28 +1874,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00500_loop (__gl u32 block_len; - u32x block0[4]; + u32 block0[4]; block0[0] = 0; block0[1] = 0; block0[2] = 0; block0[3] = 0; - u32x block1[4]; + u32 block1[4]; block1[0] = 0; block1[1] = 0; block1[2] = 0; block1[3] = 0; - u32x block2[4]; + u32 block2[4]; block2[0] = 0; block2[1] = 0; block2[2] = 0; block2[3] = 0; - u32x block3[4]; + u32 block3[4]; block3[0] = 0; block3[1] = 0; @@ -2041,12 +2020,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00500_comp (__gl * digest */ - const u32x r0 = tmps[gid].digest_buf[DGST_R0]; - const u32x r1 = tmps[gid].digest_buf[DGST_R1]; - const u32x r2 = tmps[gid].digest_buf[DGST_R2]; - const u32x r3 = tmps[gid].digest_buf[DGST_R3]; + const u32 r0 = tmps[gid].digest_buf[DGST_R0]; + const u32 r1 = tmps[gid].digest_buf[DGST_R1]; + const u32 r2 = tmps[gid].digest_buf[DGST_R2]; + const u32 r3 = tmps[gid].digest_buf[DGST_R3]; #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m00900_a0.cl b/OpenCL/m00900_a0.cl similarity index 92% rename from amd/m00900_a0.cl rename to OpenCL/m00900_a0.cl index cb656ce..e6b6ad0 100644 --- a/amd/m00900_a0.cl +++ b/OpenCL/m00900_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -62,14 +38,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -84,28 +60,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -118,10 +94,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_m04 (__glo w3[2] = out_len * 8; - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; + u32 a = MD4M_A; + u32 b = MD4M_B; + u32 c = MD4M_C; + u32 d = MD4M_D; MD4_STEP (MD4_Fo, a, b, c, d, w0[0], MD4C00, MD4S00); MD4_STEP (MD4_Fo, d, a, b, c, w0[1], MD4C00, MD4S01); @@ -174,12 +150,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_m04 (__glo MD4_STEP (MD4_H , c, d, a, b, w1[3], MD4C02, MD4S22); MD4_STEP (MD4_H , b, c, d, a, w3[3], MD4C02, MD4S23); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -207,14 +183,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -241,28 +217,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -275,10 +251,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_s04 (__glo w3[2] = out_len * 8; - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; + u32 a = MD4M_A; + u32 b = MD4M_B; + u32 c = MD4M_C; + u32 d = MD4M_D; MD4_STEP (MD4_Fo, a, b, c, d, w0[0], MD4C00, MD4S00); MD4_STEP (MD4_Fo, d, a, b, c, w0[1], MD4C00, MD4S01); @@ -331,12 +307,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_s04 (__glo MD4_STEP (MD4_H , c, d, a, b, w1[3], MD4C02, MD4S22); MD4_STEP (MD4_H , b, c, d, a, w3[3], MD4C02, MD4S23); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00900_a1.cl b/OpenCL/m00900_a1.cl similarity index 92% rename from amd/m00900_a1.cl rename to OpenCL/m00900_a1.cl index 106bdfe..c4d3a3e 100644 --- a/amd/m00900_a1.cl +++ b/OpenCL/m00900_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -60,28 +36,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -140,38 +116,38 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; w3[2] = pw_len * 8; w3[3] = 0; - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; + u32 a = MD4M_A; + u32 b = MD4M_B; + u32 c = MD4M_C; + u32 d = MD4M_D; MD4_STEP (MD4_Fo, a, b, c, d, w0[0], MD4C00, MD4S00); MD4_STEP (MD4_Fo, d, a, b, c, w0[1], MD4C00, MD4S01); @@ -224,12 +200,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_m04 (__glo MD4_STEP (MD4_H , c, d, a, b, w1[3], MD4C02, MD4S22); MD4_STEP (MD4_H , b, c, d, a, w3[3], MD4C02, MD4S23); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -257,28 +233,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -349,38 +325,38 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; w3[2] = pw_len * 8; w3[3] = 0; - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; + u32 a = MD4M_A; + u32 b = MD4M_B; + u32 c = MD4M_C; + u32 d = MD4M_D; MD4_STEP (MD4_Fo, a, b, c, d, w0[0], MD4C00, MD4S00); MD4_STEP (MD4_Fo, d, a, b, c, w0[1], MD4C00, MD4S01); @@ -433,12 +409,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_s04 (__glo MD4_STEP (MD4_H , c, d, a, b, w1[3], MD4C02, MD4S22); MD4_STEP (MD4_H , b, c, d, a, w3[3], MD4C02, MD4S23); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m00900_a3.cl b/OpenCL/m00900_a3.cl similarity index 70% rename from amd/m00900_a3.cl rename to OpenCL/m00900_a3.cl index f41c260..0b3c522 100644 --- a/amd/m00900_a3.cl +++ b/OpenCL/m00900_a3.cl @@ -4,46 +4,21 @@ */ #define _MD4_ -#define _SCALAR_ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define MD4_STEP_REV(f,a,b,c,d,x,t,s) \ { \ @@ -60,7 +35,7 @@ a -= t; \ } -static void m00900m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00900m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -128,20 +103,18 @@ static void m00900m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; + u32 a = MD4M_A; + u32 b = MD4M_B; + u32 c = MD4M_C; + u32 d = MD4M_D; MD4_STEP (MD4_Fo, a, b, c, d, w0, F_w0c00, MD4S00); MD4_STEP0(MD4_Fo, d, a, b, c, F_w1c00, MD4S01); @@ -194,16 +167,16 @@ static void m00900m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g MD4_STEP0(MD4_H , c, d, a, b, H_w7c02, MD4S22); MD4_STEP0(MD4_H , b, c, d, a, H_wfc02, MD4S23); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m00900s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m00900s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -305,8 +278,8 @@ static void m00900s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g MD4_STEP_REV (MD4_H, d_rev, a_rev, b_rev, c_rev, w[ 8], MD4C02, MD4S21); MD4_STEP_REV (MD4_H, a_rev, b_rev, c_rev, d_rev, 0, MD4C02, MD4S20); - const u32x sav_c = c_rev; - const u32x sav_d = d_rev; + const u32 sav_c = c_rev; + const u32 sav_d = d_rev; MD4_STEP_REV1(MD4_G, b_rev, c_rev, d_rev, a_rev, w[15], MD4C01, MD4S13); MD4_STEP_REV1(MD4_G, c_rev, d_rev, a_rev, b_rev, w[11], MD4C01, MD4S12); @@ -315,28 +288,26 @@ static void m00900s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x pre_a = a_rev; - u32x pre_b = b_rev; - u32x pre_c = c_rev; + u32 pre_a = a_rev; + u32 pre_b = b_rev; + u32 pre_c = c_rev; pre_a = pre_a - w0; pre_b = pre_b - MD4_G (sav_c, sav_d, pre_a); pre_c = pre_c - MD4_G (sav_d, pre_a, pre_b); - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; + u32 a = MD4M_A; + u32 b = MD4M_B; + u32 c = MD4M_C; + u32 d = MD4M_D; MD4_STEP (MD4_Fo, a, b, c, d, w0, F_w0c00, MD4S00); MD4_STEP0(MD4_Fo, d, a, b, c, F_w1c00, MD4S01); @@ -399,16 +370,16 @@ static void m00900s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g MD4_STEP0(MD4_H , c, d, a, b, H_w7c02, MD4S22); MD4_STEP0(MD4_H , b, c, d, a, H_wfc02, MD4S23); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -446,7 +417,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_m04 (__glo m00900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -484,7 +455,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_m08 (__glo m00900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -522,7 +493,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_m16 (__glo m00900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -560,7 +531,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_s04 (__glo m00900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -598,7 +569,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_s08 (__glo m00900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m00900_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base diff --git a/amd/m01000_a0.cl b/OpenCL/m01000_a0.cl similarity index 91% rename from amd/m01000_a0.cl rename to OpenCL/m01000_a0.cl index 4d412ff..b5ca3b4 100644 --- a/amd/m01000_a0.cl +++ b/OpenCL/m01000_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -62,14 +38,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -84,28 +60,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -116,22 +92,22 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_m04 (__glo append_0x80_2 (w0, w1, out_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); w3_t[2] = out_len * 8 * 2; - u32x tmp2; + u32 tmp2; - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; + u32 a = MD4M_A; + u32 b = MD4M_B; + u32 c = MD4M_C; + u32 d = MD4M_D; MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00); MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01); @@ -184,12 +160,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_m04 (__glo MD4_STEP (MD4_H1, c, d, a, b, w1_t[3], MD4C02, MD4S22); MD4_STEP (MD4_H2, b, c, d, a, w3_t[3], MD4C02, MD4S23); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -217,14 +193,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -251,28 +227,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -283,22 +259,22 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_s04 (__glo append_0x80_2 (w0, w1, out_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); w3_t[2] = out_len * 8 * 2; - u32x tmp2; + u32 tmp2; - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; + u32 a = MD4M_A; + u32 b = MD4M_B; + u32 c = MD4M_C; + u32 d = MD4M_D; MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00); MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01); @@ -351,12 +327,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_s04 (__glo MD4_STEP (MD4_H1, c, d, a, b, w1_t[3], MD4C02, MD4S22); MD4_STEP (MD4_H2, b, c, d, a, w3_t[3], MD4C02, MD4S23); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01000_a1.cl b/OpenCL/m01000_a1.cl similarity index 91% rename from amd/m01000_a1.cl rename to OpenCL/m01000_a1.cl index 3e6168a..2079407 100644 --- a/amd/m01000_a1.cl +++ b/OpenCL/m01000_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -60,28 +36,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -140,50 +116,50 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); w3_t[2] = pw_len * 8 * 2; - u32x tmp2; + u32 tmp2; - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; + u32 a = MD4M_A; + u32 b = MD4M_B; + u32 c = MD4M_C; + u32 d = MD4M_D; MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00); MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01); @@ -236,12 +212,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_m04 (__glo MD4_STEP (MD4_H1, c, d, a, b, w1_t[3], MD4C02, MD4S22); MD4_STEP (MD4_H2, b, c, d, a, w3_t[3], MD4C02, MD4S23); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -269,28 +245,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -361,50 +337,50 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); w3_t[2] = pw_len * 8 * 2; - u32x tmp2; + u32 tmp2; - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; + u32 a = MD4M_A; + u32 b = MD4M_B; + u32 c = MD4M_C; + u32 d = MD4M_D; MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00); MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01); @@ -457,12 +433,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_s04 (__glo MD4_STEP (MD4_H1, c, d, a, b, w1_t[3], MD4C02, MD4S22); MD4_STEP (MD4_H2, b, c, d, a, w3_t[3], MD4C02, MD4S23); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01000_a3.cl b/OpenCL/m01000_a3.cl similarity index 70% rename from amd/m01000_a3.cl rename to OpenCL/m01000_a3.cl index cf5ea40..75b40ae 100644 --- a/amd/m01000_a3.cl +++ b/OpenCL/m01000_a3.cl @@ -4,46 +4,21 @@ */ #define _MD4_ -#define _SCALAR_ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define MD4_STEP_REV(f,a,b,c,d,x,t,s) \ { \ @@ -60,7 +35,7 @@ a -= t; \ } -static void m01000m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m01000m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -128,22 +103,20 @@ static void m01000m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x tmp2; + u32 tmp2; - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; + u32 a = MD4M_A; + u32 b = MD4M_B; + u32 c = MD4M_C; + u32 d = MD4M_D; MD4_STEP (MD4_Fo, a, b, c, d, w0, F_w0c00, MD4S00); MD4_STEP0(MD4_Fo, d, a, b, c, F_w1c00, MD4S01); @@ -196,16 +169,16 @@ static void m01000m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g MD4_STEP0(MD4_H1, c, d, a, b, H_w7c02, MD4S22); MD4_STEP0(MD4_H2, b, c, d, a, H_wfc02, MD4S23); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m01000s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m01000s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -307,8 +280,8 @@ static void m01000s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g MD4_STEP_REV (MD4_H, d_rev, a_rev, b_rev, c_rev, w[ 8], MD4C02, MD4S21); MD4_STEP_REV (MD4_H, a_rev, b_rev, c_rev, d_rev, 0, MD4C02, MD4S20); - const u32x sav_c = c_rev; - const u32x sav_d = d_rev; + const u32 sav_c = c_rev; + const u32 sav_d = d_rev; MD4_STEP_REV1(MD4_G, b_rev, c_rev, d_rev, a_rev, w[15], MD4C01, MD4S13); MD4_STEP_REV1(MD4_G, c_rev, d_rev, a_rev, b_rev, w[11], MD4C01, MD4S12); @@ -317,30 +290,28 @@ static void m01000s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x pre_a = a_rev; - u32x pre_b = b_rev; - u32x pre_c = c_rev; + u32 pre_a = a_rev; + u32 pre_b = b_rev; + u32 pre_c = c_rev; pre_a = pre_a - w0; pre_b = pre_b - MD4_G (sav_c, sav_d, pre_a); pre_c = pre_c - MD4_G (sav_d, pre_a, pre_b); - u32x tmp2; + u32 tmp2; - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; + u32 a = MD4M_A; + u32 b = MD4M_B; + u32 c = MD4M_C; + u32 d = MD4M_D; MD4_STEP (MD4_Fo, a, b, c, d, w0, F_w0c00, MD4S00); MD4_STEP0(MD4_Fo, d, a, b, c, F_w1c00, MD4S01); @@ -403,16 +374,16 @@ static void m01000s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g MD4_STEP0(MD4_H1, c, d, a, b, H_w7c02, MD4S22); MD4_STEP0(MD4_H2, b, c, d, a, H_wfc02, MD4S23); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -450,7 +421,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_m04 (__glo m01000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -488,7 +459,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_m08 (__glo m01000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -526,7 +497,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_m16 (__glo m01000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -564,7 +535,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_s04 (__glo m01000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -602,7 +573,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_s08 (__glo m01000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01000_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base diff --git a/amd/m01100_a0.cl b/OpenCL/m01100_a0.cl similarity index 94% rename from amd/m01100_a0.cl rename to OpenCL/m01100_a0.cl index c10e447..4fb454f 100644 --- a/amd/m01100_a0.cl +++ b/OpenCL/m01100_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -62,14 +38,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -111,28 +87,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -143,20 +119,20 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_m04 (__glo append_0x80_2 (w0, w1, out_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); w3_t[2] = out_len * 8 * 2; - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; + u32 a = MD4M_A; + u32 b = MD4M_B; + u32 c = MD4M_C; + u32 d = MD4M_D; MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00); MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01); @@ -287,12 +263,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_m04 (__glo MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22); MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -320,14 +296,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -381,28 +357,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -413,20 +389,20 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_s04 (__glo append_0x80_2 (w0, w1, out_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); w3_t[2] = out_len * 8 * 2; - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; + u32 a = MD4M_A; + u32 b = MD4M_B; + u32 c = MD4M_C; + u32 d = MD4M_D; MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00); MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01); @@ -562,12 +538,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_s04 (__glo MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22); MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01100_a1.cl b/OpenCL/m01100_a1.cl similarity index 94% rename from amd/m01100_a1.cl rename to OpenCL/m01100_a1.cl index 87d01f7..3cb6f66 100644 --- a/amd/m01100_a1.cl +++ b/OpenCL/m01100_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -60,28 +36,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -167,48 +143,48 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); w3_t[2] = pw_len * 8 * 2; - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; + u32 a = MD4M_A; + u32 b = MD4M_B; + u32 c = MD4M_C; + u32 d = MD4M_D; MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00); MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01); @@ -339,12 +315,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_m04 (__glo MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22); MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -372,28 +348,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -491,48 +467,48 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); w3_t[2] = pw_len * 8 * 2; - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; + u32 a = MD4M_A; + u32 b = MD4M_B; + u32 c = MD4M_C; + u32 d = MD4M_D; MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00); MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01); @@ -668,12 +644,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_s04 (__glo MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22); MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01100_a3.cl b/OpenCL/m01100_a3.cl similarity index 74% rename from amd/m01100_a3.cl rename to OpenCL/m01100_a3.cl index 1c79c6f..09330ba 100644 --- a/amd/m01100_a3.cl +++ b/OpenCL/m01100_a3.cl @@ -4,48 +4,23 @@ */ #define _MD4_ -#define _SCALAR_ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -static void m01100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m01100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -140,20 +115,18 @@ static void m01100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; + u32 a = MD4M_A; + u32 b = MD4M_B; + u32 c = MD4M_C; + u32 d = MD4M_D; MD4_STEP (MD4_Fo, a, b, c, d, w0, F_w0c00, MD4S00); MD4_STEP0(MD4_Fo, d, a, b, c, F_w1c00, MD4S01); @@ -211,10 +184,10 @@ static void m01100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g c += MD4M_C; d += MD4M_D; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = a; w0_t[1] = b; @@ -289,16 +262,16 @@ static void m01100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22); MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m01100s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m01100s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -405,20 +378,18 @@ static void m01100s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; + u32 a = MD4M_A; + u32 b = MD4M_B; + u32 c = MD4M_C; + u32 d = MD4M_D; MD4_STEP (MD4_Fo, a, b, c, d, w0, F_w0c00, MD4S00); MD4_STEP0(MD4_Fo, d, a, b, c, F_w1c00, MD4S01); @@ -476,10 +447,10 @@ static void m01100s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g c += MD4M_C; d += MD4M_D; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = a; w0_t[1] = b; @@ -559,16 +530,16 @@ static void m01100s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22); MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -606,7 +577,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_m04 (__glo m01100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -644,11 +615,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_m08 (__glo m01100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -686,7 +657,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_s04 (__glo m01100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -724,6 +695,6 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_s08 (__glo m01100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01100_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { } diff --git a/amd/m01400_a0.cl b/OpenCL/m01400_a0.cl similarity index 90% rename from amd/m01400_a0.cl rename to OpenCL/m01400_a0.cl index 37e3738..6f71e37 100644 --- a/amd/m01400_a0.cl +++ b/OpenCL/m01400_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 7 #define DGST_R2 2 #define DGST_R3 6 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -62,14 +38,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -84,28 +60,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -120,31 +96,31 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_m04 (__glo * SHA256 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = 0; - u32x w9_t = 0; - u32x wa_t = 0; - u32x wb_t = 0; - u32x wc_t = 0; - u32x wd_t = 0; - u32x we_t = 0; - u32x wf_t = out_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = 0; + u32 w9_t = 0; + u32 wa_t = 0; + u32 wb_t = 0; + u32 wc_t = 0; + u32 wd_t = 0; + u32 we_t = 0; + u32 wf_t = out_len * 8; + + u32 a = SHA256M_A; + u32 b = SHA256M_B; + u32 c = SHA256M_C; + u32 d = SHA256M_D; + u32 e = SHA256M_E; + u32 f = SHA256M_F; + u32 g = SHA256M_G; + u32 h = SHA256M_H; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); @@ -215,12 +191,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_m04 (__glo wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -248,14 +224,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -282,28 +258,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -318,31 +294,31 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_s04 (__glo * SHA256 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = 0; - u32x w9_t = 0; - u32x wa_t = 0; - u32x wb_t = 0; - u32x wc_t = 0; - u32x wd_t = 0; - u32x we_t = 0; - u32x wf_t = out_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = 0; + u32 w9_t = 0; + u32 wa_t = 0; + u32 wb_t = 0; + u32 wc_t = 0; + u32 wd_t = 0; + u32 we_t = 0; + u32 wf_t = out_len * 8; + + u32 a = SHA256M_A; + u32 b = SHA256M_B; + u32 c = SHA256M_C; + u32 d = SHA256M_D; + u32 e = SHA256M_E; + u32 f = SHA256M_F; + u32 g = SHA256M_G; + u32 h = SHA256M_H; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); @@ -413,12 +389,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_s04 (__glo wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01400_a1.cl b/OpenCL/m01400_a1.cl similarity index 90% rename from amd/m01400_a1.cl rename to OpenCL/m01400_a1.cl index 98a03ae..54893a9 100644 --- a/amd/m01400_a1.cl +++ b/OpenCL/m01400_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 7 #define DGST_R2 2 #define DGST_R3 6 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -60,28 +36,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -142,10 +118,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -168,31 +144,31 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_m04 (__glo * SHA256 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = pw_len * 8; + + u32 a = SHA256M_A; + u32 b = SHA256M_B; + u32 c = SHA256M_C; + u32 d = SHA256M_D; + u32 e = SHA256M_E; + u32 f = SHA256M_F; + u32 g = SHA256M_G; + u32 h = SHA256M_H; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); @@ -263,12 +239,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_m04 (__glo wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -296,28 +272,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -390,10 +366,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -416,31 +392,31 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_s04 (__glo * SHA256 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = pw_len * 8; + + u32 a = SHA256M_A; + u32 b = SHA256M_B; + u32 c = SHA256M_C; + u32 d = SHA256M_D; + u32 e = SHA256M_E; + u32 f = SHA256M_F; + u32 g = SHA256M_G; + u32 h = SHA256M_H; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); @@ -511,12 +487,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_s04 (__glo wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01400_a3.cl b/OpenCL/m01400_a3.cl similarity index 72% rename from amd/m01400_a3.cl rename to OpenCL/m01400_a3.cl index 5c02900..1739727 100644 --- a/amd/m01400_a3.cl +++ b/OpenCL/m01400_a3.cl @@ -4,48 +4,23 @@ */ #define _SHA256_ -#define _SCALAR_ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 7 #define DGST_R2 2 #define DGST_R3 6 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -static void m01400m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m01400m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -58,41 +33,39 @@ static void m01400m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x w0_t = w0; - u32x w1_t = w[ 1]; - u32x w2_t = w[ 2]; - u32x w3_t = w[ 3]; - u32x w4_t = w[ 4]; - u32x w5_t = w[ 5]; - u32x w6_t = w[ 6]; - u32x w7_t = w[ 7]; - u32x w8_t = w[ 8]; - u32x w9_t = w[ 9]; - u32x wa_t = w[10]; - u32x wb_t = w[11]; - u32x wc_t = w[12]; - u32x wd_t = w[13]; - u32x we_t = w[14]; - u32x wf_t = w[15]; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; + const u32 w0r = words_buf_r[il_pos]; + + const u32 w0 = w0l | w0r; + + u32 w0_t = w0; + u32 w1_t = w[ 1]; + u32 w2_t = w[ 2]; + u32 w3_t = w[ 3]; + u32 w4_t = w[ 4]; + u32 w5_t = w[ 5]; + u32 w6_t = w[ 6]; + u32 w7_t = w[ 7]; + u32 w8_t = w[ 8]; + u32 w9_t = w[ 9]; + u32 wa_t = w[10]; + u32 wb_t = w[11]; + u32 wc_t = w[12]; + u32 wd_t = w[13]; + u32 we_t = w[14]; + u32 wf_t = w[15]; + + u32 a = SHA256M_A; + u32 b = SHA256M_B; + u32 c = SHA256M_C; + u32 d = SHA256M_D; + u32 e = SHA256M_E; + u32 f = SHA256M_F; + u32 g = SHA256M_G; + u32 h = SHA256M_H; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); @@ -163,16 +136,16 @@ static void m01400m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m01400s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m01400s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -197,41 +170,39 @@ static void m01400s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); + u32 w0l = w[0]; - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x w0_t = w0; - u32x w1_t = w[ 1]; - u32x w2_t = w[ 2]; - u32x w3_t = w[ 3]; - u32x w4_t = w[ 4]; - u32x w5_t = w[ 5]; - u32x w6_t = w[ 6]; - u32x w7_t = w[ 7]; - u32x w8_t = w[ 8]; - u32x w9_t = w[ 9]; - u32x wa_t = w[10]; - u32x wb_t = w[11]; - u32x wc_t = w[12]; - u32x wd_t = w[13]; - u32x we_t = w[14]; - u32x wf_t = w[15]; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; + const u32 w0r = words_buf_r[il_pos]; + + const u32 w0 = w0l | w0r; + + u32 w0_t = w0; + u32 w1_t = w[ 1]; + u32 w2_t = w[ 2]; + u32 w3_t = w[ 3]; + u32 w4_t = w[ 4]; + u32 w5_t = w[ 5]; + u32 w6_t = w[ 6]; + u32 w7_t = w[ 7]; + u32 w8_t = w[ 8]; + u32 w9_t = w[ 9]; + u32 wa_t = w[10]; + u32 wb_t = w[11]; + u32 wc_t = w[12]; + u32 wd_t = w[13]; + u32 we_t = w[14]; + u32 wf_t = w[15]; + + u32 a = SHA256M_A; + u32 b = SHA256M_B; + u32 c = SHA256M_C; + u32 d = SHA256M_D; + u32 e = SHA256M_E; + u32 f = SHA256M_F; + u32 g = SHA256M_G; + u32 h = SHA256M_H; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); @@ -301,17 +272,16 @@ static void m01400s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_S + #include COMPARE_S } } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -349,7 +319,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_m04 (__glo m01400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -387,7 +357,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_m08 (__glo m01400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -425,7 +395,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_m16 (__glo m01400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -463,7 +433,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_s04 (__glo m01400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -501,7 +471,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_s08 (__glo m01400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01400_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base diff --git a/amd/m01410_a0.cl b/OpenCL/m01410_a0.cl similarity index 90% rename from amd/m01410_a0.cl rename to OpenCL/m01410_a0.cl index 84702e7..242071d 100644 --- a/amd/m01410_a0.cl +++ b/OpenCL/m01410_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 7 #define DGST_R2 2 #define DGST_R3 6 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -62,14 +38,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -104,28 +80,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -196,31 +172,31 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_m04 (__glo * sha256 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = out_salt_len * 8; + + u32 a = SHA256M_A; + u32 b = SHA256M_B; + u32 c = SHA256M_C; + u32 d = SHA256M_D; + u32 e = SHA256M_E; + u32 f = SHA256M_F; + u32 g = SHA256M_G; + u32 h = SHA256M_H; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); @@ -291,12 +267,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_m04 (__glo wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -324,14 +300,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -378,28 +354,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -470,31 +446,31 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_s04 (__glo * sha256 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = out_salt_len * 8; + + u32 a = SHA256M_A; + u32 b = SHA256M_B; + u32 c = SHA256M_C; + u32 d = SHA256M_D; + u32 e = SHA256M_E; + u32 f = SHA256M_F; + u32 g = SHA256M_G; + u32 h = SHA256M_H; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); @@ -565,12 +541,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_s04 (__glo wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01410_a1.cl b/OpenCL/m01410_a1.cl similarity index 91% rename from amd/m01410_a1.cl rename to OpenCL/m01410_a1.cl index 5607b5b..c421dd6 100644 --- a/amd/m01410_a1.cl +++ b/OpenCL/m01410_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 7 #define DGST_R2 2 #define DGST_R3 6 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -60,28 +36,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -194,10 +170,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_m04 (__glo const u32 pw_salt_len = pw_len + salt_len; - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0] | s0[0]; w0[1] = wordl0[1] | wordr0[1] | s0[1]; @@ -222,31 +198,31 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_m04 (__glo * sha256 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = pw_salt_len * 8; + + u32 a = SHA256M_A; + u32 b = SHA256M_B; + u32 c = SHA256M_C; + u32 d = SHA256M_D; + u32 e = SHA256M_E; + u32 f = SHA256M_F; + u32 g = SHA256M_G; + u32 h = SHA256M_H; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); @@ -317,12 +293,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_m04 (__glo wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -350,28 +326,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -496,10 +472,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_s04 (__glo const u32 pw_salt_len = pw_len + salt_len; - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0] | s0[0]; w0[1] = wordl0[1] | wordr0[1] | s0[1]; @@ -524,31 +500,31 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_s04 (__glo * sha256 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = pw_salt_len * 8; + + u32 a = SHA256M_A; + u32 b = SHA256M_B; + u32 c = SHA256M_C; + u32 d = SHA256M_D; + u32 e = SHA256M_E; + u32 f = SHA256M_F; + u32 g = SHA256M_G; + u32 h = SHA256M_H; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); @@ -619,12 +595,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_s04 (__glo wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01410_a3.cl b/OpenCL/m01410_a3.cl similarity index 73% rename from amd/m01410_a3.cl rename to OpenCL/m01410_a3.cl index 855a770..c18442e 100644 --- a/amd/m01410_a3.cl +++ b/OpenCL/m01410_a3.cl @@ -4,48 +4,23 @@ */ #define _SHA256_ -#define _SCALAR_ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 7 #define DGST_R2 2 #define DGST_R3 6 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -static void m01410m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m01410m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -115,41 +90,39 @@ static void m01410m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x w0_t = w0; - u32x w1_t = w[ 1]; - u32x w2_t = w[ 2]; - u32x w3_t = w[ 3]; - u32x w4_t = w[ 4]; - u32x w5_t = w[ 5]; - u32x w6_t = w[ 6]; - u32x w7_t = w[ 7]; - u32x w8_t = w[ 8]; - u32x w9_t = w[ 9]; - u32x wa_t = w[10]; - u32x wb_t = w[11]; - u32x wc_t = w[12]; - u32x wd_t = w[13]; - u32x we_t = w[14]; - u32x wf_t = w[15]; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; + const u32 w0r = words_buf_r[il_pos]; + + const u32 w0 = w0l | w0r; + + u32 w0_t = w0; + u32 w1_t = w[ 1]; + u32 w2_t = w[ 2]; + u32 w3_t = w[ 3]; + u32 w4_t = w[ 4]; + u32 w5_t = w[ 5]; + u32 w6_t = w[ 6]; + u32 w7_t = w[ 7]; + u32 w8_t = w[ 8]; + u32 w9_t = w[ 9]; + u32 wa_t = w[10]; + u32 wb_t = w[11]; + u32 wc_t = w[12]; + u32 wd_t = w[13]; + u32 we_t = w[14]; + u32 wf_t = w[15]; + + u32 a = SHA256M_A; + u32 b = SHA256M_B; + u32 c = SHA256M_C; + u32 d = SHA256M_D; + u32 e = SHA256M_E; + u32 f = SHA256M_F; + u32 g = SHA256M_G; + u32 h = SHA256M_H; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); @@ -220,16 +193,16 @@ static void m01410m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m01410s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m01410s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -254,41 +227,39 @@ static void m01410s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x w0_t = w0; - u32x w1_t = w[ 1]; - u32x w2_t = w[ 2]; - u32x w3_t = w[ 3]; - u32x w4_t = w[ 4]; - u32x w5_t = w[ 5]; - u32x w6_t = w[ 6]; - u32x w7_t = w[ 7]; - u32x w8_t = w[ 8]; - u32x w9_t = w[ 9]; - u32x wa_t = w[10]; - u32x wb_t = w[11]; - u32x wc_t = w[12]; - u32x wd_t = w[13]; - u32x we_t = w[14]; - u32x wf_t = w[15]; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; + const u32 w0r = words_buf_r[il_pos]; + + const u32 w0 = w0l | w0r; + + u32 w0_t = w0; + u32 w1_t = w[ 1]; + u32 w2_t = w[ 2]; + u32 w3_t = w[ 3]; + u32 w4_t = w[ 4]; + u32 w5_t = w[ 5]; + u32 w6_t = w[ 6]; + u32 w7_t = w[ 7]; + u32 w8_t = w[ 8]; + u32 w9_t = w[ 9]; + u32 wa_t = w[10]; + u32 wb_t = w[11]; + u32 wc_t = w[12]; + u32 wd_t = w[13]; + u32 we_t = w[14]; + u32 wf_t = w[15]; + + u32 a = SHA256M_A; + u32 b = SHA256M_B; + u32 c = SHA256M_C; + u32 d = SHA256M_D; + u32 e = SHA256M_E; + u32 f = SHA256M_F; + u32 g = SHA256M_G; + u32 h = SHA256M_H; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); @@ -359,16 +330,16 @@ static void m01410s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_S + #include COMPARE_S } } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -406,7 +377,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_m04 (__glo m01410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -444,7 +415,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_m08 (__glo m01410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -482,7 +453,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_m16 (__glo m01410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -520,7 +491,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_s04 (__glo m01410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -558,7 +529,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_s08 (__glo m01410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01410_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base diff --git a/amd/m01420_a0.cl b/OpenCL/m01420_a0.cl similarity index 90% rename from amd/m01420_a0.cl rename to OpenCL/m01420_a0.cl index 6dc2df5..71e7f4f 100644 --- a/amd/m01420_a0.cl +++ b/OpenCL/m01420_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 7 #define DGST_R2 2 #define DGST_R3 6 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01420_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -62,14 +38,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01420_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -104,28 +80,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01420_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -157,31 +133,31 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01420_m04 (__glo * sha256 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = out_salt_len * 8; + + u32 a = SHA256M_A; + u32 b = SHA256M_B; + u32 c = SHA256M_C; + u32 d = SHA256M_D; + u32 e = SHA256M_E; + u32 f = SHA256M_F; + u32 g = SHA256M_G; + u32 h = SHA256M_H; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); @@ -252,12 +228,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01420_m04 (__glo wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -285,14 +261,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01420_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -339,28 +315,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01420_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -392,31 +368,31 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01420_s04 (__glo * sha256 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = out_salt_len * 8; + + u32 a = SHA256M_A; + u32 b = SHA256M_B; + u32 c = SHA256M_C; + u32 d = SHA256M_D; + u32 e = SHA256M_E; + u32 f = SHA256M_F; + u32 g = SHA256M_G; + u32 h = SHA256M_H; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); @@ -487,12 +463,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01420_s04 (__glo wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01420_a1.cl b/OpenCL/m01420_a1.cl similarity index 91% rename from amd/m01420_a1.cl rename to OpenCL/m01420_a1.cl index b97c9dd..f2d3658 100644 --- a/amd/m01420_a1.cl +++ b/OpenCL/m01420_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 7 #define DGST_R2 2 #define DGST_R3 6 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01420_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -60,28 +36,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01420_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -152,10 +128,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01420_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -197,31 +173,31 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01420_m04 (__glo * sha256 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = pw_salt_len * 8; + + u32 a = SHA256M_A; + u32 b = SHA256M_B; + u32 c = SHA256M_C; + u32 d = SHA256M_D; + u32 e = SHA256M_E; + u32 f = SHA256M_F; + u32 g = SHA256M_G; + u32 h = SHA256M_H; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); @@ -292,12 +268,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01420_m04 (__glo wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -325,28 +301,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01420_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -429,10 +405,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01420_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -474,31 +450,31 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01420_s04 (__glo * sha256 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = pw_salt_len * 8; + + u32 a = SHA256M_A; + u32 b = SHA256M_B; + u32 c = SHA256M_C; + u32 d = SHA256M_D; + u32 e = SHA256M_E; + u32 f = SHA256M_F; + u32 g = SHA256M_G; + u32 h = SHA256M_H; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); @@ -569,12 +545,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01420_s04 (__glo wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01420_a3.cl b/OpenCL/m01420_a3.cl similarity index 86% rename from amd/m01420_a3.cl rename to OpenCL/m01420_a3.cl index 5d3b039..33ac15f 100644 --- a/amd/m01420_a3.cl +++ b/OpenCL/m01420_a3.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 7 #define DGST_R2 2 #define DGST_R3 6 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void m01420m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m01420m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -93,7 +69,7 @@ static void m01420m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -105,10 +81,10 @@ static void m01420m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * prepend salt */ - u32x w0_t2[4]; - u32x w1_t2[4]; - u32x w2_t2[4]; - u32x w3_t2[4]; + u32 w0_t2[4]; + u32 w1_t2[4]; + u32 w2_t2[4]; + u32 w3_t2[4]; w0_t2[0] = swap_workaround (w0[0]); w0_t2[1] = swap_workaround (w0[1]); @@ -150,31 +126,31 @@ static void m01420m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * sha256 */ - u32x w0_t = swap_workaround (w0_t2[0]); - u32x w1_t = swap_workaround (w0_t2[1]); - u32x w2_t = swap_workaround (w0_t2[2]); - u32x w3_t = swap_workaround (w0_t2[3]); - u32x w4_t = swap_workaround (w1_t2[0]); - u32x w5_t = swap_workaround (w1_t2[1]); - u32x w6_t = swap_workaround (w1_t2[2]); - u32x w7_t = swap_workaround (w1_t2[3]); - u32x w8_t = swap_workaround (w2_t2[0]); - u32x w9_t = swap_workaround (w2_t2[1]); - u32x wa_t = swap_workaround (w2_t2[2]); - u32x wb_t = swap_workaround (w2_t2[3]); - u32x wc_t = swap_workaround (w3_t2[0]); - u32x wd_t = swap_workaround (w3_t2[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; + u32 w0_t = swap_workaround (w0_t2[0]); + u32 w1_t = swap_workaround (w0_t2[1]); + u32 w2_t = swap_workaround (w0_t2[2]); + u32 w3_t = swap_workaround (w0_t2[3]); + u32 w4_t = swap_workaround (w1_t2[0]); + u32 w5_t = swap_workaround (w1_t2[1]); + u32 w6_t = swap_workaround (w1_t2[2]); + u32 w7_t = swap_workaround (w1_t2[3]); + u32 w8_t = swap_workaround (w2_t2[0]); + u32 w9_t = swap_workaround (w2_t2[1]); + u32 wa_t = swap_workaround (w2_t2[2]); + u32 wb_t = swap_workaround (w2_t2[3]); + u32 wc_t = swap_workaround (w3_t2[0]); + u32 wd_t = swap_workaround (w3_t2[1]); + u32 we_t = 0; + u32 wf_t = pw_salt_len * 8; + + u32 a = SHA256M_A; + u32 b = SHA256M_B; + u32 c = SHA256M_C; + u32 d = SHA256M_D; + u32 e = SHA256M_E; + u32 f = SHA256M_F; + u32 g = SHA256M_G; + u32 h = SHA256M_H; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); @@ -245,16 +221,16 @@ static void m01420m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m01420s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m01420s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -315,7 +291,7 @@ static void m01420s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -327,10 +303,10 @@ static void m01420s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * prepend salt */ - u32x w0_t2[4]; - u32x w1_t2[4]; - u32x w2_t2[4]; - u32x w3_t2[4]; + u32 w0_t2[4]; + u32 w1_t2[4]; + u32 w2_t2[4]; + u32 w3_t2[4]; w0_t2[0] = swap_workaround (w0[0]); w0_t2[1] = swap_workaround (w0[1]); @@ -372,31 +348,31 @@ static void m01420s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * sha256 */ - u32x w0_t = swap_workaround (w0_t2[0]); - u32x w1_t = swap_workaround (w0_t2[1]); - u32x w2_t = swap_workaround (w0_t2[2]); - u32x w3_t = swap_workaround (w0_t2[3]); - u32x w4_t = swap_workaround (w1_t2[0]); - u32x w5_t = swap_workaround (w1_t2[1]); - u32x w6_t = swap_workaround (w1_t2[2]); - u32x w7_t = swap_workaround (w1_t2[3]); - u32x w8_t = swap_workaround (w2_t2[0]); - u32x w9_t = swap_workaround (w2_t2[1]); - u32x wa_t = swap_workaround (w2_t2[2]); - u32x wb_t = swap_workaround (w2_t2[3]); - u32x wc_t = swap_workaround (w3_t2[0]); - u32x wd_t = swap_workaround (w3_t2[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; + u32 w0_t = swap_workaround (w0_t2[0]); + u32 w1_t = swap_workaround (w0_t2[1]); + u32 w2_t = swap_workaround (w0_t2[2]); + u32 w3_t = swap_workaround (w0_t2[3]); + u32 w4_t = swap_workaround (w1_t2[0]); + u32 w5_t = swap_workaround (w1_t2[1]); + u32 w6_t = swap_workaround (w1_t2[2]); + u32 w7_t = swap_workaround (w1_t2[3]); + u32 w8_t = swap_workaround (w2_t2[0]); + u32 w9_t = swap_workaround (w2_t2[1]); + u32 wa_t = swap_workaround (w2_t2[2]); + u32 wb_t = swap_workaround (w2_t2[3]); + u32 wc_t = swap_workaround (w3_t2[0]); + u32 wd_t = swap_workaround (w3_t2[1]); + u32 we_t = 0; + u32 wf_t = pw_salt_len * 8; + + u32 a = SHA256M_A; + u32 b = SHA256M_B; + u32 c = SHA256M_C; + u32 d = SHA256M_D; + u32 e = SHA256M_E; + u32 f = SHA256M_F; + u32 g = SHA256M_G; + u32 h = SHA256M_H; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); @@ -467,12 +443,12 @@ static void m01420s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -486,28 +462,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01420_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -533,28 +509,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01420_m08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -580,28 +556,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01420_m16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -627,28 +603,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01420_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -674,28 +650,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01420_s08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -721,28 +697,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01420_s16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m01430_a0.cl b/OpenCL/m01430_a0.cl similarity index 90% rename from amd/m01430_a0.cl rename to OpenCL/m01430_a0.cl index 7665ea2..483899d 100644 --- a/amd/m01430_a0.cl +++ b/OpenCL/m01430_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 7 #define DGST_R2 2 #define DGST_R3 6 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -62,14 +38,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -104,28 +80,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -170,10 +146,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_m04 (__glo const u32 out_salt_len = (out_len * 2) + salt_len; - u32x w0_t2[4]; - u32x w1_t2[4]; - u32x w2_t2[4]; - u32x w3_t2[4]; + u32 w0_t2[4]; + u32 w1_t2[4]; + u32 w2_t2[4]; + u32 w3_t2[4]; make_unicode (w0, w0_t2, w1_t2); make_unicode (w1, w2_t2, w3_t2); @@ -201,31 +177,31 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_m04 (__glo * sha256 */ - u32x w0_t = swap_workaround (w0_t2[0]); - u32x w1_t = swap_workaround (w0_t2[1]); - u32x w2_t = swap_workaround (w0_t2[2]); - u32x w3_t = swap_workaround (w0_t2[3]); - u32x w4_t = swap_workaround (w1_t2[0]); - u32x w5_t = swap_workaround (w1_t2[1]); - u32x w6_t = swap_workaround (w1_t2[2]); - u32x w7_t = swap_workaround (w1_t2[3]); - u32x w8_t = swap_workaround (w2_t2[0]); - u32x w9_t = swap_workaround (w2_t2[1]); - u32x wa_t = swap_workaround (w2_t2[2]); - u32x wb_t = swap_workaround (w2_t2[3]); - u32x wc_t = swap_workaround (w3_t2[0]); - u32x wd_t = swap_workaround (w3_t2[1]); - u32x we_t = 0; - u32x wf_t = out_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; + u32 w0_t = swap_workaround (w0_t2[0]); + u32 w1_t = swap_workaround (w0_t2[1]); + u32 w2_t = swap_workaround (w0_t2[2]); + u32 w3_t = swap_workaround (w0_t2[3]); + u32 w4_t = swap_workaround (w1_t2[0]); + u32 w5_t = swap_workaround (w1_t2[1]); + u32 w6_t = swap_workaround (w1_t2[2]); + u32 w7_t = swap_workaround (w1_t2[3]); + u32 w8_t = swap_workaround (w2_t2[0]); + u32 w9_t = swap_workaround (w2_t2[1]); + u32 wa_t = swap_workaround (w2_t2[2]); + u32 wb_t = swap_workaround (w2_t2[3]); + u32 wc_t = swap_workaround (w3_t2[0]); + u32 wd_t = swap_workaround (w3_t2[1]); + u32 we_t = 0; + u32 wf_t = out_salt_len * 8; + + u32 a = SHA256M_A; + u32 b = SHA256M_B; + u32 c = SHA256M_C; + u32 d = SHA256M_D; + u32 e = SHA256M_E; + u32 f = SHA256M_F; + u32 g = SHA256M_G; + u32 h = SHA256M_H; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); @@ -296,12 +272,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_m04 (__glo wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -329,14 +305,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -383,28 +359,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -449,10 +425,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_s04 (__glo const u32 out_salt_len = (out_len * 2) + salt_len; - u32x w0_t2[4]; - u32x w1_t2[4]; - u32x w2_t2[4]; - u32x w3_t2[4]; + u32 w0_t2[4]; + u32 w1_t2[4]; + u32 w2_t2[4]; + u32 w3_t2[4]; make_unicode (w0, w0_t2, w1_t2); make_unicode (w1, w2_t2, w3_t2); @@ -480,31 +456,31 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_s04 (__glo * sha256 */ - u32x w0_t = swap_workaround (w0_t2[0]); - u32x w1_t = swap_workaround (w0_t2[1]); - u32x w2_t = swap_workaround (w0_t2[2]); - u32x w3_t = swap_workaround (w0_t2[3]); - u32x w4_t = swap_workaround (w1_t2[0]); - u32x w5_t = swap_workaround (w1_t2[1]); - u32x w6_t = swap_workaround (w1_t2[2]); - u32x w7_t = swap_workaround (w1_t2[3]); - u32x w8_t = swap_workaround (w2_t2[0]); - u32x w9_t = swap_workaround (w2_t2[1]); - u32x wa_t = swap_workaround (w2_t2[2]); - u32x wb_t = swap_workaround (w2_t2[3]); - u32x wc_t = swap_workaround (w3_t2[0]); - u32x wd_t = swap_workaround (w3_t2[1]); - u32x we_t = 0; - u32x wf_t = out_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; + u32 w0_t = swap_workaround (w0_t2[0]); + u32 w1_t = swap_workaround (w0_t2[1]); + u32 w2_t = swap_workaround (w0_t2[2]); + u32 w3_t = swap_workaround (w0_t2[3]); + u32 w4_t = swap_workaround (w1_t2[0]); + u32 w5_t = swap_workaround (w1_t2[1]); + u32 w6_t = swap_workaround (w1_t2[2]); + u32 w7_t = swap_workaround (w1_t2[3]); + u32 w8_t = swap_workaround (w2_t2[0]); + u32 w9_t = swap_workaround (w2_t2[1]); + u32 wa_t = swap_workaround (w2_t2[2]); + u32 wb_t = swap_workaround (w2_t2[3]); + u32 wc_t = swap_workaround (w3_t2[0]); + u32 wd_t = swap_workaround (w3_t2[1]); + u32 we_t = 0; + u32 wf_t = out_salt_len * 8; + + u32 a = SHA256M_A; + u32 b = SHA256M_B; + u32 c = SHA256M_C; + u32 d = SHA256M_D; + u32 e = SHA256M_E; + u32 f = SHA256M_F; + u32 g = SHA256M_G; + u32 h = SHA256M_H; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); @@ -575,12 +551,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_s04 (__glo wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01430_a1.cl b/OpenCL/m01430_a1.cl similarity index 90% rename from amd/m01430_a1.cl rename to OpenCL/m01430_a1.cl index 32df538..2b32650 100644 --- a/amd/m01430_a1.cl +++ b/OpenCL/m01430_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 7 #define DGST_R2 2 #define DGST_R3 6 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -60,28 +36,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -152,10 +128,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -210,10 +186,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_m04 (__glo const u32 pw_salt_len = (pw_len * 2) + salt_len; - u32x w0_t2[4]; - u32x w1_t2[4]; - u32x w2_t2[4]; - u32x w3_t2[4]; + u32 w0_t2[4]; + u32 w1_t2[4]; + u32 w2_t2[4]; + u32 w3_t2[4]; make_unicode (w0, w0_t2, w1_t2); make_unicode (w1, w2_t2, w3_t2); @@ -241,31 +217,31 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_m04 (__glo * sha256 */ - u32x w0_t = swap_workaround (w0_t2[0]); - u32x w1_t = swap_workaround (w0_t2[1]); - u32x w2_t = swap_workaround (w0_t2[2]); - u32x w3_t = swap_workaround (w0_t2[3]); - u32x w4_t = swap_workaround (w1_t2[0]); - u32x w5_t = swap_workaround (w1_t2[1]); - u32x w6_t = swap_workaround (w1_t2[2]); - u32x w7_t = swap_workaround (w1_t2[3]); - u32x w8_t = swap_workaround (w2_t2[0]); - u32x w9_t = swap_workaround (w2_t2[1]); - u32x wa_t = swap_workaround (w2_t2[2]); - u32x wb_t = swap_workaround (w2_t2[3]); - u32x wc_t = swap_workaround (w3_t2[0]); - u32x wd_t = swap_workaround (w3_t2[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; + u32 w0_t = swap_workaround (w0_t2[0]); + u32 w1_t = swap_workaround (w0_t2[1]); + u32 w2_t = swap_workaround (w0_t2[2]); + u32 w3_t = swap_workaround (w0_t2[3]); + u32 w4_t = swap_workaround (w1_t2[0]); + u32 w5_t = swap_workaround (w1_t2[1]); + u32 w6_t = swap_workaround (w1_t2[2]); + u32 w7_t = swap_workaround (w1_t2[3]); + u32 w8_t = swap_workaround (w2_t2[0]); + u32 w9_t = swap_workaround (w2_t2[1]); + u32 wa_t = swap_workaround (w2_t2[2]); + u32 wb_t = swap_workaround (w2_t2[3]); + u32 wc_t = swap_workaround (w3_t2[0]); + u32 wd_t = swap_workaround (w3_t2[1]); + u32 we_t = 0; + u32 wf_t = pw_salt_len * 8; + + u32 a = SHA256M_A; + u32 b = SHA256M_B; + u32 c = SHA256M_C; + u32 d = SHA256M_D; + u32 e = SHA256M_E; + u32 f = SHA256M_F; + u32 g = SHA256M_G; + u32 h = SHA256M_H; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); @@ -336,12 +312,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_m04 (__glo wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -369,28 +345,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -473,10 +449,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -531,10 +507,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_s04 (__glo const u32 pw_salt_len = (pw_len * 2) + salt_len; - u32x w0_t2[4]; - u32x w1_t2[4]; - u32x w2_t2[4]; - u32x w3_t2[4]; + u32 w0_t2[4]; + u32 w1_t2[4]; + u32 w2_t2[4]; + u32 w3_t2[4]; make_unicode (w0, w0_t2, w1_t2); make_unicode (w1, w2_t2, w3_t2); @@ -562,31 +538,31 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_s04 (__glo * sha256 */ - u32x w0_t = swap_workaround (w0_t2[0]); - u32x w1_t = swap_workaround (w0_t2[1]); - u32x w2_t = swap_workaround (w0_t2[2]); - u32x w3_t = swap_workaround (w0_t2[3]); - u32x w4_t = swap_workaround (w1_t2[0]); - u32x w5_t = swap_workaround (w1_t2[1]); - u32x w6_t = swap_workaround (w1_t2[2]); - u32x w7_t = swap_workaround (w1_t2[3]); - u32x w8_t = swap_workaround (w2_t2[0]); - u32x w9_t = swap_workaround (w2_t2[1]); - u32x wa_t = swap_workaround (w2_t2[2]); - u32x wb_t = swap_workaround (w2_t2[3]); - u32x wc_t = swap_workaround (w3_t2[0]); - u32x wd_t = swap_workaround (w3_t2[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; + u32 w0_t = swap_workaround (w0_t2[0]); + u32 w1_t = swap_workaround (w0_t2[1]); + u32 w2_t = swap_workaround (w0_t2[2]); + u32 w3_t = swap_workaround (w0_t2[3]); + u32 w4_t = swap_workaround (w1_t2[0]); + u32 w5_t = swap_workaround (w1_t2[1]); + u32 w6_t = swap_workaround (w1_t2[2]); + u32 w7_t = swap_workaround (w1_t2[3]); + u32 w8_t = swap_workaround (w2_t2[0]); + u32 w9_t = swap_workaround (w2_t2[1]); + u32 wa_t = swap_workaround (w2_t2[2]); + u32 wb_t = swap_workaround (w2_t2[3]); + u32 wc_t = swap_workaround (w3_t2[0]); + u32 wd_t = swap_workaround (w3_t2[1]); + u32 we_t = 0; + u32 wf_t = pw_salt_len * 8; + + u32 a = SHA256M_A; + u32 b = SHA256M_B; + u32 c = SHA256M_C; + u32 d = SHA256M_D; + u32 e = SHA256M_E; + u32 f = SHA256M_F; + u32 g = SHA256M_G; + u32 h = SHA256M_H; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); @@ -657,12 +633,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_s04 (__glo wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01430_a3.cl b/OpenCL/m01430_a3.cl similarity index 73% rename from amd/m01430_a3.cl rename to OpenCL/m01430_a3.cl index cfac03e..807993f 100644 --- a/amd/m01430_a3.cl +++ b/OpenCL/m01430_a3.cl @@ -4,48 +4,23 @@ */ #define _SHA256_ -#define _SCALAR_ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 7 #define DGST_R2 2 #define DGST_R3 6 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -static void m01430m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m01430m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -115,41 +90,39 @@ static void m01430m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x w0_t = w0; - u32x w1_t = w[ 1]; - u32x w2_t = w[ 2]; - u32x w3_t = w[ 3]; - u32x w4_t = w[ 4]; - u32x w5_t = w[ 5]; - u32x w6_t = w[ 6]; - u32x w7_t = w[ 7]; - u32x w8_t = w[ 8]; - u32x w9_t = w[ 9]; - u32x wa_t = w[10]; - u32x wb_t = w[11]; - u32x wc_t = w[12]; - u32x wd_t = w[13]; - u32x we_t = w[14]; - u32x wf_t = w[15]; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; + const u32 w0r = words_buf_r[il_pos]; + + const u32 w0 = w0l | w0r; + + u32 w0_t = w0; + u32 w1_t = w[ 1]; + u32 w2_t = w[ 2]; + u32 w3_t = w[ 3]; + u32 w4_t = w[ 4]; + u32 w5_t = w[ 5]; + u32 w6_t = w[ 6]; + u32 w7_t = w[ 7]; + u32 w8_t = w[ 8]; + u32 w9_t = w[ 9]; + u32 wa_t = w[10]; + u32 wb_t = w[11]; + u32 wc_t = w[12]; + u32 wd_t = w[13]; + u32 we_t = w[14]; + u32 wf_t = w[15]; + + u32 a = SHA256M_A; + u32 b = SHA256M_B; + u32 c = SHA256M_C; + u32 d = SHA256M_D; + u32 e = SHA256M_E; + u32 f = SHA256M_F; + u32 g = SHA256M_G; + u32 h = SHA256M_H; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); @@ -220,16 +193,16 @@ static void m01430m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m01430s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m01430s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -254,41 +227,39 @@ static void m01430s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x w0_t = w0; - u32x w1_t = w[ 1]; - u32x w2_t = w[ 2]; - u32x w3_t = w[ 3]; - u32x w4_t = w[ 4]; - u32x w5_t = w[ 5]; - u32x w6_t = w[ 6]; - u32x w7_t = w[ 7]; - u32x w8_t = w[ 8]; - u32x w9_t = w[ 9]; - u32x wa_t = w[10]; - u32x wb_t = w[11]; - u32x wc_t = w[12]; - u32x wd_t = w[13]; - u32x we_t = w[14]; - u32x wf_t = w[15]; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; + const u32 w0r = words_buf_r[il_pos]; + + const u32 w0 = w0l | w0r; + + u32 w0_t = w0; + u32 w1_t = w[ 1]; + u32 w2_t = w[ 2]; + u32 w3_t = w[ 3]; + u32 w4_t = w[ 4]; + u32 w5_t = w[ 5]; + u32 w6_t = w[ 6]; + u32 w7_t = w[ 7]; + u32 w8_t = w[ 8]; + u32 w9_t = w[ 9]; + u32 wa_t = w[10]; + u32 wb_t = w[11]; + u32 wc_t = w[12]; + u32 wd_t = w[13]; + u32 we_t = w[14]; + u32 wf_t = w[15]; + + u32 a = SHA256M_A; + u32 b = SHA256M_B; + u32 c = SHA256M_C; + u32 d = SHA256M_D; + u32 e = SHA256M_E; + u32 f = SHA256M_F; + u32 g = SHA256M_G; + u32 h = SHA256M_H; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); @@ -359,16 +330,16 @@ static void m01430s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_S + #include COMPARE_S } } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -406,7 +377,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_m04 (__glo m01430m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -444,7 +415,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_m08 (__glo m01430m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -482,7 +453,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_m16 (__glo m01430m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -520,7 +491,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_s04 (__glo m01430s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -558,7 +529,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_s08 (__glo m01430s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01430_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base diff --git a/amd/m01440_a0.cl b/OpenCL/m01440_a0.cl similarity index 89% rename from amd/m01440_a0.cl rename to OpenCL/m01440_a0.cl index b53806a..d6d95c2 100644 --- a/amd/m01440_a0.cl +++ b/OpenCL/m01440_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 7 #define DGST_R2 2 #define DGST_R3 6 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01440_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -62,14 +38,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01440_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -104,10 +80,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01440_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; @@ -134,10 +110,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01440_m04 (__glo const u32 out_salt_len = (out_len * 2) + salt_len; - u32x w0_t2[4]; - u32x w1_t2[4]; - u32x w2_t2[4]; - u32x w3_t2[4]; + u32 w0_t2[4]; + u32 w1_t2[4]; + u32 w2_t2[4]; + u32 w3_t2[4]; make_unicode (w0, w0_t2, w1_t2); make_unicode (w1, w2_t2, w3_t2); @@ -159,31 +135,31 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01440_m04 (__glo * sha256 */ - u32x w0_t = swap_workaround (w0_t2[0]); - u32x w1_t = swap_workaround (w0_t2[1]); - u32x w2_t = swap_workaround (w0_t2[2]); - u32x w3_t = swap_workaround (w0_t2[3]); - u32x w4_t = swap_workaround (w1_t2[0]); - u32x w5_t = swap_workaround (w1_t2[1]); - u32x w6_t = swap_workaround (w1_t2[2]); - u32x w7_t = swap_workaround (w1_t2[3]); - u32x w8_t = swap_workaround (w2_t2[0]); - u32x w9_t = swap_workaround (w2_t2[1]); - u32x wa_t = swap_workaround (w2_t2[2]); - u32x wb_t = swap_workaround (w2_t2[3]); - u32x wc_t = swap_workaround (w3_t2[0]); - u32x wd_t = swap_workaround (w3_t2[1]); - u32x we_t = 0; - u32x wf_t = out_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; + u32 w0_t = swap_workaround (w0_t2[0]); + u32 w1_t = swap_workaround (w0_t2[1]); + u32 w2_t = swap_workaround (w0_t2[2]); + u32 w3_t = swap_workaround (w0_t2[3]); + u32 w4_t = swap_workaround (w1_t2[0]); + u32 w5_t = swap_workaround (w1_t2[1]); + u32 w6_t = swap_workaround (w1_t2[2]); + u32 w7_t = swap_workaround (w1_t2[3]); + u32 w8_t = swap_workaround (w2_t2[0]); + u32 w9_t = swap_workaround (w2_t2[1]); + u32 wa_t = swap_workaround (w2_t2[2]); + u32 wb_t = swap_workaround (w2_t2[3]); + u32 wc_t = swap_workaround (w3_t2[0]); + u32 wd_t = swap_workaround (w3_t2[1]); + u32 we_t = 0; + u32 wf_t = out_salt_len * 8; + + u32 a = SHA256M_A; + u32 b = SHA256M_B; + u32 c = SHA256M_C; + u32 d = SHA256M_D; + u32 e = SHA256M_E; + u32 f = SHA256M_F; + u32 g = SHA256M_G; + u32 h = SHA256M_H; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); @@ -254,12 +230,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01440_m04 (__glo wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -287,14 +263,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01440_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -341,10 +317,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01440_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; @@ -371,10 +347,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01440_s04 (__glo const u32 out_salt_len = (out_len * 2) + salt_len; - u32x w0_t2[4]; - u32x w1_t2[4]; - u32x w2_t2[4]; - u32x w3_t2[4]; + u32 w0_t2[4]; + u32 w1_t2[4]; + u32 w2_t2[4]; + u32 w3_t2[4]; make_unicode (w0, w0_t2, w1_t2); make_unicode (w1, w2_t2, w3_t2); @@ -396,31 +372,31 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01440_s04 (__glo * sha256 */ - u32x w0_t = swap_workaround (w0_t2[0]); - u32x w1_t = swap_workaround (w0_t2[1]); - u32x w2_t = swap_workaround (w0_t2[2]); - u32x w3_t = swap_workaround (w0_t2[3]); - u32x w4_t = swap_workaround (w1_t2[0]); - u32x w5_t = swap_workaround (w1_t2[1]); - u32x w6_t = swap_workaround (w1_t2[2]); - u32x w7_t = swap_workaround (w1_t2[3]); - u32x w8_t = swap_workaround (w2_t2[0]); - u32x w9_t = swap_workaround (w2_t2[1]); - u32x wa_t = swap_workaround (w2_t2[2]); - u32x wb_t = swap_workaround (w2_t2[3]); - u32x wc_t = swap_workaround (w3_t2[0]); - u32x wd_t = swap_workaround (w3_t2[1]); - u32x we_t = 0; - u32x wf_t = out_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; + u32 w0_t = swap_workaround (w0_t2[0]); + u32 w1_t = swap_workaround (w0_t2[1]); + u32 w2_t = swap_workaround (w0_t2[2]); + u32 w3_t = swap_workaround (w0_t2[3]); + u32 w4_t = swap_workaround (w1_t2[0]); + u32 w5_t = swap_workaround (w1_t2[1]); + u32 w6_t = swap_workaround (w1_t2[2]); + u32 w7_t = swap_workaround (w1_t2[3]); + u32 w8_t = swap_workaround (w2_t2[0]); + u32 w9_t = swap_workaround (w2_t2[1]); + u32 wa_t = swap_workaround (w2_t2[2]); + u32 wb_t = swap_workaround (w2_t2[3]); + u32 wc_t = swap_workaround (w3_t2[0]); + u32 wd_t = swap_workaround (w3_t2[1]); + u32 we_t = 0; + u32 wf_t = out_salt_len * 8; + + u32 a = SHA256M_A; + u32 b = SHA256M_B; + u32 c = SHA256M_C; + u32 d = SHA256M_D; + u32 e = SHA256M_E; + u32 f = SHA256M_F; + u32 g = SHA256M_G; + u32 h = SHA256M_H; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); @@ -491,12 +467,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01440_s04 (__glo wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01440_a1.cl b/OpenCL/m01440_a1.cl similarity index 90% rename from amd/m01440_a1.cl rename to OpenCL/m01440_a1.cl index 18530b6..ef5088e 100644 --- a/amd/m01440_a1.cl +++ b/OpenCL/m01440_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 7 #define DGST_R2 2 #define DGST_R3 6 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01440_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -60,28 +36,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01440_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -152,10 +128,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01440_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -180,10 +156,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01440_m04 (__glo const u32 pw_salt_len = (pw_len * 2) + salt_len; - u32x w0_t2[4]; - u32x w1_t2[4]; - u32x w2_t2[4]; - u32x w3_t2[4]; + u32 w0_t2[4]; + u32 w1_t2[4]; + u32 w2_t2[4]; + u32 w3_t2[4]; make_unicode (w0, w0_t2, w1_t2); make_unicode (w1, w2_t2, w3_t2); @@ -205,31 +181,31 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01440_m04 (__glo * sha256 */ - u32x w0_t = swap_workaround (w0_t2[0]); - u32x w1_t = swap_workaround (w0_t2[1]); - u32x w2_t = swap_workaround (w0_t2[2]); - u32x w3_t = swap_workaround (w0_t2[3]); - u32x w4_t = swap_workaround (w1_t2[0]); - u32x w5_t = swap_workaround (w1_t2[1]); - u32x w6_t = swap_workaround (w1_t2[2]); - u32x w7_t = swap_workaround (w1_t2[3]); - u32x w8_t = swap_workaround (w2_t2[0]); - u32x w9_t = swap_workaround (w2_t2[1]); - u32x wa_t = swap_workaround (w2_t2[2]); - u32x wb_t = swap_workaround (w2_t2[3]); - u32x wc_t = swap_workaround (w3_t2[0]); - u32x wd_t = swap_workaround (w3_t2[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; + u32 w0_t = swap_workaround (w0_t2[0]); + u32 w1_t = swap_workaround (w0_t2[1]); + u32 w2_t = swap_workaround (w0_t2[2]); + u32 w3_t = swap_workaround (w0_t2[3]); + u32 w4_t = swap_workaround (w1_t2[0]); + u32 w5_t = swap_workaround (w1_t2[1]); + u32 w6_t = swap_workaround (w1_t2[2]); + u32 w7_t = swap_workaround (w1_t2[3]); + u32 w8_t = swap_workaround (w2_t2[0]); + u32 w9_t = swap_workaround (w2_t2[1]); + u32 wa_t = swap_workaround (w2_t2[2]); + u32 wb_t = swap_workaround (w2_t2[3]); + u32 wc_t = swap_workaround (w3_t2[0]); + u32 wd_t = swap_workaround (w3_t2[1]); + u32 we_t = 0; + u32 wf_t = pw_salt_len * 8; + + u32 a = SHA256M_A; + u32 b = SHA256M_B; + u32 c = SHA256M_C; + u32 d = SHA256M_D; + u32 e = SHA256M_E; + u32 f = SHA256M_F; + u32 g = SHA256M_G; + u32 h = SHA256M_H; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); @@ -300,12 +276,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01440_m04 (__glo wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -333,28 +309,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01440_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -437,10 +413,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01440_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -465,10 +441,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01440_s04 (__glo const u32 pw_salt_len = (pw_len * 2) + salt_len; - u32x w0_t2[4]; - u32x w1_t2[4]; - u32x w2_t2[4]; - u32x w3_t2[4]; + u32 w0_t2[4]; + u32 w1_t2[4]; + u32 w2_t2[4]; + u32 w3_t2[4]; make_unicode (w0, w0_t2, w1_t2); make_unicode (w1, w2_t2, w3_t2); @@ -490,31 +466,31 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01440_s04 (__glo * sha256 */ - u32x w0_t = swap_workaround (w0_t2[0]); - u32x w1_t = swap_workaround (w0_t2[1]); - u32x w2_t = swap_workaround (w0_t2[2]); - u32x w3_t = swap_workaround (w0_t2[3]); - u32x w4_t = swap_workaround (w1_t2[0]); - u32x w5_t = swap_workaround (w1_t2[1]); - u32x w6_t = swap_workaround (w1_t2[2]); - u32x w7_t = swap_workaround (w1_t2[3]); - u32x w8_t = swap_workaround (w2_t2[0]); - u32x w9_t = swap_workaround (w2_t2[1]); - u32x wa_t = swap_workaround (w2_t2[2]); - u32x wb_t = swap_workaround (w2_t2[3]); - u32x wc_t = swap_workaround (w3_t2[0]); - u32x wd_t = swap_workaround (w3_t2[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; + u32 w0_t = swap_workaround (w0_t2[0]); + u32 w1_t = swap_workaround (w0_t2[1]); + u32 w2_t = swap_workaround (w0_t2[2]); + u32 w3_t = swap_workaround (w0_t2[3]); + u32 w4_t = swap_workaround (w1_t2[0]); + u32 w5_t = swap_workaround (w1_t2[1]); + u32 w6_t = swap_workaround (w1_t2[2]); + u32 w7_t = swap_workaround (w1_t2[3]); + u32 w8_t = swap_workaround (w2_t2[0]); + u32 w9_t = swap_workaround (w2_t2[1]); + u32 wa_t = swap_workaround (w2_t2[2]); + u32 wb_t = swap_workaround (w2_t2[3]); + u32 wc_t = swap_workaround (w3_t2[0]); + u32 wd_t = swap_workaround (w3_t2[1]); + u32 we_t = 0; + u32 wf_t = pw_salt_len * 8; + + u32 a = SHA256M_A; + u32 b = SHA256M_B; + u32 c = SHA256M_C; + u32 d = SHA256M_D; + u32 e = SHA256M_E; + u32 f = SHA256M_F; + u32 g = SHA256M_G; + u32 h = SHA256M_H; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); @@ -585,12 +561,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01440_s04 (__glo wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01440_a3.cl b/OpenCL/m01440_a3.cl similarity index 86% rename from amd/m01440_a3.cl rename to OpenCL/m01440_a3.cl index 9c8bb0a..3756b4e 100644 --- a/amd/m01440_a3.cl +++ b/OpenCL/m01440_a3.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 7 #define DGST_R2 2 #define DGST_R3 6 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void m01440m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m01440m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -93,7 +69,7 @@ static void m01440m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -105,10 +81,10 @@ static void m01440m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * prepend salt */ - u32x w0_t2[4]; - u32x w1_t2[4]; - u32x w2_t2[4]; - u32x w3_t2[4]; + u32 w0_t2[4]; + u32 w1_t2[4]; + u32 w2_t2[4]; + u32 w3_t2[4]; w0_t2[0] = swap_workaround (w0[0]); w0_t2[1] = swap_workaround (w0[1]); @@ -150,31 +126,31 @@ static void m01440m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * sha256 */ - u32x w0_t = swap_workaround (w0_t2[0]); - u32x w1_t = swap_workaround (w0_t2[1]); - u32x w2_t = swap_workaround (w0_t2[2]); - u32x w3_t = swap_workaround (w0_t2[3]); - u32x w4_t = swap_workaround (w1_t2[0]); - u32x w5_t = swap_workaround (w1_t2[1]); - u32x w6_t = swap_workaround (w1_t2[2]); - u32x w7_t = swap_workaround (w1_t2[3]); - u32x w8_t = swap_workaround (w2_t2[0]); - u32x w9_t = swap_workaround (w2_t2[1]); - u32x wa_t = swap_workaround (w2_t2[2]); - u32x wb_t = swap_workaround (w2_t2[3]); - u32x wc_t = swap_workaround (w3_t2[0]); - u32x wd_t = swap_workaround (w3_t2[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; + u32 w0_t = swap_workaround (w0_t2[0]); + u32 w1_t = swap_workaround (w0_t2[1]); + u32 w2_t = swap_workaround (w0_t2[2]); + u32 w3_t = swap_workaround (w0_t2[3]); + u32 w4_t = swap_workaround (w1_t2[0]); + u32 w5_t = swap_workaround (w1_t2[1]); + u32 w6_t = swap_workaround (w1_t2[2]); + u32 w7_t = swap_workaround (w1_t2[3]); + u32 w8_t = swap_workaround (w2_t2[0]); + u32 w9_t = swap_workaround (w2_t2[1]); + u32 wa_t = swap_workaround (w2_t2[2]); + u32 wb_t = swap_workaround (w2_t2[3]); + u32 wc_t = swap_workaround (w3_t2[0]); + u32 wd_t = swap_workaround (w3_t2[1]); + u32 we_t = 0; + u32 wf_t = pw_salt_len * 8; + + u32 a = SHA256M_A; + u32 b = SHA256M_B; + u32 c = SHA256M_C; + u32 d = SHA256M_D; + u32 e = SHA256M_E; + u32 f = SHA256M_F; + u32 g = SHA256M_G; + u32 h = SHA256M_H; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); @@ -245,16 +221,16 @@ static void m01440m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m01440s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m01440s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -315,7 +291,7 @@ static void m01440s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -327,10 +303,10 @@ static void m01440s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * prepend salt */ - u32x w0_t2[4]; - u32x w1_t2[4]; - u32x w2_t2[4]; - u32x w3_t2[4]; + u32 w0_t2[4]; + u32 w1_t2[4]; + u32 w2_t2[4]; + u32 w3_t2[4]; w0_t2[0] = swap_workaround (w0[0]); w0_t2[1] = swap_workaround (w0[1]); @@ -372,31 +348,31 @@ static void m01440s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * sha256 */ - u32x w0_t = swap_workaround (w0_t2[0]); - u32x w1_t = swap_workaround (w0_t2[1]); - u32x w2_t = swap_workaround (w0_t2[2]); - u32x w3_t = swap_workaround (w0_t2[3]); - u32x w4_t = swap_workaround (w1_t2[0]); - u32x w5_t = swap_workaround (w1_t2[1]); - u32x w6_t = swap_workaround (w1_t2[2]); - u32x w7_t = swap_workaround (w1_t2[3]); - u32x w8_t = swap_workaround (w2_t2[0]); - u32x w9_t = swap_workaround (w2_t2[1]); - u32x wa_t = swap_workaround (w2_t2[2]); - u32x wb_t = swap_workaround (w2_t2[3]); - u32x wc_t = swap_workaround (w3_t2[0]); - u32x wd_t = swap_workaround (w3_t2[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; + u32 w0_t = swap_workaround (w0_t2[0]); + u32 w1_t = swap_workaround (w0_t2[1]); + u32 w2_t = swap_workaround (w0_t2[2]); + u32 w3_t = swap_workaround (w0_t2[3]); + u32 w4_t = swap_workaround (w1_t2[0]); + u32 w5_t = swap_workaround (w1_t2[1]); + u32 w6_t = swap_workaround (w1_t2[2]); + u32 w7_t = swap_workaround (w1_t2[3]); + u32 w8_t = swap_workaround (w2_t2[0]); + u32 w9_t = swap_workaround (w2_t2[1]); + u32 wa_t = swap_workaround (w2_t2[2]); + u32 wb_t = swap_workaround (w2_t2[3]); + u32 wc_t = swap_workaround (w3_t2[0]); + u32 wd_t = swap_workaround (w3_t2[1]); + u32 we_t = 0; + u32 wf_t = pw_salt_len * 8; + + u32 a = SHA256M_A; + u32 b = SHA256M_B; + u32 c = SHA256M_C; + u32 d = SHA256M_D; + u32 e = SHA256M_E; + u32 f = SHA256M_F; + u32 g = SHA256M_G; + u32 h = SHA256M_H; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); @@ -467,12 +443,12 @@ static void m01440s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -486,28 +462,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01440_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -533,28 +509,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01440_m08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -580,28 +556,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01440_m16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -627,28 +603,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01440_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -674,28 +650,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01440_s08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -721,28 +697,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01440_s16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m01450_a0.cl b/OpenCL/m01450_a0.cl similarity index 88% rename from amd/m01450_a0.cl rename to OpenCL/m01450_a0.cl index 794c054..a18fbe6 100644 --- a/amd/m01450_a0.cl +++ b/OpenCL/m01450_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 3 #define DGST_R1 7 #define DGST_R2 2 #define DGST_R3 6 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 k_sha256[64] = { @@ -66,33 +42,33 @@ __constant u32 k_sha256[64] = SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f, }; -static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8]) +static void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[8]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + u32 e = digest[4]; + u32 f = digest[5]; + u32 g = digest[6]; + u32 h = digest[7]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #define ROUND_EXPAND() \ { \ @@ -152,7 +128,7 @@ static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[ digest[7] += h; } -static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8]) +static void hmac_sha256_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[8], u32 opad[8]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -211,7 +187,7 @@ static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32 sha256_transform (w0, w1, w2, w3, opad); } -static void hmac_sha256_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8], u32x digest[8]) +static void hmac_sha256_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[8], u32 opad[8], u32 digest[8]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -269,14 +245,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01450_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -311,28 +287,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01450_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -345,36 +321,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01450_m04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); w0_t[2] = swap_workaround (w0[2]); w0_t[3] = swap_workaround (w0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (w1[0]); w1_t[1] = swap_workaround (w1[1]); w1_t[2] = swap_workaround (w1[2]); w1_t[3] = swap_workaround (w1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[8]; - u32x opad[8]; + u32 ipad[8]; + u32 opad[8]; hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -395,16 +371,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01450_m04 (__glo w3_t[2] = 0; w3_t[3] = (64 + salt_len) * 8; - u32x digest[8]; + u32 digest[8]; hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; + const u32 r0 = digest[3]; + const u32 r1 = digest[7]; + const u32 r2 = digest[2]; + const u32 r3 = digest[6]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -432,14 +408,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01450_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -486,28 +462,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01450_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -520,36 +496,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01450_s04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); w0_t[2] = swap_workaround (w0[2]); w0_t[3] = swap_workaround (w0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (w1[0]); w1_t[1] = swap_workaround (w1[1]); w1_t[2] = swap_workaround (w1[2]); w1_t[3] = swap_workaround (w1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[8]; - u32x opad[8]; + u32 ipad[8]; + u32 opad[8]; hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -570,16 +546,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01450_s04 (__glo w3_t[2] = 0; w3_t[3] = (64 + salt_len) * 8; - u32x digest[8]; + u32 digest[8]; hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; + const u32 r0 = digest[3]; + const u32 r1 = digest[7]; + const u32 r2 = digest[2]; + const u32 r3 = digest[6]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01450_a1.cl b/OpenCL/m01450_a1.cl similarity index 89% rename from amd/m01450_a1.cl rename to OpenCL/m01450_a1.cl index 9ca7e33..6bbdbe8 100644 --- a/amd/m01450_a1.cl +++ b/OpenCL/m01450_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 3 #define DGST_R1 7 #define DGST_R2 2 #define DGST_R3 6 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 k_sha256[64] = { @@ -64,33 +40,33 @@ __constant u32 k_sha256[64] = SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f, }; -static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8]) +static void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[8]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + u32 e = digest[4]; + u32 f = digest[5]; + u32 g = digest[6]; + u32 h = digest[7]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #define ROUND_EXPAND() \ { \ @@ -150,7 +126,7 @@ static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[ digest[7] += h; } -static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8]) +static void hmac_sha256_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[8], u32 opad[8]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -209,7 +185,7 @@ static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32 sha256_transform (w0, w1, w2, w3, opad); } -static void hmac_sha256_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8], u32x digest[8]) +static void hmac_sha256_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[8], u32 opad[8], u32 digest[8]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -267,28 +243,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01450_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -365,28 +341,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01450_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -397,36 +373,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01450_m04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); w0_t[2] = swap_workaround (w0[2]); w0_t[3] = swap_workaround (w0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (w1[0]); w1_t[1] = swap_workaround (w1[1]); w1_t[2] = swap_workaround (w1[2]); w1_t[3] = swap_workaround (w1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[8]; - u32x opad[8]; + u32 ipad[8]; + u32 opad[8]; hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -447,16 +423,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01450_m04 (__glo w3_t[2] = 0; w3_t[3] = (64 + salt_len) * 8; - u32x digest[8]; + u32 digest[8]; hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; + const u32 r0 = digest[3]; + const u32 r1 = digest[7]; + const u32 r2 = digest[2]; + const u32 r3 = digest[6]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -484,28 +460,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01450_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -594,28 +570,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01450_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -626,36 +602,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01450_s04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); w0_t[2] = swap_workaround (w0[2]); w0_t[3] = swap_workaround (w0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (w1[0]); w1_t[1] = swap_workaround (w1[1]); w1_t[2] = swap_workaround (w1[2]); w1_t[3] = swap_workaround (w1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[8]; - u32x opad[8]; + u32 ipad[8]; + u32 opad[8]; hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -676,16 +652,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01450_s04 (__glo w3_t[2] = 0; w3_t[3] = (64 + salt_len) * 8; - u32x digest[8]; + u32 digest[8]; hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; + const u32 r0 = digest[3]; + const u32 r1 = digest[7]; + const u32 r2 = digest[2]; + const u32 r3 = digest[6]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01450_a3.cl b/OpenCL/m01450_a3.cl similarity index 83% rename from amd/m01450_a3.cl rename to OpenCL/m01450_a3.cl index 264778d..a9f623f 100644 --- a/amd/m01450_a3.cl +++ b/OpenCL/m01450_a3.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 3 #define DGST_R1 7 #define DGST_R2 2 #define DGST_R3 6 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 k_sha256[64] = { @@ -64,33 +40,33 @@ __constant u32 k_sha256[64] = SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f, }; -static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8]) +static void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[8]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + u32 e = digest[4]; + u32 f = digest[5]; + u32 g = digest[6]; + u32 h = digest[7]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #define ROUND_EXPAND() \ { \ @@ -150,7 +126,7 @@ static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[ digest[7] += h; } -static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8]) +static void hmac_sha256_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[8], u32 opad[8]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -209,7 +185,7 @@ static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32 sha256_transform (w0, w1, w2, w3, opad); } -static void hmac_sha256_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8], u32x digest[8]) +static void hmac_sha256_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[8], u32 opad[8], u32 digest[8]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -251,7 +227,7 @@ static void hmac_sha256_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32 sha256_transform (w0, w1, w2, w3, digest); } -static void m01450m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m01450m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -284,7 +260,7 @@ static void m01450m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -296,36 +272,36 @@ static void m01450m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[8]; - u32x opad[8]; + u32 ipad[8]; + u32 opad[8]; hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -346,20 +322,20 @@ static void m01450m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[2] = 0; w3_t[3] = (64 + salt_len) * 8; - u32x digest[8]; + u32 digest[8]; hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; + const u32 r0 = digest[3]; + const u32 r1 = digest[7]; + const u32 r2 = digest[2]; + const u32 r3 = digest[6]; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m01450s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m01450s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -404,7 +380,7 @@ static void m01450s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -416,36 +392,36 @@ static void m01450s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[8]; - u32x opad[8]; + u32 ipad[8]; + u32 opad[8]; hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -466,16 +442,16 @@ static void m01450s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[2] = 0; w3_t[3] = (64 + salt_len) * 8; - u32x digest[8]; + u32 digest[8]; hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; + const u32 r0 = digest[3]; + const u32 r1 = digest[7]; + const u32 r2 = digest[2]; + const u32 r3 = digest[6]; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -489,28 +465,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01450_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -536,28 +512,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01450_m08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -583,28 +559,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01450_m16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -630,28 +606,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01450_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -677,28 +653,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01450_s08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -724,28 +700,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01450_s16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m01460_a0.cl b/OpenCL/m01460_a0.cl similarity index 88% rename from amd/m01460_a0.cl rename to OpenCL/m01460_a0.cl index a41e1d3..99e952e 100644 --- a/amd/m01460_a0.cl +++ b/OpenCL/m01460_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 7 #define DGST_R2 2 #define DGST_R3 6 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 k_sha256[64] = { @@ -66,33 +42,33 @@ __constant u32 k_sha256[64] = SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f, }; -static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8]) +static void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[8]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + u32 e = digest[4]; + u32 f = digest[5]; + u32 g = digest[6]; + u32 h = digest[7]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #define ROUND_EXPAND() \ { \ @@ -152,7 +128,7 @@ static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[ digest[7] += h; } -static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8]) +static void hmac_sha256_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[8], u32 opad[8]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -211,7 +187,7 @@ static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32 sha256_transform (w0, w1, w2, w3, opad); } -static void hmac_sha256_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8], u32x digest[8]) +static void hmac_sha256_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[8], u32 opad[8], u32 digest[8]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -269,14 +245,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01460_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -307,36 +283,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01460_m04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (salt_buf0[0]); w0_t[1] = swap_workaround (salt_buf0[1]); w0_t[2] = swap_workaround (salt_buf0[2]); w0_t[3] = swap_workaround (salt_buf0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (salt_buf1[0]); w1_t[1] = swap_workaround (salt_buf1[1]); w1_t[2] = swap_workaround (salt_buf1[2]); w1_t[3] = swap_workaround (salt_buf1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[8]; - u32x opad[8]; + u32 ipad[8]; + u32 opad[8]; hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -346,28 +322,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01460_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -395,16 +371,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01460_m04 (__glo w3_t[2] = 0; w3_t[3] = (64 + out_len) * 8; - u32x digest[8]; + u32 digest[8]; hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; + const u32 r0 = digest[3]; + const u32 r1 = digest[7]; + const u32 r2 = digest[2]; + const u32 r3 = digest[6]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -432,14 +408,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01460_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -470,36 +446,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01460_s04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (salt_buf0[0]); w0_t[1] = swap_workaround (salt_buf0[1]); w0_t[2] = swap_workaround (salt_buf0[2]); w0_t[3] = swap_workaround (salt_buf0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (salt_buf1[0]); w1_t[1] = swap_workaround (salt_buf1[1]); w1_t[2] = swap_workaround (salt_buf1[2]); w1_t[3] = swap_workaround (salt_buf1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[8]; - u32x opad[8]; + u32 ipad[8]; + u32 opad[8]; hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -521,28 +497,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01460_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -570,16 +546,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01460_s04 (__glo w3_t[2] = 0; w3_t[3] = (64 + out_len) * 8; - u32x digest[8]; + u32 digest[8]; hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; + const u32 r0 = digest[3]; + const u32 r1 = digest[7]; + const u32 r2 = digest[2]; + const u32 r3 = digest[6]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01460_a1.cl b/OpenCL/m01460_a1.cl similarity index 89% rename from amd/m01460_a1.cl rename to OpenCL/m01460_a1.cl index 3306e48..0d902e2 100644 --- a/amd/m01460_a1.cl +++ b/OpenCL/m01460_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 7 #define DGST_R2 2 #define DGST_R3 6 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 k_sha256[64] = { @@ -64,33 +40,33 @@ __constant u32 k_sha256[64] = SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f, }; -static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8]) +static void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[8]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + u32 e = digest[4]; + u32 f = digest[5]; + u32 g = digest[6]; + u32 h = digest[7]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #define ROUND_EXPAND() \ { \ @@ -150,7 +126,7 @@ static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[ digest[7] += h; } -static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8]) +static void hmac_sha256_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[8], u32 opad[8]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -209,7 +185,7 @@ static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32 sha256_transform (w0, w1, w2, w3, opad); } -static void hmac_sha256_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8], u32x digest[8]) +static void hmac_sha256_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[8], u32 opad[8], u32 digest[8]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -267,28 +243,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01460_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -324,36 +300,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01460_m04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (salt_buf0[0]); w0_t[1] = swap_workaround (salt_buf0[1]); w0_t[2] = swap_workaround (salt_buf0[2]); w0_t[3] = swap_workaround (salt_buf0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (salt_buf1[0]); w1_t[1] = swap_workaround (salt_buf1[1]); w1_t[2] = swap_workaround (salt_buf1[2]); w1_t[3] = swap_workaround (salt_buf1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[8]; - u32x opad[8]; + u32 ipad[8]; + u32 opad[8]; hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -400,28 +376,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01460_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -447,16 +423,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01460_m04 (__glo w3_t[2] = 0; w3_t[3] = (64 + pw_len) * 8; - u32x digest[8]; + u32 digest[8]; hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; + const u32 r0 = digest[3]; + const u32 r1 = digest[7]; + const u32 r2 = digest[2]; + const u32 r3 = digest[6]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -484,28 +460,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01460_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -541,36 +517,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01460_s04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (salt_buf0[0]); w0_t[1] = swap_workaround (salt_buf0[1]); w0_t[2] = swap_workaround (salt_buf0[2]); w0_t[3] = swap_workaround (salt_buf0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (salt_buf1[0]); w1_t[1] = swap_workaround (salt_buf1[1]); w1_t[2] = swap_workaround (salt_buf1[2]); w1_t[3] = swap_workaround (salt_buf1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[8]; - u32x opad[8]; + u32 ipad[8]; + u32 opad[8]; hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -629,28 +605,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01460_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -676,16 +652,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01460_s04 (__glo w3_t[2] = 0; w3_t[3] = (64 + pw_len) * 8; - u32x digest[8]; + u32 digest[8]; hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; + const u32 r0 = digest[3]; + const u32 r1 = digest[7]; + const u32 r2 = digest[2]; + const u32 r3 = digest[6]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01460_a3.cl b/OpenCL/m01460_a3.cl similarity index 83% rename from amd/m01460_a3.cl rename to OpenCL/m01460_a3.cl index ba250a2..81149df 100644 --- a/amd/m01460_a3.cl +++ b/OpenCL/m01460_a3.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 7 #define DGST_R2 2 #define DGST_R3 6 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 k_sha256[64] = { @@ -64,33 +40,33 @@ __constant u32 k_sha256[64] = SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f, }; -static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8]) +static void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[8]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + u32 e = digest[4]; + u32 f = digest[5]; + u32 g = digest[6]; + u32 h = digest[7]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #define ROUND_EXPAND() \ { \ @@ -150,7 +126,7 @@ static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[ digest[7] += h; } -static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8]) +static void hmac_sha256_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[8], u32 opad[8]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -209,7 +185,7 @@ static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32 sha256_transform (w0, w1, w2, w3, opad); } -static void hmac_sha256_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8], u32x digest[8]) +static void hmac_sha256_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[8], u32 opad[8], u32 digest[8]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -251,7 +227,7 @@ static void hmac_sha256_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32 sha256_transform (w0, w1, w2, w3, digest); } -static void m01460m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esal_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m01460m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esal_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -282,36 +258,36 @@ static void m01460m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (salt_buf0[0]); w0_t[1] = swap_workaround (salt_buf0[1]); w0_t[2] = swap_workaround (salt_buf0[2]); w0_t[3] = swap_workaround (salt_buf0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (salt_buf1[0]); w1_t[1] = swap_workaround (salt_buf1[1]); w1_t[2] = swap_workaround (salt_buf1[2]); w1_t[3] = swap_workaround (salt_buf1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[8]; - u32x opad[8]; + u32 ipad[8]; + u32 opad[8]; hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -319,7 +295,7 @@ static void m01460m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -344,20 +320,20 @@ static void m01460m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[2] = 0; w3_t[3] = (64 + pw_len) * 8; - u32x digest[8]; + u32 digest[8]; hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; + const u32 r0 = digest[3]; + const u32 r1 = digest[7]; + const u32 r2 = digest[2]; + const u32 r3 = digest[6]; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m01460s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m01460s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -388,36 +364,36 @@ static void m01460s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (salt_buf0[0]); w0_t[1] = swap_workaround (salt_buf0[1]); w0_t[2] = swap_workaround (salt_buf0[2]); w0_t[3] = swap_workaround (salt_buf0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (salt_buf1[0]); w1_t[1] = swap_workaround (salt_buf1[1]); w1_t[2] = swap_workaround (salt_buf1[2]); w1_t[3] = swap_workaround (salt_buf1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[8]; - u32x opad[8]; + u32 ipad[8]; + u32 opad[8]; hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -437,7 +413,7 @@ static void m01460s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -462,16 +438,16 @@ static void m01460s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[2] = 0; w3_t[3] = (64 + pw_len) * 8; - u32x digest[8]; + u32 digest[8]; hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; + const u32 r0 = digest[3]; + const u32 r1 = digest[7]; + const u32 r2 = digest[2]; + const u32 r3 = digest[6]; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -485,28 +461,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01460_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -532,28 +508,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01460_m08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -579,28 +555,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01460_m16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -626,28 +602,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01460_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -673,28 +649,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01460_s08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -720,28 +696,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01460_s16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m01500_a0.cl b/OpenCL/m01500_a0.cl similarity index 93% rename from amd/m01500_a0.cl rename to OpenCL/m01500_a0.cl index 709fec8..dba48ba 100644 --- a/amd/m01500_a0.cl +++ b/OpenCL/m01500_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" +#include "rp.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define PERM_OP(a,b,tt,n,m) \ { \ @@ -361,9 +337,9 @@ __constant u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, #define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) #endif -static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 s_skb[8][64]) +static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u32 s_skb[8][64]) { - u32x tt; + u32 tt; PERM_OP (d, c, tt, 4, 0x0f0f0f0f); HPERM_OP (c, tt, 2, 0xcccc0000); @@ -388,13 +364,13 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc c = c & 0x0fffffff; d = d & 0x0fffffff; - const u32x c00 = (c >> 0) & 0x0000003f; - const u32x c06 = (c >> 6) & 0x00383003; - const u32x c07 = (c >> 7) & 0x0000003c; - const u32x c13 = (c >> 13) & 0x0000060f; - const u32x c20 = (c >> 20) & 0x00000001; + const u32 c00 = (c >> 0) & 0x0000003f; + const u32 c06 = (c >> 6) & 0x00383003; + const u32 c07 = (c >> 7) & 0x0000003c; + const u32 c13 = (c >> 13) & 0x0000060f; + const u32 c20 = (c >> 20) & 0x00000001; - u32x s = BOX (((c00 >> 0) & 0xff), 0, s_skb) + u32 s = BOX (((c00 >> 0) & 0xff), 0, s_skb) | BOX (((c06 >> 0) & 0xff) |((c07 >> 0) & 0xff), 1, s_skb) | BOX (((c13 >> 0) & 0xff) @@ -403,12 +379,12 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc |((c13 >> 8) & 0xff) |((c06 >> 16) & 0xff), 3, s_skb); - const u32x d00 = (d >> 0) & 0x00003c3f; - const u32x d07 = (d >> 7) & 0x00003f03; - const u32x d21 = (d >> 21) & 0x0000000f; - const u32x d22 = (d >> 22) & 0x00000030; + const u32 d00 = (d >> 0) & 0x00003c3f; + const u32 d07 = (d >> 7) & 0x00003f03; + const u32 d21 = (d >> 21) & 0x0000000f; + const u32 d22 = (d >> 22) & 0x00000030; - u32x t = BOX (((d00 >> 0) & 0xff), 4, s_skb) + u32 t = BOX (((d00 >> 0) & 0xff), 4, s_skb) | BOX (((d07 >> 0) & 0xff) |((d00 >> 8) & 0xff), 5, s_skb) | BOX (((d07 >> 8) & 0xff), 6, s_skb) @@ -420,20 +396,20 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc } } -static void _des_crypt_encrypt (u32x iv[2], u32 mask, u32x Kc[16], u32x Kd[16], __local u32 s_SPtrans[8][64]) +static void _des_crypt_encrypt (u32 iv[2], u32 mask, u32 Kc[16], u32 Kd[16], __local u32 s_SPtrans[8][64]) { const u32 E1 = (mask >> 2) & 0x3f0; const u32 E0 = mask & 0x3f; - u32x r = 0; - u32x l = 0; + u32 r = 0; + u32 l = 0; for (u32 i = 0; i < 25; i++) { for (u32 j = 0; j < 16; j += 2) { - u32x t; - u32x u; + u32 t; + u32 u; t = r ^ (r >> 16); u = t & E0; @@ -478,7 +454,7 @@ static void _des_crypt_encrypt (u32x iv[2], u32 mask, u32x Kc[16], u32x Kd[16], ^ BOX (amd_bfe (t, 24, 6), 7, s_SPtrans); } - u32x tt; + u32 tt; tt = l; l = r; @@ -503,7 +479,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01500_m04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; @@ -553,28 +529,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01500_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = 0; w0[3] = 0; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -585,26 +561,26 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01500_m04 (__glo out_len = (out_len >= 8) ? 8 : out_len; - u32x data[2]; + u32 data[2]; data[0] = (w0[0] << 1) & 0xfefefefe; data[1] = (w0[1] << 1) & 0xfefefefe; - u32x Kc[16]; - u32x Kd[16]; + u32 Kc[16]; + u32 Kd[16]; _des_crypt_keysetup (data[0], data[1], Kc, Kd, s_skb); - u32x iv[2]; + u32 iv[2]; _des_crypt_encrypt (iv, mask, Kc, Kd, s_SPtrans); - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = iv[0]; + const u32 r1 = iv[1]; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -630,7 +606,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01500_s04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; @@ -692,28 +668,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01500_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = 0; w0[3] = 0; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -724,26 +700,26 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01500_s04 (__glo out_len = (out_len >= 8) ? 8 : out_len; - u32x data[2]; + u32 data[2]; data[0] = (w0[0] << 1) & 0xfefefefe; data[1] = (w0[1] << 1) & 0xfefefefe; - u32x Kc[16]; - u32x Kd[16]; + u32 Kc[16]; + u32 Kd[16]; _des_crypt_keysetup (data[0], data[1], Kc, Kd, s_skb); - u32x iv[2]; + u32 iv[2]; _des_crypt_encrypt (iv, mask, Kc, Kd, s_SPtrans); - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = iv[0]; + const u32 r1 = iv[1]; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01500_a1.cl b/OpenCL/m01500_a1.cl similarity index 93% rename from amd/m01500_a1.cl rename to OpenCL/m01500_a1.cl index cdf366e..d265a41 100644 --- a/amd/m01500_a1.cl +++ b/OpenCL/m01500_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define PERM_OP(a,b,tt,n,m) \ { \ @@ -359,9 +335,9 @@ __constant u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, #define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) #endif -static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 s_skb[8][64]) +static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u32 s_skb[8][64]) { - u32x tt; + u32 tt; PERM_OP (d, c, tt, 4, 0x0f0f0f0f); HPERM_OP (c, tt, 2, 0xcccc0000); @@ -386,13 +362,13 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc c = c & 0x0fffffff; d = d & 0x0fffffff; - const u32x c00 = (c >> 0) & 0x0000003f; - const u32x c06 = (c >> 6) & 0x00383003; - const u32x c07 = (c >> 7) & 0x0000003c; - const u32x c13 = (c >> 13) & 0x0000060f; - const u32x c20 = (c >> 20) & 0x00000001; + const u32 c00 = (c >> 0) & 0x0000003f; + const u32 c06 = (c >> 6) & 0x00383003; + const u32 c07 = (c >> 7) & 0x0000003c; + const u32 c13 = (c >> 13) & 0x0000060f; + const u32 c20 = (c >> 20) & 0x00000001; - u32x s = BOX (((c00 >> 0) & 0xff), 0, s_skb) + u32 s = BOX (((c00 >> 0) & 0xff), 0, s_skb) | BOX (((c06 >> 0) & 0xff) |((c07 >> 0) & 0xff), 1, s_skb) | BOX (((c13 >> 0) & 0xff) @@ -401,12 +377,12 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc |((c13 >> 8) & 0xff) |((c06 >> 16) & 0xff), 3, s_skb); - const u32x d00 = (d >> 0) & 0x00003c3f; - const u32x d07 = (d >> 7) & 0x00003f03; - const u32x d21 = (d >> 21) & 0x0000000f; - const u32x d22 = (d >> 22) & 0x00000030; + const u32 d00 = (d >> 0) & 0x00003c3f; + const u32 d07 = (d >> 7) & 0x00003f03; + const u32 d21 = (d >> 21) & 0x0000000f; + const u32 d22 = (d >> 22) & 0x00000030; - u32x t = BOX (((d00 >> 0) & 0xff), 4, s_skb) + u32 t = BOX (((d00 >> 0) & 0xff), 4, s_skb) | BOX (((d07 >> 0) & 0xff) |((d00 >> 8) & 0xff), 5, s_skb) | BOX (((d07 >> 8) & 0xff), 6, s_skb) @@ -418,20 +394,20 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc } } -static void _des_crypt_encrypt (u32x iv[2], u32 mask, u32x Kc[16], u32x Kd[16], __local u32 s_SPtrans[8][64]) +static void _des_crypt_encrypt (u32 iv[2], u32 mask, u32 Kc[16], u32 Kd[16], __local u32 s_SPtrans[8][64]) { const u32 E1 = (mask >> 2) & 0x3f0; const u32 E0 = mask & 0x3f; - u32x r = 0; - u32x l = 0; + u32 r = 0; + u32 l = 0; for (u32 i = 0; i < 25; i++) { for (u32 j = 0; j < 16; j += 2) { - u32x t; - u32x u; + u32 t; + u32 u; t = r ^ (r >> 16); u = t & E0; @@ -476,7 +452,7 @@ static void _des_crypt_encrypt (u32x iv[2], u32 mask, u32x Kc[16], u32x Kd[16], ^ BOX (amd_bfe (t, 24, 6), 7, s_SPtrans); } - u32x tt; + u32 tt; tt = l; l = r; @@ -501,28 +477,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01500_m04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = 0; wordl0[3] = 0; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = 0; wordl1[1] = 0; wordl1[2] = 0; wordl1[3] = 0; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -617,54 +593,54 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01500_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = 0; w0[3] = 0; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; - u32x data[2]; + u32 data[2]; data[0] = (w0[0] << 1) & 0xfefefefe; data[1] = (w0[1] << 1) & 0xfefefefe; - u32x Kc[16]; - u32x Kd[16]; + u32 Kc[16]; + u32 Kd[16]; _des_crypt_keysetup (data[0], data[1], Kc, Kd, s_skb); - u32x iv[2]; + u32 iv[2]; _des_crypt_encrypt (iv, mask, Kc, Kd, s_SPtrans); - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = iv[0]; + const u32 r1 = iv[1]; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -690,28 +666,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01500_s04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = 0; wordl0[3] = 0; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = 0; wordl1[1] = 0; wordl1[2] = 0; wordl1[3] = 0; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -818,54 +794,54 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01500_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = 0; w0[3] = 0; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; - u32x data[2]; + u32 data[2]; data[0] = (w0[0] << 1) & 0xfefefefe; data[1] = (w0[1] << 1) & 0xfefefefe; - u32x Kc[16]; - u32x Kd[16]; + u32 Kc[16]; + u32 Kd[16]; _des_crypt_keysetup (data[0], data[1], Kc, Kd, s_skb); - u32x iv[2]; + u32 iv[2]; _des_crypt_encrypt (iv, mask, Kc, Kd, s_SPtrans); - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = iv[0]; + const u32 r1 = iv[1]; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01500_a3.cl b/OpenCL/m01500_a3.cl similarity index 98% rename from amd/m01500_a3.cl rename to OpenCL/m01500_a3.cl index cc10239..8e3a5f7 100644 --- a/amd/m01500_a3.cl +++ b/OpenCL/m01500_a3.cl @@ -5,45 +5,32 @@ */ #define _DES_ -#define _SCALAR_ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp_bs.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp_bs.c" +#define COMPARE_S "check_single_vect1_comp4_bs.c" +#define COMPARE_M "check_multi_vect1_comp4_bs.c" #endif #ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp_bs.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp_bs.c" +#define COMPARE_S "check_single_vect2_comp4_bs.c" +#define COMPARE_M "check_multi_vect2_comp4_bs.c" #endif #ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp_bs.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp_bs.c" +#define COMPARE_S "check_single_vect4_comp4_bs.c" +#define COMPARE_M "check_multi_vect4_comp4_bs.c" #endif #define KXX_DECL volatile @@ -980,7 +967,7 @@ static void m01500m (__local u32 *s_S, __global pw_t *pws, __global gpu_rule_t * * inner loop */ - const u32 bf_loops = bfs_cnt; + const u32 bfs_cnt = bfs_cnt; const u32 pc_pos = get_local_id (1); @@ -1226,12 +1213,12 @@ static void m01500m (__local u32 *s_S, __global pw_t *pws, __global gpu_rule_t * const u32 slice = 31 - clz (~tmpResult); - const u32x r0 = search[0]; - const u32x r1 = search[1]; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } else @@ -1252,12 +1239,12 @@ static void m01500m (__local u32 *s_S, __global pw_t *pws, __global gpu_rule_t * #pragma unroll for (int slice = 0; slice < 32; slice++) { - const u32x r0 = out0[31 - slice]; - const u32x r1 = out1[31 - slice]; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = out0[31 - slice]; + const u32 r1 = out1[31 - slice]; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } } @@ -1633,7 +1620,7 @@ static void m01500s (__local u32 *s_S, __global pw_t *pws, __global gpu_rule_t * const u32 slice = 31 - clz (~tmpResult); - #include VECT_COMPARE_S + #include COMPARE_S } // diff --git a/amd/m01600.cl b/OpenCL/m01600.cl similarity index 96% rename from amd/m01600.cl rename to OpenCL/m01600.cl index 50e8e06..fef28b4 100644 --- a/amd/m01600.cl +++ b/OpenCL/m01600.cl @@ -8,63 +8,51 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif #ifdef VECT_SIZE4 -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" +#define COMPARE_M "check_multi_vect4_comp4.c" #endif #define md5apr1_magic0 0x72706124 #define md5apr1_magic1 0x00002431 -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = 0; - - u32x tmp2; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = 0; + + u32 tmp2; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -140,7 +128,7 @@ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void memcat16 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32x append[4]) +static void memcat16 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[4]) { switch (block_len) { @@ -575,7 +563,7 @@ static void memcat16 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block } } -static void memcat16_x80 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32x append[4]) +static void memcat16_x80 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[4]) { switch (block_len) { @@ -1022,7 +1010,7 @@ static void memcat16_x80 (u32x block0[4], u32x block1[4], u32x block2[4], u32x b } } -static void memcat8 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32 append[2]) +static void memcat8 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[2]) { switch (block_len) { @@ -1355,7 +1343,7 @@ static void memcat8 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3 } } -static void append_sign (u32x block0[4], u32x block1[4], const u32 block_len) +static void append_sign (u32 block0[4], u32 block1[4], const u32 block_len) { switch (block_len) { @@ -1453,7 +1441,7 @@ static void append_sign (u32x block0[4], u32x block1[4], const u32 block_len) } } -static void append_1st (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32x append) +static void append_1st (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append) { switch (block_len) { @@ -1697,7 +1685,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01600_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[0]; w0[1] = pws[gid].i[1]; @@ -1726,28 +1714,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01600_init (__gl u32 block_len = pw_len; - u32x block0[4]; + u32 block0[4]; block0[0] = w0[0]; block0[1] = w0[1]; block0[2] = w0[2]; block0[3] = w0[3]; - u32x block1[4]; + u32 block1[4]; block1[0] = 0; block1[1] = 0; block1[2] = 0; block1[3] = 0; - u32x block2[4]; + u32 block2[4]; block2[0] = 0; block2[1] = 0; block2[2] = 0; block2[3] = 0; - u32x block3[4]; + u32 block3[4]; block3[0] = 0; block3[1] = 0; @@ -1766,7 +1754,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01600_init (__gl block3[2] = block_len * 8; - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -1821,7 +1809,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01600_init (__gl /* Then something really weird... */ - u32x append = block0[0] & 0xFF; + u32 append = block0[0] & 0xFF; for (u32 j = pw_len; j; j >>= 1) { @@ -1860,7 +1848,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01600_loop (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[0]; w0[1] = pws[gid].i[1]; @@ -1869,7 +1857,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01600_loop (__gl const u32 pw_len = pws[gid].pw_len; - u32x w0_x80[4]; + u32 w0_x80[4]; w0_x80[0] = w0[0]; w0_x80[1] = w0[1]; @@ -1893,7 +1881,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01600_loop (__gl * digest */ - u32x digest[4]; + u32 digest[4]; digest[0] = tmps[gid].digest_buf[0]; digest[1] = tmps[gid].digest_buf[1]; @@ -1908,28 +1896,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01600_loop (__gl u32 block_len; - u32x block0[4]; + u32 block0[4]; block0[0] = 0; block0[1] = 0; block0[2] = 0; block0[3] = 0; - u32x block1[4]; + u32 block1[4]; block1[0] = 0; block1[1] = 0; block1[2] = 0; block1[3] = 0; - u32x block2[4]; + u32 block2[4]; block2[0] = 0; block2[1] = 0; block2[2] = 0; block2[3] = 0; - u32x block3[4]; + u32 block3[4]; block3[0] = 0; block3[1] = 0; @@ -2054,12 +2042,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01600_comp (__gl * digest */ - const u32x r0 = tmps[gid].digest_buf[DGST_R0]; - const u32x r1 = tmps[gid].digest_buf[DGST_R1]; - const u32x r2 = tmps[gid].digest_buf[DGST_R2]; - const u32x r3 = tmps[gid].digest_buf[DGST_R3]; + const u32 r0 = tmps[gid].digest_buf[DGST_R0]; + const u32 r1 = tmps[gid].digest_buf[DGST_R1]; + const u32 r2 = tmps[gid].digest_buf[DGST_R2]; + const u32 r3 = tmps[gid].digest_buf[DGST_R3]; #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m01700_a0.cl b/OpenCL/m01700_a0.cl similarity index 86% rename from amd/m01700_a0.cl rename to OpenCL/m01700_a0.cl index 83b91a0..2dcd4f4 100644 --- a/amd/m01700_a0.cl +++ b/OpenCL/m01700_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 14 #define DGST_R1 15 #define DGST_R2 6 #define DGST_R3 7 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u64 k_sha512[80] = { @@ -70,33 +46,33 @@ __constant u64 k_sha512[80] = SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, }; -static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) +static void sha512_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u64 digest[8]) { - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; + u64 w0_t = hl32_to_64 (w0[0], w0[1]); + u64 w1_t = hl32_to_64 (w0[2], w0[3]); + u64 w2_t = hl32_to_64 (w1[0], w1[1]); + u64 w3_t = hl32_to_64 (w1[2], w1[3]); + u64 w4_t = hl32_to_64 (w2[0], w2[1]); + u64 w5_t = hl32_to_64 (w2[2], w2[3]); + u64 w6_t = hl32_to_64 (w3[0], w3[1]); + u64 w7_t = 0; + u64 w8_t = 0; + u64 w9_t = 0; + u64 wa_t = 0; + u64 wb_t = 0; + u64 wc_t = 0; + u64 wd_t = 0; + u64 we_t = 0; + u64 wf_t = hl32_to_64 (w3[2], w3[3]); + + u64 a = digest[0]; + u64 b = digest[1]; + u64 c = digest[2]; + u64 d = digest[3]; + u64 e = digest[4]; + u64 f = digest[5]; + u64 g = digest[6]; + u64 h = digest[7]; #define ROUND_EXPAND() \ { \ @@ -183,14 +159,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -205,28 +181,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -241,10 +217,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_m04 (__glo * SHA512 */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); @@ -263,7 +239,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_m04 (__glo w3_t[2] = 0; w3_t[3] = out_len * 8; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -277,12 +253,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_m04 (__glo sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -310,14 +286,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -344,28 +320,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -380,10 +356,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_s04 (__glo * SHA512 */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); @@ -402,7 +378,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_s04 (__glo w3_t[2] = 0; w3_t[3] = out_len * 8; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -416,12 +392,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_s04 (__glo sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01700_a1.cl b/OpenCL/m01700_a1.cl similarity index 88% rename from amd/m01700_a1.cl rename to OpenCL/m01700_a1.cl index 0733c78..f0c7611 100644 --- a/amd/m01700_a1.cl +++ b/OpenCL/m01700_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 14 #define DGST_R1 15 #define DGST_R2 6 #define DGST_R3 7 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u64 k_sha512[80] = { @@ -68,33 +44,33 @@ __constant u64 k_sha512[80] = SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, }; -static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) +static void sha512_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u64 digest[8]) { - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; + u64 w0_t = hl32_to_64 (w0[0], w0[1]); + u64 w1_t = hl32_to_64 (w0[2], w0[3]); + u64 w2_t = hl32_to_64 (w1[0], w1[1]); + u64 w3_t = hl32_to_64 (w1[2], w1[3]); + u64 w4_t = hl32_to_64 (w2[0], w2[1]); + u64 w5_t = hl32_to_64 (w2[2], w2[3]); + u64 w6_t = hl32_to_64 (w3[0], w3[1]); + u64 w7_t = 0; + u64 w8_t = 0; + u64 w9_t = 0; + u64 wa_t = 0; + u64 wb_t = 0; + u64 wc_t = 0; + u64 wd_t = 0; + u64 we_t = 0; + u64 wf_t = hl32_to_64 (w3[2], w3[3]); + + u64 a = digest[0]; + u64 b = digest[1]; + u64 c = digest[2]; + u64 d = digest[3]; + u64 e = digest[4]; + u64 f = digest[5]; + u64 g = digest[6]; + u64 h = digest[7]; #define ROUND_EXPAND() \ { \ @@ -181,28 +157,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -263,10 +239,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -289,10 +265,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_m04 (__glo * SHA512 */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); @@ -311,7 +287,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_m04 (__glo w3_t[2] = 0; w3_t[3] = pw_len * 8; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -325,12 +301,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_m04 (__glo sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -358,28 +334,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -452,10 +428,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -478,10 +454,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_s04 (__glo * SHA512 */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); @@ -500,7 +476,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_s04 (__glo w3_t[2] = 0; w3_t[3] = pw_len * 8; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -514,12 +490,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_s04 (__glo sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01700_a3.cl b/OpenCL/m01700_a3.cl similarity index 59% rename from amd/m01700_a3.cl rename to OpenCL/m01700_a3.cl index 0e965b1..bf9f884 100644 --- a/amd/m01700_a3.cl +++ b/OpenCL/m01700_a3.cl @@ -4,46 +4,21 @@ */ #define _SHA512_ -#define _SCALAR_ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 14 #define DGST_R1 15 #define DGST_R2 6 #define DGST_R3 7 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u64 k_sha512[80] = { @@ -69,33 +44,33 @@ __constant u64 k_sha512[80] = SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, }; -static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) +static void sha512_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u64 digest[8]) { - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; + u64 w0_t = hl32_to_64 (w0[0], w0[1]); + u64 w1_t = hl32_to_64 (w0[2], w0[3]); + u64 w2_t = hl32_to_64 (w1[0], w1[1]); + u64 w3_t = hl32_to_64 (w1[2], w1[3]); + u64 w4_t = hl32_to_64 (w2[0], w2[1]); + u64 w5_t = hl32_to_64 (w2[2], w2[3]); + u64 w6_t = hl32_to_64 (w3[0], w3[1]); + u64 w7_t = 0; + u64 w8_t = 0; + u64 w9_t = 0; + u64 wa_t = 0; + u64 wb_t = 0; + u64 wc_t = 0; + u64 wd_t = 0; + u64 we_t = 0; + u64 wf_t = hl32_to_64 (w3[2], w3[3]); + + u64 a = digest[0]; + u64 b = digest[1]; + u64 c = digest[2]; + u64 d = digest[3]; + u64 e = digest[4]; + u64 f = digest[5]; + u64 g = digest[6]; + u64 h = digest[7]; #define ROUND_EXPAND() \ { \ @@ -166,7 +141,7 @@ static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[ digest[7] = h; } -static void m01700m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m01700m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -179,20 +154,18 @@ static void m01700m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0; w0_t[1] = w[ 1]; @@ -211,7 +184,7 @@ static void m01700m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g w3_t[2] = w[14]; w3_t[3] = w[15]; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -225,16 +198,16 @@ static void m01700m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m01700s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m01700s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -259,20 +232,18 @@ static void m01700s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0; w0_t[1] = w[ 1]; @@ -291,7 +262,7 @@ static void m01700s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g w3_t[2] = w[14]; w3_t[3] = w[15]; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -305,16 +276,16 @@ static void m01700s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_S + #include COMPARE_S } } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -352,7 +323,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_m04 (__glo m01700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -390,7 +361,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_m08 (__glo m01700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -428,7 +399,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_m16 (__glo m01700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -466,7 +437,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_s04 (__glo m01700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -504,7 +475,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_s08 (__glo m01700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01700_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base diff --git a/amd/m01710_a0.cl b/OpenCL/m01710_a0.cl similarity index 88% rename from amd/m01710_a0.cl rename to OpenCL/m01710_a0.cl index 8e49df1..8d85e4c 100644 --- a/amd/m01710_a0.cl +++ b/OpenCL/m01710_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 14 #define DGST_R1 15 #define DGST_R2 6 #define DGST_R3 7 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u64 k_sha512[80] = { @@ -70,33 +46,33 @@ __constant u64 k_sha512[80] = SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, }; -static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) +static void sha512_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u64 digest[8]) { - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; + u64 w0_t = hl32_to_64 (w0[0], w0[1]); + u64 w1_t = hl32_to_64 (w0[2], w0[3]); + u64 w2_t = hl32_to_64 (w1[0], w1[1]); + u64 w3_t = hl32_to_64 (w1[2], w1[3]); + u64 w4_t = hl32_to_64 (w2[0], w2[1]); + u64 w5_t = hl32_to_64 (w2[2], w2[3]); + u64 w6_t = hl32_to_64 (w3[0], w3[1]); + u64 w7_t = 0; + u64 w8_t = 0; + u64 w9_t = 0; + u64 wa_t = 0; + u64 wb_t = 0; + u64 wc_t = 0; + u64 wd_t = 0; + u64 we_t = 0; + u64 wf_t = hl32_to_64 (w3[2], w3[3]); + + u64 a = digest[0]; + u64 b = digest[1]; + u64 c = digest[2]; + u64 d = digest[3]; + u64 e = digest[4]; + u64 f = digest[5]; + u64 g = digest[6]; + u64 h = digest[7]; #define ROUND_EXPAND() \ { \ @@ -183,14 +159,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -225,28 +201,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -317,10 +293,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_m04 (__glo * sha512 */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); @@ -339,7 +315,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_m04 (__glo w3_t[2] = 0; w3_t[3] = out_salt_len * 8; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -353,12 +329,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_m04 (__glo sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -386,14 +362,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -440,28 +416,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -532,10 +508,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_s04 (__glo * sha512 */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); @@ -554,7 +530,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_s04 (__glo w3_t[2] = 0; w3_t[3] = out_salt_len * 8; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -568,12 +544,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_s04 (__glo sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01710_a1.cl b/OpenCL/m01710_a1.cl similarity index 89% rename from amd/m01710_a1.cl rename to OpenCL/m01710_a1.cl index 5dd2053..56c2ba0 100644 --- a/amd/m01710_a1.cl +++ b/OpenCL/m01710_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 14 #define DGST_R1 15 #define DGST_R2 6 #define DGST_R3 7 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u64 k_sha512[80] = { @@ -68,33 +44,33 @@ __constant u64 k_sha512[80] = SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, }; -static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) +static void sha512_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u64 digest[8]) { - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; + u64 w0_t = hl32_to_64 (w0[0], w0[1]); + u64 w1_t = hl32_to_64 (w0[2], w0[3]); + u64 w2_t = hl32_to_64 (w1[0], w1[1]); + u64 w3_t = hl32_to_64 (w1[2], w1[3]); + u64 w4_t = hl32_to_64 (w2[0], w2[1]); + u64 w5_t = hl32_to_64 (w2[2], w2[3]); + u64 w6_t = hl32_to_64 (w3[0], w3[1]); + u64 w7_t = 0; + u64 w8_t = 0; + u64 w9_t = 0; + u64 wa_t = 0; + u64 wb_t = 0; + u64 wc_t = 0; + u64 wd_t = 0; + u64 we_t = 0; + u64 wf_t = hl32_to_64 (w3[2], w3[3]); + + u64 a = digest[0]; + u64 b = digest[1]; + u64 c = digest[2]; + u64 d = digest[3]; + u64 e = digest[4]; + u64 f = digest[5]; + u64 g = digest[6]; + u64 h = digest[7]; #define ROUND_EXPAND() \ { \ @@ -181,28 +157,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -315,10 +291,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_m04 (__glo const u32 pw_salt_len = pw_len + salt_len; - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0] | s0[0]; w0[1] = wordl0[1] | wordr0[1] | s0[1]; @@ -343,10 +319,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_m04 (__glo * sha512 */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); @@ -365,7 +341,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_m04 (__glo w3_t[2] = 0; w3_t[3] = pw_salt_len * 8; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -379,12 +355,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_m04 (__glo sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -412,28 +388,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -558,10 +534,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_s04 (__glo const u32 pw_salt_len = pw_len + salt_len; - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0] | s0[0]; w0[1] = wordl0[1] | wordr0[1] | s0[1]; @@ -586,10 +562,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_s04 (__glo * sha512 */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); @@ -608,7 +584,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_s04 (__glo w3_t[2] = 0; w3_t[3] = pw_salt_len * 8; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -622,12 +598,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_s04 (__glo sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01710_a3.cl b/OpenCL/m01710_a3.cl similarity index 62% rename from amd/m01710_a3.cl rename to OpenCL/m01710_a3.cl index c7c6531..5b79726 100644 --- a/amd/m01710_a3.cl +++ b/OpenCL/m01710_a3.cl @@ -4,46 +4,21 @@ */ #define _SHA512_ -#define _SCALAR_ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 14 #define DGST_R1 15 #define DGST_R2 6 #define DGST_R3 7 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u64 k_sha512[80] = { @@ -69,33 +44,33 @@ __constant u64 k_sha512[80] = SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, }; -static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) +static void sha512_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u64 digest[8]) { - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; + u64 w0_t = hl32_to_64 (w0[0], w0[1]); + u64 w1_t = hl32_to_64 (w0[2], w0[3]); + u64 w2_t = hl32_to_64 (w1[0], w1[1]); + u64 w3_t = hl32_to_64 (w1[2], w1[3]); + u64 w4_t = hl32_to_64 (w2[0], w2[1]); + u64 w5_t = hl32_to_64 (w2[2], w2[3]); + u64 w6_t = hl32_to_64 (w3[0], w3[1]); + u64 w7_t = 0; + u64 w8_t = 0; + u64 w9_t = 0; + u64 wa_t = 0; + u64 wb_t = 0; + u64 wc_t = 0; + u64 wd_t = 0; + u64 we_t = 0; + u64 wf_t = hl32_to_64 (w3[2], w3[3]); + + u64 a = digest[0]; + u64 b = digest[1]; + u64 c = digest[2]; + u64 d = digest[3]; + u64 e = digest[4]; + u64 f = digest[5]; + u64 g = digest[6]; + u64 h = digest[7]; #define ROUND_EXPAND() \ { \ @@ -166,7 +141,7 @@ static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[ digest[7] = h; } -static void m01710m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m01710m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -236,20 +211,18 @@ static void m01710m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0; w0_t[1] = w[ 1]; @@ -268,7 +241,7 @@ static void m01710m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g w3_t[2] = w[14]; w3_t[3] = w[15]; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -282,16 +255,16 @@ static void m01710m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m01710s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m01710s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -316,20 +289,18 @@ static void m01710s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0; w0_t[1] = w[ 1]; @@ -348,7 +319,7 @@ static void m01710s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g w3_t[2] = w[14]; w3_t[3] = w[15]; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -362,16 +333,16 @@ static void m01710s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_S + #include COMPARE_S } } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -409,7 +380,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_m04 (__glo m01710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -447,7 +418,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_m08 (__glo m01710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -485,7 +456,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_m16 (__glo m01710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -523,7 +494,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_s04 (__glo m01710s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -561,7 +532,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_s08 (__glo m01710s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01710_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base diff --git a/amd/m01720_a0.cl b/OpenCL/m01720_a0.cl similarity index 88% rename from amd/m01720_a0.cl rename to OpenCL/m01720_a0.cl index dff8fec..ca77914 100644 --- a/amd/m01720_a0.cl +++ b/OpenCL/m01720_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 14 #define DGST_R1 15 #define DGST_R2 6 #define DGST_R3 7 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u64 k_sha512[80] = { @@ -70,33 +46,33 @@ __constant u64 k_sha512[80] = SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, }; -static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) +static void sha512_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u64 digest[8]) { - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; + u64 w0_t = hl32_to_64 (w0[0], w0[1]); + u64 w1_t = hl32_to_64 (w0[2], w0[3]); + u64 w2_t = hl32_to_64 (w1[0], w1[1]); + u64 w3_t = hl32_to_64 (w1[2], w1[3]); + u64 w4_t = hl32_to_64 (w2[0], w2[1]); + u64 w5_t = hl32_to_64 (w2[2], w2[3]); + u64 w6_t = hl32_to_64 (w3[0], w3[1]); + u64 w7_t = 0; + u64 w8_t = 0; + u64 w9_t = 0; + u64 wa_t = 0; + u64 wb_t = 0; + u64 wc_t = 0; + u64 wd_t = 0; + u64 we_t = 0; + u64 wf_t = hl32_to_64 (w3[2], w3[3]); + + u64 a = digest[0]; + u64 b = digest[1]; + u64 c = digest[2]; + u64 d = digest[3]; + u64 e = digest[4]; + u64 f = digest[5]; + u64 g = digest[6]; + u64 h = digest[7]; #define ROUND_EXPAND() \ { \ @@ -183,14 +159,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01720_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -225,28 +201,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01720_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -278,10 +254,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01720_m04 (__glo * sha512 */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); @@ -300,7 +276,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01720_m04 (__glo w3_t[2] = 0; w3_t[3] = out_salt_len * 8; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -314,12 +290,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01720_m04 (__glo sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -347,14 +323,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01720_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -401,28 +377,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01720_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -454,10 +430,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01720_s04 (__glo * sha512 */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); @@ -476,7 +452,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01720_s04 (__glo w3_t[2] = 0; w3_t[3] = out_salt_len * 8; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -490,12 +466,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01720_s04 (__glo sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01720_a1.cl b/OpenCL/m01720_a1.cl similarity index 89% rename from amd/m01720_a1.cl rename to OpenCL/m01720_a1.cl index 5ae641a..f87bd74 100644 --- a/amd/m01720_a1.cl +++ b/OpenCL/m01720_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 14 #define DGST_R1 15 #define DGST_R2 6 #define DGST_R3 7 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u64 k_sha512[80] = { @@ -68,33 +44,33 @@ __constant u64 k_sha512[80] = SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, }; -static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) +static void sha512_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u64 digest[8]) { - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; + u64 w0_t = hl32_to_64 (w0[0], w0[1]); + u64 w1_t = hl32_to_64 (w0[2], w0[3]); + u64 w2_t = hl32_to_64 (w1[0], w1[1]); + u64 w3_t = hl32_to_64 (w1[2], w1[3]); + u64 w4_t = hl32_to_64 (w2[0], w2[1]); + u64 w5_t = hl32_to_64 (w2[2], w2[3]); + u64 w6_t = hl32_to_64 (w3[0], w3[1]); + u64 w7_t = 0; + u64 w8_t = 0; + u64 w9_t = 0; + u64 wa_t = 0; + u64 wb_t = 0; + u64 wc_t = 0; + u64 wd_t = 0; + u64 we_t = 0; + u64 wf_t = hl32_to_64 (w3[2], w3[3]); + + u64 a = digest[0]; + u64 b = digest[1]; + u64 c = digest[2]; + u64 d = digest[3]; + u64 e = digest[4]; + u64 f = digest[5]; + u64 g = digest[6]; + u64 h = digest[7]; #define ROUND_EXPAND() \ { \ @@ -181,28 +157,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01720_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -273,10 +249,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01720_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -318,10 +294,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01720_m04 (__glo * sha512 */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); @@ -340,7 +316,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01720_m04 (__glo w3_t[2] = 0; w3_t[3] = pw_salt_len * 8; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -354,12 +330,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01720_m04 (__glo sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -387,28 +363,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01720_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -491,10 +467,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01720_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -536,10 +512,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01720_s04 (__glo * sha512 */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); @@ -558,7 +534,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01720_s04 (__glo w3_t[2] = 0; w3_t[3] = pw_salt_len * 8; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -572,12 +548,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01720_s04 (__glo sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01720_a3.cl b/OpenCL/m01720_a3.cl similarity index 84% rename from amd/m01720_a3.cl rename to OpenCL/m01720_a3.cl index 75b89eb..2a363d4 100644 --- a/amd/m01720_a3.cl +++ b/OpenCL/m01720_a3.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 14 #define DGST_R1 15 #define DGST_R2 6 #define DGST_R3 7 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u64 k_sha512[80] = { @@ -68,33 +44,33 @@ __constant u64 k_sha512[80] = SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, }; -static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) +static void sha512_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u64 digest[8]) { - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; + u64 w0_t = hl32_to_64 (w0[0], w0[1]); + u64 w1_t = hl32_to_64 (w0[2], w0[3]); + u64 w2_t = hl32_to_64 (w1[0], w1[1]); + u64 w3_t = hl32_to_64 (w1[2], w1[3]); + u64 w4_t = hl32_to_64 (w2[0], w2[1]); + u64 w5_t = hl32_to_64 (w2[2], w2[3]); + u64 w6_t = hl32_to_64 (w3[0], w3[1]); + u64 w7_t = 0; + u64 w8_t = 0; + u64 w9_t = 0; + u64 wa_t = 0; + u64 wb_t = 0; + u64 wc_t = 0; + u64 wd_t = 0; + u64 we_t = 0; + u64 wf_t = hl32_to_64 (w3[2], w3[3]); + + u64 a = digest[0]; + u64 b = digest[1]; + u64 c = digest[2]; + u64 d = digest[3]; + u64 e = digest[4]; + u64 f = digest[5]; + u64 g = digest[6]; + u64 h = digest[7]; #define ROUND_EXPAND() \ { \ @@ -165,7 +141,7 @@ static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[ digest[7] = h; } -static void m01720m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m01720m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -214,7 +190,7 @@ static void m01720m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -226,10 +202,10 @@ static void m01720m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * prepend salt */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); @@ -288,7 +264,7 @@ static void m01720m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p //w3_t[2] = swap_workaround (w3_t[2]); //w3_t[3] = swap_workaround (w3_t[3]); - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -302,16 +278,16 @@ static void m01720m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m01720s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m01720s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -372,7 +348,7 @@ static void m01720s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -384,10 +360,10 @@ static void m01720s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * prepend salt */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); @@ -446,7 +422,7 @@ static void m01720s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p //w3_t[2] = swap_workaround (w3_t[2]); //w3_t[3] = swap_workaround (w3_t[3]); - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -460,12 +436,12 @@ static void m01720s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -479,28 +455,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01720_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -526,28 +502,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01720_m08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -573,28 +549,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01720_m16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -620,28 +596,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01720_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -667,28 +643,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01720_s08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -714,28 +690,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01720_s16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m01730_a0.cl b/OpenCL/m01730_a0.cl similarity index 88% rename from amd/m01730_a0.cl rename to OpenCL/m01730_a0.cl index 321996b..ae3f0e9 100644 --- a/amd/m01730_a0.cl +++ b/OpenCL/m01730_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 14 #define DGST_R1 15 #define DGST_R2 6 #define DGST_R3 7 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u64 k_sha512[80] = { @@ -70,33 +46,33 @@ __constant u64 k_sha512[80] = SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, }; -static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) +static void sha512_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u64 digest[8]) { - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; + u64 w0_t = hl32_to_64 (w0[0], w0[1]); + u64 w1_t = hl32_to_64 (w0[2], w0[3]); + u64 w2_t = hl32_to_64 (w1[0], w1[1]); + u64 w3_t = hl32_to_64 (w1[2], w1[3]); + u64 w4_t = hl32_to_64 (w2[0], w2[1]); + u64 w5_t = hl32_to_64 (w2[2], w2[3]); + u64 w6_t = hl32_to_64 (w3[0], w3[1]); + u64 w7_t = 0; + u64 w8_t = 0; + u64 w9_t = 0; + u64 wa_t = 0; + u64 wb_t = 0; + u64 wc_t = 0; + u64 wd_t = 0; + u64 we_t = 0; + u64 wf_t = hl32_to_64 (w3[2], w3[3]); + + u64 a = digest[0]; + u64 b = digest[1]; + u64 c = digest[2]; + u64 d = digest[3]; + u64 e = digest[4]; + u64 f = digest[5]; + u64 g = digest[6]; + u64 h = digest[7]; #define ROUND_EXPAND() \ { \ @@ -183,14 +159,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -225,28 +201,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -291,10 +267,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_m04 (__glo const u32 out_salt_len = (out_len * 2) + salt_len; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); @@ -339,7 +315,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_m04 (__glo w3_t[2] = 0; w3_t[3] = out_salt_len * 8; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -353,12 +329,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_m04 (__glo sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -386,14 +362,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -440,28 +416,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -506,10 +482,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_s04 (__glo const u32 out_salt_len = (out_len * 2) + salt_len; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); @@ -554,7 +530,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_s04 (__glo w3_t[2] = 0; w3_t[3] = out_salt_len * 8; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -568,12 +544,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_s04 (__glo sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01730_a1.cl b/OpenCL/m01730_a1.cl similarity index 89% rename from amd/m01730_a1.cl rename to OpenCL/m01730_a1.cl index e433262..0925b6b 100644 --- a/amd/m01730_a1.cl +++ b/OpenCL/m01730_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 14 #define DGST_R1 15 #define DGST_R2 6 #define DGST_R3 7 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u64 k_sha512[80] = { @@ -68,33 +44,33 @@ __constant u64 k_sha512[80] = SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, }; -static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) +static void sha512_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u64 digest[8]) { - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; + u64 w0_t = hl32_to_64 (w0[0], w0[1]); + u64 w1_t = hl32_to_64 (w0[2], w0[3]); + u64 w2_t = hl32_to_64 (w1[0], w1[1]); + u64 w3_t = hl32_to_64 (w1[2], w1[3]); + u64 w4_t = hl32_to_64 (w2[0], w2[1]); + u64 w5_t = hl32_to_64 (w2[2], w2[3]); + u64 w6_t = hl32_to_64 (w3[0], w3[1]); + u64 w7_t = 0; + u64 w8_t = 0; + u64 w9_t = 0; + u64 wa_t = 0; + u64 wb_t = 0; + u64 wc_t = 0; + u64 wd_t = 0; + u64 we_t = 0; + u64 wf_t = hl32_to_64 (w3[2], w3[3]); + + u64 a = digest[0]; + u64 b = digest[1]; + u64 c = digest[2]; + u64 d = digest[3]; + u64 e = digest[4]; + u64 f = digest[5]; + u64 g = digest[6]; + u64 h = digest[7]; #define ROUND_EXPAND() \ { \ @@ -181,28 +157,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -273,10 +249,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -331,10 +307,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_m04 (__glo const u32 pw_salt_len = (pw_len * 2) + salt_len; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); @@ -379,7 +355,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_m04 (__glo w3_t[2] = 0; w3_t[3] = pw_salt_len * 8; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -393,12 +369,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_m04 (__glo sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -426,28 +402,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -530,10 +506,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -588,10 +564,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_s04 (__glo const u32 pw_salt_len = (pw_len * 2) + salt_len; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); @@ -636,7 +612,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_s04 (__glo w3_t[2] = 0; w3_t[3] = pw_salt_len * 8; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -650,12 +626,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_s04 (__glo sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01730_a3.cl b/OpenCL/m01730_a3.cl similarity index 62% rename from amd/m01730_a3.cl rename to OpenCL/m01730_a3.cl index e4b8a34..6e69c30 100644 --- a/amd/m01730_a3.cl +++ b/OpenCL/m01730_a3.cl @@ -4,46 +4,21 @@ */ #define _SHA512_ -#define _SCALAR_ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 14 #define DGST_R1 15 #define DGST_R2 6 #define DGST_R3 7 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u64 k_sha512[80] = { @@ -69,33 +44,33 @@ __constant u64 k_sha512[80] = SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, }; -static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) +static void sha512_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u64 digest[8]) { - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; + u64 w0_t = hl32_to_64 (w0[0], w0[1]); + u64 w1_t = hl32_to_64 (w0[2], w0[3]); + u64 w2_t = hl32_to_64 (w1[0], w1[1]); + u64 w3_t = hl32_to_64 (w1[2], w1[3]); + u64 w4_t = hl32_to_64 (w2[0], w2[1]); + u64 w5_t = hl32_to_64 (w2[2], w2[3]); + u64 w6_t = hl32_to_64 (w3[0], w3[1]); + u64 w7_t = 0; + u64 w8_t = 0; + u64 w9_t = 0; + u64 wa_t = 0; + u64 wb_t = 0; + u64 wc_t = 0; + u64 wd_t = 0; + u64 we_t = 0; + u64 wf_t = hl32_to_64 (w3[2], w3[3]); + + u64 a = digest[0]; + u64 b = digest[1]; + u64 c = digest[2]; + u64 d = digest[3]; + u64 e = digest[4]; + u64 f = digest[5]; + u64 g = digest[6]; + u64 h = digest[7]; #define ROUND_EXPAND() \ { \ @@ -166,7 +141,7 @@ static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[ digest[7] = h; } -static void m01730m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m01730m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -236,21 +211,19 @@ static void m01730m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0; w0_t[1] = w[ 1]; @@ -269,7 +242,7 @@ static void m01730m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g w3_t[2] = w[14]; w3_t[3] = w[15]; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -283,16 +256,16 @@ static void m01730m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m01730s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m01730s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -317,20 +290,18 @@ static void m01730s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0; w0_t[1] = w[ 1]; @@ -349,7 +320,7 @@ static void m01730s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g w3_t[2] = w[14]; w3_t[3] = w[15]; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -363,16 +334,16 @@ static void m01730s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_S + #include COMPARE_S } } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -410,7 +381,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_m04 (__glo m01730m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -448,7 +419,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_m08 (__glo m01730m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -486,7 +457,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_m16 (__glo m01730m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -524,7 +495,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_s04 (__glo m01730s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -562,7 +533,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_s08 (__glo m01730s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01730_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base diff --git a/amd/m01740_a0.cl b/OpenCL/m01740_a0.cl similarity index 88% rename from amd/m01740_a0.cl rename to OpenCL/m01740_a0.cl index 3932179..b8cf1d4 100644 --- a/amd/m01740_a0.cl +++ b/OpenCL/m01740_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 14 #define DGST_R1 15 #define DGST_R2 6 #define DGST_R3 7 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u64 k_sha512[80] = { @@ -70,33 +46,33 @@ __constant u64 k_sha512[80] = SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, }; -static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) +static void sha512_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u64 digest[8]) { - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; + u64 w0_t = hl32_to_64 (w0[0], w0[1]); + u64 w1_t = hl32_to_64 (w0[2], w0[3]); + u64 w2_t = hl32_to_64 (w1[0], w1[1]); + u64 w3_t = hl32_to_64 (w1[2], w1[3]); + u64 w4_t = hl32_to_64 (w2[0], w2[1]); + u64 w5_t = hl32_to_64 (w2[2], w2[3]); + u64 w6_t = hl32_to_64 (w3[0], w3[1]); + u64 w7_t = 0; + u64 w8_t = 0; + u64 w9_t = 0; + u64 wa_t = 0; + u64 wb_t = 0; + u64 wc_t = 0; + u64 wd_t = 0; + u64 we_t = 0; + u64 wf_t = hl32_to_64 (w3[2], w3[3]); + + u64 a = digest[0]; + u64 b = digest[1]; + u64 c = digest[2]; + u64 d = digest[3]; + u64 e = digest[4]; + u64 f = digest[5]; + u64 g = digest[6]; + u64 h = digest[7]; #define ROUND_EXPAND() \ { \ @@ -183,14 +159,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01740_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -225,10 +201,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01740_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; @@ -255,10 +231,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01740_m04 (__glo const u32 out_salt_len = (out_len * 2) + salt_len; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); @@ -297,7 +273,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01740_m04 (__glo w3_t[2] = 0; w3_t[3] = out_salt_len * 8; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -311,12 +287,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01740_m04 (__glo sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -344,14 +320,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01740_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -398,10 +374,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01740_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; @@ -428,10 +404,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01740_s04 (__glo const u32 out_salt_len = (out_len * 2) + salt_len; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); @@ -470,7 +446,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01740_s04 (__glo w3_t[2] = 0; w3_t[3] = out_salt_len * 8; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -484,12 +460,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01740_s04 (__glo sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01740_a1.cl b/OpenCL/m01740_a1.cl similarity index 89% rename from amd/m01740_a1.cl rename to OpenCL/m01740_a1.cl index 2793077..2e38d54 100644 --- a/amd/m01740_a1.cl +++ b/OpenCL/m01740_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 14 #define DGST_R1 15 #define DGST_R2 6 #define DGST_R3 7 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u64 k_sha512[80] = { @@ -68,33 +44,33 @@ __constant u64 k_sha512[80] = SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, }; -static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) +static void sha512_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u64 digest[8]) { - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; + u64 w0_t = hl32_to_64 (w0[0], w0[1]); + u64 w1_t = hl32_to_64 (w0[2], w0[3]); + u64 w2_t = hl32_to_64 (w1[0], w1[1]); + u64 w3_t = hl32_to_64 (w1[2], w1[3]); + u64 w4_t = hl32_to_64 (w2[0], w2[1]); + u64 w5_t = hl32_to_64 (w2[2], w2[3]); + u64 w6_t = hl32_to_64 (w3[0], w3[1]); + u64 w7_t = 0; + u64 w8_t = 0; + u64 w9_t = 0; + u64 wa_t = 0; + u64 wb_t = 0; + u64 wc_t = 0; + u64 wd_t = 0; + u64 we_t = 0; + u64 wf_t = hl32_to_64 (w3[2], w3[3]); + + u64 a = digest[0]; + u64 b = digest[1]; + u64 c = digest[2]; + u64 d = digest[3]; + u64 e = digest[4]; + u64 f = digest[5]; + u64 g = digest[6]; + u64 h = digest[7]; #define ROUND_EXPAND() \ { \ @@ -181,28 +157,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01740_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -273,10 +249,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01740_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -301,10 +277,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01740_m04 (__glo const u32 pw_salt_len = (pw_len * 2) + salt_len; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); @@ -343,7 +319,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01740_m04 (__glo w3_t[2] = 0; w3_t[3] = pw_salt_len * 8; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -357,12 +333,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01740_m04 (__glo sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -390,28 +366,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01740_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -494,10 +470,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01740_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -522,10 +498,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01740_s04 (__glo const u32 pw_salt_len = (pw_len * 2) + salt_len; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); @@ -564,7 +540,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01740_s04 (__glo w3_t[2] = 0; w3_t[3] = pw_salt_len * 8; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -578,12 +554,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01740_s04 (__glo sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01740_a3.cl b/OpenCL/m01740_a3.cl similarity index 84% rename from amd/m01740_a3.cl rename to OpenCL/m01740_a3.cl index 5d5578b..8a53310 100644 --- a/amd/m01740_a3.cl +++ b/OpenCL/m01740_a3.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 14 #define DGST_R1 15 #define DGST_R2 6 #define DGST_R3 7 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u64 k_sha512[80] = { @@ -68,33 +44,33 @@ __constant u64 k_sha512[80] = SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, }; -static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) +static void sha512_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u64 digest[8]) { - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; + u64 w0_t = hl32_to_64 (w0[0], w0[1]); + u64 w1_t = hl32_to_64 (w0[2], w0[3]); + u64 w2_t = hl32_to_64 (w1[0], w1[1]); + u64 w3_t = hl32_to_64 (w1[2], w1[3]); + u64 w4_t = hl32_to_64 (w2[0], w2[1]); + u64 w5_t = hl32_to_64 (w2[2], w2[3]); + u64 w6_t = hl32_to_64 (w3[0], w3[1]); + u64 w7_t = 0; + u64 w8_t = 0; + u64 w9_t = 0; + u64 wa_t = 0; + u64 wb_t = 0; + u64 wc_t = 0; + u64 wd_t = 0; + u64 we_t = 0; + u64 wf_t = hl32_to_64 (w3[2], w3[3]); + + u64 a = digest[0]; + u64 b = digest[1]; + u64 c = digest[2]; + u64 d = digest[3]; + u64 e = digest[4]; + u64 f = digest[5]; + u64 g = digest[6]; + u64 h = digest[7]; #define ROUND_EXPAND() \ { \ @@ -165,7 +141,7 @@ static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[ digest[7] = h; } -static void m01740m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m01740m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -214,7 +190,7 @@ static void m01740m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -226,10 +202,10 @@ static void m01740m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * prepend salt */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); @@ -288,7 +264,7 @@ static void m01740m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p //w3_t[2] = swap_workaround (w3_t[2]); //w3_t[3] = swap_workaround (w3_t[3]); - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -302,16 +278,16 @@ static void m01740m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m01740s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m01740s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -372,7 +348,7 @@ static void m01740s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -384,10 +360,10 @@ static void m01740s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * prepend salt */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); @@ -446,7 +422,7 @@ static void m01740s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p //w3_t[2] = swap_workaround (w3_t[2]); //w3_t[3] = swap_workaround (w3_t[3]); - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -460,12 +436,12 @@ static void m01740s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -479,28 +455,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01740_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -526,28 +502,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01740_m08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -573,28 +549,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01740_m16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -620,28 +596,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01740_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -667,28 +643,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01740_s08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -714,28 +690,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01740_s16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m01750_a0.cl b/OpenCL/m01750_a0.cl similarity index 88% rename from amd/m01750_a0.cl rename to OpenCL/m01750_a0.cl index e9d036d..d352903 100644 --- a/amd/m01750_a0.cl +++ b/OpenCL/m01750_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 14 #define DGST_R1 15 #define DGST_R2 6 #define DGST_R3 7 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u64 k_sha512[80] = { @@ -70,33 +46,33 @@ __constant u64 k_sha512[80] = SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, }; -static void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], const u64x w3[4], u64x digest[8]) +static void sha512_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const u64 w3[4], u64 digest[8]) { - u64x w0_t = w0[0]; - u64x w1_t = w0[1]; - u64x w2_t = w0[2]; - u64x w3_t = w0[3]; - u64x w4_t = w1[0]; - u64x w5_t = w1[1]; - u64x w6_t = w1[2]; - u64x w7_t = w1[3]; - u64x w8_t = w2[0]; - u64x w9_t = w2[1]; - u64x wa_t = w2[2]; - u64x wb_t = w2[3]; - u64x wc_t = w3[0]; - u64x wd_t = w3[1]; - u64x we_t = w3[2]; - u64x wf_t = w3[3]; - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; + u64 w0_t = w0[0]; + u64 w1_t = w0[1]; + u64 w2_t = w0[2]; + u64 w3_t = w0[3]; + u64 w4_t = w1[0]; + u64 w5_t = w1[1]; + u64 w6_t = w1[2]; + u64 w7_t = w1[3]; + u64 w8_t = w2[0]; + u64 w9_t = w2[1]; + u64 wa_t = w2[2]; + u64 wb_t = w2[3]; + u64 wc_t = w3[0]; + u64 wd_t = w3[1]; + u64 we_t = w3[2]; + u64 wf_t = w3[3]; + + u64 a = digest[0]; + u64 b = digest[1]; + u64 c = digest[2]; + u64 d = digest[3]; + u64 e = digest[4]; + u64 f = digest[5]; + u64 g = digest[6]; + u64 h = digest[7]; #define ROUND_EXPAND() \ { \ @@ -156,12 +132,12 @@ static void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[ digest[7] += h; } -static void hmac_sha512_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64x ipad[8], u64x opad[8]) +static void hmac_sha512_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u64 ipad[8], u64 opad[8]) { - u64x w0_t[4]; - u64x w1_t[4]; - u64x w2_t[4]; - u64x w3_t[4]; + u64 w0_t[4]; + u64 w1_t[4]; + u64 w2_t[4]; + u64 w3_t[4]; w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ 0x3636363636363636; w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ 0x3636363636363636; @@ -220,12 +196,12 @@ static void hmac_sha512_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64 sha512_transform (w0_t, w1_t, w2_t, w3_t, opad); } -static void hmac_sha512_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64x ipad[8], u64x opad[8], u64x digest[8]) +static void hmac_sha512_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u64 ipad[8], u64 opad[8], u64 digest[8]) { - u64x w0_t[4]; - u64x w1_t[4]; - u64x w2_t[4]; - u64x w3_t[4]; + u64 w0_t[4]; + u64 w1_t[4]; + u64 w2_t[4]; + u64 w3_t[4]; w0_t[0] = hl32_to_64 (w0[0], w0[1]); w0_t[1] = hl32_to_64 (w0[2], w0[3]); @@ -300,14 +276,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01750_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -342,28 +318,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01750_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -376,36 +352,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01750_m04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); w0_t[2] = swap_workaround (w0[2]); w0_t[3] = swap_workaround (w0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (w1[0]); w1_t[1] = swap_workaround (w1[1]); w1_t[2] = swap_workaround (w1[2]); w1_t[3] = swap_workaround (w1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u64x ipad[8]; - u64x opad[8]; + u64 ipad[8]; + u64 opad[8]; hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -426,17 +402,17 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01750_m04 (__glo w3_t[2] = 0; w3_t[3] = (128 + salt_len) * 8; - u64x digest[8]; + u64 digest[8]; hmac_sha512_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -464,14 +440,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01750_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -518,28 +494,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01750_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -552,36 +528,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01750_s04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); w0_t[2] = swap_workaround (w0[2]); w0_t[3] = swap_workaround (w0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (w1[0]); w1_t[1] = swap_workaround (w1[1]); w1_t[2] = swap_workaround (w1[2]); w1_t[3] = swap_workaround (w1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u64x ipad[8]; - u64x opad[8]; + u64 ipad[8]; + u64 opad[8]; hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -602,17 +578,17 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01750_s04 (__glo w3_t[2] = 0; w3_t[3] = (128 + salt_len) * 8; - u64x digest[8]; + u64 digest[8]; hmac_sha512_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01750_a1.cl b/OpenCL/m01750_a1.cl similarity index 89% rename from amd/m01750_a1.cl rename to OpenCL/m01750_a1.cl index 918c882..c714c3a 100644 --- a/amd/m01750_a1.cl +++ b/OpenCL/m01750_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 14 #define DGST_R1 15 #define DGST_R2 6 #define DGST_R3 7 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u64 k_sha512[80] = { @@ -68,33 +44,33 @@ __constant u64 k_sha512[80] = SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, }; -static void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], const u64x w3[4], u64x digest[8]) +static void sha512_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const u64 w3[4], u64 digest[8]) { - u64x w0_t = w0[0]; - u64x w1_t = w0[1]; - u64x w2_t = w0[2]; - u64x w3_t = w0[3]; - u64x w4_t = w1[0]; - u64x w5_t = w1[1]; - u64x w6_t = w1[2]; - u64x w7_t = w1[3]; - u64x w8_t = w2[0]; - u64x w9_t = w2[1]; - u64x wa_t = w2[2]; - u64x wb_t = w2[3]; - u64x wc_t = w3[0]; - u64x wd_t = w3[1]; - u64x we_t = w3[2]; - u64x wf_t = w3[3]; - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; + u64 w0_t = w0[0]; + u64 w1_t = w0[1]; + u64 w2_t = w0[2]; + u64 w3_t = w0[3]; + u64 w4_t = w1[0]; + u64 w5_t = w1[1]; + u64 w6_t = w1[2]; + u64 w7_t = w1[3]; + u64 w8_t = w2[0]; + u64 w9_t = w2[1]; + u64 wa_t = w2[2]; + u64 wb_t = w2[3]; + u64 wc_t = w3[0]; + u64 wd_t = w3[1]; + u64 we_t = w3[2]; + u64 wf_t = w3[3]; + + u64 a = digest[0]; + u64 b = digest[1]; + u64 c = digest[2]; + u64 d = digest[3]; + u64 e = digest[4]; + u64 f = digest[5]; + u64 g = digest[6]; + u64 h = digest[7]; #define ROUND_EXPAND() \ { \ @@ -154,12 +130,12 @@ static void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[ digest[7] += h; } -static void hmac_sha512_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64x ipad[8], u64x opad[8]) +static void hmac_sha512_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u64 ipad[8], u64 opad[8]) { - u64x w0_t[4]; - u64x w1_t[4]; - u64x w2_t[4]; - u64x w3_t[4]; + u64 w0_t[4]; + u64 w1_t[4]; + u64 w2_t[4]; + u64 w3_t[4]; w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ 0x3636363636363636; w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ 0x3636363636363636; @@ -218,12 +194,12 @@ static void hmac_sha512_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64 sha512_transform (w0_t, w1_t, w2_t, w3_t, opad); } -static void hmac_sha512_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64x ipad[8], u64x opad[8], u64x digest[8]) +static void hmac_sha512_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u64 ipad[8], u64 opad[8], u64 digest[8]) { - u64x w0_t[4]; - u64x w1_t[4]; - u64x w2_t[4]; - u64x w3_t[4]; + u64 w0_t[4]; + u64 w1_t[4]; + u64 w2_t[4]; + u64 w3_t[4]; w0_t[0] = hl32_to_64 (w0[0], w0[1]); w0_t[1] = hl32_to_64 (w0[2], w0[3]); @@ -298,28 +274,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01750_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -396,28 +372,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01750_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -428,36 +404,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01750_m04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); w0_t[2] = swap_workaround (w0[2]); w0_t[3] = swap_workaround (w0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (w1[0]); w1_t[1] = swap_workaround (w1[1]); w1_t[2] = swap_workaround (w1[2]); w1_t[3] = swap_workaround (w1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u64x ipad[8]; - u64x opad[8]; + u64 ipad[8]; + u64 opad[8]; hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -478,17 +454,17 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01750_m04 (__glo w3_t[2] = 0; w3_t[3] = (128 + salt_len) * 8; - u64x digest[8]; + u64 digest[8]; hmac_sha512_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -516,28 +492,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01750_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -626,28 +602,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01750_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -658,36 +634,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01750_s04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); w0_t[2] = swap_workaround (w0[2]); w0_t[3] = swap_workaround (w0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (w1[0]); w1_t[1] = swap_workaround (w1[1]); w1_t[2] = swap_workaround (w1[2]); w1_t[3] = swap_workaround (w1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u64x ipad[8]; - u64x opad[8]; + u64 ipad[8]; + u64 opad[8]; hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -708,17 +684,17 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01750_s04 (__glo w3_t[2] = 0; w3_t[3] = (128 + salt_len) * 8; - u64x digest[8]; + u64 digest[8]; hmac_sha512_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01750_a3.cl b/OpenCL/m01750_a3.cl similarity index 83% rename from amd/m01750_a3.cl rename to OpenCL/m01750_a3.cl index 2d31fa2..1cbe77a 100644 --- a/amd/m01750_a3.cl +++ b/OpenCL/m01750_a3.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 14 #define DGST_R1 15 #define DGST_R2 6 #define DGST_R3 7 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u64 k_sha512[80] = { @@ -68,33 +44,33 @@ __constant u64 k_sha512[80] = SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, }; -static void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], const u64x w3[4], u64x digest[8]) +static void sha512_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const u64 w3[4], u64 digest[8]) { - u64x w0_t = w0[0]; - u64x w1_t = w0[1]; - u64x w2_t = w0[2]; - u64x w3_t = w0[3]; - u64x w4_t = w1[0]; - u64x w5_t = w1[1]; - u64x w6_t = w1[2]; - u64x w7_t = w1[3]; - u64x w8_t = w2[0]; - u64x w9_t = w2[1]; - u64x wa_t = w2[2]; - u64x wb_t = w2[3]; - u64x wc_t = w3[0]; - u64x wd_t = w3[1]; - u64x we_t = w3[2]; - u64x wf_t = w3[3]; - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; + u64 w0_t = w0[0]; + u64 w1_t = w0[1]; + u64 w2_t = w0[2]; + u64 w3_t = w0[3]; + u64 w4_t = w1[0]; + u64 w5_t = w1[1]; + u64 w6_t = w1[2]; + u64 w7_t = w1[3]; + u64 w8_t = w2[0]; + u64 w9_t = w2[1]; + u64 wa_t = w2[2]; + u64 wb_t = w2[3]; + u64 wc_t = w3[0]; + u64 wd_t = w3[1]; + u64 we_t = w3[2]; + u64 wf_t = w3[3]; + + u64 a = digest[0]; + u64 b = digest[1]; + u64 c = digest[2]; + u64 d = digest[3]; + u64 e = digest[4]; + u64 f = digest[5]; + u64 g = digest[6]; + u64 h = digest[7]; #define ROUND_EXPAND() \ { \ @@ -154,12 +130,12 @@ static void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[ digest[7] += h; } -static void hmac_sha512_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64x ipad[8], u64x opad[8]) +static void hmac_sha512_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u64 ipad[8], u64 opad[8]) { - u64x w0_t[4]; - u64x w1_t[4]; - u64x w2_t[4]; - u64x w3_t[4]; + u64 w0_t[4]; + u64 w1_t[4]; + u64 w2_t[4]; + u64 w3_t[4]; w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ 0x3636363636363636; w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ 0x3636363636363636; @@ -218,12 +194,12 @@ static void hmac_sha512_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64 sha512_transform (w0_t, w1_t, w2_t, w3_t, opad); } -static void hmac_sha512_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64x ipad[8], u64x opad[8], u64x digest[8]) +static void hmac_sha512_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u64 ipad[8], u64 opad[8], u64 digest[8]) { - u64x w0_t[4]; - u64x w1_t[4]; - u64x w2_t[4]; - u64x w3_t[4]; + u64 w0_t[4]; + u64 w1_t[4]; + u64 w2_t[4]; + u64 w3_t[4]; w0_t[0] = hl32_to_64 (w0[0], w0[1]); w0_t[1] = hl32_to_64 (w0[2], w0[3]); @@ -282,7 +258,7 @@ static void hmac_sha512_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64 sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); } -static void m01750m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m01750m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -315,7 +291,7 @@ static void m01750m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -327,36 +303,36 @@ static void m01750m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; w3_t[2] = 0; w3_t[3] = 0; - u64x ipad[8]; - u64x opad[8]; + u64 ipad[8]; + u64 opad[8]; hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -377,21 +353,21 @@ static void m01750m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[2] = 0; w3_t[3] = (128 + salt_len) * 8; - u64x digest[8]; + u64 digest[8]; hmac_sha512_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m01750s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m01750s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -436,7 +412,7 @@ static void m01750s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -448,36 +424,36 @@ static void m01750s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; w3_t[2] = 0; w3_t[3] = 0; - u64x ipad[8]; - u64x opad[8]; + u64 ipad[8]; + u64 opad[8]; hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -498,17 +474,17 @@ static void m01750s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[2] = 0; w3_t[3] = (128 + salt_len) * 8; - u64x digest[8]; + u64 digest[8]; hmac_sha512_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -522,28 +498,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01750_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -569,28 +545,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01750_m08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -616,28 +592,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01750_m16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -663,28 +639,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01750_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -710,28 +686,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01750_s08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -757,28 +733,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01750_s16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m01760_a0.cl b/OpenCL/m01760_a0.cl similarity index 88% rename from amd/m01760_a0.cl rename to OpenCL/m01760_a0.cl index 8fef412..e37a0af 100644 --- a/amd/m01760_a0.cl +++ b/OpenCL/m01760_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 14 #define DGST_R1 15 #define DGST_R2 6 #define DGST_R3 7 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u64 k_sha512[80] = { @@ -70,33 +46,33 @@ __constant u64 k_sha512[80] = SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, }; -static void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], const u64x w3[4], u64x digest[8]) +static void sha512_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const u64 w3[4], u64 digest[8]) { - u64x w0_t = w0[0]; - u64x w1_t = w0[1]; - u64x w2_t = w0[2]; - u64x w3_t = w0[3]; - u64x w4_t = w1[0]; - u64x w5_t = w1[1]; - u64x w6_t = w1[2]; - u64x w7_t = w1[3]; - u64x w8_t = w2[0]; - u64x w9_t = w2[1]; - u64x wa_t = w2[2]; - u64x wb_t = w2[3]; - u64x wc_t = w3[0]; - u64x wd_t = w3[1]; - u64x we_t = w3[2]; - u64x wf_t = w3[3]; - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; + u64 w0_t = w0[0]; + u64 w1_t = w0[1]; + u64 w2_t = w0[2]; + u64 w3_t = w0[3]; + u64 w4_t = w1[0]; + u64 w5_t = w1[1]; + u64 w6_t = w1[2]; + u64 w7_t = w1[3]; + u64 w8_t = w2[0]; + u64 w9_t = w2[1]; + u64 wa_t = w2[2]; + u64 wb_t = w2[3]; + u64 wc_t = w3[0]; + u64 wd_t = w3[1]; + u64 we_t = w3[2]; + u64 wf_t = w3[3]; + + u64 a = digest[0]; + u64 b = digest[1]; + u64 c = digest[2]; + u64 d = digest[3]; + u64 e = digest[4]; + u64 f = digest[5]; + u64 g = digest[6]; + u64 h = digest[7]; #define ROUND_EXPAND() \ { \ @@ -156,12 +132,12 @@ static void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[ digest[7] += h; } -static void hmac_sha512_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64x ipad[8], u64x opad[8]) +static void hmac_sha512_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u64 ipad[8], u64 opad[8]) { - u64x w0_t[4]; - u64x w1_t[4]; - u64x w2_t[4]; - u64x w3_t[4]; + u64 w0_t[4]; + u64 w1_t[4]; + u64 w2_t[4]; + u64 w3_t[4]; w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ 0x3636363636363636; w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ 0x3636363636363636; @@ -220,12 +196,12 @@ static void hmac_sha512_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64 sha512_transform (w0_t, w1_t, w2_t, w3_t, opad); } -static void hmac_sha512_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64x ipad[8], u64x opad[8], u64x digest[8]) +static void hmac_sha512_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u64 ipad[8], u64 opad[8], u64 digest[8]) { - u64x w0_t[4]; - u64x w1_t[4]; - u64x w2_t[4]; - u64x w3_t[4]; + u64 w0_t[4]; + u64 w1_t[4]; + u64 w2_t[4]; + u64 w3_t[4]; w0_t[0] = hl32_to_64 (w0[0], w0[1]); w0_t[1] = hl32_to_64 (w0[2], w0[3]); @@ -300,14 +276,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01760_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -338,36 +314,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01760_m04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (salt_buf0[0]); w0_t[1] = swap_workaround (salt_buf0[1]); w0_t[2] = swap_workaround (salt_buf0[2]); w0_t[3] = swap_workaround (salt_buf0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (salt_buf1[0]); w1_t[1] = swap_workaround (salt_buf1[1]); w1_t[2] = swap_workaround (salt_buf1[2]); w1_t[3] = swap_workaround (salt_buf1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u64x ipad[8]; - u64x opad[8]; + u64 ipad[8]; + u64 opad[8]; hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -377,28 +353,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01760_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -426,17 +402,17 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01760_m04 (__glo w3_t[2] = 0; w3_t[3] = (128 + out_len) * 8; - u64x digest[8]; + u64 digest[8]; hmac_sha512_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -464,14 +440,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01760_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -502,36 +478,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01760_s04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (salt_buf0[0]); w0_t[1] = swap_workaround (salt_buf0[1]); w0_t[2] = swap_workaround (salt_buf0[2]); w0_t[3] = swap_workaround (salt_buf0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (salt_buf1[0]); w1_t[1] = swap_workaround (salt_buf1[1]); w1_t[2] = swap_workaround (salt_buf1[2]); w1_t[3] = swap_workaround (salt_buf1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u64x ipad[8]; - u64x opad[8]; + u64 ipad[8]; + u64 opad[8]; hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -553,28 +529,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01760_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -602,17 +578,17 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01760_s04 (__glo w3_t[2] = 0; w3_t[3] = (128 + out_len) * 8; - u64x digest[8]; + u64 digest[8]; hmac_sha512_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01760_a1.cl b/OpenCL/m01760_a1.cl similarity index 89% rename from amd/m01760_a1.cl rename to OpenCL/m01760_a1.cl index ff8e9c2..ab136a0 100644 --- a/amd/m01760_a1.cl +++ b/OpenCL/m01760_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 14 #define DGST_R1 15 #define DGST_R2 6 #define DGST_R3 7 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u64 k_sha512[80] = { @@ -68,33 +44,33 @@ __constant u64 k_sha512[80] = SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, }; -static void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], const u64x w3[4], u64x digest[8]) +static void sha512_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const u64 w3[4], u64 digest[8]) { - u64x w0_t = w0[0]; - u64x w1_t = w0[1]; - u64x w2_t = w0[2]; - u64x w3_t = w0[3]; - u64x w4_t = w1[0]; - u64x w5_t = w1[1]; - u64x w6_t = w1[2]; - u64x w7_t = w1[3]; - u64x w8_t = w2[0]; - u64x w9_t = w2[1]; - u64x wa_t = w2[2]; - u64x wb_t = w2[3]; - u64x wc_t = w3[0]; - u64x wd_t = w3[1]; - u64x we_t = w3[2]; - u64x wf_t = w3[3]; - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; + u64 w0_t = w0[0]; + u64 w1_t = w0[1]; + u64 w2_t = w0[2]; + u64 w3_t = w0[3]; + u64 w4_t = w1[0]; + u64 w5_t = w1[1]; + u64 w6_t = w1[2]; + u64 w7_t = w1[3]; + u64 w8_t = w2[0]; + u64 w9_t = w2[1]; + u64 wa_t = w2[2]; + u64 wb_t = w2[3]; + u64 wc_t = w3[0]; + u64 wd_t = w3[1]; + u64 we_t = w3[2]; + u64 wf_t = w3[3]; + + u64 a = digest[0]; + u64 b = digest[1]; + u64 c = digest[2]; + u64 d = digest[3]; + u64 e = digest[4]; + u64 f = digest[5]; + u64 g = digest[6]; + u64 h = digest[7]; #define ROUND_EXPAND() \ { \ @@ -154,12 +130,12 @@ static void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[ digest[7] += h; } -static void hmac_sha512_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64x ipad[8], u64x opad[8]) +static void hmac_sha512_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u64 ipad[8], u64 opad[8]) { - u64x w0_t[4]; - u64x w1_t[4]; - u64x w2_t[4]; - u64x w3_t[4]; + u64 w0_t[4]; + u64 w1_t[4]; + u64 w2_t[4]; + u64 w3_t[4]; w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ 0x3636363636363636; w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ 0x3636363636363636; @@ -218,12 +194,12 @@ static void hmac_sha512_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64 sha512_transform (w0_t, w1_t, w2_t, w3_t, opad); } -static void hmac_sha512_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64x ipad[8], u64x opad[8], u64x digest[8]) +static void hmac_sha512_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u64 ipad[8], u64 opad[8], u64 digest[8]) { - u64x w0_t[4]; - u64x w1_t[4]; - u64x w2_t[4]; - u64x w3_t[4]; + u64 w0_t[4]; + u64 w1_t[4]; + u64 w2_t[4]; + u64 w3_t[4]; w0_t[0] = hl32_to_64 (w0[0], w0[1]); w0_t[1] = hl32_to_64 (w0[2], w0[3]); @@ -298,28 +274,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01760_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -355,36 +331,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01760_m04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (salt_buf0[0]); w0_t[1] = swap_workaround (salt_buf0[1]); w0_t[2] = swap_workaround (salt_buf0[2]); w0_t[3] = swap_workaround (salt_buf0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (salt_buf1[0]); w1_t[1] = swap_workaround (salt_buf1[1]); w1_t[2] = swap_workaround (salt_buf1[2]); w1_t[3] = swap_workaround (salt_buf1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u64x ipad[8]; - u64x opad[8]; + u64 ipad[8]; + u64 opad[8]; hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -431,28 +407,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01760_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -478,17 +454,17 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01760_m04 (__glo w3_t[2] = 0; w3_t[3] = (128 + pw_len) * 8; - u64x digest[8]; + u64 digest[8]; hmac_sha512_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -516,28 +492,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01760_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -573,36 +549,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01760_s04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (salt_buf0[0]); w0_t[1] = swap_workaround (salt_buf0[1]); w0_t[2] = swap_workaround (salt_buf0[2]); w0_t[3] = swap_workaround (salt_buf0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (salt_buf1[0]); w1_t[1] = swap_workaround (salt_buf1[1]); w1_t[2] = swap_workaround (salt_buf1[2]); w1_t[3] = swap_workaround (salt_buf1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u64x ipad[8]; - u64x opad[8]; + u64 ipad[8]; + u64 opad[8]; hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -661,28 +637,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01760_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -708,17 +684,17 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01760_s04 (__glo w3_t[2] = 0; w3_t[3] = (128 + pw_len) * 8; - u64x digest[8]; + u64 digest[8]; hmac_sha512_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m01760_a3.cl b/OpenCL/m01760_a3.cl similarity index 83% rename from amd/m01760_a3.cl rename to OpenCL/m01760_a3.cl index 9a1e0ea..36e6ac6 100644 --- a/amd/m01760_a3.cl +++ b/OpenCL/m01760_a3.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 14 #define DGST_R1 15 #define DGST_R2 6 #define DGST_R3 7 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u64 k_sha512[80] = { @@ -68,33 +44,33 @@ __constant u64 k_sha512[80] = SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, }; -static void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], const u64x w3[4], u64x digest[8]) +static void sha512_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const u64 w3[4], u64 digest[8]) { - u64x w0_t = w0[0]; - u64x w1_t = w0[1]; - u64x w2_t = w0[2]; - u64x w3_t = w0[3]; - u64x w4_t = w1[0]; - u64x w5_t = w1[1]; - u64x w6_t = w1[2]; - u64x w7_t = w1[3]; - u64x w8_t = w2[0]; - u64x w9_t = w2[1]; - u64x wa_t = w2[2]; - u64x wb_t = w2[3]; - u64x wc_t = w3[0]; - u64x wd_t = w3[1]; - u64x we_t = w3[2]; - u64x wf_t = w3[3]; - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; + u64 w0_t = w0[0]; + u64 w1_t = w0[1]; + u64 w2_t = w0[2]; + u64 w3_t = w0[3]; + u64 w4_t = w1[0]; + u64 w5_t = w1[1]; + u64 w6_t = w1[2]; + u64 w7_t = w1[3]; + u64 w8_t = w2[0]; + u64 w9_t = w2[1]; + u64 wa_t = w2[2]; + u64 wb_t = w2[3]; + u64 wc_t = w3[0]; + u64 wd_t = w3[1]; + u64 we_t = w3[2]; + u64 wf_t = w3[3]; + + u64 a = digest[0]; + u64 b = digest[1]; + u64 c = digest[2]; + u64 d = digest[3]; + u64 e = digest[4]; + u64 f = digest[5]; + u64 g = digest[6]; + u64 h = digest[7]; #define ROUND_EXPAND() \ { \ @@ -154,12 +130,12 @@ static void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[ digest[7] += h; } -static void hmac_sha512_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64x ipad[8], u64x opad[8]) +static void hmac_sha512_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u64 ipad[8], u64 opad[8]) { - u64x w0_t[4]; - u64x w1_t[4]; - u64x w2_t[4]; - u64x w3_t[4]; + u64 w0_t[4]; + u64 w1_t[4]; + u64 w2_t[4]; + u64 w3_t[4]; w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ 0x3636363636363636; w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ 0x3636363636363636; @@ -218,12 +194,12 @@ static void hmac_sha512_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64 sha512_transform (w0_t, w1_t, w2_t, w3_t, opad); } -static void hmac_sha512_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64x ipad[8], u64x opad[8], u64x digest[8]) +static void hmac_sha512_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u64 ipad[8], u64 opad[8], u64 digest[8]) { - u64x w0_t[4]; - u64x w1_t[4]; - u64x w2_t[4]; - u64x w3_t[4]; + u64 w0_t[4]; + u64 w1_t[4]; + u64 w2_t[4]; + u64 w3_t[4]; w0_t[0] = hl32_to_64 (w0[0], w0[1]); w0_t[1] = hl32_to_64 (w0[2], w0[3]); @@ -282,7 +258,7 @@ static void hmac_sha512_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64 sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); } -static void m01760m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esal_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m01760m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esal_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -313,36 +289,36 @@ static void m01760m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (salt_buf0[0]); w0_t[1] = swap_workaround (salt_buf0[1]); w0_t[2] = swap_workaround (salt_buf0[2]); w0_t[3] = swap_workaround (salt_buf0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (salt_buf1[0]); w1_t[1] = swap_workaround (salt_buf1[1]); w1_t[2] = swap_workaround (salt_buf1[2]); w1_t[3] = swap_workaround (salt_buf1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u64x ipad[8]; - u64x opad[8]; + u64 ipad[8]; + u64 opad[8]; hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -350,7 +326,7 @@ static void m01760m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -375,21 +351,21 @@ static void m01760m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[2] = 0; w3_t[3] = (128 + pw_len) * 8; - u64x digest[8]; + u64 digest[8]; hmac_sha512_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m01760s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m01760s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -420,36 +396,36 @@ static void m01760s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (salt_buf0[0]); w0_t[1] = swap_workaround (salt_buf0[1]); w0_t[2] = swap_workaround (salt_buf0[2]); w0_t[3] = swap_workaround (salt_buf0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (salt_buf1[0]); w1_t[1] = swap_workaround (salt_buf1[1]); w1_t[2] = swap_workaround (salt_buf1[2]); w1_t[3] = swap_workaround (salt_buf1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u64x ipad[8]; - u64x opad[8]; + u64 ipad[8]; + u64 opad[8]; hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -469,7 +445,7 @@ static void m01760s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -494,17 +470,17 @@ static void m01760s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[2] = 0; w3_t[3] = (128 + pw_len) * 8; - u64x digest[8]; + u64 digest[8]; hmac_sha512_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); + const u32 r0 = l32_from_64 (digest[7]); + const u32 r1 = h32_from_64 (digest[7]); + const u32 r2 = l32_from_64 (digest[3]); + const u32 r3 = h32_from_64 (digest[3]); - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -518,28 +494,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01760_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -565,28 +541,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01760_m08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -612,28 +588,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01760_m16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -659,28 +635,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01760_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -706,28 +682,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01760_s08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -753,28 +729,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01760_s16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m01800.cl b/OpenCL/m01800.cl similarity index 89% rename from amd/m01800.cl rename to OpenCL/m01800.cl index a65ed52..ae3678f 100644 --- a/amd/m01800.cl +++ b/OpenCL/m01800.cl @@ -8,29 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif #define PUTCHAR64_BE(a,p,c) ((u8 *)(a))[(p) ^ 7] = (u8) (c) @@ -38,8 +26,8 @@ typedef struct { - u64x state[8]; - u64x buf[16]; + u64 state[8]; + u64 buf[16]; int len; } sha512_ctx_t; @@ -68,33 +56,33 @@ __constant u64 k_sha512[80] = SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, }; -static void sha512_transform (const u64x w[16], u64x digest[8]) +static void sha512_transform (const u64 w[16], u64 digest[8]) { - u64x w0_t = w[ 0]; - u64x w1_t = w[ 1]; - u64x w2_t = w[ 2]; - u64x w3_t = w[ 3]; - u64x w4_t = w[ 4]; - u64x w5_t = w[ 5]; - u64x w6_t = w[ 6]; - u64x w7_t = w[ 7]; - u64x w8_t = w[ 8]; - u64x w9_t = w[ 9]; - u64x wa_t = w[10]; - u64x wb_t = w[11]; - u64x wc_t = w[12]; - u64x wd_t = w[13]; - u64x we_t = w[14]; - u64x wf_t = w[15]; - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; + u64 w0_t = w[ 0]; + u64 w1_t = w[ 1]; + u64 w2_t = w[ 2]; + u64 w3_t = w[ 3]; + u64 w4_t = w[ 4]; + u64 w5_t = w[ 5]; + u64 w6_t = w[ 6]; + u64 w7_t = w[ 7]; + u64 w8_t = w[ 8]; + u64 w9_t = w[ 9]; + u64 wa_t = w[10]; + u64 wb_t = w[11]; + u64 wc_t = w[12]; + u64 wd_t = w[13]; + u64 we_t = w[14]; + u64 wf_t = w[15]; + + u64 a = digest[0]; + u64 b = digest[1]; + u64 c = digest[2]; + u64 d = digest[3]; + u64 e = digest[4]; + u64 f = digest[5]; + u64 g = digest[6]; + u64 h = digest[7]; #define ROUND_EXPAND() \ { \ @@ -154,33 +142,33 @@ static void sha512_transform (const u64x w[16], u64x digest[8]) digest[7] += h; } -static void sha512_transform_workaround (const u64x w[16], u64x digest[8]) +static void sha512_transform_workaround (const u64 w[16], u64 digest[8]) { - u64x w0_t = w[ 0]; - u64x w1_t = w[ 1]; - u64x w2_t = w[ 2]; - u64x w3_t = w[ 3]; - u64x w4_t = w[ 4]; - u64x w5_t = w[ 5]; - u64x w6_t = w[ 6]; - u64x w7_t = w[ 7]; - u64x w8_t = w[ 8]; - u64x w9_t = w[ 9]; - u64x wa_t = w[10]; - u64x wb_t = w[11]; - u64x wc_t = w[12]; - u64x wd_t = w[13]; - u64x we_t = w[14]; - u64x wf_t = w[15]; - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; + u64 w0_t = w[ 0]; + u64 w1_t = w[ 1]; + u64 w2_t = w[ 2]; + u64 w3_t = w[ 3]; + u64 w4_t = w[ 4]; + u64 w5_t = w[ 5]; + u64 w6_t = w[ 6]; + u64 w7_t = w[ 7]; + u64 w8_t = w[ 8]; + u64 w9_t = w[ 9]; + u64 wa_t = w[10]; + u64 wb_t = w[11]; + u64 wc_t = w[12]; + u64 wd_t = w[13]; + u64 we_t = w[14]; + u64 wf_t = w[15]; + + u64 a = digest[0]; + u64 b = digest[1]; + u64 c = digest[2]; + u64 d = digest[3]; + u64 e = digest[4]; + u64 f = digest[5]; + u64 g = digest[6]; + u64 h = digest[7]; #define ROUND_EXPAND_WO() \ { \ @@ -233,7 +221,7 @@ static void sha512_init (sha512_ctx_t *sha512_ctx) sha512_ctx->len = 0; } -static void sha512_update (sha512_ctx_t *sha512_ctx, const u64x *buf, int len) +static void sha512_update (sha512_ctx_t *sha512_ctx, const u64 *buf, int len) { int pos = sha512_ctx->len & 0x7f; @@ -314,7 +302,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01800_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[0]; w0[1] = pws[gid].i[1]; @@ -340,12 +328,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01800_init (__gl * buffers */ - u64x pw[2]; + u64 pw[2]; pw[0] = swap_workaround (hl32_to_64 (w0[1], w0[0])); pw[1] = swap_workaround (hl32_to_64 (w0[3], w0[2])); - u64x salt[2]; + u64 salt[2]; salt[0] = swap_workaround (hl32_to_64 (salt_buf[1], salt_buf[0])); salt[1] = swap_workaround (hl32_to_64 (salt_buf[3], salt_buf[2])); @@ -364,7 +352,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01800_init (__gl sha512_final (&sha512_ctx); - u64x tmp[8]; + u64 tmp[8]; tmp[0] = sha512_ctx.state[0]; tmp[1] = sha512_ctx.state[1]; @@ -443,14 +431,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01800_loop (__gl if (gid >= gid_max) return; - u64x l_p_bytes0[2]; + u64 l_p_bytes0[2]; l_p_bytes0[0] = tmps[gid].l_p_bytes[0]; l_p_bytes0[1] = tmps[gid].l_p_bytes[1]; const u32 pw_len = pws[gid].pw_len; - u64x l_s_bytes0[2]; + u64 l_s_bytes0[2]; l_s_bytes0[0] = tmps[gid].l_s_bytes[0]; l_s_bytes0[1] = tmps[gid].l_s_bytes[1]; @@ -523,7 +511,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01800_loop (__gl * base */ - u64x l_alt_result[8]; + u64 l_alt_result[8]; l_alt_result[0] = tmps[gid].l_alt_result[0]; l_alt_result[1] = tmps[gid].l_alt_result[1]; @@ -621,15 +609,15 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m01800_comp (__gl const u32 lid = get_local_id (0); - const u64x a = swap_workaround (tmps[gid].l_alt_result[0]); - const u64x b = swap_workaround (tmps[gid].l_alt_result[1]); + const u64 a = swap_workaround (tmps[gid].l_alt_result[0]); + const u64 b = swap_workaround (tmps[gid].l_alt_result[1]); - const u32x r0 = l32_from_64 (a); - const u32x r1 = h32_from_64 (a); - const u32x r2 = l32_from_64 (b); - const u32x r3 = h32_from_64 (b); + const u32 r0 = l32_from_64 (a); + const u32 r1 = h32_from_64 (a); + const u32 r2 = l32_from_64 (b); + const u32 r3 = h32_from_64 (b); #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m02100.cl b/OpenCL/m02100.cl similarity index 92% rename from amd/m02100.cl rename to OpenCL/m02100.cl index 850817a..9e6df95 100644 --- a/amd/m02100.cl +++ b/OpenCL/m02100.cl @@ -8,41 +8,29 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif #ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" +#define COMPARE_M "check_multi_vect2_comp4.c" #endif -static void md4_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md4_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; MD4_STEP (MD4_Fo, a, b, c, d, w0[0], MD4C00, MD4S00); MD4_STEP (MD4_Fo, d, a, b, c, w0[1], MD4C00, MD4S01); @@ -101,30 +89,30 @@ static void md4_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -229,7 +217,7 @@ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4] digest[4] += E; } -static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) +static void hmac_sha1_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -282,7 +270,7 @@ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x sha1_transform (w0, w1, w2, w3, opad); } -static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) +static void hmac_sha1_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5], u32 digest[5]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -328,28 +316,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02100_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -391,7 +379,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02100_init (__gl w3[2] = pw_len * 2 * 8; - u32x digest_md4[4]; + u32 digest_md4[4]; digest_md4[0] = MD4M_A; digest_md4[1] = MD4M_B; @@ -447,8 +435,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02100_init (__gl w3[2] = 0; w3[3] = 0; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_sha1_pad (w0, w1, w2, w3, ipad, opad); @@ -503,7 +491,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02100_init (__gl w3[0] = swap_workaround (w3[0]); w3[1] = swap_workaround (w3[1]); - u32x digest[5]; + u32 digest[5]; hmac_sha1_run (w0, w1, w2, w3, ipad, opad, digest); @@ -529,8 +517,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02100_loop (__gl if (gid >= gid_max) return; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; ipad[0] = tmps[gid].ipad[0]; ipad[1] = tmps[gid].ipad[1]; @@ -548,8 +536,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02100_loop (__gl * iter1 */ - u32x dgst[5]; - u32x out[4]; + u32 dgst[5]; + u32 out[4]; dgst[0] = tmps[gid].dgst[0]; dgst[1] = tmps[gid].dgst[1]; @@ -564,10 +552,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02100_loop (__gl for (u32 i = 0; i < loop_cnt; i++) { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = dgst[0]; w0[1] = dgst[1]; @@ -618,12 +606,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02100_comp (__gl const u32 lid = get_local_id (0); - const u32x r0 = tmps[gid].out[DGST_R0]; - const u32x r1 = tmps[gid].out[DGST_R1]; - const u32x r2 = tmps[gid].out[DGST_R2]; - const u32x r3 = tmps[gid].out[DGST_R3]; + const u32 r0 = tmps[gid].out[DGST_R0]; + const u32 r1 = tmps[gid].out[DGST_R1]; + const u32 r2 = tmps[gid].out[DGST_R2]; + const u32 r3 = tmps[gid].out[DGST_R3]; #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m02400_a0.cl b/OpenCL/m02400_a0.cl similarity index 92% rename from amd/m02400_a0.cl rename to OpenCL/m02400_a0.cl index 30fabd2..73f555f 100644 --- a/amd/m02400_a0.cl +++ b/OpenCL/m02400_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02400_m04 (__global pw_t *pws, __global gpu_rule_t * rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -62,7 +38,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02400_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; @@ -77,28 +53,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02400_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -112,12 +88,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02400_m04 (__glo w1[0] = 0x80; w3[2] = 16 * 8; - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -192,12 +168,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02400_m04 (__glo c &= 0x00ffffff; b &= 0x00ffffff; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -225,7 +201,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02400_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; @@ -252,28 +228,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02400_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -287,12 +263,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02400_s04 (__glo w1[0] = 0x80; w3[2] = 16 * 8; - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -372,12 +348,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02400_s04 (__glo c &= 0x00ffffff; b &= 0x00ffffff; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m02400_a1.cl b/OpenCL/m02400_a1.cl similarity index 93% rename from amd/m02400_a1.cl rename to OpenCL/m02400_a1.cl index d70cd64..bba27b0 100644 --- a/amd/m02400_a1.cl +++ b/OpenCL/m02400_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02400_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -60,28 +36,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02400_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -138,28 +114,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02400_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -171,12 +147,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02400_m04 (__glo w1[0] = 0x80; w3[2] = 16 * 8; - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -251,12 +227,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02400_m04 (__glo c &= 0x00ffffff; b &= 0x00ffffff; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -284,28 +260,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02400_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -374,28 +350,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02400_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -407,12 +383,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02400_s04 (__glo w1[0] = 0x80; w3[2] = 16 * 8; - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -492,12 +468,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02400_s04 (__glo c &= 0x00ffffff; b &= 0x00ffffff; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m02400_a3.cl b/OpenCL/m02400_a3.cl similarity index 67% rename from amd/m02400_a3.cl rename to OpenCL/m02400_a3.cl index bb8bd6c..3fb7485 100644 --- a/amd/m02400_a3.cl +++ b/OpenCL/m02400_a3.cl @@ -4,48 +4,23 @@ */ #define _MD5_ -#define _SCALAR_ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -static void m02400m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m02400m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -137,22 +112,20 @@ static void m02400m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00); MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01); @@ -227,16 +200,16 @@ static void m02400m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g c &= 0x00ffffff; b &= 0x00ffffff; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m02400s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m02400s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -340,22 +313,20 @@ static void m02400s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00); MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01); @@ -435,16 +406,16 @@ static void m02400s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g c &= 0x00ffffff; b &= 0x00ffffff; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02400_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02400_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -482,15 +453,15 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02400_m04 (__glo m02400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02400_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02400_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02400_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02400_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02400_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02400_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -528,10 +499,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02400_s04 (__glo m02400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02400_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02400_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02400_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02400_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { } diff --git a/amd/m02410_a0.cl b/OpenCL/m02410_a0.cl similarity index 93% rename from amd/m02410_a0.cl rename to OpenCL/m02410_a0.cl index edb4dff..835da39 100644 --- a/amd/m02410_a0.cl +++ b/OpenCL/m02410_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02410_m04 (__global pw_t *pws, __global gpu_rule_t * rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -62,7 +38,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02410_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; @@ -90,28 +66,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02410_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -166,12 +142,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02410_m04 (__glo w1[0] = 0x80; w3[2] = 16 * 8; - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -246,12 +222,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02410_m04 (__glo c &= 0x00ffffff; b &= 0x00ffffff; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -279,7 +255,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02410_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; @@ -319,28 +295,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02410_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -395,12 +371,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02410_s04 (__glo w1[0] = 0x80; w3[2] = 16 * 8; - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -480,12 +456,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02410_s04 (__glo c &= 0x00ffffff; b &= 0x00ffffff; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m02410_a1.cl b/OpenCL/m02410_a1.cl similarity index 93% rename from amd/m02410_a1.cl rename to OpenCL/m02410_a1.cl index 4e8d369..8ab9307 100644 --- a/amd/m02410_a1.cl +++ b/OpenCL/m02410_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02410_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -60,28 +36,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02410_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -187,28 +163,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02410_m04 (__glo const u32 pw_salt_len = pw_len + salt_len; - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0] | s0[0]; w0[1] = wordl0[1] | wordr0[1] | s0[1]; w0[2] = wordl0[2] | wordr0[2] | s0[2]; w0[3] = wordl0[3] | wordr0[3] | s0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -220,12 +196,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02410_m04 (__glo w1[0] = 0x80; w3[2] = 16 * 8; - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -300,12 +276,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02410_m04 (__glo c &= 0x00ffffff; b &= 0x00ffffff; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -333,28 +309,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02410_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -472,28 +448,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02410_s04 (__glo const u32 pw_salt_len = pw_len + salt_len; - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0] | s0[0]; w0[1] = wordl0[1] | wordr0[1] | s0[1]; w0[2] = wordl0[2] | wordr0[2] | s0[2]; w0[3] = wordl0[3] | wordr0[3] | s0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -505,12 +481,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02410_s04 (__glo w1[0] = 0x80; w3[2] = 16 * 8; - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -590,12 +566,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02410_s04 (__glo c &= 0x00ffffff; b &= 0x00ffffff; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m02410_a3.cl b/OpenCL/m02410_a3.cl similarity index 69% rename from amd/m02410_a3.cl rename to OpenCL/m02410_a3.cl index 10f8d68..fcef5de 100644 --- a/amd/m02410_a3.cl +++ b/OpenCL/m02410_a3.cl @@ -4,48 +4,23 @@ */ #define _MD5_ -#define _SCALAR_ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -static void m02410m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m02410m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -182,22 +157,20 @@ static void m02410m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00); MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01); @@ -272,16 +245,16 @@ static void m02410m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g c &= 0x00ffffff; b &= 0x00ffffff; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m02410s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m02410s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -430,22 +403,20 @@ static void m02410s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00); MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01); @@ -525,16 +496,16 @@ static void m02410s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g c &= 0x00ffffff; b &= 0x00ffffff; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02410_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02410_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -572,15 +543,15 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02410_m04 (__glo m02410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02410_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02410_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02410_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02410_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02410_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02410_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -618,10 +589,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02410_s04 (__glo m02410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02410_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02410_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02410_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02410_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { } diff --git a/amd/m02500.cl b/OpenCL/m02500.cl similarity index 91% rename from amd/m02500.cl rename to OpenCL/m02500.cl index 724f0e1..d5c86df 100644 --- a/amd/m02500.cl +++ b/OpenCL/m02500.cl @@ -8,64 +8,52 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif #ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" +#define COMPARE_M "check_multi_vect2_comp4.c" #endif #ifdef VECT_SIZE4 -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" +#define COMPARE_M "check_multi_vect4_comp4.c" #endif -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - u32x tmp2; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; + + u32 tmp2; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -141,7 +129,7 @@ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) +static void hmac_md5_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -192,7 +180,7 @@ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x i md5_transform (w0, w1, w2, w3, opad); } -static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) +static void hmac_md5_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4], u32 digest[4]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -226,30 +214,30 @@ static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x i md5_transform (w0, w1, w2, w3, digest); } -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -354,7 +342,7 @@ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4] digest[4] += E; } -static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) +static void hmac_sha1_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -407,7 +395,7 @@ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x sha1_transform (w0, w1, w2, w3, opad); } -static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) +static void hmac_sha1_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5], u32 digest[5]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -453,28 +441,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02500_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -520,8 +508,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02500_init (__gl w3[2] = swap_workaround (w3[2]); w3[3] = swap_workaround (w3[3]); - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_sha1_pad (w0, w1, w2, w3, ipad, opad); @@ -580,7 +568,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02500_init (__gl w3[2] = 0; w3[3] = (64 + salt_len + 4) * 8; - u32x dgst[5]; + u32 dgst[5]; hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst); @@ -604,8 +592,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02500_loop (__gl if (gid >= gid_max) return; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; ipad[0] = tmps[gid].ipad[0]; ipad[1] = tmps[gid].ipad[1]; @@ -621,8 +609,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02500_loop (__gl for (u32 i = 0; i < 8; i += 5) { - u32x dgst[5]; - u32x out[5]; + u32 dgst[5]; + u32 out[5]; dgst[0] = tmps[gid].dgst[i + 0]; dgst[1] = tmps[gid].dgst[i + 1]; @@ -638,10 +626,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02500_loop (__gl for (u32 j = 0; j < loop_cnt; j++) { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = dgst[0]; w0[1] = dgst[1]; @@ -691,10 +679,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02500_comp (__gl const u32 lid = get_local_id (0); - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = tmps[gid].out[0]; w0[1] = tmps[gid].out[1]; @@ -713,8 +701,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02500_comp (__gl w3[2] = 0; w3[3] = 0; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_sha1_pad (w0, w1, w2, w3, ipad, opad); @@ -754,7 +742,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02500_comp (__gl w3[2] = 0; w3[3] = (64 + 100) * 8; - u32x digest[5]; + u32 digest[5]; hmac_sha1_run (w0, w1, w2, w3, ipad, opad, digest); @@ -822,7 +810,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02500_comp (__gl w3[2] = (64 + eapol_size) * 8; w3[3] = 0; - u32x digest1[4]; + u32 digest1[4]; hmac_md5_run (w0, w1, w2, w3, ipad, opad, digest1); @@ -832,12 +820,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02500_comp (__gl #define il_pos 0 - const u32x r0 = digest1[DGST_R0]; - const u32x r1 = digest1[DGST_R1]; - const u32x r2 = digest1[DGST_R2]; - const u32x r3 = digest1[DGST_R3]; + const u32 r0 = digest1[DGST_R0]; + const u32 r1 = digest1[DGST_R1]; + const u32 r2 = digest1[DGST_R2]; + const u32 r3 = digest1[DGST_R3]; - #include VECT_COMPARE_M + #include COMPARE_M } { @@ -904,7 +892,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02500_comp (__gl w3[2] = 0; w3[3] = (64 + eapol_size) * 8; - u32x digest2[5]; + u32 digest2[5]; hmac_sha1_run (w0, w1, w2, w3, ipad, opad, digest2); @@ -914,11 +902,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02500_comp (__gl #define il_pos 0 - const u32x r0 = digest2[DGST_R0]; - const u32x r1 = digest2[DGST_R1]; - const u32x r2 = digest2[DGST_R2]; - const u32x r3 = digest2[DGST_R3]; + const u32 r0 = digest2[DGST_R0]; + const u32 r1 = digest2[DGST_R1]; + const u32 r2 = digest2[DGST_R2]; + const u32 r3 = digest2[DGST_R3]; - #include VECT_COMPARE_M + #include COMPARE_M } } diff --git a/amd/m02610_a0.cl b/OpenCL/m02610_a0.cl similarity index 92% rename from amd/m02610_a0.cl rename to OpenCL/m02610_a0.cl index 8a12831..d1d6da4 100644 --- a/amd/m02610_a0.cl +++ b/OpenCL/m02610_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" +#include "rp.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] @@ -72,14 +48,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02610_m04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -146,28 +122,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02610_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -180,10 +156,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02610_m04 (__glo w3[2] = out_len * 8; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -258,21 +234,21 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02610_m04 (__glo c += MD5M_C; d += MD5M_D; - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + const u32 w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + const u32 w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + const u32 w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + const u32 w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + const u32 w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + const u32 w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + const u32 w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + const u32 w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 | uint_to_hex_lower8 ((d >> 24) & 255) << 16; const u32 w8_t = s[0]; @@ -357,12 +333,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02610_m04 (__glo MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -388,14 +364,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02610_s04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -474,28 +450,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02610_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -508,10 +484,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02610_s04 (__glo w3[2] = out_len * 8; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -586,21 +562,21 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02610_s04 (__glo c += MD5M_C; d += MD5M_D; - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + const u32 w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + const u32 w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + const u32 w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + const u32 w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + const u32 w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + const u32 w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + const u32 w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + const u32 w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 | uint_to_hex_lower8 ((d >> 24) & 255) << 16; const u32 w8_t = s[0]; @@ -685,12 +661,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02610_s04 (__glo MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m02610_a1.cl b/OpenCL/m02610_a1.cl similarity index 93% rename from amd/m02610_a1.cl rename to OpenCL/m02610_a1.cl index 78549d7..9f334a1 100644 --- a/amd/m02610_a1.cl +++ b/OpenCL/m02610_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] @@ -70,28 +46,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02610_m04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -202,38 +178,38 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02610_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; w3[2] = pw_len * 8; w3[3] = 0; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -308,21 +284,21 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02610_m04 (__glo c += MD5M_C; d += MD5M_D; - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + const u32 w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + const u32 w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + const u32 w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + const u32 w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + const u32 w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + const u32 w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + const u32 w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + const u32 w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 | uint_to_hex_lower8 ((d >> 24) & 255) << 16; const u32 w8_t = s[0]; @@ -407,12 +383,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02610_m04 (__glo MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -438,28 +414,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02610_s04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -582,38 +558,38 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02610_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; w3[2] = pw_len * 8; w3[3] = 0; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -688,21 +664,21 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02610_s04 (__glo c += MD5M_C; d += MD5M_D; - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + const u32 w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + const u32 w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + const u32 w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + const u32 w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + const u32 w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + const u32 w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + const u32 w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + const u32 w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 | uint_to_hex_lower8 ((d >> 24) & 255) << 16; const u32 w8_t = s[0]; @@ -787,12 +763,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02610_s04 (__glo MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m02610_a3.cl b/OpenCL/m02610_a3.cl similarity index 89% rename from amd/m02610_a3.cl rename to OpenCL/m02610_a3.cl index 7e3e8e3..68d012d 100644 --- a/amd/m02610_a3.cl +++ b/OpenCL/m02610_a3.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] @@ -56,7 +32,7 @@ #define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) #endif -static void m02610m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) +static void m02610m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { /** * modifier @@ -86,7 +62,7 @@ static void m02610m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -94,10 +70,10 @@ static void m02610m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -172,21 +148,21 @@ static void m02610m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p c += MD5M_C; d += MD5M_D; - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + const u32 w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + const u32 w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + const u32 w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + const u32 w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + const u32 w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + const u32 w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + const u32 w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + const u32 w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 | uint_to_hex_lower8 ((d >> 24) & 255) << 16; const u32 w8_t = s[0]; @@ -271,16 +247,16 @@ static void m02610m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m02610s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) +static void m02610s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { /** * modifier @@ -322,7 +298,7 @@ static void m02610s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -330,10 +306,10 @@ static void m02610s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -408,21 +384,21 @@ static void m02610s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p c += MD5M_C; d += MD5M_D; - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + const u32 w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + const u32 w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + const u32 w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + const u32 w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + const u32 w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + const u32 w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + const u32 w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + const u32 w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 | uint_to_hex_lower8 ((d >> 24) & 255) << 16; const u32 w8_t = s[0]; @@ -507,12 +483,12 @@ static void m02610s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -531,28 +507,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02610_m04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -617,28 +593,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02610_m08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -703,28 +679,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02610_m16 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -789,28 +765,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02610_s04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -875,28 +851,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02610_s08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -961,28 +937,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02610_s16 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m02710_a0.cl b/OpenCL/m02710_a0.cl similarity index 93% rename from amd/m02710_a0.cl rename to OpenCL/m02710_a0.cl index 290f5d9..c603a41 100644 --- a/amd/m02710_a0.cl +++ b/OpenCL/m02710_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" +#include "rp.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] @@ -72,14 +48,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02710_m04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -148,28 +124,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02710_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -182,10 +158,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02710_m04 (__glo w3[2] = out_len * 8; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -260,21 +236,21 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02710_m04 (__glo c += MD5M_C; d += MD5M_D; - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + const u32 w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + const u32 w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + const u32 w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + const u32 w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + const u32 w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + const u32 w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + const u32 w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + const u32 w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 | uint_to_hex_lower8 ((d >> 24) & 255) << 16; const u32 w8_t = s[0]; @@ -359,10 +335,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02710_m04 (__glo MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - const u32x r_a = a + MD5M_A; - const u32x r_b = b + MD5M_B; - const u32x r_c = c + MD5M_C; - const u32x r_d = d + MD5M_D; + const u32 r_a = a + MD5M_A; + const u32 r_b = b + MD5M_B; + const u32 r_c = c + MD5M_C; + const u32 r_d = d + MD5M_D; a = r_a; b = r_b; @@ -442,12 +418,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02710_m04 (__glo c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -473,14 +449,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02710_s04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -561,28 +537,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02710_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -595,10 +571,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02710_s04 (__glo w3[2] = out_len * 8; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -673,21 +649,21 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02710_s04 (__glo c += MD5M_C; d += MD5M_D; - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + const u32 w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + const u32 w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + const u32 w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + const u32 w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + const u32 w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + const u32 w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + const u32 w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + const u32 w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 | uint_to_hex_lower8 ((d >> 24) & 255) << 16; const u32 w8_t = s[0]; @@ -772,10 +748,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02710_s04 (__glo MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - const u32x r_a = a + MD5M_A; - const u32x r_b = b + MD5M_B; - const u32x r_c = c + MD5M_C; - const u32x r_d = d + MD5M_D; + const u32 r_a = a + MD5M_A; + const u32 r_b = b + MD5M_B; + const u32 r_c = c + MD5M_C; + const u32 r_d = d + MD5M_D; a = r_a; b = r_b; @@ -858,12 +834,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02710_s04 (__glo c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m02710_a1.cl b/OpenCL/m02710_a1.cl similarity index 93% rename from amd/m02710_a1.cl rename to OpenCL/m02710_a1.cl index 6b58737..02f7e60 100644 --- a/amd/m02710_a1.cl +++ b/OpenCL/m02710_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] @@ -74,28 +50,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02710_m04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -208,38 +184,38 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02710_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; w3[2] = pw_len * 8; w3[3] = 0; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -314,21 +290,21 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02710_m04 (__glo c += MD5M_C; d += MD5M_D; - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + const u32 w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + const u32 w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + const u32 w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + const u32 w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + const u32 w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + const u32 w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + const u32 w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + const u32 w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 | uint_to_hex_lower8 ((d >> 24) & 255) << 16; const u32 w8_t = s[0]; @@ -413,10 +389,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02710_m04 (__glo MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - const u32x r_a = a + MD5M_A; - const u32x r_b = b + MD5M_B; - const u32x r_c = c + MD5M_C; - const u32x r_d = d + MD5M_D; + const u32 r_a = a + MD5M_A; + const u32 r_b = b + MD5M_B; + const u32 r_c = c + MD5M_C; + const u32 r_d = d + MD5M_D; a = r_a; b = r_b; @@ -496,12 +472,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02710_m04 (__glo c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -527,28 +503,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02710_s04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -673,38 +649,38 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02710_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; w3[2] = pw_len * 8; w3[3] = 0; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -779,21 +755,21 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02710_s04 (__glo c += MD5M_C; d += MD5M_D; - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + const u32 w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + const u32 w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + const u32 w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + const u32 w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + const u32 w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + const u32 w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + const u32 w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + const u32 w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 | uint_to_hex_lower8 ((d >> 24) & 255) << 16; const u32 w8_t = s[0]; @@ -878,10 +854,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02710_s04 (__glo MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - const u32x r_a = a + MD5M_A; - const u32x r_b = b + MD5M_B; - const u32x r_c = c + MD5M_C; - const u32x r_d = d + MD5M_D; + const u32 r_a = a + MD5M_A; + const u32 r_b = b + MD5M_B; + const u32 r_c = c + MD5M_C; + const u32 r_d = d + MD5M_D; a = r_a; b = r_b; @@ -964,12 +940,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02710_s04 (__glo c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m02710_a3.cl b/OpenCL/m02710_a3.cl similarity index 90% rename from amd/m02710_a3.cl rename to OpenCL/m02710_a3.cl index 1505030..c2badb5 100644 --- a/amd/m02710_a3.cl +++ b/OpenCL/m02710_a3.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] @@ -56,7 +32,7 @@ #define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) #endif -static void m02710m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) +static void m02710m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { /** * modifier @@ -88,7 +64,7 @@ static void m02710m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -96,10 +72,10 @@ static void m02710m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -174,21 +150,21 @@ static void m02710m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p c += MD5M_C; d += MD5M_D; - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + const u32 w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + const u32 w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + const u32 w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + const u32 w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + const u32 w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + const u32 w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + const u32 w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + const u32 w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 | uint_to_hex_lower8 ((d >> 24) & 255) << 16; const u32 w8_t = s[0]; @@ -273,10 +249,10 @@ static void m02710m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - const u32x r_a = a + MD5M_A; - const u32x r_b = b + MD5M_B; - const u32x r_c = c + MD5M_C; - const u32x r_d = d + MD5M_D; + const u32 r_a = a + MD5M_A; + const u32 r_b = b + MD5M_B; + const u32 r_c = c + MD5M_C; + const u32 r_d = d + MD5M_D; a = r_a; b = r_b; @@ -356,16 +332,16 @@ static void m02710m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m02710s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) +static void m02710s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { /** * modifier @@ -409,7 +385,7 @@ static void m02710s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -417,10 +393,10 @@ static void m02710s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -495,21 +471,21 @@ static void m02710s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p c += MD5M_C; d += MD5M_D; - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + const u32 w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + const u32 w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + const u32 w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + const u32 w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + const u32 w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + const u32 w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + const u32 w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + const u32 w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 | uint_to_hex_lower8 ((d >> 24) & 255) << 16; const u32 w8_t = s[0]; @@ -594,10 +570,10 @@ static void m02710s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - const u32x r_a = a + MD5M_A; - const u32x r_b = b + MD5M_B; - const u32x r_c = c + MD5M_C; - const u32x r_d = d + MD5M_D; + const u32 r_a = a + MD5M_A; + const u32 r_b = b + MD5M_B; + const u32 r_c = c + MD5M_C; + const u32 r_d = d + MD5M_D; a = r_a; b = r_b; @@ -680,12 +656,12 @@ static void m02710s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -704,28 +680,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02710_m04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -790,28 +766,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02710_m08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -876,28 +852,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02710_m16 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -962,28 +938,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02710_s04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1048,28 +1024,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02710_s08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1134,28 +1110,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02710_s16 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m02810_a0.cl b/OpenCL/m02810_a0.cl similarity index 93% rename from amd/m02810_a0.cl rename to OpenCL/m02810_a0.cl index db09766..ab9f643 100644 --- a/amd/m02810_a0.cl +++ b/OpenCL/m02810_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" +#include "rp.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] @@ -72,14 +48,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02810_m04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -151,28 +127,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02810_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -185,10 +161,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02810_m04 (__glo w3[2] = out_len * 8; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -272,21 +248,21 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02810_m04 (__glo const u32 w6_t = s[6]; const u32 w7_t = s[7]; - const u32x w8_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + const u32 w8_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w9_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + const u32 w9_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x wa_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + const u32 wa_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x wb_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + const u32 wb_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x wc_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + const u32 wc_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x wd_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + const u32 wd_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x we_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + const u32 we_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x wf_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + const u32 wf_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 | uint_to_hex_lower8 ((d >> 24) & 255) << 16; a = MD5M_A; @@ -362,10 +338,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02810_m04 (__glo MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - const u32x r_a = a + MD5M_A; - const u32x r_b = b + MD5M_B; - const u32x r_c = c + MD5M_C; - const u32x r_d = d + MD5M_D; + const u32 r_a = a + MD5M_A; + const u32 r_b = b + MD5M_B; + const u32 r_c = c + MD5M_C; + const u32 r_d = d + MD5M_D; a = r_a; b = r_b; @@ -445,12 +421,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02810_m04 (__glo c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -476,14 +452,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02810_s04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -563,28 +539,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02810_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -597,10 +573,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02810_s04 (__glo w3[2] = out_len * 8; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -684,21 +660,21 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02810_s04 (__glo const u32 w6_t = s[6]; const u32 w7_t = s[7]; - const u32x w8_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + const u32 w8_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w9_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + const u32 w9_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x wa_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + const u32 wa_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x wb_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + const u32 wb_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x wc_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + const u32 wc_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x wd_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + const u32 wd_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x we_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + const u32 we_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x wf_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + const u32 wf_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 | uint_to_hex_lower8 ((d >> 24) & 255) << 16; a = MD5M_A; @@ -774,10 +750,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02810_s04 (__glo MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - const u32x r_a = a + MD5M_A; - const u32x r_b = b + MD5M_B; - const u32x r_c = c + MD5M_C; - const u32x r_d = d + MD5M_D; + const u32 r_a = a + MD5M_A; + const u32 r_b = b + MD5M_B; + const u32 r_c = c + MD5M_C; + const u32 r_d = d + MD5M_D; a = r_a; b = r_b; @@ -860,12 +836,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02810_s04 (__glo c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m02810_a1.cl b/OpenCL/m02810_a1.cl similarity index 93% rename from amd/m02810_a1.cl rename to OpenCL/m02810_a1.cl index c4fb50b..6471732 100644 --- a/amd/m02810_a1.cl +++ b/OpenCL/m02810_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] @@ -70,28 +46,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02810_m04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -203,38 +179,38 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02810_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; w3[2] = pw_len * 8; w3[3] = 0; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -318,21 +294,21 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02810_m04 (__glo const u32 w6_t = s[6]; const u32 w7_t = s[7]; - const u32x w8_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + const u32 w8_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w9_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + const u32 w9_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x wa_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + const u32 wa_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x wb_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + const u32 wb_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x wc_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + const u32 wc_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x wd_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + const u32 wd_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x we_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + const u32 we_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x wf_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + const u32 wf_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 | uint_to_hex_lower8 ((d >> 24) & 255) << 16; a = MD5M_A; @@ -408,10 +384,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02810_m04 (__glo MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - const u32x r_a = a + MD5M_A; - const u32x r_b = b + MD5M_B; - const u32x r_c = c + MD5M_C; - const u32x r_d = d + MD5M_D; + const u32 r_a = a + MD5M_A; + const u32 r_b = b + MD5M_B; + const u32 r_c = c + MD5M_C; + const u32 r_d = d + MD5M_D; a = r_a; b = r_b; @@ -491,12 +467,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02810_m04 (__glo c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -522,28 +498,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02810_s04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -667,38 +643,38 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02810_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; w3[2] = pw_len * 8; w3[3] = 0; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -782,21 +758,21 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02810_s04 (__glo const u32 w6_t = s[6]; const u32 w7_t = s[7]; - const u32x w8_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + const u32 w8_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w9_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + const u32 w9_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x wa_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + const u32 wa_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x wb_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + const u32 wb_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x wc_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + const u32 wc_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x wd_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + const u32 wd_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x we_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + const u32 we_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x wf_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + const u32 wf_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 | uint_to_hex_lower8 ((d >> 24) & 255) << 16; a = MD5M_A; @@ -872,10 +848,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02810_s04 (__glo MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - const u32x r_a = a + MD5M_A; - const u32x r_b = b + MD5M_B; - const u32x r_c = c + MD5M_C; - const u32x r_d = d + MD5M_D; + const u32 r_a = a + MD5M_A; + const u32 r_b = b + MD5M_B; + const u32 r_c = c + MD5M_C; + const u32 r_d = d + MD5M_D; a = r_a; b = r_b; @@ -958,12 +934,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02810_s04 (__glo c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m02810_a3.cl b/OpenCL/m02810_a3.cl similarity index 90% rename from amd/m02810_a3.cl rename to OpenCL/m02810_a3.cl index 309b271..8450648 100644 --- a/amd/m02810_a3.cl +++ b/OpenCL/m02810_a3.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] @@ -56,7 +32,7 @@ #define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) #endif -static void m02810m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) +static void m02810m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { /** * modifier @@ -87,7 +63,7 @@ static void m02810m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -95,10 +71,10 @@ static void m02810m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -182,21 +158,21 @@ static void m02810m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p const u32 w6_t = s[6]; const u32 w7_t = s[7]; - const u32x w8_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + const u32 w8_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w9_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + const u32 w9_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x wa_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + const u32 wa_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x wb_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + const u32 wb_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x wc_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + const u32 wc_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x wd_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + const u32 wd_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x we_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + const u32 we_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x wf_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + const u32 wf_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 | uint_to_hex_lower8 ((d >> 24) & 255) << 16; a = MD5M_A; @@ -272,10 +248,10 @@ static void m02810m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - const u32x r_a = a + MD5M_A; - const u32x r_b = b + MD5M_B; - const u32x r_c = c + MD5M_C; - const u32x r_d = d + MD5M_D; + const u32 r_a = a + MD5M_A; + const u32 r_b = b + MD5M_B; + const u32 r_c = c + MD5M_C; + const u32 r_d = d + MD5M_D; a = r_a; b = r_b; @@ -355,16 +331,16 @@ static void m02810m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m02810s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) +static void m02810s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { /** * modifier @@ -407,7 +383,7 @@ static void m02810s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -415,10 +391,10 @@ static void m02810s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -502,21 +478,21 @@ static void m02810s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p const u32 w6_t = s[6]; const u32 w7_t = s[7]; - const u32x w8_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + const u32 w8_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w9_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + const u32 w9_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x wa_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + const u32 wa_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x wb_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + const u32 wb_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x wc_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + const u32 wc_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x wd_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + const u32 wd_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x we_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + const u32 we_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x wf_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + const u32 wf_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 | uint_to_hex_lower8 ((d >> 24) & 255) << 16; a = MD5M_A; @@ -592,10 +568,10 @@ static void m02810s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - const u32x r_a = a + MD5M_A; - const u32x r_b = b + MD5M_B; - const u32x r_c = c + MD5M_C; - const u32x r_d = d + MD5M_D; + const u32 r_a = a + MD5M_A; + const u32 r_b = b + MD5M_B; + const u32 r_c = c + MD5M_C; + const u32 r_d = d + MD5M_D; a = r_a; b = r_b; @@ -678,12 +654,12 @@ static void m02810s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -702,28 +678,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02810_m04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -788,28 +764,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02810_m08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -874,28 +850,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02810_m16 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -960,28 +936,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02810_s04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1046,28 +1022,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02810_s08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1132,28 +1108,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m02810_s16 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m03000_a0.cl b/OpenCL/m03000_a0.cl similarity index 92% rename from amd/m03000_a0.cl rename to OpenCL/m03000_a0.cl index 648128f..16789f8 100644 --- a/amd/m03000_a0.cl +++ b/OpenCL/m03000_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" +#include "rp.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define PERM_OP(a,b,tt,n,m) \ { \ @@ -353,27 +329,27 @@ __constant u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, #define LM_IV_1_IP_RR3 0xaa190747 #ifdef VECT_SIZE1 -#define BOX(i,n,S) u32x ((S)[(n)][(i)]) +#define BOX(i,n,S) u32 ((S)[(n)][(i)]) #endif #ifdef VECT_SIZE2 -#define BOX(i,n,S) u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) +#define BOX(i,n,S) u32 ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) #endif #ifdef VECT_SIZE4 -#define BOX(i,n,S) u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) +#define BOX(i,n,S) u32 ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) #endif -static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __local u32 s_SPtrans[8][64]) +static void _des_crypt_encrypt (u32 iv[2], u32 data[2], u32 Kc[16], u32 Kd[16], __local u32 s_SPtrans[8][64]) { - u32x r = data[0]; - u32x l = data[1]; + u32 r = data[0]; + u32 l = data[1]; #pragma unroll 16 for (u32 i = 0; i < 16; i += 2) { - u32x u; - u32x t; + u32 u; + u32 t; u = Kc[i + 0] ^ rotl32 (r, 30u); t = Kd[i + 0] ^ rotl32 (r, 26u); @@ -406,9 +382,9 @@ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[1 iv[1] = rotl32 (r, 29); } -static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 s_skb[8][64]) +static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u32 s_skb[8][64]) { - u32x tt; + u32 tt; PERM_OP (d, c, tt, 4, 0x0f0f0f0f); HPERM_OP (c, tt, 2, 0xcccc0000); @@ -433,13 +409,13 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc c = c & 0x0fffffff; d = d & 0x0fffffff; - const u32x c00 = (c >> 0) & 0x0000003f; - const u32x c06 = (c >> 6) & 0x00383003; - const u32x c07 = (c >> 7) & 0x0000003c; - const u32x c13 = (c >> 13) & 0x0000060f; - const u32x c20 = (c >> 20) & 0x00000001; + const u32 c00 = (c >> 0) & 0x0000003f; + const u32 c06 = (c >> 6) & 0x00383003; + const u32 c07 = (c >> 7) & 0x0000003c; + const u32 c13 = (c >> 13) & 0x0000060f; + const u32 c20 = (c >> 20) & 0x00000001; - u32x s = BOX (((c00 >> 0) & 0xff), 0, s_skb) + u32 s = BOX (((c00 >> 0) & 0xff), 0, s_skb) | BOX (((c06 >> 0) & 0xff) |((c07 >> 0) & 0xff), 1, s_skb) | BOX (((c13 >> 0) & 0xff) @@ -448,12 +424,12 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc |((c13 >> 8) & 0xff) |((c06 >> 16) & 0xff), 3, s_skb); - const u32x d00 = (d >> 0) & 0x00003c3f; - const u32x d07 = (d >> 7) & 0x00003f03; - const u32x d21 = (d >> 21) & 0x0000000f; - const u32x d22 = (d >> 22) & 0x00000030; + const u32 d00 = (d >> 0) & 0x00003c3f; + const u32 d07 = (d >> 7) & 0x00003f03; + const u32 d21 = (d >> 21) & 0x0000000f; + const u32 d22 = (d >> 22) & 0x00000030; - u32x t = BOX (((d00 >> 0) & 0xff), 4, s_skb) + u32 t = BOX (((d00 >> 0) & 0xff), 4, s_skb) | BOX (((d07 >> 0) & 0xff) |((d00 >> 8) & 0xff), 5, s_skb) | BOX (((d07 >> 8) & 0xff), 6, s_skb) @@ -465,7 +441,7 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc } } -static void transform_netntlmv1_key (const u32x w0, const u32x w1, u32x out[2]) +static void transform_netntlmv1_key (const u32 w0, const u32 w1, u32 out[2]) { #ifdef VECT_SIZE1 const uchar4 t0 = as_uchar4 (w0); @@ -578,7 +554,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03000_m04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf[4]; + u32 pw_buf[4]; pw_buf[0] = pws[gid].i[ 0]; pw_buf[1] = pws[gid].i[ 1]; @@ -622,28 +598,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03000_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf[0]; w0[1] = pw_buf[1]; w0[2] = pw_buf[2]; w0[3] = pw_buf[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -654,33 +630,33 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03000_m04 (__glo out_len = (out_len >= 7) ? 7 : out_len; - u32x key[2]; + u32 key[2]; transform_netntlmv1_key (w0[0], w0[1], key); - const u32x c = key[0]; - const u32x d = key[1]; + const u32 c = key[0]; + const u32 d = key[1]; - u32x Kc[16]; - u32x Kd[16]; + u32 Kc[16]; + u32 Kd[16]; _des_crypt_keysetup (c, d, Kc, Kd, s_skb); - u32x data[2]; + u32 data[2]; data[0] = LM_IV_0_IP_RR3; data[1] = LM_IV_1_IP_RR3; - u32x iv[2]; + u32 iv[2]; _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = iv[0]; + const u32 r1 = iv[1]; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -706,7 +682,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03000_s04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf[4]; + u32 pw_buf[4]; pw_buf[0] = pws[gid].i[ 0]; pw_buf[1] = pws[gid].i[ 1]; @@ -762,28 +738,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03000_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf[0]; w0[1] = pw_buf[1]; w0[2] = pw_buf[2]; w0[3] = pw_buf[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -794,33 +770,33 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03000_s04 (__glo out_len = (out_len >= 7) ? 7 : out_len; - u32x key[2]; + u32 key[2]; transform_netntlmv1_key (w0[0], w0[1], key); - const u32x c = key[0]; - const u32x d = key[1]; + const u32 c = key[0]; + const u32 d = key[1]; - u32x Kc[16]; - u32x Kd[16]; + u32 Kc[16]; + u32 Kd[16]; _des_crypt_keysetup (c, d, Kc, Kd, s_skb); - u32x data[2]; + u32 data[2]; data[0] = LM_IV_0_IP_RR3; data[1] = LM_IV_1_IP_RR3; - u32x iv[2]; + u32 iv[2]; _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = iv[0]; + const u32 r1 = iv[1]; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m03000_a1.cl b/OpenCL/m03000_a1.cl similarity index 92% rename from amd/m03000_a1.cl rename to OpenCL/m03000_a1.cl index 6873d94..8ad0cd8 100644 --- a/amd/m03000_a1.cl +++ b/OpenCL/m03000_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define PERM_OP(a,b,tt,n,m) \ { \ @@ -351,27 +327,27 @@ __constant u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, #define LM_IV_1_IP_RR3 0xaa190747 #ifdef VECT_SIZE1 -#define BOX(i,n,S) u32x ((S)[(n)][(i)]) +#define BOX(i,n,S) u32 ((S)[(n)][(i)]) #endif #ifdef VECT_SIZE2 -#define BOX(i,n,S) u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) +#define BOX(i,n,S) u32 ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) #endif #ifdef VECT_SIZE4 -#define BOX(i,n,S) u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) +#define BOX(i,n,S) u32 ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) #endif -static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __local u32 s_SPtrans[8][64]) +static void _des_crypt_encrypt (u32 iv[2], u32 data[2], u32 Kc[16], u32 Kd[16], __local u32 s_SPtrans[8][64]) { - u32x r = data[0]; - u32x l = data[1]; + u32 r = data[0]; + u32 l = data[1]; #pragma unroll 16 for (u32 i = 0; i < 16; i += 2) { - u32x u; - u32x t; + u32 u; + u32 t; u = Kc[i + 0] ^ rotl32 (r, 30u); t = Kd[i + 0] ^ rotl32 (r, 26u); @@ -404,9 +380,9 @@ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[1 iv[1] = rotl32 (r, 29); } -static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 s_skb[8][64]) +static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u32 s_skb[8][64]) { - u32x tt; + u32 tt; PERM_OP (d, c, tt, 4, 0x0f0f0f0f); HPERM_OP (c, tt, 2, 0xcccc0000); @@ -431,13 +407,13 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc c = c & 0x0fffffff; d = d & 0x0fffffff; - const u32x c00 = (c >> 0) & 0x0000003f; - const u32x c06 = (c >> 6) & 0x00383003; - const u32x c07 = (c >> 7) & 0x0000003c; - const u32x c13 = (c >> 13) & 0x0000060f; - const u32x c20 = (c >> 20) & 0x00000001; + const u32 c00 = (c >> 0) & 0x0000003f; + const u32 c06 = (c >> 6) & 0x00383003; + const u32 c07 = (c >> 7) & 0x0000003c; + const u32 c13 = (c >> 13) & 0x0000060f; + const u32 c20 = (c >> 20) & 0x00000001; - u32x s = BOX (((c00 >> 0) & 0xff), 0, s_skb) + u32 s = BOX (((c00 >> 0) & 0xff), 0, s_skb) | BOX (((c06 >> 0) & 0xff) |((c07 >> 0) & 0xff), 1, s_skb) | BOX (((c13 >> 0) & 0xff) @@ -446,12 +422,12 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc |((c13 >> 8) & 0xff) |((c06 >> 16) & 0xff), 3, s_skb); - const u32x d00 = (d >> 0) & 0x00003c3f; - const u32x d07 = (d >> 7) & 0x00003f03; - const u32x d21 = (d >> 21) & 0x0000000f; - const u32x d22 = (d >> 22) & 0x00000030; + const u32 d00 = (d >> 0) & 0x00003c3f; + const u32 d07 = (d >> 7) & 0x00003f03; + const u32 d21 = (d >> 21) & 0x0000000f; + const u32 d22 = (d >> 22) & 0x00000030; - u32x t = BOX (((d00 >> 0) & 0xff), 4, s_skb) + u32 t = BOX (((d00 >> 0) & 0xff), 4, s_skb) | BOX (((d07 >> 0) & 0xff) |((d00 >> 8) & 0xff), 5, s_skb) | BOX (((d07 >> 8) & 0xff), 6, s_skb) @@ -463,7 +439,7 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc } } -static void transform_netntlmv1_key (const u32x w0, const u32x w1, u32x out[2]) +static void transform_netntlmv1_key (const u32 w0, const u32 w1, u32 out[2]) { #ifdef VECT_SIZE1 const uchar4 t0 = as_uchar4 (w0); @@ -576,28 +552,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03000_m04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = 0; wordl0[3] = 0; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = 0; wordl1[1] = 0; wordl1[2] = 0; wordl1[3] = 0; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -686,61 +662,61 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03000_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = 0; w0[3] = 0; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; - u32x key[2]; + u32 key[2]; transform_netntlmv1_key (w0[0], w0[1], key); - const u32x c = key[0]; - const u32x d = key[1]; + const u32 c = key[0]; + const u32 d = key[1]; - u32x Kc[16]; - u32x Kd[16]; + u32 Kc[16]; + u32 Kd[16]; _des_crypt_keysetup (c, d, Kc, Kd, s_skb); - u32x data[2]; + u32 data[2]; data[0] = LM_IV_0_IP_RR3; data[1] = LM_IV_1_IP_RR3; - u32x iv[2]; + u32 iv[2]; _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = iv[0]; + const u32 r1 = iv[1]; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -766,28 +742,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03000_s04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = 0; wordl0[3] = 0; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = 0; wordl1[1] = 0; wordl1[2] = 0; wordl1[3] = 0; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -888,61 +864,61 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03000_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = 0; w0[3] = 0; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; - u32x key[2]; + u32 key[2]; transform_netntlmv1_key (w0[0], w0[1], key); - const u32x c = key[0]; - const u32x d = key[1]; + const u32 c = key[0]; + const u32 d = key[1]; - u32x Kc[16]; - u32x Kd[16]; + u32 Kc[16]; + u32 Kd[16]; _des_crypt_keysetup (c, d, Kc, Kd, s_skb); - u32x data[2]; + u32 data[2]; data[0] = LM_IV_0_IP_RR3; data[1] = LM_IV_1_IP_RR3; - u32x iv[2]; + u32 iv[2]; _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = iv[0]; + const u32 r1 = iv[1]; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m03000_a3.cl b/OpenCL/m03000_a3.cl similarity index 98% rename from amd/m03000_a3.cl rename to OpenCL/m03000_a3.cl index df44723..392d612 100644 --- a/amd/m03000_a3.cl +++ b/OpenCL/m03000_a3.cl @@ -5,45 +5,32 @@ */ #define _DES_ -#define _SCALAR_ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp_bs.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp_bs.c" +#define COMPARE_S "check_single_vect1_comp4_bs.c" +#define COMPARE_M "check_multi_vect1_comp4_bs.c" #endif #ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp_bs.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp_bs.c" +#define COMPARE_S "check_single_vect2_comp4_bs.c" +#define COMPARE_M "check_multi_vect2_comp4_bs.c" #endif #ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp_bs.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp_bs.c" +#define COMPARE_S "check_single_vect4_comp4_bs.c" +#define COMPARE_M "check_multi_vect4_comp4_bs.c" #endif #define KXX_DECL @@ -817,7 +804,7 @@ static void m03000m (__local u32 *s_S, __global pw_t *pws, __global gpu_rule_t * const u32 K54 = pws[gid].i[54]; const u32 K55 = pws[gid].i[55]; - const u32 bf_loops = bfs_cnt; + const u32 bfs_cnt = bfs_cnt; const u32 pc_pos = get_local_id (1); @@ -1072,12 +1059,12 @@ static void m03000m (__local u32 *s_S, __global pw_t *pws, __global gpu_rule_t * const u32 slice = 31 - clz (~tmpResult); - const u32x r0 = search[0]; - const u32x r1 = search[1]; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = search[0]; + const u32 r1 = search[1]; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } else @@ -1098,12 +1085,12 @@ static void m03000m (__local u32 *s_S, __global pw_t *pws, __global gpu_rule_t * #pragma unroll for (int slice = 0; slice < 32; slice++) { - const u32x r0 = out0[31 - slice]; - const u32x r1 = out1[31 - slice]; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = out0[31 - slice]; + const u32 r1 = out1[31 - slice]; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } } @@ -1484,7 +1471,7 @@ static void m03000s (__local u32 *s_S, __global pw_t *pws, __global gpu_rule_t * const u32 slice = 31 - clz (~tmpResult); - #include VECT_COMPARE_S + #include COMPARE_S } // diff --git a/amd/m03100_a0.cl b/OpenCL/m03100_a0.cl similarity index 94% rename from amd/m03100_a0.cl rename to OpenCL/m03100_a0.cl index edad8ee..d04183e 100644 --- a/amd/m03100_a0.cl +++ b/OpenCL/m03100_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" +#include "rp.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define PERM_OP(a,b,tt,n,m) \ { \ @@ -368,23 +344,23 @@ __constant u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, __constant u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 }; #ifdef VECT_SIZE1 -#define BOX(i,n,S) u32x ((S)[(n)][(i)]) +#define BOX(i,n,S) u32 ((S)[(n)][(i)]) #endif #ifdef VECT_SIZE2 -#define BOX(i,n,S) u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) +#define BOX(i,n,S) u32 ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) #endif #ifdef VECT_SIZE4 -#define BOX(i,n,S) u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) +#define BOX(i,n,S) u32 ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) #endif -static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __local u32 s_SPtrans[8][64]) +static void _des_crypt_encrypt (u32 iv[2], u32 data[2], u32 Kc[16], u32 Kd[16], __local u32 s_SPtrans[8][64]) { - u32x tt; + u32 tt; - u32x r = data[0]; - u32x l = data[1]; + u32 r = data[0]; + u32 l = data[1]; IP (r, l, tt); @@ -394,8 +370,8 @@ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[1 #pragma unroll 16 for (u32 i = 0; i < 16; i += 2) { - u32x u; - u32x t; + u32 u; + u32 t; u = Kc[i + 0] ^ r; t = Kd[i + 0] ^ rotl32 (r, 28u); @@ -433,9 +409,9 @@ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[1 iv[1] = r; } -static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 s_skb[8][64]) +static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u32 s_skb[8][64]) { - u32x tt; + u32 tt; PERM_OP (d, c, tt, 4, 0x0f0f0f0f); HPERM_OP (c, tt, 2, 0xcccc0000); @@ -460,7 +436,7 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc c = c & 0x0fffffff; d = d & 0x0fffffff; - u32x s = BOX ((( c >> 0) & 0x3f), 0, s_skb) + u32 s = BOX ((( c >> 0) & 0x3f), 0, s_skb) | BOX ((((c >> 6) & 0x03) | ((c >> 7) & 0x3c)), 1, s_skb) | BOX ((((c >> 13) & 0x0f) @@ -469,7 +445,7 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc | ((c >> 21) & 0x06) | ((c >> 22) & 0x38)), 3, s_skb); - u32x t = BOX ((( d >> 0) & 0x3f), 4, s_skb) + u32 t = BOX ((( d >> 0) & 0x3f), 4, s_skb) | BOX ((((d >> 7) & 0x03) | ((d >> 8) & 0x3c)), 5, s_skb) | BOX ((((d >> 15) & 0x3f)), 6, s_skb) @@ -503,14 +479,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -574,28 +550,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -610,28 +586,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m04 (__glo * prepend salt */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; @@ -649,7 +625,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m04 (__glo w1_t[2] |= salt_buf1[2]; w1_t[3] |= salt_buf1[3]; - u32x dst[16]; + u32 dst[16]; dst[ 0] = w0_t[0]; dst[ 1] = w0_t[1]; @@ -673,7 +649,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m04 (__glo * plus LEFT_ROTATE by 2 */ - u32x Kc[16]; + u32 Kc[16]; Kc[ 0] = 0x64649040; Kc[ 1] = 0x14909858; @@ -692,7 +668,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m04 (__glo Kc[14] = 0x584020b4; Kc[15] = 0x00742c4c; - u32x Kd[16]; + u32 Kd[16]; Kd[ 0] = 0xa42ce40c; Kd[ 1] = 0x64689858; @@ -715,14 +691,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m04 (__glo * key1 (generate key) */ - u32x iv[2]; + u32 iv[2]; iv[0] = 0; iv[1] = 0; for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++) { - u32x data[2]; + u32 data[2]; data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00); data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00); @@ -744,7 +720,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m04 (__glo for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++) { - u32x data[2]; + u32 data[2]; data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00); data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00); @@ -759,12 +735,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m04 (__glo * cmp */ - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = iv[0]; + const u32 r1 = iv[1]; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -790,14 +766,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -873,28 +849,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -909,28 +885,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s04 (__glo * prepend salt */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; @@ -948,7 +924,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s04 (__glo w1_t[2] |= salt_buf1[2]; w1_t[3] |= salt_buf1[3]; - u32x dst[16]; + u32 dst[16]; dst[ 0] = w0_t[0]; dst[ 1] = w0_t[1]; @@ -972,7 +948,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s04 (__glo * plus LEFT_ROTATE by 2 */ - u32x Kc[16]; + u32 Kc[16]; Kc[ 0] = 0x64649040; Kc[ 1] = 0x14909858; @@ -991,7 +967,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s04 (__glo Kc[14] = 0x584020b4; Kc[15] = 0x00742c4c; - u32x Kd[16]; + u32 Kd[16]; Kd[ 0] = 0xa42ce40c; Kd[ 1] = 0x64689858; @@ -1014,14 +990,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s04 (__glo * key1 (generate key) */ - u32x iv[2]; + u32 iv[2]; iv[0] = 0; iv[1] = 0; for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++) { - u32x data[2]; + u32 data[2]; data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00); data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00); @@ -1043,7 +1019,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s04 (__glo for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++) { - u32x data[2]; + u32 data[2]; data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00); data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00); @@ -1058,12 +1034,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s04 (__glo * cmp */ - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = iv[0]; + const u32 r1 = iv[1]; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m03100_a1.cl b/OpenCL/m03100_a1.cl similarity index 94% rename from amd/m03100_a1.cl rename to OpenCL/m03100_a1.cl index 5793a02..63f8e98 100644 --- a/amd/m03100_a1.cl +++ b/OpenCL/m03100_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define PERM_OP(a,b,tt,n,m) \ { \ @@ -366,23 +342,23 @@ __constant u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, __constant u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 }; #ifdef VECT_SIZE1 -#define BOX(i,n,S) u32x ((S)[(n)][(i)]) +#define BOX(i,n,S) u32 ((S)[(n)][(i)]) #endif #ifdef VECT_SIZE2 -#define BOX(i,n,S) u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) +#define BOX(i,n,S) u32 ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) #endif #ifdef VECT_SIZE4 -#define BOX(i,n,S) u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) +#define BOX(i,n,S) u32 ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) #endif -static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __local u32 s_SPtrans[8][64]) +static void _des_crypt_encrypt (u32 iv[2], u32 data[2], u32 Kc[16], u32 Kd[16], __local u32 s_SPtrans[8][64]) { - u32x tt; + u32 tt; - u32x r = data[0]; - u32x l = data[1]; + u32 r = data[0]; + u32 l = data[1]; IP (r, l, tt); @@ -392,8 +368,8 @@ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[1 #pragma unroll 16 for (u32 i = 0; i < 16; i += 2) { - u32x u; - u32x t; + u32 u; + u32 t; u = Kc[i + 0] ^ r; t = Kd[i + 0] ^ rotl32 (r, 28u); @@ -431,9 +407,9 @@ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[1 iv[1] = r; } -static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 s_skb[8][64]) +static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u32 s_skb[8][64]) { - u32x tt; + u32 tt; PERM_OP (d, c, tt, 4, 0x0f0f0f0f); HPERM_OP (c, tt, 2, 0xcccc0000); @@ -458,7 +434,7 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc c = c & 0x0fffffff; d = d & 0x0fffffff; - u32x s = BOX ((( c >> 0) & 0x3f), 0, s_skb) + u32 s = BOX ((( c >> 0) & 0x3f), 0, s_skb) | BOX ((((c >> 6) & 0x03) | ((c >> 7) & 0x3c)), 1, s_skb) | BOX ((((c >> 13) & 0x0f) @@ -467,7 +443,7 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc | ((c >> 21) & 0x06) | ((c >> 22) & 0x38)), 3, s_skb); - u32x t = BOX ((( d >> 0) & 0x3f), 4, s_skb) + u32 t = BOX ((( d >> 0) & 0x3f), 4, s_skb) | BOX ((((d >> 7) & 0x03) | ((d >> 8) & 0x3c)), 5, s_skb) | BOX ((((d >> 15) & 0x3f)), 6, s_skb) @@ -501,28 +477,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -631,28 +607,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -663,10 +639,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m04 (__glo * prepend salt */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; @@ -696,7 +672,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m04 (__glo w1_t[2] |= salt_buf1[2]; w1_t[3] |= salt_buf1[3]; - u32x dst[16]; + u32 dst[16]; dst[ 0] = w0_t[0]; dst[ 1] = w0_t[1]; @@ -720,7 +696,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m04 (__glo * plus LEFT_ROTATE by 2 */ - u32x Kc[16]; + u32 Kc[16]; Kc[ 0] = 0x64649040; Kc[ 1] = 0x14909858; @@ -739,7 +715,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m04 (__glo Kc[14] = 0x584020b4; Kc[15] = 0x00742c4c; - u32x Kd[16]; + u32 Kd[16]; Kd[ 0] = 0xa42ce40c; Kd[ 1] = 0x64689858; @@ -762,14 +738,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m04 (__glo * key1 (generate key) */ - u32x iv[2]; + u32 iv[2]; iv[0] = 0; iv[1] = 0; for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++) { - u32x data[2]; + u32 data[2]; data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00); data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00); @@ -791,7 +767,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m04 (__glo for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++) { - u32x data[2]; + u32 data[2]; data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00); data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00); @@ -806,12 +782,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m04 (__glo * cmp */ - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = iv[0]; + const u32 r1 = iv[1]; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -837,28 +813,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -979,28 +955,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -1011,10 +987,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s04 (__glo * prepend salt */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; @@ -1044,7 +1020,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s04 (__glo w1_t[2] |= salt_buf1[2]; w1_t[3] |= salt_buf1[3]; - u32x dst[16]; + u32 dst[16]; dst[ 0] = w0_t[0]; dst[ 1] = w0_t[1]; @@ -1068,7 +1044,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s04 (__glo * plus LEFT_ROTATE by 2 */ - u32x Kc[16]; + u32 Kc[16]; Kc[ 0] = 0x64649040; Kc[ 1] = 0x14909858; @@ -1087,7 +1063,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s04 (__glo Kc[14] = 0x584020b4; Kc[15] = 0x00742c4c; - u32x Kd[16]; + u32 Kd[16]; Kd[ 0] = 0xa42ce40c; Kd[ 1] = 0x64689858; @@ -1110,14 +1086,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s04 (__glo * key1 (generate key) */ - u32x iv[2]; + u32 iv[2]; iv[0] = 0; iv[1] = 0; for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++) { - u32x data[2]; + u32 data[2]; data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00); data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00); @@ -1139,7 +1115,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s04 (__glo for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++) { - u32x data[2]; + u32 data[2]; data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00); data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00); @@ -1154,12 +1130,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s04 (__glo * cmp */ - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = iv[0]; + const u32 r1 = iv[1]; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m03100_a3.cl b/OpenCL/m03100_a3.cl similarity index 82% rename from amd/m03100_a3.cl rename to OpenCL/m03100_a3.cl index e4b0689..3941078 100644 --- a/amd/m03100_a3.cl +++ b/OpenCL/m03100_a3.cl @@ -4,46 +4,21 @@ */ #define _DES_ -#define _SCALAR_ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define PERM_OP(a,b,tt,n,m) \ { \ @@ -367,23 +342,23 @@ __constant u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, __constant u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 }; #ifdef VECT_SIZE1 -#define BOX(i,n,S) u32x ((S)[(n)][(i)]) +#define BOX(i,n,S) u32 ((S)[(n)][(i)]) #endif #ifdef VECT_SIZE2 -#define BOX(i,n,S) u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) +#define BOX(i,n,S) u32 ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) #endif #ifdef VECT_SIZE4 -#define BOX(i,n,S) u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) +#define BOX(i,n,S) u32 ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) #endif -static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __local u32 s_SPtrans[8][64]) +static void _des_crypt_encrypt (u32 iv[2], u32 data[2], u32 Kc[16], u32 Kd[16], __local u32 s_SPtrans[8][64]) { - u32x tt; + u32 tt; - u32x r = data[0]; - u32x l = data[1]; + u32 r = data[0]; + u32 l = data[1]; IP (r, l, tt); @@ -393,8 +368,8 @@ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[1 #pragma unroll 16 for (u32 i = 0; i < 16; i += 2) { - u32x u; - u32x t; + u32 u; + u32 t; u = Kc[i + 0] ^ r; t = Kd[i + 0] ^ rotl32 (r, 28u); @@ -432,9 +407,9 @@ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[1 iv[1] = r; } -static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 s_skb[8][64]) +static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u32 s_skb[8][64]) { - u32x tt; + u32 tt; PERM_OP (d, c, tt, 4, 0x0f0f0f0f); HPERM_OP (c, tt, 2, 0xcccc0000); @@ -459,7 +434,7 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc c = c & 0x0fffffff; d = d & 0x0fffffff; - u32x s = BOX ((( c >> 0) & 0x3f), 0, s_skb) + u32 s = BOX ((( c >> 0) & 0x3f), 0, s_skb) | BOX ((((c >> 6) & 0x03) | ((c >> 7) & 0x3c)), 1, s_skb) | BOX ((((c >> 13) & 0x0f) @@ -468,7 +443,7 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc | ((c >> 21) & 0x06) | ((c >> 22) & 0x38)), 3, s_skb); - u32x t = BOX ((( d >> 0) & 0x3f), 4, s_skb) + u32 t = BOX ((( d >> 0) & 0x3f), 4, s_skb) | BOX ((((d >> 7) & 0x03) | ((d >> 8) & 0x3c)), 5, s_skb) | BOX ((((d >> 15) & 0x3f)), 6, s_skb) @@ -488,7 +463,7 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc } } -static void overwrite_at (u32x sw[16], const u32x w0, const u32 salt_len) +static void overwrite_at (u32 sw[16], const u32 w0, const u32 salt_len) { #if defined cl_amd_media_ops switch (salt_len) @@ -677,7 +652,7 @@ static void overwrite_at (u32x sw[16], const u32x w0, const u32 salt_len) #endif } -static void m03100m (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m03100m (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -760,7 +735,7 @@ static void m03100m (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 w3_t[2] = 0; w3_t[3] = 0; - u32x dst[16]; + u32 dst[16]; dst[ 0] = w0_t[0]; dst[ 1] = w0_t[1]; @@ -783,15 +758,13 @@ static void m03100m (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; overwrite_at (dst, w0, salt_len); @@ -800,7 +773,7 @@ static void m03100m (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 * plus LEFT_ROTATE by 2 */ - u32x Kc[16]; + u32 Kc[16]; Kc[ 0] = 0x64649040; Kc[ 1] = 0x14909858; @@ -819,7 +792,7 @@ static void m03100m (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 Kc[14] = 0x584020b4; Kc[15] = 0x00742c4c; - u32x Kd[16]; + u32 Kd[16]; Kd[ 0] = 0xa42ce40c; Kd[ 1] = 0x64689858; @@ -842,14 +815,14 @@ static void m03100m (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 * key1 (generate key) */ - u32x iv[2]; + u32 iv[2]; iv[0] = 0; iv[1] = 0; for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++) { - u32x data[2]; + u32 data[2]; data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00); data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00); @@ -871,7 +844,7 @@ static void m03100m (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++) { - u32x data[2]; + u32 data[2]; data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00); data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00); @@ -886,16 +859,16 @@ static void m03100m (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 * cmp */ - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = iv[0]; + const u32 r1 = iv[1]; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m03100s (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m03100s (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -978,7 +951,7 @@ static void m03100s (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 w3_t[2] = 0; w3_t[3] = 0; - u32x dst[16]; + u32 dst[16]; dst[ 0] = w0_t[0]; dst[ 1] = w0_t[1]; @@ -1013,15 +986,13 @@ static void m03100s (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; overwrite_at (dst, w0, salt_len); @@ -1030,7 +1001,7 @@ static void m03100s (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 * plus LEFT_ROTATE by 2 */ - u32x Kc[16]; + u32 Kc[16]; Kc[ 0] = 0x64649040; Kc[ 1] = 0x14909858; @@ -1049,7 +1020,7 @@ static void m03100s (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 Kc[14] = 0x584020b4; Kc[15] = 0x00742c4c; - u32x Kd[16]; + u32 Kd[16]; Kd[ 0] = 0xa42ce40c; Kd[ 1] = 0x64689858; @@ -1072,14 +1043,14 @@ static void m03100s (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 * key1 (generate key) */ - u32x iv[2]; + u32 iv[2]; iv[0] = 0; iv[1] = 0; for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++) { - u32x data[2]; + u32 data[2]; data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00); data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00); @@ -1101,7 +1072,7 @@ static void m03100s (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++) { - u32x data[2]; + u32 data[2]; data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00); data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00); @@ -1116,16 +1087,16 @@ static void m03100s (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 * cmp */ - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = iv[0]; + const u32 r1 = iv[1]; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { __local u32 s_SPtrans[8][64]; @@ -1192,7 +1163,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m04 (__glo m03100m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { __local u32 s_SPtrans[8][64]; @@ -1259,11 +1230,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m08 (__glo m03100m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { __local u32 s_SPtrans[8][64]; @@ -1330,7 +1301,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s04 (__glo m03100s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { __local u32 s_SPtrans[8][64]; @@ -1397,6 +1368,6 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s08 (__glo m03100s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03100_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { } diff --git a/amd/m03200.cl b/OpenCL/m03200.cl similarity index 95% rename from amd/m03200.cl rename to OpenCL/m03200.cl index 77cda5e..8d5681a 100644 --- a/amd/m03200.cl +++ b/OpenCL/m03200.cl @@ -8,29 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif // http://www.schneier.com/code/constants.txt @@ -320,7 +308,7 @@ __constant u32 c_pbox[18] = { \ uchar4 c = as_uchar4 ((L)); \ \ - u32x tmp; \ + u32 tmp; \ \ tmp = S0[c.s3]; \ tmp += S1[c.s2]; \ @@ -351,7 +339,7 @@ __constant u32 c_pbox[18] = BF_ROUND (L, R, 15); \ BF_ROUND (R, L, 16); \ \ - u32x tmp; \ + u32 tmp; \ \ tmp = R; \ R = L; \ @@ -360,7 +348,7 @@ __constant u32 c_pbox[18] = L ^= P[17]; \ } -static void expand_key (u32x E[34], const u32x W[16], const u32 len) +static void expand_key (u32 E[34], const u32 W[16], const u32 len) { u8 *E_cur = (u8 *) E; u8 *E_stop = E_cur + 72; @@ -393,7 +381,7 @@ __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m03200_init (__glo const u32 pw_len = pws[gid].pw_len; - u32x w[16]; + u32 w[16]; w[ 0] = pws[gid].i[ 0]; w[ 1] = pws[gid].i[ 1]; @@ -412,7 +400,7 @@ __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m03200_init (__glo w[14] = pws[gid].i[14]; w[15] = pws[gid].i[15]; - u32x E[34]; + u32 E[34]; expand_key (E, w, pw_len); @@ -450,19 +438,19 @@ __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m03200_init (__glo * do the key setup */ - __local u32x S0_all[8][256]; - __local u32x S1_all[8][256]; - __local u32x S2_all[8][256]; - __local u32x S3_all[8][256]; + __local u32 S0_all[8][256]; + __local u32 S1_all[8][256]; + __local u32 S2_all[8][256]; + __local u32 S3_all[8][256]; - __local u32x *S0 = S0_all[lid]; - __local u32x *S1 = S1_all[lid]; - __local u32x *S2 = S2_all[lid]; - __local u32x *S3 = S3_all[lid]; + __local u32 *S0 = S0_all[lid]; + __local u32 *S1 = S1_all[lid]; + __local u32 *S2 = S2_all[lid]; + __local u32 *S3 = S3_all[lid]; // initstate - u32x P[18]; + u32 P[18]; for (u32 i = 0; i < 18; i++) { @@ -604,7 +592,7 @@ __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m03200_loop (__glo const u32 pw_len = pws[gid].pw_len; - u32x w[16]; + u32 w[16]; w[ 0] = pws[gid].i[ 0]; w[ 1] = pws[gid].i[ 1]; @@ -623,7 +611,7 @@ __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m03200_loop (__glo w[14] = pws[gid].i[14]; w[15] = pws[gid].i[15]; - u32x E[34]; + u32 E[34]; expand_key (E, w, pw_len); @@ -648,22 +636,22 @@ __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m03200_loop (__glo // load - u32x P[18]; + u32 P[18]; for (u32 i = 0; i < 18; i++) { P[i] = tmps[gid].P[i]; } - __local u32x S0_all[8][256]; - __local u32x S1_all[8][256]; - __local u32x S2_all[8][256]; - __local u32x S3_all[8][256]; + __local u32 S0_all[8][256]; + __local u32 S1_all[8][256]; + __local u32 S2_all[8][256]; + __local u32 S3_all[8][256]; - __local u32x *S0 = S0_all[lid]; - __local u32x *S1 = S1_all[lid]; - __local u32x *S2 = S2_all[lid]; - __local u32x *S3 = S3_all[lid]; + __local u32 *S0 = S0_all[lid]; + __local u32 *S1 = S1_all[lid]; + __local u32 *S2 = S2_all[lid]; + __local u32 *S3 = S3_all[lid]; for (u32 i = 0; i < 256; i++) { @@ -834,22 +822,22 @@ __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m03200_comp (__glo // load - u32x P[18]; + u32 P[18]; for (u32 i = 0; i < 18; i++) { P[i] = tmps[gid].P[i]; } - __local u32x S0_all[8][256]; - __local u32x S1_all[8][256]; - __local u32x S2_all[8][256]; - __local u32x S3_all[8][256]; + __local u32 S0_all[8][256]; + __local u32 S1_all[8][256]; + __local u32 S2_all[8][256]; + __local u32 S3_all[8][256]; - __local u32x *S0 = S0_all[lid]; - __local u32x *S1 = S1_all[lid]; - __local u32x *S2 = S2_all[lid]; - __local u32x *S3 = S3_all[lid]; + __local u32 *S0 = S0_all[lid]; + __local u32 *S1 = S1_all[lid]; + __local u32 *S2 = S2_all[lid]; + __local u32 *S3 = S3_all[lid]; for (u32 i = 0; i < 256; i++) { @@ -874,8 +862,8 @@ __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m03200_comp (__glo BF_ENCRYPT (L0, R0); } - const u32x r0 = L0; - const u32x r1 = R0; + const u32 r0 = L0; + const u32 r1 = R0; L0 = BCRYPTM_2; R0 = BCRYPTM_3; @@ -885,8 +873,8 @@ __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m03200_comp (__glo BF_ENCRYPT (L0, R0); } - const u32x r2 = L0; - const u32x r3 = R0; + const u32 r2 = L0; + const u32 r3 = R0; /* e = L0; @@ -897,5 +885,5 @@ __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m03200_comp (__glo #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m03710_a0.cl b/OpenCL/m03710_a0.cl similarity index 95% rename from amd/m03710_a0.cl rename to OpenCL/m03710_a0.cl index 142a953..9a99001 100644 --- a/amd/m03710_a0.cl +++ b/OpenCL/m03710_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" +#include "rp.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] @@ -72,14 +48,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03710_m04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -165,28 +141,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03710_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -199,10 +175,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03710_m04 (__glo w3[2] = out_len * 8; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -277,10 +253,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03710_m04 (__glo c += MD5M_C; d += MD5M_D; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 | uint_to_hex_lower8 ((a >> 8) & 255) << 16; @@ -411,12 +387,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03710_m04 (__glo MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -442,14 +418,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03710_s04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -547,28 +523,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03710_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -581,10 +557,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03710_s04 (__glo w3[2] = out_len * 8; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -659,10 +635,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03710_s04 (__glo c += MD5M_C; d += MD5M_D; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 | uint_to_hex_lower8 ((a >> 8) & 255) << 16; @@ -793,12 +769,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03710_s04 (__glo MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m03710_a1.cl b/OpenCL/m03710_a1.cl similarity index 96% rename from amd/m03710_a1.cl rename to OpenCL/m03710_a1.cl index 880d154..c475b8c 100644 --- a/amd/m03710_a1.cl +++ b/OpenCL/m03710_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] @@ -70,28 +46,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03710_m04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -221,38 +197,38 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03710_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; w3[2] = pw_len * 8; w3[3] = 0; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -327,10 +303,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03710_m04 (__glo c += MD5M_C; d += MD5M_D; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 | uint_to_hex_lower8 ((a >> 8) & 255) << 16; @@ -461,12 +437,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03710_m04 (__glo MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -492,28 +468,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03710_s04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -655,38 +631,38 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03710_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; w3[2] = pw_len * 8; w3[3] = 0; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -761,10 +737,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03710_s04 (__glo c += MD5M_C; d += MD5M_D; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 | uint_to_hex_lower8 ((a >> 8) & 255) << 16; @@ -895,12 +871,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03710_s04 (__glo MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m03710_a3.cl b/OpenCL/m03710_a3.cl similarity index 92% rename from amd/m03710_a3.cl rename to OpenCL/m03710_a3.cl index 75c6650..054a591 100644 --- a/amd/m03710_a3.cl +++ b/OpenCL/m03710_a3.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] @@ -56,7 +32,7 @@ #define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) #endif -static void m03710m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) +static void m03710m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { /** * modifier @@ -105,7 +81,7 @@ static void m03710m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -113,10 +89,10 @@ static void m03710m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -191,10 +167,10 @@ static void m03710m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p c += MD5M_C; d += MD5M_D; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 | uint_to_hex_lower8 ((a >> 8) & 255) << 16; @@ -325,16 +301,16 @@ static void m03710m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m03710s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) +static void m03710s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { /** * modifier @@ -395,7 +371,7 @@ static void m03710s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -403,10 +379,10 @@ static void m03710s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -481,10 +457,10 @@ static void m03710s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p c += MD5M_C; d += MD5M_D; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 | uint_to_hex_lower8 ((a >> 8) & 255) << 16; @@ -615,12 +591,12 @@ static void m03710s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -638,28 +614,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03710_m04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -724,28 +700,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03710_m08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -810,28 +786,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03710_m16 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -896,28 +872,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03710_s04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -982,28 +958,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03710_s08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1068,28 +1044,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03710_s16 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m03800_a0.cl b/OpenCL/m03800_a0.cl similarity index 94% rename from amd/m03800_a0.cl rename to OpenCL/m03800_a0.cl index cc04da2..0eac70a 100644 --- a/amd/m03800_a0.cl +++ b/OpenCL/m03800_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03800_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -60,14 +36,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03800_m04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -151,28 +127,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03800_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -181,28 +157,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03800_m04 (__glo const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len); - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; @@ -294,10 +270,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03800_m04 (__glo * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -368,12 +344,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03800_m04 (__glo MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -399,14 +375,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03800_s04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -502,28 +478,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03800_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -532,28 +508,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03800_s04 (__glo const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len); - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; @@ -645,10 +621,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03800_s04 (__glo * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -718,12 +694,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03800_s04 (__glo MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m03800_a1.cl b/OpenCL/m03800_a1.cl similarity index 94% rename from amd/m03800_a1.cl rename to OpenCL/m03800_a1.cl index f47e271..9731b7b 100644 --- a/amd/m03800_a1.cl +++ b/OpenCL/m03800_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03800_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -58,28 +34,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03800_m04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -205,28 +181,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03800_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -237,28 +213,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03800_m04 (__glo * prepend salt */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; @@ -345,10 +321,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03800_m04 (__glo * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -419,12 +395,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03800_m04 (__glo MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -450,28 +426,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03800_s04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -609,28 +585,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03800_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -641,28 +617,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03800_s04 (__glo * prepend salt */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; @@ -749,10 +725,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03800_s04 (__glo * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -822,12 +798,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03800_s04 (__glo MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m03800_a3.cl b/OpenCL/m03800_a3.cl similarity index 88% rename from amd/m03800_a3.cl rename to OpenCL/m03800_a3.cl index ce61421..9451f63 100644 --- a/amd/m03800_a3.cl +++ b/OpenCL/m03800_a3.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void m03800m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m03800m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -97,7 +73,7 @@ static void m03800m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -105,28 +81,28 @@ static void m03800m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; @@ -216,10 +192,10 @@ static void m03800m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -290,16 +266,16 @@ static void m03800m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m03800s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m03800s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -360,7 +336,7 @@ static void m03800s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -368,28 +344,28 @@ static void m03800s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; @@ -479,10 +455,10 @@ static void m03800s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -552,12 +528,12 @@ static void m03800s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -577,28 +553,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03800_m04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -630,28 +606,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03800_m08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -683,28 +659,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03800_m16 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -736,28 +712,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03800_s04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -789,28 +765,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03800_s08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -842,28 +818,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m03800_s16 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m04310_a0.cl b/OpenCL/m04310_a0.cl similarity index 92% rename from amd/m04310_a0.cl rename to OpenCL/m04310_a0.cl index 40e0156..7ee75f1 100644 --- a/amd/m04310_a0.cl +++ b/OpenCL/m04310_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" +#include "rp.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] @@ -72,14 +48,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04310_m04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -146,28 +122,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04310_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -180,10 +156,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04310_m04 (__glo w3[2] = out_len * 8; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -258,21 +234,21 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04310_m04 (__glo c += MD5M_C; d += MD5M_D; - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + const u32 w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + const u32 w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + const u32 w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + const u32 w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + const u32 w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + const u32 w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + const u32 w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + const u32 w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 | uint_to_hex_lower8 ((d >> 24) & 255) << 16; const u32 w8_t = s[0]; @@ -357,12 +333,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04310_m04 (__glo MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -388,14 +364,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04310_s04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -474,28 +450,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04310_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -508,10 +484,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04310_s04 (__glo w3[2] = out_len * 8; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -586,21 +562,21 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04310_s04 (__glo c += MD5M_C; d += MD5M_D; - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + const u32 w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + const u32 w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + const u32 w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + const u32 w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + const u32 w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + const u32 w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + const u32 w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + const u32 w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 | uint_to_hex_lower8 ((d >> 24) & 255) << 16; const u32 w8_t = s[0]; @@ -685,12 +661,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04310_s04 (__glo MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m04310_a1.cl b/OpenCL/m04310_a1.cl similarity index 93% rename from amd/m04310_a1.cl rename to OpenCL/m04310_a1.cl index bae892c..4467889 100644 --- a/amd/m04310_a1.cl +++ b/OpenCL/m04310_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] @@ -70,28 +46,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04310_m04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -202,38 +178,38 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04310_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; w3[2] = pw_len * 8; w3[3] = 0; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -308,21 +284,21 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04310_m04 (__glo c += MD5M_C; d += MD5M_D; - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + const u32 w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + const u32 w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + const u32 w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + const u32 w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + const u32 w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + const u32 w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + const u32 w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + const u32 w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 | uint_to_hex_lower8 ((d >> 24) & 255) << 16; const u32 w8_t = s[0]; @@ -407,12 +383,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04310_m04 (__glo MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -438,28 +414,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04310_s04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -582,38 +558,38 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04310_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; w3[2] = pw_len * 8; w3[3] = 0; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -688,21 +664,21 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04310_s04 (__glo c += MD5M_C; d += MD5M_D; - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + const u32 w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + const u32 w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + const u32 w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + const u32 w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + const u32 w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + const u32 w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + const u32 w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + const u32 w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 | uint_to_hex_lower8 ((d >> 24) & 255) << 16; const u32 w8_t = s[0]; @@ -787,12 +763,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04310_s04 (__glo MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m04310_a3.cl b/OpenCL/m04310_a3.cl similarity index 89% rename from amd/m04310_a3.cl rename to OpenCL/m04310_a3.cl index ce814c2..e98b9f2 100644 --- a/amd/m04310_a3.cl +++ b/OpenCL/m04310_a3.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] @@ -56,7 +32,7 @@ #define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) #endif -static void m04310m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) +static void m04310m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { /** * modifier @@ -86,7 +62,7 @@ static void m04310m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -94,10 +70,10 @@ static void m04310m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -172,21 +148,21 @@ static void m04310m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p c += MD5M_C; d += MD5M_D; - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + const u32 w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + const u32 w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + const u32 w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + const u32 w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + const u32 w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + const u32 w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + const u32 w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + const u32 w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 | uint_to_hex_lower8 ((d >> 24) & 255) << 16; const u32 w8_t = s[0]; @@ -271,16 +247,16 @@ static void m04310m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m04310s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) +static void m04310s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { /** * modifier @@ -322,7 +298,7 @@ static void m04310s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -330,10 +306,10 @@ static void m04310s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -408,21 +384,21 @@ static void m04310s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p c += MD5M_C; d += MD5M_D; - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + const u32 w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + const u32 w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + const u32 w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + const u32 w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + const u32 w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + const u32 w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + const u32 w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + const u32 w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 | uint_to_hex_lower8 ((d >> 24) & 255) << 16; const u32 w8_t = s[0]; @@ -507,12 +483,12 @@ static void m04310s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -531,28 +507,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04310_m04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -617,28 +593,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04310_m08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -703,28 +679,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04310_m16 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -789,28 +765,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04310_s04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -875,28 +851,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04310_s08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -961,28 +937,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04310_s16 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m04400_a0.cl b/OpenCL/m04400_a0.cl similarity index 93% rename from amd/m04400_a0.cl rename to OpenCL/m04400_a0.cl index f8e9759..175ddc9 100644 --- a/amd/m04400_a0.cl +++ b/OpenCL/m04400_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" +#include "rp.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] @@ -72,14 +48,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04400_m04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -129,28 +105,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04400_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -165,28 +141,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04400_m04 (__glo * sha1 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = out_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -395,12 +371,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04400_m04 (__glo MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -426,14 +402,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04400_s04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -495,28 +471,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04400_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -531,28 +507,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04400_s04 (__glo * sha1 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = out_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -761,12 +737,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04400_s04 (__glo MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m04400_a1.cl b/OpenCL/m04400_a1.cl similarity index 93% rename from amd/m04400_a1.cl rename to OpenCL/m04400_a1.cl index df44095..4e2fd39 100644 --- a/amd/m04400_a1.cl +++ b/OpenCL/m04400_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] @@ -70,28 +46,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04400_m04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -187,28 +163,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04400_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -219,28 +195,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04400_m04 (__glo * sha1 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = pw_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -449,12 +425,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04400_m04 (__glo MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -480,28 +456,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04400_s04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -609,28 +585,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04400_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -641,28 +617,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04400_s04 (__glo * sha1 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = pw_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -871,12 +847,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04400_s04 (__glo MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m04400_a3.cl b/OpenCL/m04400_a3.cl similarity index 91% rename from amd/m04400_a3.cl rename to OpenCL/m04400_a3.cl index b14bac3..483fd34 100644 --- a/amd/m04400_a3.cl +++ b/OpenCL/m04400_a3.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] @@ -56,7 +32,7 @@ #define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) #endif -static void m04400m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) +static void m04400m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { /** * modifier @@ -73,7 +49,7 @@ static void m04400m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -85,28 +61,28 @@ static void m04400m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * sha1 */ - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = 0; + u32 wf_t = pw_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -315,16 +291,16 @@ static void m04400m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m04400s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) +static void m04400s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { /** * modifier @@ -349,7 +325,7 @@ static void m04400s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -361,28 +337,28 @@ static void m04400s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * sha1 */ - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = 0; + u32 wf_t = pw_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -591,12 +567,12 @@ static void m04400s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -615,28 +591,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04400_m04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -701,28 +677,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04400_m08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -787,28 +763,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04400_m16 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -873,28 +849,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04400_s04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -959,28 +935,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04400_s08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1045,28 +1021,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04400_s16 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m04500_a0.cl b/OpenCL/m04500_a0.cl similarity index 93% rename from amd/m04500_a0.cl rename to OpenCL/m04500_a0.cl index d656901..f85c9da 100644 --- a/amd/m04500_a0.cl +++ b/OpenCL/m04500_a0.cl @@ -8,54 +8,30 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" +#include "rp.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8_le(i) l_bin2asc[(i)] #endif #ifdef VECT_SIZE2 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) #endif #ifdef VECT_SIZE4 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) #endif __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04500_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) @@ -72,14 +48,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04500_m04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -129,28 +105,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04500_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -165,28 +141,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04500_m04 (__glo * sha1 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = out_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -425,12 +401,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04500_m04 (__glo we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -456,14 +432,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04500_s04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -531,28 +507,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04500_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -567,28 +543,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04500_s04 (__glo * sha1 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = out_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -829,12 +805,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04500_s04 (__glo we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m04500_a1.cl b/OpenCL/m04500_a1.cl similarity index 94% rename from amd/m04500_a1.cl rename to OpenCL/m04500_a1.cl index a60a70f..55f202a 100644 --- a/amd/m04500_a1.cl +++ b/OpenCL/m04500_a1.cl @@ -8,52 +8,28 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8_le(i) l_bin2asc[(i)] #endif #ifdef VECT_SIZE2 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) #endif #ifdef VECT_SIZE4 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) #endif __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04500_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) @@ -70,28 +46,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04500_m04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -187,28 +163,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04500_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -219,28 +195,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04500_m04 (__glo * sha1 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = pw_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -478,12 +454,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04500_m04 (__glo we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -509,28 +485,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04500_s04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -644,28 +620,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04500_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -676,28 +652,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04500_s04 (__glo * sha1 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = pw_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -938,12 +914,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04500_s04 (__glo we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m04500_a3.cl b/OpenCL/m04500_a3.cl similarity index 92% rename from amd/m04500_a3.cl rename to OpenCL/m04500_a3.cl index a21f3f4..7a0d1c6 100644 --- a/amd/m04500_a3.cl +++ b/OpenCL/m04500_a3.cl @@ -8,55 +8,31 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8_le(i) l_bin2asc[(i)] #endif #ifdef VECT_SIZE2 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) #endif #ifdef VECT_SIZE4 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) #endif -static void m04500m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) +static void m04500m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { /** * modifier @@ -69,7 +45,7 @@ static void m04500m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -81,28 +57,28 @@ static void m04500m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * sha1 */ - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = 0; + u32 wf_t = pw_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -340,16 +316,16 @@ static void m04500m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m04500s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) +static void m04500s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { /** * modifier @@ -380,7 +356,7 @@ static void m04500s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -392,28 +368,28 @@ static void m04500s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * sha1 */ - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = 0; + u32 wf_t = pw_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -654,12 +630,12 @@ static void m04500s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -678,28 +654,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04500_m04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -764,28 +740,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04500_m08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -850,28 +826,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04500_m16 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -936,28 +912,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04500_s04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1022,28 +998,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04500_s08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1108,28 +1084,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04500_s16 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m04700_a0.cl b/OpenCL/m04700_a0.cl similarity index 92% rename from amd/m04700_a0.cl rename to OpenCL/m04700_a0.cl index 23743db..57daee0 100644 --- a/amd/m04700_a0.cl +++ b/OpenCL/m04700_a0.cl @@ -8,18 +8,6 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 @@ -27,36 +15,24 @@ #include "include/kernel_functions.c" #undef _MD5_ -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" +#include "rp.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8_le(i) l_bin2asc[(i)] #endif #ifdef VECT_SIZE2 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) #endif #ifdef VECT_SIZE4 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) #endif __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04700_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) @@ -73,14 +49,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04700_m04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -130,28 +106,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04700_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -166,10 +142,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04700_m04 (__glo * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -248,33 +224,33 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04700_m04 (__glo * sha1 */ - u32x w0_t = uint_to_hex_lower8_le ((a >> 8) & 255) << 0 + u32 w0_t = uint_to_hex_lower8_le ((a >> 8) & 255) << 0 | uint_to_hex_lower8_le ((a >> 0) & 255) << 16; - u32x w1_t = uint_to_hex_lower8_le ((a >> 24) & 255) << 0 + u32 w1_t = uint_to_hex_lower8_le ((a >> 24) & 255) << 0 | uint_to_hex_lower8_le ((a >> 16) & 255) << 16; - u32x w2_t = uint_to_hex_lower8_le ((b >> 8) & 255) << 0 + u32 w2_t = uint_to_hex_lower8_le ((b >> 8) & 255) << 0 | uint_to_hex_lower8_le ((b >> 0) & 255) << 16; - u32x w3_t = uint_to_hex_lower8_le ((b >> 24) & 255) << 0 + u32 w3_t = uint_to_hex_lower8_le ((b >> 24) & 255) << 0 | uint_to_hex_lower8_le ((b >> 16) & 255) << 16; - u32x w4_t = uint_to_hex_lower8_le ((c >> 8) & 255) << 0 + u32 w4_t = uint_to_hex_lower8_le ((c >> 8) & 255) << 0 | uint_to_hex_lower8_le ((c >> 0) & 255) << 16; - u32x w5_t = uint_to_hex_lower8_le ((c >> 24) & 255) << 0 + u32 w5_t = uint_to_hex_lower8_le ((c >> 24) & 255) << 0 | uint_to_hex_lower8_le ((c >> 16) & 255) << 16; - u32x w6_t = uint_to_hex_lower8_le ((d >> 8) & 255) << 0 + u32 w6_t = uint_to_hex_lower8_le ((d >> 8) & 255) << 0 | uint_to_hex_lower8_le ((d >> 0) & 255) << 16; - u32x w7_t = uint_to_hex_lower8_le ((d >> 24) & 255) << 0 + u32 w7_t = uint_to_hex_lower8_le ((d >> 24) & 255) << 0 | uint_to_hex_lower8_le ((d >> 16) & 255) << 16; - u32x w8_t = 0x80000000; - u32x w9_t = 0; - u32x wa_t = 0; - u32x wb_t = 0; - u32x wc_t = 0; - u32x wd_t = 0; - u32x we_t = 0; - u32x wf_t = 32 * 8; + u32 w8_t = 0x80000000; + u32 w9_t = 0; + u32 wa_t = 0; + u32 wb_t = 0; + u32 wc_t = 0; + u32 wd_t = 0; + u32 we_t = 0; + u32 wf_t = 32 * 8; - u32x e; + u32 e; a = SHA1M_A; b = SHA1M_B; @@ -378,12 +354,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04700_m04 (__glo we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -409,14 +385,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04700_s04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -484,28 +460,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04700_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -520,10 +496,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04700_s04 (__glo * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -602,33 +578,33 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04700_s04 (__glo * sha1 */ - u32x w0_t = uint_to_hex_lower8_le ((a >> 8) & 255) << 0 + u32 w0_t = uint_to_hex_lower8_le ((a >> 8) & 255) << 0 | uint_to_hex_lower8_le ((a >> 0) & 255) << 16; - u32x w1_t = uint_to_hex_lower8_le ((a >> 24) & 255) << 0 + u32 w1_t = uint_to_hex_lower8_le ((a >> 24) & 255) << 0 | uint_to_hex_lower8_le ((a >> 16) & 255) << 16; - u32x w2_t = uint_to_hex_lower8_le ((b >> 8) & 255) << 0 + u32 w2_t = uint_to_hex_lower8_le ((b >> 8) & 255) << 0 | uint_to_hex_lower8_le ((b >> 0) & 255) << 16; - u32x w3_t = uint_to_hex_lower8_le ((b >> 24) & 255) << 0 + u32 w3_t = uint_to_hex_lower8_le ((b >> 24) & 255) << 0 | uint_to_hex_lower8_le ((b >> 16) & 255) << 16; - u32x w4_t = uint_to_hex_lower8_le ((c >> 8) & 255) << 0 + u32 w4_t = uint_to_hex_lower8_le ((c >> 8) & 255) << 0 | uint_to_hex_lower8_le ((c >> 0) & 255) << 16; - u32x w5_t = uint_to_hex_lower8_le ((c >> 24) & 255) << 0 + u32 w5_t = uint_to_hex_lower8_le ((c >> 24) & 255) << 0 | uint_to_hex_lower8_le ((c >> 16) & 255) << 16; - u32x w6_t = uint_to_hex_lower8_le ((d >> 8) & 255) << 0 + u32 w6_t = uint_to_hex_lower8_le ((d >> 8) & 255) << 0 | uint_to_hex_lower8_le ((d >> 0) & 255) << 16; - u32x w7_t = uint_to_hex_lower8_le ((d >> 24) & 255) << 0 + u32 w7_t = uint_to_hex_lower8_le ((d >> 24) & 255) << 0 | uint_to_hex_lower8_le ((d >> 16) & 255) << 16; - u32x w8_t = 0x80000000; - u32x w9_t = 0; - u32x wa_t = 0; - u32x wb_t = 0; - u32x wc_t = 0; - u32x wd_t = 0; - u32x we_t = 0; - u32x wf_t = 32 * 8; + u32 w8_t = 0x80000000; + u32 w9_t = 0; + u32 wa_t = 0; + u32 wb_t = 0; + u32 wc_t = 0; + u32 wd_t = 0; + u32 we_t = 0; + u32 wf_t = 32 * 8; - u32x e; + u32 e; a = SHA1M_A; b = SHA1M_B; @@ -735,12 +711,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04700_s04 (__glo we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m04700_a1.cl b/OpenCL/m04700_a1.cl similarity index 92% rename from amd/m04700_a1.cl rename to OpenCL/m04700_a1.cl index ff3133c..b6996d5 100644 --- a/amd/m04700_a1.cl +++ b/OpenCL/m04700_a1.cl @@ -8,18 +8,6 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 @@ -27,34 +15,22 @@ #include "include/kernel_functions.c" #undef _MD5_ -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8_le(i) l_bin2asc[(i)] #endif #ifdef VECT_SIZE2 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) #endif #ifdef VECT_SIZE4 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) #endif __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04700_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) @@ -71,28 +47,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04700_m04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -188,28 +164,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04700_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -220,10 +196,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04700_m04 (__glo * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -302,33 +278,33 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04700_m04 (__glo * sha1 */ - u32x w0_t = uint_to_hex_lower8_le ((a >> 8) & 255) << 0 + u32 w0_t = uint_to_hex_lower8_le ((a >> 8) & 255) << 0 | uint_to_hex_lower8_le ((a >> 0) & 255) << 16; - u32x w1_t = uint_to_hex_lower8_le ((a >> 24) & 255) << 0 + u32 w1_t = uint_to_hex_lower8_le ((a >> 24) & 255) << 0 | uint_to_hex_lower8_le ((a >> 16) & 255) << 16; - u32x w2_t = uint_to_hex_lower8_le ((b >> 8) & 255) << 0 + u32 w2_t = uint_to_hex_lower8_le ((b >> 8) & 255) << 0 | uint_to_hex_lower8_le ((b >> 0) & 255) << 16; - u32x w3_t = uint_to_hex_lower8_le ((b >> 24) & 255) << 0 + u32 w3_t = uint_to_hex_lower8_le ((b >> 24) & 255) << 0 | uint_to_hex_lower8_le ((b >> 16) & 255) << 16; - u32x w4_t = uint_to_hex_lower8_le ((c >> 8) & 255) << 0 + u32 w4_t = uint_to_hex_lower8_le ((c >> 8) & 255) << 0 | uint_to_hex_lower8_le ((c >> 0) & 255) << 16; - u32x w5_t = uint_to_hex_lower8_le ((c >> 24) & 255) << 0 + u32 w5_t = uint_to_hex_lower8_le ((c >> 24) & 255) << 0 | uint_to_hex_lower8_le ((c >> 16) & 255) << 16; - u32x w6_t = uint_to_hex_lower8_le ((d >> 8) & 255) << 0 + u32 w6_t = uint_to_hex_lower8_le ((d >> 8) & 255) << 0 | uint_to_hex_lower8_le ((d >> 0) & 255) << 16; - u32x w7_t = uint_to_hex_lower8_le ((d >> 24) & 255) << 0 + u32 w7_t = uint_to_hex_lower8_le ((d >> 24) & 255) << 0 | uint_to_hex_lower8_le ((d >> 16) & 255) << 16; - u32x w8_t = 0x80000000; - u32x w9_t = 0; - u32x wa_t = 0; - u32x wb_t = 0; - u32x wc_t = 0; - u32x wd_t = 0; - u32x we_t = 0; - u32x wf_t = 32 * 8; + u32 w8_t = 0x80000000; + u32 w9_t = 0; + u32 wa_t = 0; + u32 wb_t = 0; + u32 wc_t = 0; + u32 wd_t = 0; + u32 we_t = 0; + u32 wf_t = 32 * 8; - u32x e; + u32 e; a = SHA1M_A; b = SHA1M_B; @@ -432,12 +408,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04700_m04 (__glo we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -463,28 +439,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04700_s04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -598,28 +574,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04700_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -630,10 +606,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04700_s04 (__glo * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -712,33 +688,33 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04700_s04 (__glo * sha1 */ - u32x w0_t = uint_to_hex_lower8_le ((a >> 8) & 255) << 0 + u32 w0_t = uint_to_hex_lower8_le ((a >> 8) & 255) << 0 | uint_to_hex_lower8_le ((a >> 0) & 255) << 16; - u32x w1_t = uint_to_hex_lower8_le ((a >> 24) & 255) << 0 + u32 w1_t = uint_to_hex_lower8_le ((a >> 24) & 255) << 0 | uint_to_hex_lower8_le ((a >> 16) & 255) << 16; - u32x w2_t = uint_to_hex_lower8_le ((b >> 8) & 255) << 0 + u32 w2_t = uint_to_hex_lower8_le ((b >> 8) & 255) << 0 | uint_to_hex_lower8_le ((b >> 0) & 255) << 16; - u32x w3_t = uint_to_hex_lower8_le ((b >> 24) & 255) << 0 + u32 w3_t = uint_to_hex_lower8_le ((b >> 24) & 255) << 0 | uint_to_hex_lower8_le ((b >> 16) & 255) << 16; - u32x w4_t = uint_to_hex_lower8_le ((c >> 8) & 255) << 0 + u32 w4_t = uint_to_hex_lower8_le ((c >> 8) & 255) << 0 | uint_to_hex_lower8_le ((c >> 0) & 255) << 16; - u32x w5_t = uint_to_hex_lower8_le ((c >> 24) & 255) << 0 + u32 w5_t = uint_to_hex_lower8_le ((c >> 24) & 255) << 0 | uint_to_hex_lower8_le ((c >> 16) & 255) << 16; - u32x w6_t = uint_to_hex_lower8_le ((d >> 8) & 255) << 0 + u32 w6_t = uint_to_hex_lower8_le ((d >> 8) & 255) << 0 | uint_to_hex_lower8_le ((d >> 0) & 255) << 16; - u32x w7_t = uint_to_hex_lower8_le ((d >> 24) & 255) << 0 + u32 w7_t = uint_to_hex_lower8_le ((d >> 24) & 255) << 0 | uint_to_hex_lower8_le ((d >> 16) & 255) << 16; - u32x w8_t = 0x80000000; - u32x w9_t = 0; - u32x wa_t = 0; - u32x wb_t = 0; - u32x wc_t = 0; - u32x wd_t = 0; - u32x we_t = 0; - u32x wf_t = 32 * 8; + u32 w8_t = 0x80000000; + u32 w9_t = 0; + u32 wa_t = 0; + u32 wb_t = 0; + u32 wc_t = 0; + u32 wd_t = 0; + u32 we_t = 0; + u32 wf_t = 32 * 8; - u32x e; + u32 e; a = SHA1M_A; b = SHA1M_B; @@ -845,12 +821,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04700_s04 (__glo we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m04700_a3.cl b/OpenCL/m04700_a3.cl similarity index 90% rename from amd/m04700_a3.cl rename to OpenCL/m04700_a3.cl index 0c5628c..e011d2e 100644 --- a/amd/m04700_a3.cl +++ b/OpenCL/m04700_a3.cl @@ -8,18 +8,6 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 @@ -27,37 +15,25 @@ #include "include/kernel_functions.c" #undef _MD5_ -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8_le(i) l_bin2asc[(i)] #endif #ifdef VECT_SIZE2 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) #endif #ifdef VECT_SIZE4 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) #endif -static void m04700m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) +static void m04700m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { /** * modifier @@ -70,7 +46,7 @@ static void m04700m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -82,10 +58,10 @@ static void m04700m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -164,33 +140,33 @@ static void m04700m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * sha1 */ - u32x w0_t = uint_to_hex_lower8_le ((a >> 8) & 255) << 0 + u32 w0_t = uint_to_hex_lower8_le ((a >> 8) & 255) << 0 | uint_to_hex_lower8_le ((a >> 0) & 255) << 16; - u32x w1_t = uint_to_hex_lower8_le ((a >> 24) & 255) << 0 + u32 w1_t = uint_to_hex_lower8_le ((a >> 24) & 255) << 0 | uint_to_hex_lower8_le ((a >> 16) & 255) << 16; - u32x w2_t = uint_to_hex_lower8_le ((b >> 8) & 255) << 0 + u32 w2_t = uint_to_hex_lower8_le ((b >> 8) & 255) << 0 | uint_to_hex_lower8_le ((b >> 0) & 255) << 16; - u32x w3_t = uint_to_hex_lower8_le ((b >> 24) & 255) << 0 + u32 w3_t = uint_to_hex_lower8_le ((b >> 24) & 255) << 0 | uint_to_hex_lower8_le ((b >> 16) & 255) << 16; - u32x w4_t = uint_to_hex_lower8_le ((c >> 8) & 255) << 0 + u32 w4_t = uint_to_hex_lower8_le ((c >> 8) & 255) << 0 | uint_to_hex_lower8_le ((c >> 0) & 255) << 16; - u32x w5_t = uint_to_hex_lower8_le ((c >> 24) & 255) << 0 + u32 w5_t = uint_to_hex_lower8_le ((c >> 24) & 255) << 0 | uint_to_hex_lower8_le ((c >> 16) & 255) << 16; - u32x w6_t = uint_to_hex_lower8_le ((d >> 8) & 255) << 0 + u32 w6_t = uint_to_hex_lower8_le ((d >> 8) & 255) << 0 | uint_to_hex_lower8_le ((d >> 0) & 255) << 16; - u32x w7_t = uint_to_hex_lower8_le ((d >> 24) & 255) << 0 + u32 w7_t = uint_to_hex_lower8_le ((d >> 24) & 255) << 0 | uint_to_hex_lower8_le ((d >> 16) & 255) << 16; - u32x w8_t = 0x80000000; - u32x w9_t = 0; - u32x wa_t = 0; - u32x wb_t = 0; - u32x wc_t = 0; - u32x wd_t = 0; - u32x we_t = 0; - u32x wf_t = 32 * 8; + u32 w8_t = 0x80000000; + u32 w9_t = 0; + u32 wa_t = 0; + u32 wb_t = 0; + u32 wc_t = 0; + u32 wd_t = 0; + u32 we_t = 0; + u32 wf_t = 32 * 8; - u32x e; + u32 e; a = SHA1M_A; b = SHA1M_B; @@ -294,16 +270,16 @@ static void m04700m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m04700s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) +static void m04700s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { /** * modifier @@ -334,7 +310,7 @@ static void m04700s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -346,10 +322,10 @@ static void m04700s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -428,33 +404,33 @@ static void m04700s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * sha1 */ - u32x w0_t = uint_to_hex_lower8_le ((a >> 8) & 255) << 0 + u32 w0_t = uint_to_hex_lower8_le ((a >> 8) & 255) << 0 | uint_to_hex_lower8_le ((a >> 0) & 255) << 16; - u32x w1_t = uint_to_hex_lower8_le ((a >> 24) & 255) << 0 + u32 w1_t = uint_to_hex_lower8_le ((a >> 24) & 255) << 0 | uint_to_hex_lower8_le ((a >> 16) & 255) << 16; - u32x w2_t = uint_to_hex_lower8_le ((b >> 8) & 255) << 0 + u32 w2_t = uint_to_hex_lower8_le ((b >> 8) & 255) << 0 | uint_to_hex_lower8_le ((b >> 0) & 255) << 16; - u32x w3_t = uint_to_hex_lower8_le ((b >> 24) & 255) << 0 + u32 w3_t = uint_to_hex_lower8_le ((b >> 24) & 255) << 0 | uint_to_hex_lower8_le ((b >> 16) & 255) << 16; - u32x w4_t = uint_to_hex_lower8_le ((c >> 8) & 255) << 0 + u32 w4_t = uint_to_hex_lower8_le ((c >> 8) & 255) << 0 | uint_to_hex_lower8_le ((c >> 0) & 255) << 16; - u32x w5_t = uint_to_hex_lower8_le ((c >> 24) & 255) << 0 + u32 w5_t = uint_to_hex_lower8_le ((c >> 24) & 255) << 0 | uint_to_hex_lower8_le ((c >> 16) & 255) << 16; - u32x w6_t = uint_to_hex_lower8_le ((d >> 8) & 255) << 0 + u32 w6_t = uint_to_hex_lower8_le ((d >> 8) & 255) << 0 | uint_to_hex_lower8_le ((d >> 0) & 255) << 16; - u32x w7_t = uint_to_hex_lower8_le ((d >> 24) & 255) << 0 + u32 w7_t = uint_to_hex_lower8_le ((d >> 24) & 255) << 0 | uint_to_hex_lower8_le ((d >> 16) & 255) << 16; - u32x w8_t = 0x80000000; - u32x w9_t = 0; - u32x wa_t = 0; - u32x wb_t = 0; - u32x wc_t = 0; - u32x wd_t = 0; - u32x we_t = 0; - u32x wf_t = 32 * 8; + u32 w8_t = 0x80000000; + u32 w9_t = 0; + u32 wa_t = 0; + u32 wb_t = 0; + u32 wc_t = 0; + u32 wd_t = 0; + u32 we_t = 0; + u32 wf_t = 32 * 8; - u32x e; + u32 e; a = SHA1M_A; b = SHA1M_B; @@ -561,12 +537,12 @@ static void m04700s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -585,28 +561,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04700_m04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -671,28 +647,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04700_m08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -757,28 +733,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04700_m16 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -843,28 +819,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04700_s04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -929,28 +905,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04700_s08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1015,28 +991,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04700_s16 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m04800_a0.cl b/OpenCL/m04800_a0.cl similarity index 93% rename from amd/m04800_a0.cl rename to OpenCL/m04800_a0.cl index 5b9b407..0e60511 100644 --- a/amd/m04800_a0.cl +++ b/OpenCL/m04800_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04800_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -62,14 +38,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04800_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -98,28 +74,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04800_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -198,10 +174,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04800_m04 (__glo * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -271,12 +247,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04800_m04 (__glo MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -304,14 +280,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04800_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -352,28 +328,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04800_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -452,10 +428,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04800_s04 (__glo * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -530,12 +506,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04800_s04 (__glo MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m04800_a1.cl b/OpenCL/m04800_a1.cl similarity index 94% rename from amd/m04800_a1.cl rename to OpenCL/m04800_a1.cl index e5d9877..5c5b5e2 100644 --- a/amd/m04800_a1.cl +++ b/OpenCL/m04800_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04800_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -60,28 +36,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04800_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -152,28 +128,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04800_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -245,10 +221,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04800_m04 (__glo * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -318,12 +294,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04800_m04 (__glo MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -351,28 +327,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04800_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -455,28 +431,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04800_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -548,10 +524,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04800_s04 (__glo * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -626,12 +602,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04800_s04 (__glo MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m04800_a3.cl b/OpenCL/m04800_a3.cl similarity index 87% rename from amd/m04800_a3.cl rename to OpenCL/m04800_a3.cl index 4e1ddd9..1e6cf74 100644 --- a/amd/m04800_a3.cl +++ b/OpenCL/m04800_a3.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void m04800m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m04800m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -99,28 +75,28 @@ static void m04800m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p // move w by 1 - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; @@ -161,8 +137,8 @@ static void m04800m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0_t[0]; - u32x w1l = w0_t[1]; + u32 w0l = w0_t[0]; + u32 w1l = w0_t[1]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -175,10 +151,10 @@ static void m04800m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -248,16 +224,16 @@ static void m04800m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m04800s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m04800s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -312,28 +288,28 @@ static void m04800s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p // move w by 1 - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; @@ -386,8 +362,8 @@ static void m04800s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0_t[0]; - u32x w1l = w0_t[1]; + u32 w0l = w0_t[0]; + u32 w1l = w0_t[1]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -400,10 +376,10 @@ static void m04800s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -478,12 +454,12 @@ static void m04800s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -504,28 +480,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04800_m04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -557,28 +533,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04800_m08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -610,28 +586,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04800_m16 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -663,28 +639,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04800_s04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -716,28 +692,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04800_s08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -769,28 +745,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04800_s16 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m04900_a0.cl b/OpenCL/m04900_a0.cl similarity index 90% rename from amd/m04900_a0.cl rename to OpenCL/m04900_a0.cl index f0b63bc..e73a36b 100644 --- a/amd/m04900_a0.cl +++ b/OpenCL/m04900_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04900_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -62,14 +38,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04900_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -118,28 +94,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04900_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = pw_buf0[0]; w0_t[1] = pw_buf0[1]; w0_t[2] = pw_buf0[2]; w0_t[3] = pw_buf0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = pw_buf1[0]; w1_t[1] = pw_buf1[1]; w1_t[2] = pw_buf1[2]; w1_t[3] = pw_buf1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; @@ -226,32 +202,32 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04900_m04 (__glo append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len); - u32x w0 = swap_workaround (w0_t[0]); - u32x w1 = swap_workaround (w0_t[1]); - u32x w2 = swap_workaround (w0_t[2]); - u32x w3 = swap_workaround (w0_t[3]); - u32x w4 = swap_workaround (w1_t[0]); - u32x w5 = swap_workaround (w1_t[1]); - u32x w6 = swap_workaround (w1_t[2]); - u32x w7 = swap_workaround (w1_t[3]); - u32x w8 = swap_workaround (w2_t[0]); - u32x w9 = swap_workaround (w2_t[1]); - u32x wa = swap_workaround (w2_t[2]); - u32x wb = swap_workaround (w2_t[3]); - u32x wc = swap_workaround (w3_t[0]); - u32x wd = swap_workaround (w3_t[1]); - u32x we = 0; - u32x wf = pw_salt_len * 8; + u32 w0 = swap_workaround (w0_t[0]); + u32 w1 = swap_workaround (w0_t[1]); + u32 w2 = swap_workaround (w0_t[2]); + u32 w3 = swap_workaround (w0_t[3]); + u32 w4 = swap_workaround (w1_t[0]); + u32 w5 = swap_workaround (w1_t[1]); + u32 w6 = swap_workaround (w1_t[2]); + u32 w7 = swap_workaround (w1_t[3]); + u32 w8 = swap_workaround (w2_t[0]); + u32 w9 = swap_workaround (w2_t[1]); + u32 wa = swap_workaround (w2_t[2]); + u32 wb = swap_workaround (w2_t[3]); + u32 wc = swap_workaround (w3_t[0]); + u32 wd = swap_workaround (w3_t[1]); + u32 we = 0; + u32 wf = pw_salt_len * 8; /** * sha1 */ - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -349,12 +325,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04900_m04 (__glo we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we); wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -382,14 +358,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04900_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -456,28 +432,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04900_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = pw_buf0[0]; w0_t[1] = pw_buf0[1]; w0_t[2] = pw_buf0[2]; w0_t[3] = pw_buf0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = pw_buf1[0]; w1_t[1] = pw_buf1[1]; w1_t[2] = pw_buf1[2]; w1_t[3] = pw_buf1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; @@ -564,32 +540,32 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04900_s04 (__glo append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len); - u32x w0 = swap_workaround (w0_t[0]); - u32x w1 = swap_workaround (w0_t[1]); - u32x w2 = swap_workaround (w0_t[2]); - u32x w3 = swap_workaround (w0_t[3]); - u32x w4 = swap_workaround (w1_t[0]); - u32x w5 = swap_workaround (w1_t[1]); - u32x w6 = swap_workaround (w1_t[2]); - u32x w7 = swap_workaround (w1_t[3]); - u32x w8 = swap_workaround (w2_t[0]); - u32x w9 = swap_workaround (w2_t[1]); - u32x wa = swap_workaround (w2_t[2]); - u32x wb = swap_workaround (w2_t[3]); - u32x wc = swap_workaround (w3_t[0]); - u32x wd = swap_workaround (w3_t[1]); - u32x we = 0; - u32x wf = pw_salt_len * 8; + u32 w0 = swap_workaround (w0_t[0]); + u32 w1 = swap_workaround (w0_t[1]); + u32 w2 = swap_workaround (w0_t[2]); + u32 w3 = swap_workaround (w0_t[3]); + u32 w4 = swap_workaround (w1_t[0]); + u32 w5 = swap_workaround (w1_t[1]); + u32 w6 = swap_workaround (w1_t[2]); + u32 w7 = swap_workaround (w1_t[3]); + u32 w8 = swap_workaround (w2_t[0]); + u32 w9 = swap_workaround (w2_t[1]); + u32 wa = swap_workaround (w2_t[2]); + u32 wb = swap_workaround (w2_t[3]); + u32 wc = swap_workaround (w3_t[0]); + u32 wd = swap_workaround (w3_t[1]); + u32 we = 0; + u32 wf = pw_salt_len * 8; /** * sha1 */ - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -690,12 +666,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04900_s04 (__glo we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we); wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m04900_a1.cl b/OpenCL/m04900_a1.cl similarity index 91% rename from amd/m04900_a1.cl rename to OpenCL/m04900_a1.cl index 0334f3e..32358da 100644 --- a/amd/m04900_a1.cl +++ b/OpenCL/m04900_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04900_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -60,28 +36,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04900_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -172,28 +148,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04900_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = wordl0[0] | wordr0[0]; w0_t[1] = wordl0[1] | wordr0[1]; w0_t[2] = wordl0[2] | wordr0[2]; w0_t[3] = wordl0[3] | wordr0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = wordl1[0] | wordr1[0]; w1_t[1] = wordl1[1] | wordr1[1]; w1_t[2] = wordl1[2] | wordr1[2]; w1_t[3] = wordl1[3] | wordr1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = wordl2[0] | wordr2[0]; w2_t[1] = wordl2[1] | wordr2[1]; w2_t[2] = wordl2[2] | wordr2[2]; w2_t[3] = wordl2[3] | wordr2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = wordl3[0] | wordr3[0]; w3_t[1] = wordl3[1] | wordr3[1]; @@ -278,32 +254,32 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04900_m04 (__glo append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len); - u32x w0 = swap_workaround (w0_t[0]); - u32x w1 = swap_workaround (w0_t[1]); - u32x w2 = swap_workaround (w0_t[2]); - u32x w3 = swap_workaround (w0_t[3]); - u32x w4 = swap_workaround (w1_t[0]); - u32x w5 = swap_workaround (w1_t[1]); - u32x w6 = swap_workaround (w1_t[2]); - u32x w7 = swap_workaround (w1_t[3]); - u32x w8 = swap_workaround (w2_t[0]); - u32x w9 = swap_workaround (w2_t[1]); - u32x wa = swap_workaround (w2_t[2]); - u32x wb = swap_workaround (w2_t[3]); - u32x wc = swap_workaround (w3_t[0]); - u32x wd = swap_workaround (w3_t[1]); - u32x we = 0; - u32x wf = pw_salt_len * 8; + u32 w0 = swap_workaround (w0_t[0]); + u32 w1 = swap_workaround (w0_t[1]); + u32 w2 = swap_workaround (w0_t[2]); + u32 w3 = swap_workaround (w0_t[3]); + u32 w4 = swap_workaround (w1_t[0]); + u32 w5 = swap_workaround (w1_t[1]); + u32 w6 = swap_workaround (w1_t[2]); + u32 w7 = swap_workaround (w1_t[3]); + u32 w8 = swap_workaround (w2_t[0]); + u32 w9 = swap_workaround (w2_t[1]); + u32 wa = swap_workaround (w2_t[2]); + u32 wb = swap_workaround (w2_t[3]); + u32 wc = swap_workaround (w3_t[0]); + u32 wd = swap_workaround (w3_t[1]); + u32 we = 0; + u32 wf = pw_salt_len * 8; /** * sha1 */ - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -401,12 +377,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04900_m04 (__glo we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we); wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -434,28 +410,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04900_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -564,28 +540,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04900_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = wordl0[0] | wordr0[0]; w0_t[1] = wordl0[1] | wordr0[1]; w0_t[2] = wordl0[2] | wordr0[2]; w0_t[3] = wordl0[3] | wordr0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = wordl1[0] | wordr1[0]; w1_t[1] = wordl1[1] | wordr1[1]; w1_t[2] = wordl1[2] | wordr1[2]; w1_t[3] = wordl1[3] | wordr1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = wordl2[0] | wordr2[0]; w2_t[1] = wordl2[1] | wordr2[1]; w2_t[2] = wordl2[2] | wordr2[2]; w2_t[3] = wordl2[3] | wordr2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = wordl3[0] | wordr3[0]; w3_t[1] = wordl3[1] | wordr3[1]; @@ -670,32 +646,32 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04900_s04 (__glo append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len); - u32x w0 = swap_workaround (w0_t[0]); - u32x w1 = swap_workaround (w0_t[1]); - u32x w2 = swap_workaround (w0_t[2]); - u32x w3 = swap_workaround (w0_t[3]); - u32x w4 = swap_workaround (w1_t[0]); - u32x w5 = swap_workaround (w1_t[1]); - u32x w6 = swap_workaround (w1_t[2]); - u32x w7 = swap_workaround (w1_t[3]); - u32x w8 = swap_workaround (w2_t[0]); - u32x w9 = swap_workaround (w2_t[1]); - u32x wa = swap_workaround (w2_t[2]); - u32x wb = swap_workaround (w2_t[3]); - u32x wc = swap_workaround (w3_t[0]); - u32x wd = swap_workaround (w3_t[1]); - u32x we = 0; - u32x wf = pw_salt_len * 8; + u32 w0 = swap_workaround (w0_t[0]); + u32 w1 = swap_workaround (w0_t[1]); + u32 w2 = swap_workaround (w0_t[2]); + u32 w3 = swap_workaround (w0_t[3]); + u32 w4 = swap_workaround (w1_t[0]); + u32 w5 = swap_workaround (w1_t[1]); + u32 w6 = swap_workaround (w1_t[2]); + u32 w7 = swap_workaround (w1_t[3]); + u32 w8 = swap_workaround (w2_t[0]); + u32 w9 = swap_workaround (w2_t[1]); + u32 wa = swap_workaround (w2_t[2]); + u32 wb = swap_workaround (w2_t[3]); + u32 wc = swap_workaround (w3_t[0]); + u32 wd = swap_workaround (w3_t[1]); + u32 we = 0; + u32 wf = pw_salt_len * 8; /** * sha1 */ - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -796,12 +772,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04900_s04 (__glo we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we); wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m04900_a3.cl b/OpenCL/m04900_a3.cl similarity index 87% rename from amd/m04900_a3.cl rename to OpenCL/m04900_a3.cl index 30ccc70..c5dce61 100644 --- a/amd/m04900_a3.cl +++ b/OpenCL/m04900_a3.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void m04900m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m04900m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -149,7 +125,7 @@ static void m04900m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -157,28 +133,28 @@ static void m04900m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; @@ -207,32 +183,32 @@ static void m04900m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[1] |= salt_buf3[1]; w3_t[2] |= salt_buf3[2]; - u32x w0 = swap_workaround (w0_t[0]); - u32x w1 = swap_workaround (w0_t[1]); - u32x w2 = swap_workaround (w0_t[2]); - u32x w3 = swap_workaround (w0_t[3]); - u32x w4 = swap_workaround (w1_t[0]); - u32x w5 = swap_workaround (w1_t[1]); - u32x w6 = swap_workaround (w1_t[2]); - u32x w7 = swap_workaround (w1_t[3]); - u32x w8 = swap_workaround (w2_t[0]); - u32x w9 = swap_workaround (w2_t[1]); - u32x wa = swap_workaround (w2_t[2]); - u32x wb = swap_workaround (w2_t[3]); - u32x wc = swap_workaround (w3_t[0]); - u32x wd = swap_workaround (w3_t[1]); - u32x we = swap_workaround (w3_t[2]); - u32x wf = pw_salt_len * 8; + u32 w0 = swap_workaround (w0_t[0]); + u32 w1 = swap_workaround (w0_t[1]); + u32 w2 = swap_workaround (w0_t[2]); + u32 w3 = swap_workaround (w0_t[3]); + u32 w4 = swap_workaround (w1_t[0]); + u32 w5 = swap_workaround (w1_t[1]); + u32 w6 = swap_workaround (w1_t[2]); + u32 w7 = swap_workaround (w1_t[3]); + u32 w8 = swap_workaround (w2_t[0]); + u32 w9 = swap_workaround (w2_t[1]); + u32 wa = swap_workaround (w2_t[2]); + u32 wb = swap_workaround (w2_t[3]); + u32 wc = swap_workaround (w3_t[0]); + u32 wd = swap_workaround (w3_t[1]); + u32 we = swap_workaround (w3_t[2]); + u32 wf = pw_salt_len * 8; /** * sha1 */ - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -330,16 +306,16 @@ static void m04900m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we); wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m04900s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m04900s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -462,7 +438,7 @@ static void m04900s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -470,28 +446,28 @@ static void m04900s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; @@ -520,32 +496,32 @@ static void m04900s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[1] |= salt_buf3[1]; w3_t[2] |= salt_buf3[2]; - u32x w0 = swap_workaround (w0_t[0]); - u32x w1 = swap_workaround (w0_t[1]); - u32x w2 = swap_workaround (w0_t[2]); - u32x w3 = swap_workaround (w0_t[3]); - u32x w4 = swap_workaround (w1_t[0]); - u32x w5 = swap_workaround (w1_t[1]); - u32x w6 = swap_workaround (w1_t[2]); - u32x w7 = swap_workaround (w1_t[3]); - u32x w8 = swap_workaround (w2_t[0]); - u32x w9 = swap_workaround (w2_t[1]); - u32x wa = swap_workaround (w2_t[2]); - u32x wb = swap_workaround (w2_t[3]); - u32x wc = swap_workaround (w3_t[0]); - u32x wd = swap_workaround (w3_t[1]); - u32x we = swap_workaround (w3_t[2]); - u32x wf = pw_salt_len * 8; + u32 w0 = swap_workaround (w0_t[0]); + u32 w1 = swap_workaround (w0_t[1]); + u32 w2 = swap_workaround (w0_t[2]); + u32 w3 = swap_workaround (w0_t[3]); + u32 w4 = swap_workaround (w1_t[0]); + u32 w5 = swap_workaround (w1_t[1]); + u32 w6 = swap_workaround (w1_t[2]); + u32 w7 = swap_workaround (w1_t[3]); + u32 w8 = swap_workaround (w2_t[0]); + u32 w9 = swap_workaround (w2_t[1]); + u32 wa = swap_workaround (w2_t[2]); + u32 wb = swap_workaround (w2_t[3]); + u32 wc = swap_workaround (w3_t[0]); + u32 wd = swap_workaround (w3_t[1]); + u32 we = swap_workaround (w3_t[2]); + u32 wf = pw_salt_len * 8; /** * sha1 */ - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -646,12 +622,12 @@ static void m04900s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we); wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -671,28 +647,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04900_m04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -724,28 +700,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04900_m08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -777,28 +753,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04900_m16 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -830,28 +806,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04900_s04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -883,28 +859,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04900_s08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -936,28 +912,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m04900_s16 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m05000_a0.cl b/OpenCL/m05000_a0.cl similarity index 91% rename from amd/m05000_a0.cl rename to OpenCL/m05000_a0.cl index 6f505fa..08371b4 100644 --- a/amd/m05000_a0.cl +++ b/OpenCL/m05000_a0.cl @@ -8,38 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 2 #define DGST_R1 3 #define DGST_R2 4 #define DGST_R3 5 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u64 keccakf_rndc[24] = { @@ -119,14 +100,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05000_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -151,28 +132,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05000_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -183,7 +164,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05000_m04 (__glo append_0x01_2 (w0, w1, out_len); - u64x st[25]; + u64 st[25]; #ifdef VECT_SIZE1 st[ 0] = (u64x) (w0[0]) | (u64x) (w0[1]) << 32; @@ -229,13 +210,13 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05000_m04 (__glo { // Theta - u64x bc0 = Theta1 (0); - u64x bc1 = Theta1 (1); - u64x bc2 = Theta1 (2); - u64x bc3 = Theta1 (3); - u64x bc4 = Theta1 (4); + u64 bc0 = Theta1 (0); + u64 bc1 = Theta1 (1); + u64 bc2 = Theta1 (2); + u64 bc3 = Theta1 (3); + u64 bc4 = Theta1 (4); - u64x t; + u64 t; t = bc4 ^ rotl64 (bc1, 1); Theta2 (0); t = bc0 ^ rotl64 (bc2, 1); Theta2 (1); @@ -285,12 +266,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05000_m04 (__glo st[0] ^= keccakf_rndc[round]; } - const u32x r0 = l32_from_64 (st[1]); - const u32x r1 = h32_from_64 (st[1]); - const u32x r2 = l32_from_64 (st[2]); - const u32x r3 = h32_from_64 (st[2]); + const u32 r0 = l32_from_64 (st[1]); + const u32 r1 = h32_from_64 (st[1]); + const u32 r2 = l32_from_64 (st[2]); + const u32 r3 = h32_from_64 (st[2]); - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -318,14 +299,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05000_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -362,28 +343,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05000_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -394,7 +375,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05000_s04 (__glo append_0x01_2 (w0, w1, out_len); - u64x st[25]; + u64 st[25]; #ifdef VECT_SIZE1 st[ 0] = (u64x) (w0[0]) | (u64x) (w0[1]) << 32; @@ -440,13 +421,13 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05000_s04 (__glo { // Theta - u64x bc0 = Theta1 (0); - u64x bc1 = Theta1 (1); - u64x bc2 = Theta1 (2); - u64x bc3 = Theta1 (3); - u64x bc4 = Theta1 (4); + u64 bc0 = Theta1 (0); + u64 bc1 = Theta1 (1); + u64 bc2 = Theta1 (2); + u64 bc3 = Theta1 (3); + u64 bc4 = Theta1 (4); - u64x t; + u64 t; t = bc4 ^ rotl64 (bc1, 1); Theta2 (0); t = bc0 ^ rotl64 (bc2, 1); Theta2 (1); @@ -496,12 +477,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05000_s04 (__glo st[0] ^= keccakf_rndc[round]; } - const u32x r0 = l32_from_64 (st[1]); - const u32x r1 = h32_from_64 (st[1]); - const u32x r2 = l32_from_64 (st[2]); - const u32x r3 = h32_from_64 (st[2]); + const u32 r0 = l32_from_64 (st[1]); + const u32 r1 = h32_from_64 (st[1]); + const u32 r2 = l32_from_64 (st[2]); + const u32 r3 = h32_from_64 (st[2]); - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m05000_a1.cl b/OpenCL/m05000_a1.cl similarity index 92% rename from amd/m05000_a1.cl rename to OpenCL/m05000_a1.cl index 6d0686b..1edd1b9 100644 --- a/amd/m05000_a1.cl +++ b/OpenCL/m05000_a1.cl @@ -8,36 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 2 #define DGST_R1 3 #define DGST_R2 4 #define DGST_R3 5 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u64 keccakf_rndc[24] = { @@ -117,28 +98,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05000_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -209,35 +190,35 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05000_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; w3[2] = pw_len * 8; w3[3] = 0; - u64x st[25]; + u64 st[25]; #ifdef VECT_SIZE1 st[ 0] = (u64x) (w0[0]) | (u64x) (w0[1]) << 32; @@ -283,13 +264,13 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05000_m04 (__glo { // Theta - u64x bc0 = Theta1 (0); - u64x bc1 = Theta1 (1); - u64x bc2 = Theta1 (2); - u64x bc3 = Theta1 (3); - u64x bc4 = Theta1 (4); + u64 bc0 = Theta1 (0); + u64 bc1 = Theta1 (1); + u64 bc2 = Theta1 (2); + u64 bc3 = Theta1 (3); + u64 bc4 = Theta1 (4); - u64x t; + u64 t; t = bc4 ^ rotl64 (bc1, 1); Theta2 (0); t = bc0 ^ rotl64 (bc2, 1); Theta2 (1); @@ -339,12 +320,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05000_m04 (__glo st[0] ^= keccakf_rndc[round]; } - const u32x r0 = l32_from_64 (st[1]); - const u32x r1 = h32_from_64 (st[1]); - const u32x r2 = l32_from_64 (st[2]); - const u32x r3 = h32_from_64 (st[2]); + const u32 r0 = l32_from_64 (st[1]); + const u32 r1 = h32_from_64 (st[1]); + const u32 r2 = l32_from_64 (st[2]); + const u32 r3 = h32_from_64 (st[2]); - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -372,28 +353,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05000_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -476,35 +457,35 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05000_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; w3[2] = pw_len * 8; w3[3] = 0; - u64x st[25]; + u64 st[25]; #ifdef VECT_SIZE1 st[ 0] = (u64x) (w0[0]) | (u64x) (w0[1]) << 32; @@ -550,13 +531,13 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05000_s04 (__glo { // Theta - u64x bc0 = Theta1 (0); - u64x bc1 = Theta1 (1); - u64x bc2 = Theta1 (2); - u64x bc3 = Theta1 (3); - u64x bc4 = Theta1 (4); + u64 bc0 = Theta1 (0); + u64 bc1 = Theta1 (1); + u64 bc2 = Theta1 (2); + u64 bc3 = Theta1 (3); + u64 bc4 = Theta1 (4); - u64x t; + u64 t; t = bc4 ^ rotl64 (bc1, 1); Theta2 (0); t = bc0 ^ rotl64 (bc2, 1); Theta2 (1); @@ -606,12 +587,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05000_s04 (__glo st[0] ^= keccakf_rndc[round]; } - const u32x r0 = l32_from_64 (st[1]); - const u32x r1 = h32_from_64 (st[1]); - const u32x r2 = l32_from_64 (st[2]); - const u32x r3 = h32_from_64 (st[2]); + const u32 r0 = l32_from_64 (st[1]); + const u32 r1 = h32_from_64 (st[1]); + const u32 r2 = l32_from_64 (st[2]); + const u32 r3 = h32_from_64 (st[2]); - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m05000_a3.cl b/OpenCL/m05000_a3.cl similarity index 85% rename from amd/m05000_a3.cl rename to OpenCL/m05000_a3.cl index ddaf805..516aaf2 100644 --- a/amd/m05000_a3.cl +++ b/OpenCL/m05000_a3.cl @@ -8,36 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 2 #define DGST_R1 3 #define DGST_R2 4 #define DGST_R3 5 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u64 keccakf_rndc[24] = { @@ -101,7 +82,7 @@ __constant u32 keccakf_piln[24] = st[4 + s] ^= ~bc0 & bc1; \ } -static void m05000m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m05000m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -124,7 +105,7 @@ static void m05000m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -132,7 +113,7 @@ static void m05000m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u64x st[25]; + u64 st[25]; #ifdef VECT_SIZE1 st[ 0] = (u64x) (w0[0]) | (u64x) (w0[1]) << 32; @@ -182,13 +163,13 @@ static void m05000m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p { // Theta - u64x bc0 = Theta1 (0); - u64x bc1 = Theta1 (1); - u64x bc2 = Theta1 (2); - u64x bc3 = Theta1 (3); - u64x bc4 = Theta1 (4); + u64 bc0 = Theta1 (0); + u64 bc1 = Theta1 (1); + u64 bc2 = Theta1 (2); + u64 bc3 = Theta1 (3); + u64 bc4 = Theta1 (4); - u64x t; + u64 t; t = bc4 ^ rotl64 (bc1, 1); Theta2 (0); t = bc0 ^ rotl64 (bc2, 1); Theta2 (1); @@ -238,16 +219,16 @@ static void m05000m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p st[0] ^= keccakf_rndc[round]; } - const u32x r0 = l32_from_64 (st[1]); - const u32x r1 = h32_from_64 (st[1]); - const u32x r2 = l32_from_64 (st[2]); - const u32x r3 = h32_from_64 (st[2]); + const u32 r0 = l32_from_64 (st[1]); + const u32 r1 = h32_from_64 (st[1]); + const u32 r2 = l32_from_64 (st[2]); + const u32 r3 = h32_from_64 (st[2]); - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m05000s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m05000s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -282,7 +263,7 @@ static void m05000s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -290,7 +271,7 @@ static void m05000s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u64x st[25]; + u64 st[25]; #ifdef VECT_SIZE1 st[ 0] = (u64x) (w0[0]) | (u64x) (w0[1]) << 32; @@ -340,13 +321,13 @@ static void m05000s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p { // Theta - u64x bc0 = Theta1 (0); - u64x bc1 = Theta1 (1); - u64x bc2 = Theta1 (2); - u64x bc3 = Theta1 (3); - u64x bc4 = Theta1 (4); + u64 bc0 = Theta1 (0); + u64 bc1 = Theta1 (1); + u64 bc2 = Theta1 (2); + u64 bc3 = Theta1 (3); + u64 bc4 = Theta1 (4); - u64x t; + u64 t; t = bc4 ^ rotl64 (bc1, 1); Theta2 (0); t = bc0 ^ rotl64 (bc2, 1); Theta2 (1); @@ -396,12 +377,12 @@ static void m05000s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p st[0] ^= keccakf_rndc[round]; } - const u32x r0 = l32_from_64 (st[1]); - const u32x r1 = h32_from_64 (st[1]); - const u32x r2 = l32_from_64 (st[2]); - const u32x r3 = h32_from_64 (st[2]); + const u32 r0 = l32_from_64 (st[1]); + const u32 r1 = h32_from_64 (st[1]); + const u32 r2 = l32_from_64 (st[2]); + const u32 r3 = h32_from_64 (st[2]); - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -415,28 +396,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05000_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -462,28 +443,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05000_m08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -509,28 +490,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05000_m16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -556,28 +537,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05000_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -603,28 +584,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05000_s08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -650,28 +631,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05000_s16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m05100_a0.cl b/OpenCL/m05100_a0.cl similarity index 90% rename from amd/m05100_a0.cl rename to OpenCL/m05100_a0.cl index 599ee68..9e34cd3 100644 --- a/amd/m05100_a0.cl +++ b/OpenCL/m05100_a0.cl @@ -8,38 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05100_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -57,14 +38,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05100_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -79,28 +60,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05100_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -113,10 +94,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05100_m04 (__glo w3[2] = out_len * 8; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -192,30 +173,30 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05100_m04 (__glo d += MD5M_D; { - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } { - const u32x r0 = b; - const u32x r1 = c; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = b; + const u32 r1 = c; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } { - const u32x r0 = c; - const u32x r1 = d; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = c; + const u32 r1 = d; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } } @@ -244,14 +225,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05100_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -278,28 +259,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05100_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -312,10 +293,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05100_s04 (__glo w3[2] = out_len * 8; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -391,30 +372,30 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05100_s04 (__glo d += MD5M_D; { - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } { - const u32x r0 = b; - const u32x r1 = c; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = b; + const u32 r1 = c; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } { - const u32x r0 = c; - const u32x r1 = d; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = c; + const u32 r1 = d; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } } diff --git a/amd/m05100_a1.cl b/OpenCL/m05100_a1.cl similarity index 91% rename from amd/m05100_a1.cl rename to OpenCL/m05100_a1.cl index dccb8f7..974138d 100644 --- a/amd/m05100_a1.cl +++ b/OpenCL/m05100_a1.cl @@ -8,36 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05100_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -55,28 +36,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05100_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -135,38 +116,38 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05100_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; w3[2] = pw_len * 8; w3[3] = 0; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -242,30 +223,30 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05100_m04 (__glo d += MD5M_D; { - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } { - const u32x r0 = b; - const u32x r1 = c; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = b; + const u32 r1 = c; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } { - const u32x r0 = c; - const u32x r1 = d; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = c; + const u32 r1 = d; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } } @@ -294,28 +275,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05100_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -386,38 +367,38 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05100_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; w3[2] = pw_len * 8; w3[3] = 0; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -493,30 +474,30 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05100_s04 (__glo d += MD5M_D; { - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } { - const u32x r0 = b; - const u32x r1 = c; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = b; + const u32 r1 = c; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } { - const u32x r0 = c; - const u32x r1 = d; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = c; + const u32 r1 = d; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } } diff --git a/amd/m05100_a3.cl b/OpenCL/m05100_a3.cl similarity index 84% rename from amd/m05100_a3.cl rename to OpenCL/m05100_a3.cl index dba6d8a..4a3cd85 100644 --- a/amd/m05100_a3.cl +++ b/OpenCL/m05100_a3.cl @@ -8,38 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void m05100m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m05100m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -58,7 +39,7 @@ static void m05100m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -66,10 +47,10 @@ static void m05100m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -145,35 +126,35 @@ static void m05100m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p d += MD5M_D; { - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } { - const u32x r0 = b; - const u32x r1 = c; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = b; + const u32 r1 = c; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } { - const u32x r0 = c; - const u32x r1 = d; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = c; + const u32 r1 = d; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } } -static void m05100s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m05100s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -204,7 +185,7 @@ static void m05100s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -212,10 +193,10 @@ static void m05100s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -291,30 +272,30 @@ static void m05100s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p d += MD5M_D; { - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } { - const u32x r0 = b; - const u32x r1 = c; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = b; + const u32 r1 = c; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } { - const u32x r0 = c; - const u32x r1 = d; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = c; + const u32 r1 = d; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } } @@ -329,28 +310,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05100_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -376,28 +357,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05100_m08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -423,28 +404,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05100_m16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -470,28 +451,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05100_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -517,28 +498,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05100_s08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -564,28 +545,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05100_s16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m05200.cl b/OpenCL/m05200.cl similarity index 89% rename from amd/m05200.cl rename to OpenCL/m05200.cl index b9209fc..b160a1c 100644 --- a/amd/m05200.cl +++ b/OpenCL/m05200.cl @@ -8,37 +8,25 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif #ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" +#define COMPARE_M "check_multi_vect2_comp4.c" #endif #ifdef VECT_SIZE4 -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" +#define COMPARE_M "check_multi_vect4_comp4.c" #endif __constant u32 k_sha256[64] = @@ -61,33 +49,33 @@ __constant u32 k_sha256[64] = SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f, }; -static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8]) +static void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[8]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + u32 e = digest[4]; + u32 f = digest[5]; + u32 g = digest[6]; + u32 h = digest[7]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #define ROUND_EXPAND() \ { \ @@ -157,28 +145,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05200_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -275,7 +263,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05200_init (__gl * main */ - u32x digest[8]; + u32 digest[8]; digest[0] = SHA256M_A; digest[1] = SHA256M_B; @@ -308,7 +296,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05200_loop (__gl if (gid >= gid_max) return; - u32x digest[8]; + u32 digest[8]; digest[0] = tmps[gid].digest_buf[0]; digest[1] = tmps[gid].digest_buf[1]; @@ -325,28 +313,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05200_loop (__gl for (u32 i = 0; i < loop_cnt; i++) { - u32x w0[4]; + u32 w0[4]; w0[0] = digest[0]; w0[1] = digest[1]; w0[2] = digest[2]; w0[3] = digest[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = digest[4]; w1[1] = digest[5]; w1[2] = digest[6]; w1[3] = digest[7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0x80000000; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -391,12 +379,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05200_comp (__gl * digest */ - const u32x r0 = tmps[gid].digest_buf[DGST_R0]; - const u32x r1 = tmps[gid].digest_buf[DGST_R1]; - const u32x r2 = tmps[gid].digest_buf[DGST_R2]; - const u32x r3 = tmps[gid].digest_buf[DGST_R3]; + const u32 r0 = tmps[gid].digest_buf[DGST_R0]; + const u32 r1 = tmps[gid].digest_buf[DGST_R1]; + const u32 r2 = tmps[gid].digest_buf[DGST_R2]; + const u32 r3 = tmps[gid].digest_buf[DGST_R3]; #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m05300_a0.cl b/OpenCL/m05300_a0.cl similarity index 91% rename from amd/m05300_a0.cl rename to OpenCL/m05300_a0.cl index d979060..8bc27ca 100644 --- a/amd/m05300_a0.cl +++ b/OpenCL/m05300_a0.cl @@ -8,62 +8,43 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -139,7 +120,7 @@ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) +static void hmac_md5_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -190,7 +171,7 @@ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x i md5_transform (w0, w1, w2, w3, opad); } -static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) +static void hmac_md5_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4], u32 digest[4]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -238,14 +219,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05300_m04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -306,28 +287,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05300_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -340,36 +321,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05300_m04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[4]; - u32x opad[4]; + u32 ipad[4]; + u32 opad[4]; hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -390,7 +371,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05300_m04 (__glo w3_t[2] = (64 + nr_len) * 8; w3_t[3] = 0; - u32x digest[4]; + u32 digest[4]; hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); @@ -457,12 +438,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05300_m04 (__glo hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[0]; + const u32 r1 = digest[3]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -488,14 +469,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05300_s04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -568,28 +549,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05300_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -602,36 +583,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05300_s04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[4]; - u32x opad[4]; + u32 ipad[4]; + u32 opad[4]; hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -652,7 +633,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05300_s04 (__glo w3_t[2] = (64 + nr_len) * 8; w3_t[3] = 0; - u32x digest[4]; + u32 digest[4]; hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); @@ -719,12 +700,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05300_s04 (__glo hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[0]; + const u32 r1 = digest[3]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m05300_a1.cl b/OpenCL/m05300_a1.cl similarity index 92% rename from amd/m05300_a1.cl rename to OpenCL/m05300_a1.cl index 3428cc3..55b276a 100644 --- a/amd/m05300_a1.cl +++ b/OpenCL/m05300_a1.cl @@ -8,60 +8,41 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -137,7 +118,7 @@ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) +static void hmac_md5_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -188,7 +169,7 @@ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x i md5_transform (w0, w1, w2, w3, opad); } -static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) +static void hmac_md5_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4], u32 digest[4]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -236,28 +217,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05300_m04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -360,28 +341,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05300_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -392,36 +373,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05300_m04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; w3_t[2] = w3[2]; w3_t[3] = w3[3]; - u32x ipad[4]; - u32x opad[4]; + u32 ipad[4]; + u32 opad[4]; hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -442,7 +423,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05300_m04 (__glo w3_t[2] = (64 + nr_len) * 8; w3_t[3] = 0; - u32x digest[4]; + u32 digest[4]; hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); @@ -509,12 +490,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05300_m04 (__glo hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[0]; + const u32 r1 = digest[3]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -540,28 +521,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05300_s04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -676,28 +657,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05300_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -708,36 +689,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05300_s04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[4]; - u32x opad[4]; + u32 ipad[4]; + u32 opad[4]; hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -758,7 +739,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05300_s04 (__glo w3_t[2] = (64 + nr_len) * 8; w3_t[3] = 0; - u32x digest[4]; + u32 digest[4]; hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); @@ -825,12 +806,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05300_s04 (__glo hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[0]; + const u32 r1 = digest[3]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m05300_a3.cl b/OpenCL/m05300_a3.cl similarity index 87% rename from amd/m05300_a3.cl rename to OpenCL/m05300_a3.cl index a6885a6..73dc951 100644 --- a/amd/m05300_a3.cl +++ b/OpenCL/m05300_a3.cl @@ -8,60 +8,41 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -137,7 +118,7 @@ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) +static void hmac_md5_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -188,7 +169,7 @@ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x i md5_transform (w0, w1, w2, w3, opad); } -static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) +static void hmac_md5_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4], u32 digest[4]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -222,7 +203,7 @@ static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x i md5_transform (w0, w1, w2, w3, digest); } -static void m05300m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global ikepsk_t *ikepsk_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 s_msg_buf[128]) +static void m05300m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global ikepsk_t *ikepsk_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 s_msg_buf[128]) { /** * modifier @@ -270,7 +251,7 @@ static void m05300m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -282,36 +263,36 @@ static void m05300m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; w3_t[2] = w3[2]; w3_t[3] = w3[3]; - u32x ipad[4]; - u32x opad[4]; + u32 ipad[4]; + u32 opad[4]; hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -332,7 +313,7 @@ static void m05300m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[2] = (64 + nr_len) * 8; w3_t[3] = 0; - u32x digest[4]; + u32 digest[4]; hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); @@ -399,16 +380,16 @@ static void m05300m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[0]; + const u32 r1 = digest[3]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m05300s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global ikepsk_t *ikepsk_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 s_msg_buf[128]) +static void m05300s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global ikepsk_t *ikepsk_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 s_msg_buf[128]) { /** * modifier @@ -468,7 +449,7 @@ static void m05300s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -480,36 +461,36 @@ static void m05300s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; w3_t[2] = w3[2]; w3_t[3] = w3[3]; - u32x ipad[4]; - u32x opad[4]; + u32 ipad[4]; + u32 opad[4]; hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -530,7 +511,7 @@ static void m05300s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[2] = (64 + nr_len) * 8; w3_t[3] = 0; - u32x digest[4]; + u32 digest[4]; hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); @@ -597,12 +578,12 @@ static void m05300s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[0]; + const u32 r1 = digest[3]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -620,28 +601,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05300_m04 (__glo const u32 gid = get_global_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -686,28 +667,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05300_m08 (__glo const u32 gid = get_global_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -752,28 +733,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05300_m16 (__glo const u32 gid = get_global_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -818,28 +799,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05300_s04 (__glo const u32 gid = get_global_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -884,28 +865,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05300_s08 (__glo const u32 gid = get_global_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -950,28 +931,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05300_s16 (__glo const u32 gid = get_global_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m05400_a0.cl b/OpenCL/m05400_a0.cl similarity index 92% rename from amd/m05400_a0.cl rename to OpenCL/m05400_a0.cl index 9fe4e04..fa3c4ea 100644 --- a/amd/m05400_a0.cl +++ b/OpenCL/m05400_a0.cl @@ -8,68 +8,44 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -174,7 +150,7 @@ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4] digest[4] += E; } -static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) +static void hmac_sha1_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -227,7 +203,7 @@ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x sha1_transform (w0, w1, w2, w3, opad); } -static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) +static void hmac_sha1_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5], u32 digest[5]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -277,14 +253,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_m04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -345,28 +321,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -379,36 +355,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_m04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); w0_t[2] = swap_workaround (w0[2]); w0_t[3] = swap_workaround (w0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (w1[0]); w1_t[1] = swap_workaround (w1[1]); w1_t[2] = swap_workaround (w1[2]); w1_t[3] = swap_workaround (w1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -429,7 +405,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_m04 (__glo w3_t[2] = 0; w3_t[3] = (64 + nr_len) * 8; - u32x digest[5]; + u32 digest[5]; hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); @@ -496,12 +472,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_m04 (__glo hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -527,14 +503,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_s04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -607,28 +583,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -641,36 +617,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_s04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); w0_t[2] = swap_workaround (w0[2]); w0_t[3] = swap_workaround (w0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (w1[0]); w1_t[1] = swap_workaround (w1[1]); w1_t[2] = swap_workaround (w1[2]); w1_t[3] = swap_workaround (w1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -691,7 +667,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_s04 (__glo w3_t[2] = 0; w3_t[3] = (64 + nr_len) * 8; - u32x digest[5]; + u32 digest[5]; hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); @@ -758,12 +734,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_s04 (__glo hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m05400_a1.cl b/OpenCL/m05400_a1.cl similarity index 92% rename from amd/m05400_a1.cl rename to OpenCL/m05400_a1.cl index a45c82b..a9c9aff 100644 --- a/amd/m05400_a1.cl +++ b/OpenCL/m05400_a1.cl @@ -8,66 +8,42 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -172,7 +148,7 @@ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4] digest[4] += E; } -static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) +static void hmac_sha1_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -225,7 +201,7 @@ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x sha1_transform (w0, w1, w2, w3, opad); } -static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) +static void hmac_sha1_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5], u32 digest[5]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -275,28 +251,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_m04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -399,28 +375,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -431,36 +407,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_m04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); w0_t[2] = swap_workaround (w0[2]); w0_t[3] = swap_workaround (w0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (w1[0]); w1_t[1] = swap_workaround (w1[1]); w1_t[2] = swap_workaround (w1[2]); w1_t[3] = swap_workaround (w1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -481,7 +457,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_m04 (__glo w3_t[2] = 0; w3_t[3] = (64 + nr_len) * 8; - u32x digest[5]; + u32 digest[5]; hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); @@ -548,12 +524,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_m04 (__glo hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -579,28 +555,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_s04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -715,28 +691,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -747,36 +723,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_s04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); w0_t[2] = swap_workaround (w0[2]); w0_t[3] = swap_workaround (w0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (w1[0]); w1_t[1] = swap_workaround (w1[1]); w1_t[2] = swap_workaround (w1[2]); w1_t[3] = swap_workaround (w1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -797,7 +773,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_s04 (__glo w3_t[2] = 0; w3_t[3] = (64 + nr_len) * 8; - u32x digest[5]; + u32 digest[5]; hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); @@ -864,12 +840,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_s04 (__glo hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m05400_a3.cl b/OpenCL/m05400_a3.cl similarity index 88% rename from amd/m05400_a3.cl rename to OpenCL/m05400_a3.cl index b56d711..ca557ca 100644 --- a/amd/m05400_a3.cl +++ b/OpenCL/m05400_a3.cl @@ -8,66 +8,42 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -172,7 +148,7 @@ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4] digest[4] += E; } -static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) +static void hmac_sha1_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -225,7 +201,7 @@ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x sha1_transform (w0, w1, w2, w3, opad); } -static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) +static void hmac_sha1_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5], u32 digest[5]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -261,7 +237,7 @@ static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x sha1_transform (w0, w1, w2, w3, digest); } -static void m05400m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global ikepsk_t *ikepsk_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 s_msg_buf[128]) +static void m05400m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global ikepsk_t *ikepsk_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 s_msg_buf[128]) { /** * modifier @@ -309,7 +285,7 @@ static void m05400m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -321,36 +297,36 @@ static void m05400m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -371,7 +347,7 @@ static void m05400m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[2] = 0; w3_t[3] = (64 + nr_len) * 8; - u32x digest[5]; + u32 digest[5]; hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); @@ -438,16 +414,16 @@ static void m05400m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m05400s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global ikepsk_t *ikepsk_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 s_msg_buf[128]) +static void m05400s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global ikepsk_t *ikepsk_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 s_msg_buf[128]) { /** * modifier @@ -507,7 +483,7 @@ static void m05400s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -519,36 +495,36 @@ static void m05400s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -569,7 +545,7 @@ static void m05400s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[2] = 0; w3_t[3] = (64 + nr_len) * 8; - u32x digest[5]; + u32 digest[5]; hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); @@ -636,12 +612,12 @@ static void m05400s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -659,28 +635,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_m04 (__glo const u32 gid = get_global_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -725,28 +701,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_m08 (__glo const u32 gid = get_global_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -791,28 +767,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_m16 (__glo const u32 gid = get_global_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -857,28 +833,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_s04 (__glo const u32 gid = get_global_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -923,28 +899,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_s08 (__glo const u32 gid = get_global_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -989,28 +965,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05400_s16 (__glo const u32 gid = get_global_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m05500_a0.cl b/OpenCL/m05500_a0.cl similarity index 93% rename from amd/m05500_a0.cl rename to OpenCL/m05500_a0.cl index 2b9c851..34a346b 100644 --- a/amd/m05500_a0.cl +++ b/OpenCL/m05500_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" +#include "rp.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define PERM_OP(a,b,tt,n,m) \ { \ @@ -361,16 +337,16 @@ __constant u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, #define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) #endif -static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __local u32 s_SPtrans[8][64]) +static void _des_crypt_encrypt (u32 iv[2], u32 data[2], u32 Kc[16], u32 Kd[16], __local u32 s_SPtrans[8][64]) { - u32x r = data[0]; - u32x l = data[1]; + u32 r = data[0]; + u32 l = data[1]; #pragma unroll 16 for (u32 i = 0; i < 16; i += 2) { - u32x u; - u32x t; + u32 u; + u32 t; u = Kc[i + 0] ^ rotl32 (r, 30u); t = Kd[i + 0] ^ rotl32 (r, 26u); @@ -403,9 +379,9 @@ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[1 iv[1] = r; } -static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 s_skb[8][64]) +static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u32 s_skb[8][64]) { - u32x tt; + u32 tt; PERM_OP (d, c, tt, 4, 0x0f0f0f0f); HPERM_OP (c, tt, 2, 0xcccc0000); @@ -430,13 +406,13 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc c = c & 0x0fffffff; d = d & 0x0fffffff; - const u32x c00 = (c >> 0) & 0x0000003f; - const u32x c06 = (c >> 6) & 0x00383003; - const u32x c07 = (c >> 7) & 0x0000003c; - const u32x c13 = (c >> 13) & 0x0000060f; - const u32x c20 = (c >> 20) & 0x00000001; + const u32 c00 = (c >> 0) & 0x0000003f; + const u32 c06 = (c >> 6) & 0x00383003; + const u32 c07 = (c >> 7) & 0x0000003c; + const u32 c13 = (c >> 13) & 0x0000060f; + const u32 c20 = (c >> 20) & 0x00000001; - u32x s = BOX (((c00 >> 0) & 0xff), 0, s_skb) + u32 s = BOX (((c00 >> 0) & 0xff), 0, s_skb) | BOX (((c06 >> 0) & 0xff) |((c07 >> 0) & 0xff), 1, s_skb) | BOX (((c13 >> 0) & 0xff) @@ -445,12 +421,12 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc |((c13 >> 8) & 0xff) |((c06 >> 16) & 0xff), 3, s_skb); - const u32x d00 = (d >> 0) & 0x00003c3f; - const u32x d07 = (d >> 7) & 0x00003f03; - const u32x d21 = (d >> 21) & 0x0000000f; - const u32x d22 = (d >> 22) & 0x00000030; + const u32 d00 = (d >> 0) & 0x00003c3f; + const u32 d07 = (d >> 7) & 0x00003f03; + const u32 d21 = (d >> 21) & 0x0000000f; + const u32 d22 = (d >> 22) & 0x00000030; - u32x t = BOX (((d00 >> 0) & 0xff), 4, s_skb) + u32 t = BOX (((d00 >> 0) & 0xff), 4, s_skb) | BOX (((d07 >> 0) & 0xff) |((d00 >> 8) & 0xff), 5, s_skb) | BOX (((d07 >> 8) & 0xff), 6, s_skb) @@ -462,7 +438,7 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc } } -static void transform_netntlmv1_key (const u32x w0, const u32x w1, u32x out[2]) +static void transform_netntlmv1_key (const u32 w0, const u32 w1, u32 out[2]) { #ifdef VECT_SIZE1 const uchar4 t0 = as_uchar4 (w0); @@ -575,14 +551,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_m04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -628,7 +604,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_m04 (__glo const u32 s1 = salt_bufs[salt_pos].salt_buf[1]; const u32 s2 = salt_bufs[salt_pos].salt_buf[2]; - u32x data[2]; + u32 data[2]; data[0] = s0; data[1] = s1; @@ -639,28 +615,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -671,20 +647,20 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_m04 (__glo append_0x80_2 (w0, w1, out_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); w3_t[2] = out_len * 8 * 2; - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; + u32 a = MD4M_A; + u32 b = MD4M_B; + u32 c = MD4M_C; + u32 d = MD4M_D; MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00); MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01); @@ -749,16 +725,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_m04 (__glo * DES1 */ - u32x key[2]; + u32 key[2]; transform_netntlmv1_key (a, b, key); - u32x Kc[16]; - u32x Kd[16]; + u32 Kc[16]; + u32 Kd[16]; _des_crypt_keysetup (key[0], key[1], Kc, Kd, s_skb); - u32x iv1[2]; + u32 iv1[2]; _des_crypt_encrypt (iv1, data, Kc, Kd, s_SPtrans); @@ -770,7 +746,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_m04 (__glo _des_crypt_keysetup (key[0], key[1], Kc, Kd, s_skb); - u32x iv2[2]; + u32 iv2[2]; _des_crypt_encrypt (iv2, data, Kc, Kd, s_SPtrans); @@ -778,12 +754,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_m04 (__glo * compare */ - const u32x r0 = iv1[0]; - const u32x r1 = iv1[1]; - const u32x r2 = iv2[0]; - const u32x r3 = iv2[1]; + const u32 r0 = iv1[0]; + const u32 r1 = iv1[1]; + const u32 r2 = iv2[0]; + const u32 r3 = iv2[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -809,14 +785,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_s04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -862,7 +838,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_s04 (__glo const u32 s1 = salt_bufs[salt_pos].salt_buf[1]; const u32 s2 = salt_bufs[salt_pos].salt_buf[2]; - u32x data[2]; + u32 data[2]; data[0] = s0; data[1] = s1; @@ -885,28 +861,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -917,20 +893,20 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_s04 (__glo append_0x80_2 (w0, w1, out_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); w3_t[2] = out_len * 8 * 2; - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; + u32 a = MD4M_A; + u32 b = MD4M_B; + u32 c = MD4M_C; + u32 d = MD4M_D; MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00); MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01); @@ -995,16 +971,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_s04 (__glo * DES1 */ - u32x key[2]; + u32 key[2]; transform_netntlmv1_key (a, b, key); - u32x Kc[16]; - u32x Kd[16]; + u32 Kc[16]; + u32 Kd[16]; _des_crypt_keysetup (key[0], key[1], Kc, Kd, s_skb); - u32x iv1[2]; + u32 iv1[2]; _des_crypt_encrypt (iv1, data, Kc, Kd, s_SPtrans); @@ -1017,12 +993,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_s04 (__glo _des_crypt_keysetup (key[0], key[1], Kc, Kd, s_skb); - u32x iv2[2]; + u32 iv2[2]; _des_crypt_encrypt (iv2, data, Kc, Kd, s_SPtrans); */ - u32x iv2[2]; + u32 iv2[2]; iv2[0] = search[2]; iv2[1] = search[3]; @@ -1031,12 +1007,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_s04 (__glo * compare */ - const u32x r0 = iv1[0]; - const u32x r1 = iv1[1]; - const u32x r2 = iv2[0]; - const u32x r3 = iv2[1]; + const u32 r0 = iv1[0]; + const u32 r1 = iv1[1]; + const u32 r2 = iv2[0]; + const u32 r3 = iv2[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m05500_a1.cl b/OpenCL/m05500_a1.cl similarity index 93% rename from amd/m05500_a1.cl rename to OpenCL/m05500_a1.cl index a85924d..9f0ff9d 100644 --- a/amd/m05500_a1.cl +++ b/OpenCL/m05500_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define PERM_OP(a,b,tt,n,m) \ { \ @@ -359,16 +335,16 @@ __constant u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, #define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) #endif -static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __local u32 s_SPtrans[8][64]) +static void _des_crypt_encrypt (u32 iv[2], u32 data[2], u32 Kc[16], u32 Kd[16], __local u32 s_SPtrans[8][64]) { - u32x r = data[0]; - u32x l = data[1]; + u32 r = data[0]; + u32 l = data[1]; #pragma unroll 16 for (u32 i = 0; i < 16; i += 2) { - u32x u; - u32x t; + u32 u; + u32 t; u = Kc[i + 0] ^ rotl32 (r, 30u); t = Kd[i + 0] ^ rotl32 (r, 26u); @@ -401,9 +377,9 @@ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[1 iv[1] = r; } -static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 s_skb[8][64]) +static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u32 s_skb[8][64]) { - u32x tt; + u32 tt; PERM_OP (d, c, tt, 4, 0x0f0f0f0f); HPERM_OP (c, tt, 2, 0xcccc0000); @@ -428,13 +404,13 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc c = c & 0x0fffffff; d = d & 0x0fffffff; - const u32x c00 = (c >> 0) & 0x0000003f; - const u32x c06 = (c >> 6) & 0x00383003; - const u32x c07 = (c >> 7) & 0x0000003c; - const u32x c13 = (c >> 13) & 0x0000060f; - const u32x c20 = (c >> 20) & 0x00000001; + const u32 c00 = (c >> 0) & 0x0000003f; + const u32 c06 = (c >> 6) & 0x00383003; + const u32 c07 = (c >> 7) & 0x0000003c; + const u32 c13 = (c >> 13) & 0x0000060f; + const u32 c20 = (c >> 20) & 0x00000001; - u32x s = BOX (((c00 >> 0) & 0xff), 0, s_skb) + u32 s = BOX (((c00 >> 0) & 0xff), 0, s_skb) | BOX (((c06 >> 0) & 0xff) |((c07 >> 0) & 0xff), 1, s_skb) | BOX (((c13 >> 0) & 0xff) @@ -443,12 +419,12 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc |((c13 >> 8) & 0xff) |((c06 >> 16) & 0xff), 3, s_skb); - const u32x d00 = (d >> 0) & 0x00003c3f; - const u32x d07 = (d >> 7) & 0x00003f03; - const u32x d21 = (d >> 21) & 0x0000000f; - const u32x d22 = (d >> 22) & 0x00000030; + const u32 d00 = (d >> 0) & 0x00003c3f; + const u32 d07 = (d >> 7) & 0x00003f03; + const u32 d21 = (d >> 21) & 0x0000000f; + const u32 d22 = (d >> 22) & 0x00000030; - u32x t = BOX (((d00 >> 0) & 0xff), 4, s_skb) + u32 t = BOX (((d00 >> 0) & 0xff), 4, s_skb) | BOX (((d07 >> 0) & 0xff) |((d00 >> 8) & 0xff), 5, s_skb) | BOX (((d07 >> 8) & 0xff), 6, s_skb) @@ -460,7 +436,7 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc } } -static void transform_netntlmv1_key (const u32x w0, const u32x w1, u32x out[2]) +static void transform_netntlmv1_key (const u32 w0, const u32 w1, u32 out[2]) { #ifdef VECT_SIZE1 const uchar4 t0 = as_uchar4 (w0); @@ -573,28 +549,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_m04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -647,7 +623,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_m04 (__glo const u32 s1 = salt_bufs[salt_pos].salt_buf[1]; const u32 s2 = salt_bufs[salt_pos].salt_buf[2]; - u32x data[2]; + u32 data[2]; data[0] = s0; data[1] = s1; @@ -695,48 +671,48 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); w3_t[2] = pw_len * 8 * 2; - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; + u32 a = MD4M_A; + u32 b = MD4M_B; + u32 c = MD4M_C; + u32 d = MD4M_D; MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00); MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01); @@ -801,16 +777,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_m04 (__glo * DES1 */ - u32x key[2]; + u32 key[2]; transform_netntlmv1_key (a, b, key); - u32x Kc[16]; - u32x Kd[16]; + u32 Kc[16]; + u32 Kd[16]; _des_crypt_keysetup (key[0], key[1], Kc, Kd, s_skb); - u32x iv1[2]; + u32 iv1[2]; _des_crypt_encrypt (iv1, data, Kc, Kd, s_SPtrans); @@ -822,7 +798,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_m04 (__glo _des_crypt_keysetup (key[0], key[1], Kc, Kd, s_skb); - u32x iv2[2]; + u32 iv2[2]; _des_crypt_encrypt (iv2, data, Kc, Kd, s_SPtrans); @@ -830,12 +806,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_m04 (__glo * compare */ - const u32x r0 = iv1[0]; - const u32x r1 = iv1[1]; - const u32x r2 = iv2[0]; - const u32x r3 = iv2[1]; + const u32 r0 = iv1[0]; + const u32 r1 = iv1[1]; + const u32 r2 = iv2[0]; + const u32 r3 = iv2[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -861,28 +837,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_s04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -935,7 +911,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_s04 (__glo const u32 s1 = salt_bufs[salt_pos].salt_buf[1]; const u32 s2 = salt_bufs[salt_pos].salt_buf[2]; - u32x data[2]; + u32 data[2]; data[0] = s0; data[1] = s1; @@ -995,48 +971,48 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); w3_t[2] = pw_len * 8 * 2; - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; + u32 a = MD4M_A; + u32 b = MD4M_B; + u32 c = MD4M_C; + u32 d = MD4M_D; MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00); MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01); @@ -1101,16 +1077,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_s04 (__glo * DES1 */ - u32x key[2]; + u32 key[2]; transform_netntlmv1_key (a, b, key); - u32x Kc[16]; - u32x Kd[16]; + u32 Kc[16]; + u32 Kd[16]; _des_crypt_keysetup (key[0], key[1], Kc, Kd, s_skb); - u32x iv1[2]; + u32 iv1[2]; _des_crypt_encrypt (iv1, data, Kc, Kd, s_SPtrans); @@ -1123,12 +1099,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_s04 (__glo _des_crypt_keysetup (key[0], key[1], Kc, Kd, s_skb); - u32x iv2[2]; + u32 iv2[2]; _des_crypt_encrypt (iv2, data, Kc, Kd, s_SPtrans); */ - u32x iv2[2]; + u32 iv2[2]; iv2[0] = search[2]; iv2[1] = search[3]; @@ -1137,12 +1113,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_s04 (__glo * compare */ - const u32x r0 = iv1[0]; - const u32x r1 = iv1[1]; - const u32x r2 = iv2[0]; - const u32x r3 = iv2[1]; + const u32 r0 = iv1[0]; + const u32 r1 = iv1[1]; + const u32 r2 = iv2[0]; + const u32 r3 = iv2[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m05500_a3.cl b/OpenCL/m05500_a3.cl similarity index 80% rename from amd/m05500_a3.cl rename to OpenCL/m05500_a3.cl index f2196ac..dcbb017 100644 --- a/amd/m05500_a3.cl +++ b/OpenCL/m05500_a3.cl @@ -4,46 +4,21 @@ */ #define _MD4_ -#define _SCALAR_ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define PERM_OP(a,b,tt,n,m) \ { \ @@ -360,16 +335,16 @@ __constant u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, #define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) #endif -static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __local u32 s_SPtrans[8][64]) +static void _des_crypt_encrypt (u32 iv[2], u32 data[2], u32 Kc[16], u32 Kd[16], __local u32 s_SPtrans[8][64]) { - u32x r = data[0]; - u32x l = data[1]; + u32 r = data[0]; + u32 l = data[1]; #pragma unroll 16 for (u32 i = 0; i < 16; i += 2) { - u32x u; - u32x t; + u32 u; + u32 t; u = Kc[i + 0] ^ rotl32 (r, 30u); t = Kd[i + 0] ^ rotl32 (r, 26u); @@ -402,9 +377,9 @@ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[1 iv[1] = r; } -static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 s_skb[8][64]) +static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u32 s_skb[8][64]) { - u32x tt; + u32 tt; PERM_OP (d, c, tt, 4, 0x0f0f0f0f); HPERM_OP (c, tt, 2, 0xcccc0000); @@ -429,13 +404,13 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc c = c & 0x0fffffff; d = d & 0x0fffffff; - const u32x c00 = (c >> 0) & 0x0000003f; - const u32x c06 = (c >> 6) & 0x00383003; - const u32x c07 = (c >> 7) & 0x0000003c; - const u32x c13 = (c >> 13) & 0x0000060f; - const u32x c20 = (c >> 20) & 0x00000001; + const u32 c00 = (c >> 0) & 0x0000003f; + const u32 c06 = (c >> 6) & 0x00383003; + const u32 c07 = (c >> 7) & 0x0000003c; + const u32 c13 = (c >> 13) & 0x0000060f; + const u32 c20 = (c >> 20) & 0x00000001; - u32x s = BOX (((c00 >> 0) & 0xff), 0, s_skb) + u32 s = BOX (((c00 >> 0) & 0xff), 0, s_skb) | BOX (((c06 >> 0) & 0xff) |((c07 >> 0) & 0xff), 1, s_skb) | BOX (((c13 >> 0) & 0xff) @@ -444,12 +419,12 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc |((c13 >> 8) & 0xff) |((c06 >> 16) & 0xff), 3, s_skb); - const u32x d00 = (d >> 0) & 0x00003c3f; - const u32x d07 = (d >> 7) & 0x00003f03; - const u32x d21 = (d >> 21) & 0x0000000f; - const u32x d22 = (d >> 22) & 0x00000030; + const u32 d00 = (d >> 0) & 0x00003c3f; + const u32 d07 = (d >> 7) & 0x00003f03; + const u32 d21 = (d >> 21) & 0x0000000f; + const u32 d22 = (d >> 22) & 0x00000030; - u32x t = BOX (((d00 >> 0) & 0xff), 4, s_skb) + u32 t = BOX (((d00 >> 0) & 0xff), 4, s_skb) | BOX (((d07 >> 0) & 0xff) |((d00 >> 8) & 0xff), 5, s_skb) | BOX (((d07 >> 8) & 0xff), 6, s_skb) @@ -461,7 +436,7 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc } } -static void transform_netntlmv1_key (const u32x w0, const u32x w1, u32x out[2]) +static void transform_netntlmv1_key (const u32 w0, const u32 w1, u32 out[2]) { #ifdef VECT_SIZE1 const uchar4 t0 = as_uchar4 (w0); @@ -560,7 +535,7 @@ static void transform_netntlmv1_key (const u32x w0, const u32x w1, u32x out[2]) #endif } -static void m05500m (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m05500m (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -577,7 +552,7 @@ static void m05500m (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 const u32 s1 = salt_bufs[salt_pos].salt_buf[1]; const u32 s2 = salt_bufs[salt_pos].salt_buf[2]; - u32x data[2]; + u32 data[2]; data[0] = s0; data[1] = s1; @@ -586,20 +561,18 @@ static void m05500m (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; + u32 a = MD4M_A; + u32 b = MD4M_B; + u32 c = MD4M_C; + u32 d = MD4M_D; #define w0_t w0 #define w1_t w[ 1] @@ -681,16 +654,16 @@ static void m05500m (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 * DES1 */ - u32x key[2]; + u32 key[2]; transform_netntlmv1_key (a, b, key); - u32x Kc[16]; - u32x Kd[16]; + u32 Kc[16]; + u32 Kd[16]; _des_crypt_keysetup (key[0], key[1], Kc, Kd, s_skb); - u32x iv1[2]; + u32 iv1[2]; _des_crypt_encrypt (iv1, data, Kc, Kd, s_SPtrans); @@ -702,7 +675,7 @@ static void m05500m (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 _des_crypt_keysetup (key[0], key[1], Kc, Kd, s_skb); - u32x iv2[2]; + u32 iv2[2]; _des_crypt_encrypt (iv2, data, Kc, Kd, s_SPtrans); @@ -710,16 +683,16 @@ static void m05500m (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 * compare */ - const u32x r0 = iv1[0]; - const u32x r1 = iv1[1]; - const u32x r2 = iv2[0]; - const u32x r3 = iv2[1]; + const u32 r0 = iv1[0]; + const u32 r1 = iv1[1]; + const u32 r2 = iv2[0]; + const u32 r3 = iv2[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m05500s (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m05500s (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -736,7 +709,7 @@ static void m05500s (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 const u32 s1 = salt_bufs[salt_pos].salt_buf[1]; const u32 s2 = salt_bufs[salt_pos].salt_buf[2]; - u32x data[2]; + u32 data[2]; data[0] = s0; data[1] = s1; @@ -757,20 +730,18 @@ static void m05500s (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; + u32 a = MD4M_A; + u32 b = MD4M_B; + u32 c = MD4M_C; + u32 d = MD4M_D; #define w0_t w0 #define w1_t w[ 1] @@ -852,16 +823,16 @@ static void m05500s (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 * DES1 */ - u32x key[2]; + u32 key[2]; transform_netntlmv1_key (a, b, key); - u32x Kc[16]; - u32x Kd[16]; + u32 Kc[16]; + u32 Kd[16]; _des_crypt_keysetup (key[0], key[1], Kc, Kd, s_skb); - u32x iv1[2]; + u32 iv1[2]; _des_crypt_encrypt (iv1, data, Kc, Kd, s_SPtrans); @@ -874,12 +845,12 @@ static void m05500s (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 _des_crypt_keysetup (key[0], key[1], Kc, Kd, s_skb); - u32x iv2[2]; + u32 iv2[2]; _des_crypt_encrypt (iv2, data, Kc, Kd, s_SPtrans); */ - u32x iv2[2]; + u32 iv2[2]; iv2[0] = search[2]; iv2[1] = search[3]; @@ -888,16 +859,16 @@ static void m05500s (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 * compare */ - const u32x r0 = iv1[0]; - const u32x r1 = iv1[1]; - const u32x r2 = iv2[0]; - const u32x r3 = iv2[1]; + const u32 r0 = iv1[0]; + const u32 r1 = iv1[1]; + const u32 r2 = iv2[0]; + const u32 r3 = iv2[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -963,7 +934,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_m04 (__glo m05500m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -1029,11 +1000,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_m08 (__glo m05500m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -1099,7 +1070,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_s04 (__glo m05500s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -1165,6 +1136,6 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_s08 (__glo m05500s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05500_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { } diff --git a/amd/m05600_a0.cl b/OpenCL/m05600_a0.cl similarity index 91% rename from amd/m05600_a0.cl rename to OpenCL/m05600_a0.cl index fdd43e9..d65e1a5 100644 --- a/amd/m05600_a0.cl +++ b/OpenCL/m05600_a0.cl @@ -8,62 +8,43 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void md4_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md4_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; MD4_STEP (MD4_Fo, a, b, c, d, w0_t, MD4C00, MD4S00); MD4_STEP (MD4_Fo, d, a, b, c, w1_t, MD4C00, MD4S01); @@ -122,29 +103,29 @@ static void md4_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -220,7 +201,7 @@ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) +static void hmac_md5_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -271,7 +252,7 @@ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x i md5_transform (w0, w1, w2, w3, opad); } -static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) +static void hmac_md5_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4], u32 digest[4]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -319,14 +300,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05600_m04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -370,28 +351,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05600_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -402,17 +383,17 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05600_m04 (__glo append_0x80_2 (w0, w1, out_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); w3_t[2] = out_len * 8 * 2; - u32x digest[4]; + u32 digest[4]; digest[0] = MD4M_A; digest[1] = MD4M_B; @@ -443,8 +424,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05600_m04 (__glo digest[2] = MD5M_C; digest[3] = MD5M_D; - u32x ipad[4]; - u32x opad[4]; + u32 ipad[4]; + u32 opad[4]; hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -557,12 +538,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05600_m04 (__glo hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[0]; + const u32 r1 = digest[3]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -588,14 +569,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05600_s04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -651,28 +632,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05600_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -683,17 +664,17 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05600_s04 (__glo append_0x80_2 (w0, w1, out_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); w3_t[2] = out_len * 8 * 2; - u32x digest[4]; + u32 digest[4]; digest[0] = MD4M_A; digest[1] = MD4M_B; @@ -724,8 +705,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05600_s04 (__glo digest[2] = MD5M_C; digest[3] = MD5M_D; - u32x ipad[4]; - u32x opad[4]; + u32 ipad[4]; + u32 opad[4]; hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -838,12 +819,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05600_s04 (__glo hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[0]; + const u32 r1 = digest[3]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m05600_a1.cl b/OpenCL/m05600_a1.cl similarity index 91% rename from amd/m05600_a1.cl rename to OpenCL/m05600_a1.cl index 44a698f..89fc1b3 100644 --- a/amd/m05600_a1.cl +++ b/OpenCL/m05600_a1.cl @@ -8,60 +8,41 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void md4_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md4_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; MD4_STEP (MD4_Fo, a, b, c, d, w0_t, MD4C00, MD4S00); MD4_STEP (MD4_Fo, d, a, b, c, w1_t, MD4C00, MD4S01); @@ -120,29 +101,29 @@ static void md4_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -218,7 +199,7 @@ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) +static void hmac_md5_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -269,7 +250,7 @@ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x i md5_transform (w0, w1, w2, w3, opad); } -static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) +static void hmac_md5_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4], u32 digest[4]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -317,28 +298,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05600_m04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -426,45 +407,45 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05600_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); w3_t[2] = pw_len * 8 * 2; - u32x digest[4]; + u32 digest[4]; digest[0] = MD4M_A; digest[1] = MD4M_B; @@ -495,8 +476,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05600_m04 (__glo digest[2] = MD5M_C; digest[3] = MD5M_D; - u32x ipad[4]; - u32x opad[4]; + u32 ipad[4]; + u32 opad[4]; hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -609,12 +590,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05600_m04 (__glo hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[0]; + const u32 r1 = digest[3]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -640,28 +621,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05600_s04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -761,45 +742,45 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05600_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); w3_t[2] = pw_len * 8 * 2; - u32x digest[4]; + u32 digest[4]; digest[0] = MD4M_A; digest[1] = MD4M_B; @@ -830,8 +811,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05600_s04 (__glo digest[2] = MD5M_C; digest[3] = MD5M_D; - u32x ipad[4]; - u32x opad[4]; + u32 ipad[4]; + u32 opad[4]; hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -944,12 +925,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05600_s04 (__glo hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[0]; + const u32 r1 = digest[3]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m05600_a3.cl b/OpenCL/m05600_a3.cl similarity index 86% rename from amd/m05600_a3.cl rename to OpenCL/m05600_a3.cl index 1f375bf..c0c9b72 100644 --- a/amd/m05600_a3.cl +++ b/OpenCL/m05600_a3.cl @@ -8,60 +8,41 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void md4_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md4_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; MD4_STEP (MD4_Fo, a, b, c, d, w0_t, MD4C00, MD4S00); MD4_STEP (MD4_Fo, d, a, b, c, w1_t, MD4C00, MD4S01); @@ -120,29 +101,29 @@ static void md4_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -218,7 +199,7 @@ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) +static void hmac_md5_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -269,7 +250,7 @@ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x i md5_transform (w0, w1, w2, w3, opad); } -static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) +static void hmac_md5_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4], u32 digest[4]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -303,7 +284,7 @@ static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x i md5_transform (w0, w1, w2, w3, digest); } -static void m05600m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global netntlm_t *netntlm_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 s_userdomain_buf[64], __local u32 s_chall_buf[256]) +static void m05600m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global netntlm_t *netntlm_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 s_userdomain_buf[64], __local u32 s_chall_buf[256]) { /** * modifier @@ -326,7 +307,7 @@ static void m05600m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -334,7 +315,7 @@ static void m05600m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x digest[4]; + u32 digest[4]; digest[0] = MD4M_A; digest[1] = MD4M_B; @@ -343,10 +324,10 @@ static void m05600m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p md4_transform (w0, w1, w2, w3, digest); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = digest[0]; w0_t[1] = digest[1]; @@ -370,8 +351,8 @@ static void m05600m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p digest[2] = MD5M_C; digest[3] = MD5M_D; - u32x ipad[4]; - u32x opad[4]; + u32 ipad[4]; + u32 opad[4]; hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -484,16 +465,16 @@ static void m05600m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[0]; + const u32 r1 = digest[3]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m05600s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global netntlm_t *netntlm_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 s_userdomain_buf[64], __local u32 s_chall_buf[256]) +static void m05600s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global netntlm_t *netntlm_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 s_userdomain_buf[64], __local u32 s_chall_buf[256]) { /** * modifier @@ -528,7 +509,7 @@ static void m05600s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -536,7 +517,7 @@ static void m05600s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x digest[4]; + u32 digest[4]; digest[0] = MD4M_A; digest[1] = MD4M_B; @@ -545,10 +526,10 @@ static void m05600s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p md4_transform (w0, w1, w2, w3, digest); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = digest[0]; w0_t[1] = digest[1]; @@ -572,8 +553,8 @@ static void m05600s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p digest[2] = MD5M_C; digest[3] = MD5M_D; - u32x ipad[4]; - u32x opad[4]; + u32 ipad[4]; + u32 opad[4]; hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -686,12 +667,12 @@ static void m05600s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[0]; + const u32 r1 = digest[3]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -709,28 +690,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05600_m04 (__glo const u32 gid = get_global_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -783,28 +764,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05600_m08 (__glo const u32 gid = get_global_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -861,28 +842,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05600_s04 (__glo const u32 gid = get_global_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -935,28 +916,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05600_s08 (__glo const u32 gid = get_global_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; diff --git a/amd/m05800.cl b/OpenCL/m05800.cl similarity index 95% rename from amd/m05800.cl rename to OpenCL/m05800.cl index cf289c5..c18e38c 100644 --- a/amd/m05800.cl +++ b/OpenCL/m05800.cl @@ -8,33 +8,21 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif #ifdef VECT_SIZE4 -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" +#define COMPARE_M "check_multi_vect4_comp4.c" #endif typedef struct @@ -176,7 +164,7 @@ __constant entry_t pc[1024] = 0x36313031, 4, 0x37313031, 4, 0x38313031, 4, 0x39313031, 4, 0x30323031, 4, 0x31323031, 4, 0x32323031, 4, 0x33323031, 4, }; -static void append_word (u32x w0[4], u32x w1[4], const u32x append[4], const u32 offset) +static void append_word (u32 w0[4], u32 w1[4], const u32 append[4], const u32 offset) { switch (offset) { @@ -210,7 +198,7 @@ static void append_word (u32x w0[4], u32x w1[4], const u32x append[4], const u32 } } -static void append_salt (u32x w0[4], u32x w1[4], u32x w2[4], const u32 append[5], const u32 offset) +static void append_salt (u32 w0[4], u32 w1[4], u32 w2[4], const u32 append[5], const u32 offset) { switch (offset) { @@ -374,30 +362,30 @@ static void append_salt (u32x w0[4], u32x w1[4], u32x w2[4], const u32 append[5] } } -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -512,7 +500,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05800_init (__gl if (gid >= gid_max) return; - u32x word_buf[4]; + u32 word_buf[4]; word_buf[0] = pws[gid].i[ 0]; word_buf[1] = pws[gid].i[ 1]; @@ -542,9 +530,9 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05800_init (__gl const u32 pc_len = 1; const u32 pc_dec = 0x30; - u32x data0[4] = { 0, 0, 0, 0 }; - u32x data1[4] = { 0, 0, 0, 0 }; - u32x data2[4] = { 0, 0, 0, 0 }; + u32 data0[4] = { 0, 0, 0, 0 }; + u32 data1[4] = { 0, 0, 0, 0 }; + u32 data2[4] = { 0, 0, 0, 0 }; data0[0] = pc_dec; @@ -552,10 +540,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05800_init (__gl append_salt (data0, data1, data2, salt_buf, pc_len + pw_len); - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = swap_workaround (data0[0]); w0[1] = swap_workaround (data0[1]); @@ -574,7 +562,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05800_init (__gl w3[2] = 0; w3[3] = (pc_len + pw_len + salt_len) * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -599,7 +587,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05800_loop (__gl const u32 gid = get_global_id (0); - u32x word_buf[4]; + u32 word_buf[4]; word_buf[0] = pws[gid].i[ 0]; word_buf[1] = pws[gid].i[ 1]; @@ -610,7 +598,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05800_loop (__gl const u32 lid = get_local_id (0); - u32x digest[5]; + u32 digest[5]; digest[0] = tmps[gid].digest_buf[0]; digest[1] = tmps[gid].digest_buf[1]; @@ -670,9 +658,9 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05800_loop (__gl const u32 pc_len = s_pc[j].len; const u32 pc_dec = s_pc[j].dec; - u32x data0[4] = { 0, 0, 0, 0 }; - u32x data1[4] = { 0, 0, 0, 0 }; - u32x data2[4] = { 0, 0, 0, 0 }; + u32 data0[4] = { 0, 0, 0, 0 }; + u32 data1[4] = { 0, 0, 0, 0 }; + u32 data2[4] = { 0, 0, 0, 0 }; data0[0] = pc_dec; @@ -680,10 +668,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05800_loop (__gl append_salt (data0, data1, data2, salt_buf, pc_len + pw_len); - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = digest[0]; w0[1] = digest[1]; @@ -734,12 +722,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m05800_comp (__gl * digest */ - const u32x r0 = tmps[gid].digest_buf[DGST_R0]; - const u32x r1 = tmps[gid].digest_buf[DGST_R1]; - const u32x r2 = tmps[gid].digest_buf[DGST_R2]; - const u32x r3 = tmps[gid].digest_buf[DGST_R3]; + const u32 r0 = tmps[gid].digest_buf[DGST_R0]; + const u32 r1 = tmps[gid].digest_buf[DGST_R1]; + const u32 r2 = tmps[gid].digest_buf[DGST_R2]; + const u32 r3 = tmps[gid].digest_buf[DGST_R3]; #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m06000_a0.cl b/OpenCL/m06000_a0.cl similarity index 94% rename from amd/m06000_a0.cl rename to OpenCL/m06000_a0.cl index 0d8448b..da1d4dc 100644 --- a/amd/m06000_a0.cl +++ b/OpenCL/m06000_a0.cl @@ -8,46 +8,27 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void ripemd160_transform (const u32x w[16], u32x dgst[5]) +static void ripemd160_transform (const u32 w[16], u32 dgst[5]) { - u32x a1 = dgst[0]; - u32x b1 = dgst[1]; - u32x c1 = dgst[2]; - u32x d1 = dgst[3]; - u32x e1 = dgst[4]; + u32 a1 = dgst[0]; + u32 b1 = dgst[1]; + u32 c1 = dgst[2]; + u32 d1 = dgst[3]; + u32 e1 = dgst[4]; RIPEMD160_STEP (RIPEMD160_F , a1, b1, c1, d1, e1, w[ 0], RIPEMD160C00, RIPEMD160S00); RIPEMD160_STEP (RIPEMD160_F , e1, a1, b1, c1, d1, w[ 1], RIPEMD160C00, RIPEMD160S01); @@ -134,11 +115,11 @@ static void ripemd160_transform (const u32x w[16], u32x dgst[5]) RIPEMD160_STEP (RIPEMD160_J , c1, d1, e1, a1, b1, w[15], RIPEMD160C40, RIPEMD160S4E); RIPEMD160_STEP (RIPEMD160_J , b1, c1, d1, e1, a1, w[13], RIPEMD160C40, RIPEMD160S4F); - u32x a2 = dgst[0]; - u32x b2 = dgst[1]; - u32x c2 = dgst[2]; - u32x d2 = dgst[3]; - u32x e2 = dgst[4]; + u32 a2 = dgst[0]; + u32 b2 = dgst[1]; + u32 c2 = dgst[2]; + u32 d2 = dgst[3]; + u32 e2 = dgst[4]; RIPEMD160_STEP_WORKAROUND_BUG (RIPEMD160_J , a2, b2, c2, d2, e2, w[ 5], RIPEMD160C50, RIPEMD160S50); RIPEMD160_STEP (RIPEMD160_J , e2, a2, b2, c2, d2, w[14], RIPEMD160C50, RIPEMD160S51); @@ -225,11 +206,11 @@ static void ripemd160_transform (const u32x w[16], u32x dgst[5]) RIPEMD160_STEP (RIPEMD160_F , c2, d2, e2, a2, b2, w[ 9], RIPEMD160C90, RIPEMD160S9E); RIPEMD160_STEP (RIPEMD160_F , b2, c2, d2, e2, a2, w[11], RIPEMD160C90, RIPEMD160S9F); - const u32x a = dgst[1] + c1 + d2; - const u32x b = dgst[2] + d1 + e2; - const u32x c = dgst[3] + e1 + a2; - const u32x d = dgst[4] + a1 + b2; - const u32x e = dgst[0] + b1 + c2; + const u32 a = dgst[1] + c1 + d2; + const u32 b = dgst[2] + d1 + e2; + const u32 c = dgst[3] + e1 + a2; + const u32 d = dgst[4] + a1 + b2; + const u32 e = dgst[0] + b1 + c2; dgst[0] = a; dgst[1] = b; @@ -254,14 +235,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06000_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -276,28 +257,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06000_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -308,7 +289,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06000_m04 (__glo append_0x80_2 (w0, w1, out_len); - u32x wl[16]; + u32 wl[16]; wl[ 0] = w0[0]; wl[ 1] = w0[1]; @@ -327,7 +308,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06000_m04 (__glo wl[14] = out_len * 8; wl[15] = 0; - u32x dgst[5]; + u32 dgst[5]; dgst[0] = RIPEMD160M_A; dgst[1] = RIPEMD160M_B; @@ -337,12 +318,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06000_m04 (__glo ripemd160_transform (wl, dgst); - const u32x r0 = dgst[0]; - const u32x r1 = dgst[1]; - const u32x r2 = dgst[2]; - const u32x r3 = dgst[3]; + const u32 r0 = dgst[0]; + const u32 r1 = dgst[1]; + const u32 r2 = dgst[2]; + const u32 r3 = dgst[3]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -370,14 +351,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06000_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -404,28 +385,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06000_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -436,7 +417,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06000_s04 (__glo append_0x80_2 (w0, w1, out_len); - u32x wl[16]; + u32 wl[16]; wl[ 0] = w0[0]; wl[ 1] = w0[1]; @@ -455,7 +436,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06000_s04 (__glo wl[14] = out_len * 8; wl[15] = 0; - u32x dgst[5]; + u32 dgst[5]; dgst[0] = RIPEMD160M_A; dgst[1] = RIPEMD160M_B; @@ -465,12 +446,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06000_s04 (__glo ripemd160_transform (wl, dgst); - const u32x r0 = dgst[0]; - const u32x r1 = dgst[1]; - const u32x r2 = dgst[2]; - const u32x r3 = dgst[3]; + const u32 r0 = dgst[0]; + const u32 r1 = dgst[1]; + const u32 r2 = dgst[2]; + const u32 r3 = dgst[3]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m06000_a1.cl b/OpenCL/m06000_a1.cl similarity index 94% rename from amd/m06000_a1.cl rename to OpenCL/m06000_a1.cl index 8a52770..94af3b2 100644 --- a/amd/m06000_a1.cl +++ b/OpenCL/m06000_a1.cl @@ -8,44 +8,25 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void ripemd160_transform (const u32x w[16], u32x dgst[5]) +static void ripemd160_transform (const u32 w[16], u32 dgst[5]) { - u32x a1 = dgst[0]; - u32x b1 = dgst[1]; - u32x c1 = dgst[2]; - u32x d1 = dgst[3]; - u32x e1 = dgst[4]; + u32 a1 = dgst[0]; + u32 b1 = dgst[1]; + u32 c1 = dgst[2]; + u32 d1 = dgst[3]; + u32 e1 = dgst[4]; RIPEMD160_STEP (RIPEMD160_F , a1, b1, c1, d1, e1, w[ 0], RIPEMD160C00, RIPEMD160S00); RIPEMD160_STEP (RIPEMD160_F , e1, a1, b1, c1, d1, w[ 1], RIPEMD160C00, RIPEMD160S01); @@ -132,11 +113,11 @@ static void ripemd160_transform (const u32x w[16], u32x dgst[5]) RIPEMD160_STEP (RIPEMD160_J , c1, d1, e1, a1, b1, w[15], RIPEMD160C40, RIPEMD160S4E); RIPEMD160_STEP (RIPEMD160_J , b1, c1, d1, e1, a1, w[13], RIPEMD160C40, RIPEMD160S4F); - u32x a2 = dgst[0]; - u32x b2 = dgst[1]; - u32x c2 = dgst[2]; - u32x d2 = dgst[3]; - u32x e2 = dgst[4]; + u32 a2 = dgst[0]; + u32 b2 = dgst[1]; + u32 c2 = dgst[2]; + u32 d2 = dgst[3]; + u32 e2 = dgst[4]; RIPEMD160_STEP_WORKAROUND_BUG (RIPEMD160_J , a2, b2, c2, d2, e2, w[ 5], RIPEMD160C50, RIPEMD160S50); RIPEMD160_STEP (RIPEMD160_J , e2, a2, b2, c2, d2, w[14], RIPEMD160C50, RIPEMD160S51); @@ -223,11 +204,11 @@ static void ripemd160_transform (const u32x w[16], u32x dgst[5]) RIPEMD160_STEP (RIPEMD160_F , c2, d2, e2, a2, b2, w[ 9], RIPEMD160C90, RIPEMD160S9E); RIPEMD160_STEP (RIPEMD160_F , b2, c2, d2, e2, a2, w[11], RIPEMD160C90, RIPEMD160S9F); - const u32x a = dgst[1] + c1 + d2; - const u32x b = dgst[2] + d1 + e2; - const u32x c = dgst[3] + e1 + a2; - const u32x d = dgst[4] + a1 + b2; - const u32x e = dgst[0] + b1 + c2; + const u32 a = dgst[1] + c1 + d2; + const u32 b = dgst[2] + d1 + e2; + const u32 c = dgst[3] + e1 + a2; + const u32 d = dgst[4] + a1 + b2; + const u32 e = dgst[0] + b1 + c2; dgst[0] = a; dgst[1] = b; @@ -252,28 +233,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06000_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -332,35 +313,35 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06000_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; w3[2] = pw_len * 8; w3[3] = 0; - u32x wl[16]; + u32 wl[16]; wl[ 0] = w0[0]; wl[ 1] = w0[1]; @@ -379,7 +360,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06000_m04 (__glo wl[14] = pw_len * 8; wl[15] = 0; - u32x dgst[5]; + u32 dgst[5]; dgst[0] = RIPEMD160M_A; dgst[1] = RIPEMD160M_B; @@ -389,12 +370,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06000_m04 (__glo ripemd160_transform (wl, dgst); - const u32x r0 = dgst[0]; - const u32x r1 = dgst[1]; - const u32x r2 = dgst[2]; - const u32x r3 = dgst[3]; + const u32 r0 = dgst[0]; + const u32 r1 = dgst[1]; + const u32 r2 = dgst[2]; + const u32 r3 = dgst[3]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -422,28 +403,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06000_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -514,35 +495,35 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06000_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; w3[2] = pw_len * 8; w3[3] = 0; - u32x wl[16]; + u32 wl[16]; wl[ 0] = w0[0]; wl[ 1] = w0[1]; @@ -561,7 +542,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06000_s04 (__glo wl[14] = pw_len * 8; wl[15] = 0; - u32x dgst[5]; + u32 dgst[5]; dgst[0] = RIPEMD160M_A; dgst[1] = RIPEMD160M_B; @@ -571,12 +552,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06000_s04 (__glo ripemd160_transform (wl, dgst); - const u32x r0 = dgst[0]; - const u32x r1 = dgst[1]; - const u32x r2 = dgst[2]; - const u32x r3 = dgst[3]; + const u32 r0 = dgst[0]; + const u32 r1 = dgst[1]; + const u32 r2 = dgst[2]; + const u32 r3 = dgst[3]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m06000_a3.cl b/OpenCL/m06000_a3.cl similarity index 88% rename from amd/m06000_a3.cl rename to OpenCL/m06000_a3.cl index 0e4d96a..b4ccd38 100644 --- a/amd/m06000_a3.cl +++ b/OpenCL/m06000_a3.cl @@ -8,44 +8,25 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void ripemd160_transform (const u32x w[16], u32x dgst[5]) +static void ripemd160_transform (const u32 w[16], u32 dgst[5]) { - u32x a1 = dgst[0]; - u32x b1 = dgst[1]; - u32x c1 = dgst[2]; - u32x d1 = dgst[3]; - u32x e1 = dgst[4]; + u32 a1 = dgst[0]; + u32 b1 = dgst[1]; + u32 c1 = dgst[2]; + u32 d1 = dgst[3]; + u32 e1 = dgst[4]; RIPEMD160_STEP (RIPEMD160_F , a1, b1, c1, d1, e1, w[ 0], RIPEMD160C00, RIPEMD160S00); RIPEMD160_STEP (RIPEMD160_F , e1, a1, b1, c1, d1, w[ 1], RIPEMD160C00, RIPEMD160S01); @@ -132,11 +113,11 @@ static void ripemd160_transform (const u32x w[16], u32x dgst[5]) RIPEMD160_STEP (RIPEMD160_J , c1, d1, e1, a1, b1, w[15], RIPEMD160C40, RIPEMD160S4E); RIPEMD160_STEP (RIPEMD160_J , b1, c1, d1, e1, a1, w[13], RIPEMD160C40, RIPEMD160S4F); - u32x a2 = dgst[0]; - u32x b2 = dgst[1]; - u32x c2 = dgst[2]; - u32x d2 = dgst[3]; - u32x e2 = dgst[4]; + u32 a2 = dgst[0]; + u32 b2 = dgst[1]; + u32 c2 = dgst[2]; + u32 d2 = dgst[3]; + u32 e2 = dgst[4]; RIPEMD160_STEP_WORKAROUND_BUG (RIPEMD160_J , a2, b2, c2, d2, e2, w[ 5], RIPEMD160C50, RIPEMD160S50); RIPEMD160_STEP (RIPEMD160_J , e2, a2, b2, c2, d2, w[14], RIPEMD160C50, RIPEMD160S51); @@ -223,11 +204,11 @@ static void ripemd160_transform (const u32x w[16], u32x dgst[5]) RIPEMD160_STEP (RIPEMD160_F , c2, d2, e2, a2, b2, w[ 9], RIPEMD160C90, RIPEMD160S9E); RIPEMD160_STEP (RIPEMD160_F , b2, c2, d2, e2, a2, w[11], RIPEMD160C90, RIPEMD160S9F); - const u32x a = dgst[1] + c1 + d2; - const u32x b = dgst[2] + d1 + e2; - const u32x c = dgst[3] + e1 + a2; - const u32x d = dgst[4] + a1 + b2; - const u32x e = dgst[0] + b1 + c2; + const u32 a = dgst[1] + c1 + d2; + const u32 b = dgst[2] + d1 + e2; + const u32 c = dgst[3] + e1 + a2; + const u32 d = dgst[4] + a1 + b2; + const u32 e = dgst[0] + b1 + c2; dgst[0] = a; dgst[1] = b; @@ -236,7 +217,7 @@ static void ripemd160_transform (const u32x w[16], u32x dgst[5]) dgst[4] = e; } -static void m06000m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m06000m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -255,7 +236,7 @@ static void m06000m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -263,7 +244,7 @@ static void m06000m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x wl[16]; + u32 wl[16]; wl[ 0] = w0[0]; wl[ 1] = w0[1]; @@ -282,7 +263,7 @@ static void m06000m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p wl[14] = w14; wl[15] = 0; - u32x dgst[5]; + u32 dgst[5]; dgst[0] = RIPEMD160M_A; dgst[1] = RIPEMD160M_B; @@ -292,16 +273,16 @@ static void m06000m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p ripemd160_transform (wl, dgst); - const u32x r0 = dgst[0]; - const u32x r1 = dgst[1]; - const u32x r2 = dgst[2]; - const u32x r3 = dgst[3]; + const u32 r0 = dgst[0]; + const u32 r1 = dgst[1]; + const u32 r2 = dgst[2]; + const u32 r3 = dgst[3]; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m06000s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m06000s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -332,7 +313,7 @@ static void m06000s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -340,7 +321,7 @@ static void m06000s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x wl[16]; + u32 wl[16]; wl[ 0] = w0[0]; wl[ 1] = w0[1]; @@ -359,7 +340,7 @@ static void m06000s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p wl[14] = w14; wl[15] = 0; - u32x dgst[5]; + u32 dgst[5]; dgst[0] = RIPEMD160M_A; dgst[1] = RIPEMD160M_B; @@ -369,12 +350,12 @@ static void m06000s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p ripemd160_transform (wl, dgst); - const u32x r0 = dgst[0]; - const u32x r1 = dgst[1]; - const u32x r2 = dgst[2]; - const u32x r3 = dgst[3]; + const u32 r0 = dgst[0]; + const u32 r1 = dgst[1]; + const u32 r2 = dgst[2]; + const u32 r3 = dgst[3]; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -388,28 +369,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06000_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -435,28 +416,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06000_m08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -482,28 +463,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06000_m16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -529,28 +510,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06000_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -576,28 +557,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06000_s08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -623,28 +604,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06000_s16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m06100_a0.cl b/OpenCL/m06100_a0.cl similarity index 96% rename from amd/m06100_a0.cl rename to OpenCL/m06100_a0.cl index c7b5056..0ac351b 100644 --- a/amd/m06100_a0.cl +++ b/OpenCL/m06100_a0.cl @@ -8,47 +8,28 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define R 10 #ifdef VECT_SIZE1 -#define BOX(S,n,i) u32x ((S)[(n)][(i)]) +#define BOX(S,n,i) u32 ((S)[(n)][(i)]) #endif #ifdef VECT_SIZE2 -#define BOX(S,n,i) u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) +#define BOX(S,n,i) u32 ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) #endif __constant u32 Ch[8][256] = @@ -1147,10 +1128,10 @@ __constant u32 rcl[R + 1] = // this is a highly optimized that assumes dgst[16] = { 0 }; only reuse of no 2nd transform is needed -static void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 s_Ch[8][256], __local u32 s_Cl[8][256]) +static void whirlpool_transform (const u32 w[16], u32 dgst[16], __local u32 s_Ch[8][256], __local u32 s_Cl[8][256]) { - u32x Kh[8]; - u32x Kl[8]; + u32 Kh[8]; + u32 Kl[8]; Kh[0] = 0x300beec0; Kl[0] = 0xaf902967; @@ -1169,8 +1150,8 @@ static void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 s_ Kh[7] = 0x28282828; Kl[7] = 0x28282828; - u32x stateh[8]; - u32x statel[8]; + u32 stateh[8]; + u32 statel[8]; stateh[0] = w[ 0]; statel[0] = w[ 1]; @@ -1189,20 +1170,20 @@ static void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 s_ stateh[7] = w[14]; statel[7] = w[15]; - u32x Lh[8]; - u32x Ll[8]; + u32 Lh[8]; + u32 Ll[8]; #pragma unroll for (int i = 0; i < 8; i++) { - const u32x Lp0 = stateh[(i + 8) & 7] >> 24; - const u32x Lp1 = stateh[(i + 7) & 7] >> 16; - const u32x Lp2 = stateh[(i + 6) & 7] >> 8; - const u32x Lp3 = stateh[(i + 5) & 7] >> 0; - const u32x Lp4 = statel[(i + 4) & 7] >> 24; - const u32x Lp5 = statel[(i + 3) & 7] >> 16; - const u32x Lp6 = statel[(i + 2) & 7] >> 8; - const u32x Lp7 = statel[(i + 1) & 7] >> 0; + const u32 Lp0 = stateh[(i + 8) & 7] >> 24; + const u32 Lp1 = stateh[(i + 7) & 7] >> 16; + const u32 Lp2 = stateh[(i + 6) & 7] >> 8; + const u32 Lp3 = stateh[(i + 5) & 7] >> 0; + const u32 Lp4 = statel[(i + 4) & 7] >> 24; + const u32 Lp5 = statel[(i + 3) & 7] >> 16; + const u32 Lp6 = statel[(i + 2) & 7] >> 8; + const u32 Lp7 = statel[(i + 1) & 7] >> 0; Lh[i] = BOX (s_Ch, 0, Lp0 & 0xff) ^ BOX (s_Ch, 1, Lp1 & 0xff) @@ -1242,20 +1223,20 @@ static void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 s_ for (int r = 2; r <= R; r++) { - u32x Lh[8]; - u32x Ll[8]; + u32 Lh[8]; + u32 Ll[8]; #pragma unroll for (int i = 0; i < 8; i++) { - const u32x Lp0 = Kh[(i + 8) & 7] >> 24; - const u32x Lp1 = Kh[(i + 7) & 7] >> 16; - const u32x Lp2 = Kh[(i + 6) & 7] >> 8; - const u32x Lp3 = Kh[(i + 5) & 7] >> 0; - const u32x Lp4 = Kl[(i + 4) & 7] >> 24; - const u32x Lp5 = Kl[(i + 3) & 7] >> 16; - const u32x Lp6 = Kl[(i + 2) & 7] >> 8; - const u32x Lp7 = Kl[(i + 1) & 7] >> 0; + const u32 Lp0 = Kh[(i + 8) & 7] >> 24; + const u32 Lp1 = Kh[(i + 7) & 7] >> 16; + const u32 Lp2 = Kh[(i + 6) & 7] >> 8; + const u32 Lp3 = Kh[(i + 5) & 7] >> 0; + const u32 Lp4 = Kl[(i + 4) & 7] >> 24; + const u32 Lp5 = Kl[(i + 3) & 7] >> 16; + const u32 Lp6 = Kl[(i + 2) & 7] >> 8; + const u32 Lp7 = Kl[(i + 1) & 7] >> 0; Lh[i] = BOX (s_Ch, 0, Lp0 & 0xff) ^ BOX (s_Ch, 1, Lp1 & 0xff) @@ -1296,14 +1277,14 @@ static void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 s_ #pragma unroll 8 for (int i = 0; i < 8; i++) { - const u32x Lp0 = stateh[(i + 8) & 7] >> 24; - const u32x Lp1 = stateh[(i + 7) & 7] >> 16; - const u32x Lp2 = stateh[(i + 6) & 7] >> 8; - const u32x Lp3 = stateh[(i + 5) & 7] >> 0; - const u32x Lp4 = statel[(i + 4) & 7] >> 24; - const u32x Lp5 = statel[(i + 3) & 7] >> 16; - const u32x Lp6 = statel[(i + 2) & 7] >> 8; - const u32x Lp7 = statel[(i + 1) & 7] >> 0; + const u32 Lp0 = stateh[(i + 8) & 7] >> 24; + const u32 Lp1 = stateh[(i + 7) & 7] >> 16; + const u32 Lp2 = stateh[(i + 6) & 7] >> 8; + const u32 Lp3 = stateh[(i + 5) & 7] >> 0; + const u32 Lp4 = statel[(i + 4) & 7] >> 24; + const u32 Lp5 = statel[(i + 3) & 7] >> 16; + const u32 Lp6 = statel[(i + 2) & 7] >> 8; + const u32 Lp7 = statel[(i + 1) & 7] >> 0; Lh[i] = BOX (s_Ch, 0, Lp0 & 0xff) ^ BOX (s_Ch, 1, Lp1 & 0xff) @@ -1374,14 +1355,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06100_m04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -1421,28 +1402,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06100_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1453,7 +1434,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06100_m04 (__glo append_0x80_2 (w0, w1, out_len); - u32x wl[16]; + u32 wl[16]; wl[ 0] = swap_workaround (w0[0]); wl[ 1] = swap_workaround (w0[1]); @@ -1472,7 +1453,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06100_m04 (__glo wl[14] = 0; wl[15] = out_len * 8; - u32x dgst[16]; + u32 dgst[16]; dgst[ 0] = 0; dgst[ 1] = 0; @@ -1493,12 +1474,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06100_m04 (__glo whirlpool_transform (wl, dgst, s_Ch, s_Cl); - const u32x r0 = dgst[0]; - const u32x r1 = dgst[1]; - const u32x r2 = dgst[2]; - const u32x r3 = dgst[3]; + const u32 r0 = dgst[0]; + const u32 r1 = dgst[1]; + const u32 r2 = dgst[2]; + const u32 r3 = dgst[3]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -1524,14 +1505,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06100_s04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -1583,28 +1564,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06100_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1615,7 +1596,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06100_s04 (__glo append_0x80_2 (w0, w1, out_len); - u32x wl[16]; + u32 wl[16]; wl[ 0] = swap_workaround (w0[0]); wl[ 1] = swap_workaround (w0[1]); @@ -1634,7 +1615,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06100_s04 (__glo wl[14] = 0; wl[15] = out_len * 8; - u32x dgst[16]; + u32 dgst[16]; dgst[ 0] = 0; dgst[ 1] = 0; @@ -1655,12 +1636,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06100_s04 (__glo whirlpool_transform (wl, dgst, s_Ch, s_Cl); - const u32x r0 = dgst[0]; - const u32x r1 = dgst[1]; - const u32x r2 = dgst[2]; - const u32x r3 = dgst[3]; + const u32 r0 = dgst[0]; + const u32 r1 = dgst[1]; + const u32 r2 = dgst[2]; + const u32 r3 = dgst[3]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m06100_a1.cl b/OpenCL/m06100_a1.cl similarity index 96% rename from amd/m06100_a1.cl rename to OpenCL/m06100_a1.cl index 491c7fd..f644a5b 100644 --- a/amd/m06100_a1.cl +++ b/OpenCL/m06100_a1.cl @@ -8,45 +8,26 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define R 10 #ifdef VECT_SIZE1 -#define BOX(S,n,i) u32x ((S)[(n)][(i)]) +#define BOX(S,n,i) u32 ((S)[(n)][(i)]) #endif #ifdef VECT_SIZE2 -#define BOX(S,n,i) u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) +#define BOX(S,n,i) u32 ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) #endif __constant u32 Ch[8][256] = @@ -1145,10 +1126,10 @@ __constant u32 rcl[R + 1] = // this is a highly optimized that assumes dgst[16] = { 0 }; only reuse of no 2nd transform is needed -static void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 s_Ch[8][256], __local u32 s_Cl[8][256]) +static void whirlpool_transform (const u32 w[16], u32 dgst[16], __local u32 s_Ch[8][256], __local u32 s_Cl[8][256]) { - u32x Kh[8]; - u32x Kl[8]; + u32 Kh[8]; + u32 Kl[8]; Kh[0] = 0x300beec0; Kl[0] = 0xaf902967; @@ -1167,8 +1148,8 @@ static void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 s_ Kh[7] = 0x28282828; Kl[7] = 0x28282828; - u32x stateh[8]; - u32x statel[8]; + u32 stateh[8]; + u32 statel[8]; stateh[0] = w[ 0]; statel[0] = w[ 1]; @@ -1187,20 +1168,20 @@ static void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 s_ stateh[7] = w[14]; statel[7] = w[15]; - u32x Lh[8]; - u32x Ll[8]; + u32 Lh[8]; + u32 Ll[8]; #pragma unroll for (int i = 0; i < 8; i++) { - const u32x Lp0 = stateh[(i + 8) & 7] >> 24; - const u32x Lp1 = stateh[(i + 7) & 7] >> 16; - const u32x Lp2 = stateh[(i + 6) & 7] >> 8; - const u32x Lp3 = stateh[(i + 5) & 7] >> 0; - const u32x Lp4 = statel[(i + 4) & 7] >> 24; - const u32x Lp5 = statel[(i + 3) & 7] >> 16; - const u32x Lp6 = statel[(i + 2) & 7] >> 8; - const u32x Lp7 = statel[(i + 1) & 7] >> 0; + const u32 Lp0 = stateh[(i + 8) & 7] >> 24; + const u32 Lp1 = stateh[(i + 7) & 7] >> 16; + const u32 Lp2 = stateh[(i + 6) & 7] >> 8; + const u32 Lp3 = stateh[(i + 5) & 7] >> 0; + const u32 Lp4 = statel[(i + 4) & 7] >> 24; + const u32 Lp5 = statel[(i + 3) & 7] >> 16; + const u32 Lp6 = statel[(i + 2) & 7] >> 8; + const u32 Lp7 = statel[(i + 1) & 7] >> 0; Lh[i] = BOX (s_Ch, 0, Lp0 & 0xff) ^ BOX (s_Ch, 1, Lp1 & 0xff) @@ -1240,20 +1221,20 @@ static void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 s_ for (int r = 2; r <= R; r++) { - u32x Lh[8]; - u32x Ll[8]; + u32 Lh[8]; + u32 Ll[8]; #pragma unroll for (int i = 0; i < 8; i++) { - const u32x Lp0 = Kh[(i + 8) & 7] >> 24; - const u32x Lp1 = Kh[(i + 7) & 7] >> 16; - const u32x Lp2 = Kh[(i + 6) & 7] >> 8; - const u32x Lp3 = Kh[(i + 5) & 7] >> 0; - const u32x Lp4 = Kl[(i + 4) & 7] >> 24; - const u32x Lp5 = Kl[(i + 3) & 7] >> 16; - const u32x Lp6 = Kl[(i + 2) & 7] >> 8; - const u32x Lp7 = Kl[(i + 1) & 7] >> 0; + const u32 Lp0 = Kh[(i + 8) & 7] >> 24; + const u32 Lp1 = Kh[(i + 7) & 7] >> 16; + const u32 Lp2 = Kh[(i + 6) & 7] >> 8; + const u32 Lp3 = Kh[(i + 5) & 7] >> 0; + const u32 Lp4 = Kl[(i + 4) & 7] >> 24; + const u32 Lp5 = Kl[(i + 3) & 7] >> 16; + const u32 Lp6 = Kl[(i + 2) & 7] >> 8; + const u32 Lp7 = Kl[(i + 1) & 7] >> 0; Lh[i] = BOX (s_Ch, 0, Lp0 & 0xff) ^ BOX (s_Ch, 1, Lp1 & 0xff) @@ -1294,14 +1275,14 @@ static void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 s_ #pragma unroll 8 for (int i = 0; i < 8; i++) { - const u32x Lp0 = stateh[(i + 8) & 7] >> 24; - const u32x Lp1 = stateh[(i + 7) & 7] >> 16; - const u32x Lp2 = stateh[(i + 6) & 7] >> 8; - const u32x Lp3 = stateh[(i + 5) & 7] >> 0; - const u32x Lp4 = statel[(i + 4) & 7] >> 24; - const u32x Lp5 = statel[(i + 3) & 7] >> 16; - const u32x Lp6 = statel[(i + 2) & 7] >> 8; - const u32x Lp7 = statel[(i + 1) & 7] >> 0; + const u32 Lp0 = stateh[(i + 8) & 7] >> 24; + const u32 Lp1 = stateh[(i + 7) & 7] >> 16; + const u32 Lp2 = stateh[(i + 6) & 7] >> 8; + const u32 Lp3 = stateh[(i + 5) & 7] >> 0; + const u32 Lp4 = statel[(i + 4) & 7] >> 24; + const u32 Lp5 = statel[(i + 3) & 7] >> 16; + const u32 Lp6 = statel[(i + 2) & 7] >> 8; + const u32 Lp7 = statel[(i + 1) & 7] >> 0; Lh[i] = BOX (s_Ch, 0, Lp0 & 0xff) ^ BOX (s_Ch, 1, Lp1 & 0xff) @@ -1372,28 +1353,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06100_m04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -1477,35 +1458,35 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06100_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; w3[2] = 0; w3[3] = 0; - u32x wl[16]; + u32 wl[16]; wl[ 0] = swap_workaround (w0[0]); wl[ 1] = swap_workaround (w0[1]); @@ -1524,7 +1505,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06100_m04 (__glo wl[14] = 0; wl[15] = pw_len * 8; - u32x dgst[16]; + u32 dgst[16]; dgst[ 0] = 0; dgst[ 1] = 0; @@ -1545,12 +1526,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06100_m04 (__glo whirlpool_transform (wl, dgst, s_Ch, s_Cl); - const u32x r0 = dgst[0]; - const u32x r1 = dgst[1]; - const u32x r2 = dgst[2]; - const u32x r3 = dgst[3]; + const u32 r0 = dgst[0]; + const u32 r1 = dgst[1]; + const u32 r2 = dgst[2]; + const u32 r3 = dgst[3]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -1576,28 +1557,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06100_s04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -1693,35 +1674,35 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06100_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; w3[2] = 0; w3[3] = 0; - u32x wl[16]; + u32 wl[16]; wl[ 0] = swap_workaround (w0[0]); wl[ 1] = swap_workaround (w0[1]); @@ -1740,7 +1721,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06100_s04 (__glo wl[14] = 0; wl[15] = pw_len * 8; - u32x dgst[16]; + u32 dgst[16]; dgst[ 0] = 0; dgst[ 1] = 0; @@ -1761,12 +1742,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06100_s04 (__glo whirlpool_transform (wl, dgst, s_Ch, s_Cl); - const u32x r0 = dgst[0]; - const u32x r1 = dgst[1]; - const u32x r2 = dgst[2]; - const u32x r3 = dgst[3]; + const u32 r0 = dgst[0]; + const u32 r1 = dgst[1]; + const u32 r2 = dgst[2]; + const u32 r3 = dgst[3]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m06100_a3.cl b/OpenCL/m06100_a3.cl similarity index 94% rename from amd/m06100_a3.cl rename to OpenCL/m06100_a3.cl index ff12c0e..9794fe9 100644 --- a/amd/m06100_a3.cl +++ b/OpenCL/m06100_a3.cl @@ -8,45 +8,26 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define R 10 #ifdef VECT_SIZE1 -#define BOX(S,n,i) u32x ((S)[(n)][(i)]) +#define BOX(S,n,i) u32 ((S)[(n)][(i)]) #endif #ifdef VECT_SIZE2 -#define BOX(S,n,i) u32x ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) +#define BOX(S,n,i) u32 ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) #endif __constant u32 Ch[8][256] = @@ -1145,10 +1126,10 @@ __constant u32 rcl[R + 1] = // this is a highly optimized that assumes dgst[16] = { 0 }; only reuse of no 2nd transform is needed -static void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 s_Ch[8][256], __local u32 s_Cl[8][256]) +static void whirlpool_transform (const u32 w[16], u32 dgst[16], __local u32 s_Ch[8][256], __local u32 s_Cl[8][256]) { - u32x Kh[8]; - u32x Kl[8]; + u32 Kh[8]; + u32 Kl[8]; Kh[0] = 0x300beec0; Kl[0] = 0xaf902967; @@ -1167,8 +1148,8 @@ static void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 s_ Kh[7] = 0x28282828; Kl[7] = 0x28282828; - u32x stateh[8]; - u32x statel[8]; + u32 stateh[8]; + u32 statel[8]; stateh[0] = w[ 0]; statel[0] = w[ 1]; @@ -1187,20 +1168,20 @@ static void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 s_ stateh[7] = w[14]; statel[7] = w[15]; - u32x Lh[8]; - u32x Ll[8]; + u32 Lh[8]; + u32 Ll[8]; #pragma unroll for (int i = 0; i < 8; i++) { - const u32x Lp0 = stateh[(i + 8) & 7] >> 24; - const u32x Lp1 = stateh[(i + 7) & 7] >> 16; - const u32x Lp2 = stateh[(i + 6) & 7] >> 8; - const u32x Lp3 = stateh[(i + 5) & 7] >> 0; - const u32x Lp4 = statel[(i + 4) & 7] >> 24; - const u32x Lp5 = statel[(i + 3) & 7] >> 16; - const u32x Lp6 = statel[(i + 2) & 7] >> 8; - const u32x Lp7 = statel[(i + 1) & 7] >> 0; + const u32 Lp0 = stateh[(i + 8) & 7] >> 24; + const u32 Lp1 = stateh[(i + 7) & 7] >> 16; + const u32 Lp2 = stateh[(i + 6) & 7] >> 8; + const u32 Lp3 = stateh[(i + 5) & 7] >> 0; + const u32 Lp4 = statel[(i + 4) & 7] >> 24; + const u32 Lp5 = statel[(i + 3) & 7] >> 16; + const u32 Lp6 = statel[(i + 2) & 7] >> 8; + const u32 Lp7 = statel[(i + 1) & 7] >> 0; Lh[i] = BOX (s_Ch, 0, Lp0 & 0xff) ^ BOX (s_Ch, 1, Lp1 & 0xff) @@ -1240,20 +1221,20 @@ static void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 s_ for (int r = 2; r <= R; r++) { - u32x Lh[8]; - u32x Ll[8]; + u32 Lh[8]; + u32 Ll[8]; #pragma unroll for (int i = 0; i < 8; i++) { - const u32x Lp0 = Kh[(i + 8) & 7] >> 24; - const u32x Lp1 = Kh[(i + 7) & 7] >> 16; - const u32x Lp2 = Kh[(i + 6) & 7] >> 8; - const u32x Lp3 = Kh[(i + 5) & 7] >> 0; - const u32x Lp4 = Kl[(i + 4) & 7] >> 24; - const u32x Lp5 = Kl[(i + 3) & 7] >> 16; - const u32x Lp6 = Kl[(i + 2) & 7] >> 8; - const u32x Lp7 = Kl[(i + 1) & 7] >> 0; + const u32 Lp0 = Kh[(i + 8) & 7] >> 24; + const u32 Lp1 = Kh[(i + 7) & 7] >> 16; + const u32 Lp2 = Kh[(i + 6) & 7] >> 8; + const u32 Lp3 = Kh[(i + 5) & 7] >> 0; + const u32 Lp4 = Kl[(i + 4) & 7] >> 24; + const u32 Lp5 = Kl[(i + 3) & 7] >> 16; + const u32 Lp6 = Kl[(i + 2) & 7] >> 8; + const u32 Lp7 = Kl[(i + 1) & 7] >> 0; Lh[i] = BOX (s_Ch, 0, Lp0 & 0xff) ^ BOX (s_Ch, 1, Lp1 & 0xff) @@ -1294,14 +1275,14 @@ static void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 s_ #pragma unroll 8 for (int i = 0; i < 8; i++) { - const u32x Lp0 = stateh[(i + 8) & 7] >> 24; - const u32x Lp1 = stateh[(i + 7) & 7] >> 16; - const u32x Lp2 = stateh[(i + 6) & 7] >> 8; - const u32x Lp3 = stateh[(i + 5) & 7] >> 0; - const u32x Lp4 = statel[(i + 4) & 7] >> 24; - const u32x Lp5 = statel[(i + 3) & 7] >> 16; - const u32x Lp6 = statel[(i + 2) & 7] >> 8; - const u32x Lp7 = statel[(i + 1) & 7] >> 0; + const u32 Lp0 = stateh[(i + 8) & 7] >> 24; + const u32 Lp1 = stateh[(i + 7) & 7] >> 16; + const u32 Lp2 = stateh[(i + 6) & 7] >> 8; + const u32 Lp3 = stateh[(i + 5) & 7] >> 0; + const u32 Lp4 = statel[(i + 4) & 7] >> 24; + const u32 Lp5 = statel[(i + 3) & 7] >> 16; + const u32 Lp6 = statel[(i + 2) & 7] >> 8; + const u32 Lp7 = statel[(i + 1) & 7] >> 0; Lh[i] = BOX (s_Ch, 0, Lp0 & 0xff) ^ BOX (s_Ch, 1, Lp1 & 0xff) @@ -1358,7 +1339,7 @@ static void whirlpool_transform (const u32x w[16], u32x dgst[16], __local u32 s_ dgst[15] = statel[7] ^ w[15]; } -static void m06100m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 s_Cl[8][256], __local u32 s_Ch[8][256]) +static void m06100m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 s_Cl[8][256], __local u32 s_Ch[8][256]) { /** * modifier @@ -1371,7 +1352,7 @@ static void m06100m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -1379,7 +1360,7 @@ static void m06100m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x wl[16]; + u32 wl[16]; wl[ 0] = w0[0]; wl[ 1] = w0[1]; @@ -1398,20 +1379,20 @@ static void m06100m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p wl[14] = 0; wl[15] = pw_len * 8; - u32x dgst[16]; + u32 dgst[16]; whirlpool_transform (wl, dgst, s_Ch, s_Cl); - const u32x r0 = dgst[0]; - const u32x r1 = dgst[1]; - const u32x r2 = dgst[2]; - const u32x r3 = dgst[3]; + const u32 r0 = dgst[0]; + const u32 r1 = dgst[1]; + const u32 r2 = dgst[2]; + const u32 r3 = dgst[3]; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m06100s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 s_Cl[8][256], __local u32 s_Ch[8][256]) +static void m06100s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 s_Cl[8][256], __local u32 s_Ch[8][256]) { /** * modifier @@ -1436,7 +1417,7 @@ static void m06100s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -1444,7 +1425,7 @@ static void m06100s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x wl[16]; + u32 wl[16]; wl[ 0] = w0[0]; wl[ 1] = w0[1]; @@ -1463,16 +1444,16 @@ static void m06100s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p wl[14] = 0; wl[15] = pw_len * 8; - u32x dgst[16]; + u32 dgst[16]; whirlpool_transform (wl, dgst, s_Ch, s_Cl); - const u32x r0 = dgst[0]; - const u32x r1 = dgst[1]; - const u32x r2 = dgst[2]; - const u32x r3 = dgst[3]; + const u32 r0 = dgst[0]; + const u32 r1 = dgst[1]; + const u32 r2 = dgst[2]; + const u32 r3 = dgst[3]; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -1490,28 +1471,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06100_m04 (__glo const u32 gid = get_global_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1566,28 +1547,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06100_m08 (__glo const u32 gid = get_global_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1642,28 +1623,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06100_m16 (__glo const u32 gid = get_global_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -1718,28 +1699,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06100_s04 (__glo const u32 gid = get_global_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1794,28 +1775,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06100_s08 (__glo const u32 gid = get_global_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1870,28 +1851,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06100_s16 (__glo const u32 gid = get_global_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m06211.cl b/OpenCL/m06211.cl similarity index 95% rename from amd/m06211.cl rename to OpenCL/m06211.cl index d4cc214..4238b1a 100644 --- a/amd/m06211.cl +++ b/OpenCL/m06211.cl @@ -8,38 +8,26 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "gpu_aes256_amd.c" #include "gpu_twofish256_amd.c" #include "gpu_serpent256_amd.c" -static void ripemd160_transform (const u32x w[16], u32x dgst[5]) +static void ripemd160_transform (const u32 w[16], u32 dgst[5]) { - u32x a1 = dgst[0]; - u32x b1 = dgst[1]; - u32x c1 = dgst[2]; - u32x d1 = dgst[3]; - u32x e1 = dgst[4]; + u32 a1 = dgst[0]; + u32 b1 = dgst[1]; + u32 c1 = dgst[2]; + u32 d1 = dgst[3]; + u32 e1 = dgst[4]; RIPEMD160_STEP (RIPEMD160_F , a1, b1, c1, d1, e1, w[ 0], RIPEMD160C00, RIPEMD160S00); RIPEMD160_STEP (RIPEMD160_F , e1, a1, b1, c1, d1, w[ 1], RIPEMD160C00, RIPEMD160S01); @@ -126,11 +114,11 @@ static void ripemd160_transform (const u32x w[16], u32x dgst[5]) RIPEMD160_STEP (RIPEMD160_J , c1, d1, e1, a1, b1, w[15], RIPEMD160C40, RIPEMD160S4E); RIPEMD160_STEP (RIPEMD160_J , b1, c1, d1, e1, a1, w[13], RIPEMD160C40, RIPEMD160S4F); - u32x a2 = dgst[0]; - u32x b2 = dgst[1]; - u32x c2 = dgst[2]; - u32x d2 = dgst[3]; - u32x e2 = dgst[4]; + u32 a2 = dgst[0]; + u32 b2 = dgst[1]; + u32 c2 = dgst[2]; + u32 d2 = dgst[3]; + u32 e2 = dgst[4]; RIPEMD160_STEP_WORKAROUND_BUG (RIPEMD160_J , a2, b2, c2, d2, e2, w[ 5], RIPEMD160C50, RIPEMD160S50); RIPEMD160_STEP (RIPEMD160_J , e2, a2, b2, c2, d2, w[14], RIPEMD160C50, RIPEMD160S51); @@ -217,11 +205,11 @@ static void ripemd160_transform (const u32x w[16], u32x dgst[5]) RIPEMD160_STEP (RIPEMD160_F , c2, d2, e2, a2, b2, w[ 9], RIPEMD160C90, RIPEMD160S9E); RIPEMD160_STEP (RIPEMD160_F , b2, c2, d2, e2, a2, w[11], RIPEMD160C90, RIPEMD160S9F); - const u32x a = dgst[1] + c1 + d2; - const u32x b = dgst[2] + d1 + e2; - const u32x c = dgst[3] + e1 + a2; - const u32x d = dgst[4] + a1 + b2; - const u32x e = dgst[0] + b1 + c2; + const u32 a = dgst[1] + c1 + d2; + const u32 b = dgst[2] + d1 + e2; + const u32 c = dgst[3] + e1 + a2; + const u32 d = dgst[4] + a1 + b2; + const u32 e = dgst[0] + b1 + c2; dgst[0] = a; dgst[1] = b; @@ -230,7 +218,7 @@ static void ripemd160_transform (const u32x w[16], u32x dgst[5]) dgst[4] = e; } -static void hmac_run2 (const u32x w1[16], const u32x w2[16], const u32x ipad[5], const u32x opad[5], u32x dgst[5]) +static void hmac_run2 (const u32 w1[16], const u32 w2[16], const u32 ipad[5], const u32 opad[5], u32 dgst[5]) { dgst[0] = ipad[0]; dgst[1] = ipad[1]; @@ -241,7 +229,7 @@ static void hmac_run2 (const u32x w1[16], const u32x w2[16], const u32x ipad[5], ripemd160_transform (w1, dgst); ripemd160_transform (w2, dgst); - u32x w[16]; + u32 w[16]; w[ 0] = dgst[0]; w[ 1] = dgst[1]; @@ -269,7 +257,7 @@ static void hmac_run2 (const u32x w1[16], const u32x w2[16], const u32x ipad[5], ripemd160_transform (w, dgst); } -static void hmac_run (u32x w[16], const u32x ipad[5], const u32x opad[5], u32x dgst[5]) +static void hmac_run (u32 w[16], const u32 ipad[5], const u32 opad[5], u32 dgst[5]) { dgst[0] = ipad[0]; dgst[1] = ipad[1]; @@ -305,7 +293,7 @@ static void hmac_run (u32x w[16], const u32x ipad[5], const u32x opad[5], u32x d ripemd160_transform (w, dgst); } -static void hmac_init (u32x w[16], u32x ipad[5], u32x opad[5]) +static void hmac_init (u32 w[16], u32 ipad[5], u32 opad[5]) { w[ 0] ^= 0x36363636; w[ 1] ^= 0x36363636; @@ -393,28 +381,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06211_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -446,7 +434,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06211_init (__gl * salt */ - u32x salt_buf1[16]; + u32 salt_buf1[16]; salt_buf1[ 0] = esalt_bufs[salt_pos].salt_buf[ 0]; salt_buf1[ 1] = esalt_bufs[salt_pos].salt_buf[ 1]; @@ -465,7 +453,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06211_init (__gl salt_buf1[14] = esalt_bufs[salt_pos].salt_buf[14]; salt_buf1[15] = esalt_bufs[salt_pos].salt_buf[15]; - u32x salt_buf2[16]; + u32 salt_buf2[16]; salt_buf2[ 0] = 0; salt_buf2[ 1] = 0x80; @@ -486,7 +474,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06211_init (__gl const u32 truecrypt_mdlen = salt_bufs[0].truecrypt_mdlen; - u32x w[16]; + u32 w[16]; w[ 0] = w0[0]; w[ 1] = w0[1]; @@ -505,8 +493,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06211_init (__gl w[14] = w3[2]; w[15] = w3[3]; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_init (w, ipad, opad); @@ -526,7 +514,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06211_init (__gl { salt_buf2[0] = swap_workaround (j); - u32x dgst[5]; + u32 dgst[5]; hmac_run2 (salt_buf1, salt_buf2, ipad, opad, dgst); @@ -552,8 +540,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06211_loop (__gl if (gid >= gid_max) return; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; ipad[0] = tmps[gid].ipad[0]; ipad[1] = tmps[gid].ipad[1]; @@ -569,8 +557,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06211_loop (__gl for (u32 i = 0; i < (truecrypt_mdlen / 8 / 4); i += 5) { - u32x dgst[5]; - u32x out[5]; + u32 dgst[5]; + u32 out[5]; dgst[0] = tmps[gid].dgst[i + 0]; dgst[1] = tmps[gid].dgst[i + 1]; @@ -586,7 +574,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06211_loop (__gl for (u32 j = 0; j < loop_cnt; j++) { - u32x w[16]; + u32 w[16]; w[ 0] = dgst[0]; w[ 1] = dgst[1]; @@ -680,7 +668,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06211_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -696,7 +684,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06211_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -712,7 +700,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06211_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } diff --git a/amd/m06212.cl b/OpenCL/m06212.cl similarity index 95% rename from amd/m06212.cl rename to OpenCL/m06212.cl index 8d02bed..120801e 100644 --- a/amd/m06212.cl +++ b/OpenCL/m06212.cl @@ -8,38 +8,26 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "gpu_aes256_amd.c" #include "gpu_twofish256_amd.c" #include "gpu_serpent256_amd.c" -static void ripemd160_transform (const u32x w[16], u32x dgst[5]) +static void ripemd160_transform (const u32 w[16], u32 dgst[5]) { - u32x a1 = dgst[0]; - u32x b1 = dgst[1]; - u32x c1 = dgst[2]; - u32x d1 = dgst[3]; - u32x e1 = dgst[4]; + u32 a1 = dgst[0]; + u32 b1 = dgst[1]; + u32 c1 = dgst[2]; + u32 d1 = dgst[3]; + u32 e1 = dgst[4]; RIPEMD160_STEP (RIPEMD160_F , a1, b1, c1, d1, e1, w[ 0], RIPEMD160C00, RIPEMD160S00); RIPEMD160_STEP (RIPEMD160_F , e1, a1, b1, c1, d1, w[ 1], RIPEMD160C00, RIPEMD160S01); @@ -126,11 +114,11 @@ static void ripemd160_transform (const u32x w[16], u32x dgst[5]) RIPEMD160_STEP (RIPEMD160_J , c1, d1, e1, a1, b1, w[15], RIPEMD160C40, RIPEMD160S4E); RIPEMD160_STEP (RIPEMD160_J , b1, c1, d1, e1, a1, w[13], RIPEMD160C40, RIPEMD160S4F); - u32x a2 = dgst[0]; - u32x b2 = dgst[1]; - u32x c2 = dgst[2]; - u32x d2 = dgst[3]; - u32x e2 = dgst[4]; + u32 a2 = dgst[0]; + u32 b2 = dgst[1]; + u32 c2 = dgst[2]; + u32 d2 = dgst[3]; + u32 e2 = dgst[4]; RIPEMD160_STEP_WORKAROUND_BUG (RIPEMD160_J , a2, b2, c2, d2, e2, w[ 5], RIPEMD160C50, RIPEMD160S50); RIPEMD160_STEP (RIPEMD160_J , e2, a2, b2, c2, d2, w[14], RIPEMD160C50, RIPEMD160S51); @@ -217,11 +205,11 @@ static void ripemd160_transform (const u32x w[16], u32x dgst[5]) RIPEMD160_STEP (RIPEMD160_F , c2, d2, e2, a2, b2, w[ 9], RIPEMD160C90, RIPEMD160S9E); RIPEMD160_STEP (RIPEMD160_F , b2, c2, d2, e2, a2, w[11], RIPEMD160C90, RIPEMD160S9F); - const u32x a = dgst[1] + c1 + d2; - const u32x b = dgst[2] + d1 + e2; - const u32x c = dgst[3] + e1 + a2; - const u32x d = dgst[4] + a1 + b2; - const u32x e = dgst[0] + b1 + c2; + const u32 a = dgst[1] + c1 + d2; + const u32 b = dgst[2] + d1 + e2; + const u32 c = dgst[3] + e1 + a2; + const u32 d = dgst[4] + a1 + b2; + const u32 e = dgst[0] + b1 + c2; dgst[0] = a; dgst[1] = b; @@ -230,7 +218,7 @@ static void ripemd160_transform (const u32x w[16], u32x dgst[5]) dgst[4] = e; } -static void hmac_run2 (const u32x w1[16], const u32x w2[16], const u32x ipad[5], const u32x opad[5], u32x dgst[5]) +static void hmac_run2 (const u32 w1[16], const u32 w2[16], const u32 ipad[5], const u32 opad[5], u32 dgst[5]) { dgst[0] = ipad[0]; dgst[1] = ipad[1]; @@ -241,7 +229,7 @@ static void hmac_run2 (const u32x w1[16], const u32x w2[16], const u32x ipad[5], ripemd160_transform (w1, dgst); ripemd160_transform (w2, dgst); - u32x w[16]; + u32 w[16]; w[ 0] = dgst[0]; w[ 1] = dgst[1]; @@ -269,7 +257,7 @@ static void hmac_run2 (const u32x w1[16], const u32x w2[16], const u32x ipad[5], ripemd160_transform (w, dgst); } -static void hmac_run (u32x w[16], const u32x ipad[5], const u32x opad[5], u32x dgst[5]) +static void hmac_run (u32 w[16], const u32 ipad[5], const u32 opad[5], u32 dgst[5]) { dgst[0] = ipad[0]; dgst[1] = ipad[1]; @@ -305,7 +293,7 @@ static void hmac_run (u32x w[16], const u32x ipad[5], const u32x opad[5], u32x d ripemd160_transform (w, dgst); } -static void hmac_init (u32x w[16], u32x ipad[5], u32x opad[5]) +static void hmac_init (u32 w[16], u32 ipad[5], u32 opad[5]) { w[ 0] ^= 0x36363636; w[ 1] ^= 0x36363636; @@ -393,28 +381,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06212_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -446,7 +434,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06212_init (__gl * salt */ - u32x salt_buf1[16]; + u32 salt_buf1[16]; salt_buf1[ 0] = esalt_bufs[salt_pos].salt_buf[ 0]; salt_buf1[ 1] = esalt_bufs[salt_pos].salt_buf[ 1]; @@ -465,7 +453,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06212_init (__gl salt_buf1[14] = esalt_bufs[salt_pos].salt_buf[14]; salt_buf1[15] = esalt_bufs[salt_pos].salt_buf[15]; - u32x salt_buf2[16]; + u32 salt_buf2[16]; salt_buf2[ 0] = 0; salt_buf2[ 1] = 0x80; @@ -486,7 +474,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06212_init (__gl const u32 truecrypt_mdlen = salt_bufs[0].truecrypt_mdlen; - u32x w[16]; + u32 w[16]; w[ 0] = w0[0]; w[ 1] = w0[1]; @@ -505,8 +493,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06212_init (__gl w[14] = w3[2]; w[15] = w3[3]; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_init (w, ipad, opad); @@ -526,7 +514,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06212_init (__gl { salt_buf2[0] = swap_workaround (j); - u32x dgst[5]; + u32 dgst[5]; hmac_run2 (salt_buf1, salt_buf2, ipad, opad, dgst); @@ -552,8 +540,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06212_loop (__gl if (gid >= gid_max) return; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; ipad[0] = tmps[gid].ipad[0]; ipad[1] = tmps[gid].ipad[1]; @@ -569,8 +557,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06212_loop (__gl for (u32 i = 0; i < (truecrypt_mdlen / 8 / 4); i += 5) { - u32x dgst[5]; - u32x out[5]; + u32 dgst[5]; + u32 out[5]; dgst[0] = tmps[gid].dgst[i + 0]; dgst[1] = tmps[gid].dgst[i + 1]; @@ -586,7 +574,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06212_loop (__gl for (u32 j = 0; j < loop_cnt; j++) { - u32x w[16]; + u32 w[16]; w[ 0] = dgst[0]; w[ 1] = dgst[1]; @@ -680,7 +668,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06212_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -696,7 +684,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06212_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -712,7 +700,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06212_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -751,7 +739,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06212_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -768,7 +756,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06212_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -785,7 +773,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06212_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } diff --git a/amd/m06213.cl b/OpenCL/m06213.cl similarity index 94% rename from amd/m06213.cl rename to OpenCL/m06213.cl index c0ba879..41ae9f8 100644 --- a/amd/m06213.cl +++ b/OpenCL/m06213.cl @@ -8,38 +8,26 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "gpu_aes256_amd.c" #include "gpu_twofish256_amd.c" #include "gpu_serpent256_amd.c" -static void ripemd160_transform (const u32x w[16], u32x dgst[5]) +static void ripemd160_transform (const u32 w[16], u32 dgst[5]) { - u32x a1 = dgst[0]; - u32x b1 = dgst[1]; - u32x c1 = dgst[2]; - u32x d1 = dgst[3]; - u32x e1 = dgst[4]; + u32 a1 = dgst[0]; + u32 b1 = dgst[1]; + u32 c1 = dgst[2]; + u32 d1 = dgst[3]; + u32 e1 = dgst[4]; RIPEMD160_STEP (RIPEMD160_F , a1, b1, c1, d1, e1, w[ 0], RIPEMD160C00, RIPEMD160S00); RIPEMD160_STEP (RIPEMD160_F , e1, a1, b1, c1, d1, w[ 1], RIPEMD160C00, RIPEMD160S01); @@ -126,11 +114,11 @@ static void ripemd160_transform (const u32x w[16], u32x dgst[5]) RIPEMD160_STEP (RIPEMD160_J , c1, d1, e1, a1, b1, w[15], RIPEMD160C40, RIPEMD160S4E); RIPEMD160_STEP (RIPEMD160_J , b1, c1, d1, e1, a1, w[13], RIPEMD160C40, RIPEMD160S4F); - u32x a2 = dgst[0]; - u32x b2 = dgst[1]; - u32x c2 = dgst[2]; - u32x d2 = dgst[3]; - u32x e2 = dgst[4]; + u32 a2 = dgst[0]; + u32 b2 = dgst[1]; + u32 c2 = dgst[2]; + u32 d2 = dgst[3]; + u32 e2 = dgst[4]; RIPEMD160_STEP_WORKAROUND_BUG (RIPEMD160_J , a2, b2, c2, d2, e2, w[ 5], RIPEMD160C50, RIPEMD160S50); RIPEMD160_STEP (RIPEMD160_J , e2, a2, b2, c2, d2, w[14], RIPEMD160C50, RIPEMD160S51); @@ -217,11 +205,11 @@ static void ripemd160_transform (const u32x w[16], u32x dgst[5]) RIPEMD160_STEP (RIPEMD160_F , c2, d2, e2, a2, b2, w[ 9], RIPEMD160C90, RIPEMD160S9E); RIPEMD160_STEP (RIPEMD160_F , b2, c2, d2, e2, a2, w[11], RIPEMD160C90, RIPEMD160S9F); - const u32x a = dgst[1] + c1 + d2; - const u32x b = dgst[2] + d1 + e2; - const u32x c = dgst[3] + e1 + a2; - const u32x d = dgst[4] + a1 + b2; - const u32x e = dgst[0] + b1 + c2; + const u32 a = dgst[1] + c1 + d2; + const u32 b = dgst[2] + d1 + e2; + const u32 c = dgst[3] + e1 + a2; + const u32 d = dgst[4] + a1 + b2; + const u32 e = dgst[0] + b1 + c2; dgst[0] = a; dgst[1] = b; @@ -230,7 +218,7 @@ static void ripemd160_transform (const u32x w[16], u32x dgst[5]) dgst[4] = e; } -static void hmac_run2 (const u32x w1[16], const u32x w2[16], const u32x ipad[5], const u32x opad[5], u32x dgst[5]) +static void hmac_run2 (const u32 w1[16], const u32 w2[16], const u32 ipad[5], const u32 opad[5], u32 dgst[5]) { dgst[0] = ipad[0]; dgst[1] = ipad[1]; @@ -241,7 +229,7 @@ static void hmac_run2 (const u32x w1[16], const u32x w2[16], const u32x ipad[5], ripemd160_transform (w1, dgst); ripemd160_transform (w2, dgst); - u32x w[16]; + u32 w[16]; w[ 0] = dgst[0]; w[ 1] = dgst[1]; @@ -269,7 +257,7 @@ static void hmac_run2 (const u32x w1[16], const u32x w2[16], const u32x ipad[5], ripemd160_transform (w, dgst); } -static void hmac_run (u32x w[16], const u32x ipad[5], const u32x opad[5], u32x dgst[5]) +static void hmac_run (u32 w[16], const u32 ipad[5], const u32 opad[5], u32 dgst[5]) { dgst[0] = ipad[0]; dgst[1] = ipad[1]; @@ -305,7 +293,7 @@ static void hmac_run (u32x w[16], const u32x ipad[5], const u32x opad[5], u32x d ripemd160_transform (w, dgst); } -static void hmac_init (u32x w[16], u32x ipad[5], u32x opad[5]) +static void hmac_init (u32 w[16], u32 ipad[5], u32 opad[5]) { w[ 0] ^= 0x36363636; w[ 1] ^= 0x36363636; @@ -393,28 +381,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06213_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -446,7 +434,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06213_init (__gl * salt */ - u32x salt_buf1[16]; + u32 salt_buf1[16]; salt_buf1[ 0] = esalt_bufs[salt_pos].salt_buf[ 0]; salt_buf1[ 1] = esalt_bufs[salt_pos].salt_buf[ 1]; @@ -465,7 +453,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06213_init (__gl salt_buf1[14] = esalt_bufs[salt_pos].salt_buf[14]; salt_buf1[15] = esalt_bufs[salt_pos].salt_buf[15]; - u32x salt_buf2[16]; + u32 salt_buf2[16]; salt_buf2[ 0] = 0; salt_buf2[ 1] = 0x80; @@ -486,7 +474,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06213_init (__gl const u32 truecrypt_mdlen = salt_bufs[0].truecrypt_mdlen; - u32x w[16]; + u32 w[16]; w[ 0] = w0[0]; w[ 1] = w0[1]; @@ -505,8 +493,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06213_init (__gl w[14] = w3[2]; w[15] = w3[3]; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_init (w, ipad, opad); @@ -526,7 +514,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06213_init (__gl { salt_buf2[0] = swap_workaround (j); - u32x dgst[5]; + u32 dgst[5]; hmac_run2 (salt_buf1, salt_buf2, ipad, opad, dgst); @@ -552,8 +540,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06213_loop (__gl if (gid >= gid_max) return; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; ipad[0] = tmps[gid].ipad[0]; ipad[1] = tmps[gid].ipad[1]; @@ -569,8 +557,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06213_loop (__gl for (u32 i = 0; i < (truecrypt_mdlen / 8 / 4); i += 5) { - u32x dgst[5]; - u32x out[5]; + u32 dgst[5]; + u32 out[5]; dgst[0] = tmps[gid].dgst[i + 0]; dgst[1] = tmps[gid].dgst[i + 1]; @@ -586,7 +574,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06213_loop (__gl for (u32 j = 0; j < loop_cnt; j++) { - u32x w[16]; + u32 w[16]; w[ 0] = dgst[0]; w[ 1] = dgst[1]; @@ -680,7 +668,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06213_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -696,7 +684,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06213_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -712,7 +700,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06213_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -751,7 +739,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06213_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -768,7 +756,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06213_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -785,7 +773,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06213_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -825,7 +813,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06213_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -843,7 +831,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06213_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } diff --git a/amd/m06221.cl b/OpenCL/m06221.cl similarity index 98% rename from amd/m06221.cl rename to OpenCL/m06221.cl index 2935832..e6295e1 100644 --- a/amd/m06221.cl +++ b/OpenCL/m06221.cl @@ -8,26 +8,14 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "gpu_aes256_amd.c" #include "gpu_twofish256_amd.c" @@ -281,28 +269,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06221_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -586,7 +574,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06221_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -602,7 +590,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06221_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -618,7 +606,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06221_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } diff --git a/amd/m06222.cl b/OpenCL/m06222.cl similarity index 97% rename from amd/m06222.cl rename to OpenCL/m06222.cl index 040b593..e3d5ad8 100644 --- a/amd/m06222.cl +++ b/OpenCL/m06222.cl @@ -8,26 +8,14 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "gpu_aes256_amd.c" #include "gpu_twofish256_amd.c" @@ -281,28 +269,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06222_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -586,7 +574,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06222_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -602,7 +590,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06222_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -618,7 +606,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06222_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -657,7 +645,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06222_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -674,7 +662,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06222_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -691,7 +679,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06222_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } diff --git a/amd/m06223.cl b/OpenCL/m06223.cl similarity index 97% rename from amd/m06223.cl rename to OpenCL/m06223.cl index 91ff4e1..670cf94 100644 --- a/amd/m06223.cl +++ b/OpenCL/m06223.cl @@ -8,26 +8,14 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "gpu_aes256_amd.c" #include "gpu_twofish256_amd.c" @@ -281,28 +269,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06223_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -586,7 +574,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06223_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -602,7 +590,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06223_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -618,7 +606,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06223_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -657,7 +645,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06223_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -674,7 +662,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06223_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -691,7 +679,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06223_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -731,7 +719,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06223_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -749,7 +737,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06223_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } diff --git a/amd/m06231.cl b/OpenCL/m06231.cl similarity index 99% rename from amd/m06231.cl rename to OpenCL/m06231.cl index 90f30f6..dc74685 100644 --- a/amd/m06231.cl +++ b/OpenCL/m06231.cl @@ -8,26 +8,14 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "gpu_aes256_amd.c" #include "gpu_twofish256_amd.c" @@ -1489,28 +1477,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06231_init (__gl const u32 gid = get_global_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -1965,7 +1953,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06231_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -1981,7 +1969,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06231_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -1997,7 +1985,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06231_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } diff --git a/amd/m06232.cl b/OpenCL/m06232.cl similarity index 99% rename from amd/m06232.cl rename to OpenCL/m06232.cl index 0c22d6a..9cc55c7 100644 --- a/amd/m06232.cl +++ b/OpenCL/m06232.cl @@ -8,26 +8,14 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "gpu_aes256_amd.c" #include "gpu_twofish256_amd.c" @@ -1489,28 +1477,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06232_init (__gl const u32 gid = get_global_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -1965,7 +1953,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06232_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -1981,7 +1969,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06232_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -1997,7 +1985,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06232_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -2036,7 +2024,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06232_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -2053,7 +2041,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06232_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -2070,7 +2058,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06232_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } diff --git a/amd/m06233.cl b/OpenCL/m06233.cl similarity index 99% rename from amd/m06233.cl rename to OpenCL/m06233.cl index 87ea7e9..fb67506 100644 --- a/amd/m06233.cl +++ b/OpenCL/m06233.cl @@ -8,26 +8,14 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "gpu_aes256_amd.c" #include "gpu_twofish256_amd.c" @@ -1489,28 +1477,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06233_init (__gl const u32 gid = get_global_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -1965,7 +1953,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06233_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -1981,7 +1969,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06233_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -1997,7 +1985,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06233_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -2036,7 +2024,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06233_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -2053,7 +2041,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06233_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -2070,7 +2058,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06233_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -2110,7 +2098,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06233_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } @@ -2128,7 +2116,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06233_comp (__gl if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); + mark_hash (plains_buf, hashes_shown, 0, gid, 0); d_return_buf[lid] = 1; } diff --git a/amd/m06300.cl b/OpenCL/m06300.cl similarity index 96% rename from amd/m06300.cl rename to OpenCL/m06300.cl index 8ca2f62..c4ddb50 100644 --- a/amd/m06300.cl +++ b/OpenCL/m06300.cl @@ -8,60 +8,48 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif #ifdef VECT_SIZE4 -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" +#define COMPARE_M "check_multi_vect4_comp4.c" #endif -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = 0; - - u32x tmp2; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = 0; + + u32 tmp2; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -137,7 +125,7 @@ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void memcat16 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32x append[4]) +static void memcat16 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[4]) { switch (block_len) { @@ -572,7 +560,7 @@ static void memcat16 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block } } -static void memcat16_x80 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32x append[4]) +static void memcat16_x80 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[4]) { switch (block_len) { @@ -1019,7 +1007,7 @@ static void memcat16_x80 (u32x block0[4], u32x block1[4], u32x block2[4], u32x b } } -static void memcat8 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32 append[2]) +static void memcat8 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[2]) { switch (block_len) { @@ -1352,7 +1340,7 @@ static void memcat8 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3 } } -static void append_1st (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32x append) +static void append_1st (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append) { switch (block_len) { @@ -1596,7 +1584,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06300_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[0]; w0[1] = pws[gid].i[1]; @@ -1625,28 +1613,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06300_init (__gl u32 block_len = pw_len; - u32x block0[4]; + u32 block0[4]; block0[0] = w0[0]; block0[1] = w0[1]; block0[2] = w0[2]; block0[3] = w0[3]; - u32x block1[4]; + u32 block1[4]; block1[0] = 0; block1[1] = 0; block1[2] = 0; block1[3] = 0; - u32x block2[4]; + u32 block2[4]; block2[0] = 0; block2[1] = 0; block2[2] = 0; block2[3] = 0; - u32x block3[4]; + u32 block3[4]; block3[0] = 0; block3[1] = 0; @@ -1665,7 +1653,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06300_init (__gl block3[2] = block_len * 8; - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -1715,7 +1703,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06300_init (__gl /* Then something really weird... */ - u32x append = block0[0] & 0xFF; + u32 append = block0[0] & 0xFF; for (u32 j = pw_len; j; j >>= 1) { @@ -1754,7 +1742,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06300_loop (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[0]; w0[1] = pws[gid].i[1]; @@ -1763,7 +1751,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06300_loop (__gl const u32 pw_len = pws[gid].pw_len; - u32x w0_x80[4]; + u32 w0_x80[4]; w0_x80[0] = w0[0]; w0_x80[1] = w0[1]; @@ -1787,7 +1775,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06300_loop (__gl * digest */ - u32x digest[4]; + u32 digest[4]; digest[0] = tmps[gid].digest_buf[0]; digest[1] = tmps[gid].digest_buf[1]; @@ -1802,28 +1790,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06300_loop (__gl u32 block_len; - u32x block0[4]; + u32 block0[4]; block0[0] = 0; block0[1] = 0; block0[2] = 0; block0[3] = 0; - u32x block1[4]; + u32 block1[4]; block1[0] = 0; block1[1] = 0; block1[2] = 0; block1[3] = 0; - u32x block2[4]; + u32 block2[4]; block2[0] = 0; block2[1] = 0; block2[2] = 0; block2[3] = 0; - u32x block3[4]; + u32 block3[4]; block3[0] = 0; block3[1] = 0; @@ -1948,12 +1936,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06300_comp (__gl * digest */ - const u32x r0 = tmps[gid].digest_buf[DGST_R0]; - const u32x r1 = tmps[gid].digest_buf[DGST_R1]; - const u32x r2 = tmps[gid].digest_buf[DGST_R2]; - const u32x r3 = tmps[gid].digest_buf[DGST_R3]; + const u32 r0 = tmps[gid].digest_buf[DGST_R0]; + const u32 r1 = tmps[gid].digest_buf[DGST_R1]; + const u32 r2 = tmps[gid].digest_buf[DGST_R2]; + const u32 r3 = tmps[gid].digest_buf[DGST_R3]; #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m06400.cl b/OpenCL/m06400.cl similarity index 89% rename from amd/m06400.cl rename to OpenCL/m06400.cl index 7c1d2af..d6f9a91 100644 --- a/amd/m06400.cl +++ b/OpenCL/m06400.cl @@ -8,37 +8,25 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif #ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" +#define COMPARE_M "check_multi_vect2_comp4.c" #endif #ifdef VECT_SIZE4 -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" +#define COMPARE_M "check_multi_vect4_comp4.c" #endif __constant u32 k_sha256[64] = @@ -61,33 +49,33 @@ __constant u32 k_sha256[64] = SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f, }; -static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8]) +static void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[8]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + u32 e = digest[4]; + u32 f = digest[5]; + u32 g = digest[6]; + u32 h = digest[7]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #define ROUND_EXPAND() \ { \ @@ -147,7 +135,7 @@ static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[ digest[7] += h; } -static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8]) +static void hmac_sha256_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[8], u32 opad[8]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -206,7 +194,7 @@ static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32 sha256_transform (w0, w1, w2, w3, opad); } -static void hmac_sha256_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8], u32x digest[8]) +static void hmac_sha256_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[8], u32 opad[8], u32 digest[8]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -258,28 +246,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06400_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -345,8 +333,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06400_init (__gl w3[2] = swap_workaround (w3[2]); w3[3] = swap_workaround (w3[3]); - u32x ipad[8]; - u32x opad[8]; + u32 ipad[8]; + u32 opad[8]; hmac_sha256_pad (w0, w1, w2, w3, ipad, opad); @@ -402,7 +390,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06400_init (__gl w3[2] = swap_workaround (w3[2]); w3[3] = (64 + salt_len + 4) * 8; - u32x dgst[8]; + u32 dgst[8]; hmac_sha256_run (w0, w1, w2, w3, ipad, opad, dgst); @@ -431,8 +419,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06400_loop (__gl if (gid >= gid_max) return; - u32x ipad[8]; - u32x opad[8]; + u32 ipad[8]; + u32 opad[8]; ipad[0] = tmps[gid].ipad[0]; ipad[1] = tmps[gid].ipad[1]; @@ -452,8 +440,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06400_loop (__gl opad[6] = tmps[gid].opad[6]; opad[7] = tmps[gid].opad[7]; - u32x dgst[8]; - u32x out[8]; + u32 dgst[8]; + u32 out[8]; dgst[0] = tmps[gid].dgst[0]; dgst[1] = tmps[gid].dgst[1]; @@ -475,10 +463,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06400_loop (__gl for (u32 j = 0; j < loop_cnt; j++) { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = dgst[0]; w0[1] = dgst[1]; @@ -544,23 +532,23 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06400_comp (__gl * digest */ - const u32x r0 = tmps[gid].out[DGST_R0]; - const u32x r1 = tmps[gid].out[DGST_R1]; - const u32x r2 = tmps[gid].out[DGST_R2]; - const u32x r3 = tmps[gid].out[DGST_R3]; + const u32 r0 = tmps[gid].out[DGST_R0]; + const u32 r1 = tmps[gid].out[DGST_R1]; + const u32 r2 = tmps[gid].out[DGST_R2]; + const u32 r3 = tmps[gid].out[DGST_R3]; /* - u32x a = tmps[gid].out[0]; - u32x b = tmps[gid].out[1]; - u32x c = tmps[gid].out[2]; - u32x d = tmps[gid].out[3]; - u32x e = tmps[gid].out[4]; - u32x f = tmps[gid].out[5]; - u32x g = tmps[gid].out[6]; - u32x h = tmps[gid].out[7] & 0xffff03ff; + u32 a = tmps[gid].out[0]; + u32 b = tmps[gid].out[1]; + u32 c = tmps[gid].out[2]; + u32 d = tmps[gid].out[3]; + u32 e = tmps[gid].out[4]; + u32 f = tmps[gid].out[5]; + u32 g = tmps[gid].out[6]; + u32 h = tmps[gid].out[7] & 0xffff03ff; */ #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m06500.cl b/OpenCL/m06500.cl similarity index 89% rename from amd/m06500.cl rename to OpenCL/m06500.cl index d0dad00..1917164 100644 --- a/amd/m06500.cl +++ b/OpenCL/m06500.cl @@ -8,37 +8,25 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif #ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" +#define COMPARE_M "check_multi_vect2_comp4.c" #endif #ifdef VECT_SIZE4 -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" +#define COMPARE_M "check_multi_vect4_comp4.c" #endif __constant u64 k_sha512[80] = @@ -65,33 +53,33 @@ __constant u64 k_sha512[80] = SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, }; -static void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], const u64x w3[4], u64x digest[8]) +static void sha512_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const u64 w3[4], u64 digest[8]) { - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; - - u64x w0_t = w0[0]; - u64x w1_t = w0[1]; - u64x w2_t = w0[2]; - u64x w3_t = w0[3]; - u64x w4_t = w1[0]; - u64x w5_t = w1[1]; - u64x w6_t = w1[2]; - u64x w7_t = w1[3]; - u64x w8_t = w2[0]; - u64x w9_t = w2[1]; - u64x wa_t = w2[2]; - u64x wb_t = w2[3]; - u64x wc_t = w3[0]; - u64x wd_t = w3[1]; - u64x we_t = w3[2]; - u64x wf_t = w3[3]; + u64 a = digest[0]; + u64 b = digest[1]; + u64 c = digest[2]; + u64 d = digest[3]; + u64 e = digest[4]; + u64 f = digest[5]; + u64 g = digest[6]; + u64 h = digest[7]; + + u64 w0_t = w0[0]; + u64 w1_t = w0[1]; + u64 w2_t = w0[2]; + u64 w3_t = w0[3]; + u64 w4_t = w1[0]; + u64 w5_t = w1[1]; + u64 w6_t = w1[2]; + u64 w7_t = w1[3]; + u64 w8_t = w2[0]; + u64 w9_t = w2[1]; + u64 wa_t = w2[2]; + u64 wb_t = w2[3]; + u64 wc_t = w3[0]; + u64 wd_t = w3[1]; + u64 we_t = w3[2]; + u64 wf_t = w3[3]; #define ROUND_EXPAND() \ { \ @@ -151,7 +139,7 @@ static void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[ digest[7] += h; } -static void hmac_sha512_pad (u64x w0[4], u64x w1[4], u64x w2[4], u64x w3[4], u64x ipad[8], u64x opad[8]) +static void hmac_sha512_pad (u64 w0[4], u64 w1[4], u64 w2[4], u64 w3[4], u64 ipad[8], u64 opad[8]) { w0[0] = w0[0] ^ 0x3636363636363636; w0[1] = w0[1] ^ 0x3636363636363636; @@ -210,7 +198,7 @@ static void hmac_sha512_pad (u64x w0[4], u64x w1[4], u64x w2[4], u64x w3[4], u64 sha512_transform (w0, w1, w2, w3, opad); } -static void hmac_sha512_run (u64x w0[4], u64x w1[4], u64x w2[4], u64x w3[4], u64x ipad[8], u64x opad[8], u64x digest[8]) +static void hmac_sha512_run (u64 w0[4], u64 w1[4], u64 w2[4], u64 w3[4], u64 ipad[8], u64 opad[8], u64 digest[8]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -262,28 +250,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06500_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -349,10 +337,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06500_init (__gl w3[2] = swap_workaround (w3[2]); w3[3] = swap_workaround (w3[3]); - u64x w0l[4]; - u64x w1l[4]; - u64x w2l[4]; - u64x w3l[4]; + u64 w0l[4]; + u64 w1l[4]; + u64 w2l[4]; + u64 w3l[4]; w0l[0] = (u64x) (w0[0]) << 32 | (u64x) (w0[1]); w0l[1] = (u64x) (w0[2]) << 32 | (u64x) (w0[3]); @@ -371,8 +359,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06500_init (__gl w3l[2] = 0; w3l[3] = 0; - u64x ipad[8]; - u64x opad[8]; + u64 ipad[8]; + u64 opad[8]; hmac_sha512_pad (w0l, w1l, w2l, w3l, ipad, opad); @@ -428,7 +416,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06500_init (__gl w3l[2] = 0; w3l[3] = (128 + salt_len + 4) * 8; - u64x dgst[8]; + u64 dgst[8]; hmac_sha512_run (w0l, w1l, w2l, w3l, ipad, opad, dgst); @@ -457,8 +445,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06500_loop (__gl if (gid >= gid_max) return; - u64x ipad[8]; - u64x opad[8]; + u64 ipad[8]; + u64 opad[8]; ipad[0] = tmps[gid].ipad[0]; ipad[1] = tmps[gid].ipad[1]; @@ -478,8 +466,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06500_loop (__gl opad[6] = tmps[gid].opad[6]; opad[7] = tmps[gid].opad[7]; - u64x dgst[8]; - u64x out[8]; + u64 dgst[8]; + u64 out[8]; dgst[0] = tmps[gid].dgst[0]; dgst[1] = tmps[gid].dgst[1]; @@ -501,10 +489,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06500_loop (__gl for (u32 j = 0; j < loop_cnt; j++) { - u64x w0[4]; - u64x w1[4]; - u64x w2[4]; - u64x w3[4]; + u64 w0[4]; + u64 w1[4]; + u64 w2[4]; + u64 w3[4]; w0[0] = dgst[0]; w0[1] = dgst[1]; @@ -571,22 +559,22 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06500_comp (__gl */ /* - u64x a = tmps[gid].out[0]; - u64x b = tmps[gid].out[1]; - u64x c = tmps[gid].out[2]; - u64x d = tmps[gid].out[3]; - u64x e = tmps[gid].out[4]; - u64x f = tmps[gid].out[5]; - u64x g = tmps[gid].out[6]; - u64x h = tmps[gid].out[7] & 0xffffffffffffff00; + u64 a = tmps[gid].out[0]; + u64 b = tmps[gid].out[1]; + u64 c = tmps[gid].out[2]; + u64 d = tmps[gid].out[3]; + u64 e = tmps[gid].out[4]; + u64 f = tmps[gid].out[5]; + u64 g = tmps[gid].out[6]; + u64 h = tmps[gid].out[7] & 0xffffffffffffff00; */ - const u32x r0 = l32_from_64 (tmps[gid].out[0]); - const u32x r1 = h32_from_64 (tmps[gid].out[0]); - const u32x r2 = l32_from_64 (tmps[gid].out[1]); - const u32x r3 = h32_from_64 (tmps[gid].out[1]); + const u32 r0 = l32_from_64 (tmps[gid].out[0]); + const u32 r1 = h32_from_64 (tmps[gid].out[0]); + const u32 r2 = l32_from_64 (tmps[gid].out[1]); + const u32 r3 = h32_from_64 (tmps[gid].out[1]); #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m06600.cl b/OpenCL/m06600.cl similarity index 97% rename from amd/m06600.cl rename to OpenCL/m06600.cl index d3fe70d..6518a20 100644 --- a/amd/m06600.cl +++ b/OpenCL/m06600.cl @@ -8,33 +8,21 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif #ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" +#define COMPARE_M "check_multi_vect2_comp4.c" #endif __constant u32 te0[256] = @@ -865,30 +853,30 @@ static void AES128_decrypt (const u32 *in, u32 *out, const u32 *rdk, __local u32 ^ rdk[43]; } -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -993,7 +981,7 @@ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4] digest[4] += E; } -static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) +static void hmac_sha1_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -1046,7 +1034,7 @@ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x sha1_transform (w0, w1, w2, w3, opad); } -static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) +static void hmac_sha1_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5], u32 digest[5]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -1092,28 +1080,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06600_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -1152,8 +1140,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06600_init (__gl w3[2] = swap_workaround (w3[2]); w3[3] = swap_workaround (w3[3]); - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_sha1_pad (w0, w1, w2, w3, ipad, opad); @@ -1206,7 +1194,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06600_init (__gl w3[2] = 0; w3[3] = (64 + salt_len + 4) * 8; - u32x dgst[5]; + u32 dgst[5]; hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst); @@ -1229,8 +1217,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06600_loop (__gl if (gid >= gid_max) return; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; ipad[0] = tmps[gid].ipad[0]; ipad[1] = tmps[gid].ipad[1]; @@ -1244,8 +1232,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06600_loop (__gl opad[3] = tmps[gid].opad[3]; opad[4] = tmps[gid].opad[4]; - u32x dgst[5]; - u32x out[5]; + u32 dgst[5]; + u32 out[5]; dgst[0] = tmps[gid].dgst[0]; dgst[1] = tmps[gid].dgst[1]; @@ -1261,10 +1249,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06600_loop (__gl for (u32 j = 0; j < loop_cnt; j++) { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = dgst[0]; w0[1] = dgst[1]; @@ -1410,17 +1398,17 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06600_comp (__gl * aes init */ - u32x ukeyx[4]; + u32 ukeyx[4]; ukeyx[0] = tmps[gid].out[0]; ukeyx[1] = tmps[gid].out[1]; ukeyx[2] = tmps[gid].out[2]; ukeyx[3] = tmps[gid].out[3]; - u32x a; - u32x b; - u32x c; - u32x d; + u32 a; + u32 b; + u32 c; + u32 d; #define KEYLEN 44 @@ -1454,12 +1442,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06600_comp (__gl c = out[2] ^ iv[2]; d = out[3] ^ iv[3]; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = c; - const u32x r3 = d; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = c; + const u32 r3 = d; #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m06700.cl b/OpenCL/m06700.cl similarity index 91% rename from amd/m06700.cl rename to OpenCL/m06700.cl index 40ed05a..1b1953a 100644 --- a/amd/m06700.cl +++ b/OpenCL/m06700.cl @@ -8,63 +8,51 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif #ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" +#define COMPARE_M "check_multi_vect2_comp4.c" #endif #ifdef VECT_SIZE4 -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" +#define COMPARE_M "check_multi_vect4_comp4.c" #endif -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -169,7 +157,7 @@ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4] digest[4] += E; } -static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) +static void hmac_sha1_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -222,7 +210,7 @@ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x sha1_transform (w0, w1, w2, w3, opad); } -static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) +static void hmac_sha1_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5], u32 digest[5]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -268,28 +256,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06700_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -355,8 +343,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06700_init (__gl w3[2] = swap_workaround (w3[2]); w3[3] = swap_workaround (w3[3]); - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_sha1_pad (w0, w1, w2, w3, ipad, opad); @@ -406,7 +394,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06700_init (__gl w3[2] = swap_workaround (w3[2]); w3[3] = (64 + salt_len + 4) * 8; - u32x dgst[5]; + u32 dgst[5]; hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst); @@ -429,8 +417,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06700_loop (__gl if (gid >= gid_max) return; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; ipad[0] = tmps[gid].ipad[0]; ipad[1] = tmps[gid].ipad[1]; @@ -444,8 +432,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06700_loop (__gl opad[3] = tmps[gid].opad[3]; opad[4] = tmps[gid].opad[4]; - u32x dgst[5]; - u32x out[5]; + u32 dgst[5]; + u32 out[5]; dgst[0] = tmps[gid].dgst[0]; dgst[1] = tmps[gid].dgst[1]; @@ -461,10 +449,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06700_loop (__gl for (u32 j = 0; j < loop_cnt; j++) { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = dgst[0]; w0[1] = dgst[1]; @@ -522,19 +510,19 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06700_comp (__gl */ /* - u32x a = tmps[gid].out[0]; - u32x b = tmps[gid].out[1]; - u32x c = tmps[gid].out[2]; - u32x d = tmps[gid].out[3]; - u32x e = tmps[gid].out[4] & 0xffff03ff; + u32 a = tmps[gid].out[0]; + u32 b = tmps[gid].out[1]; + u32 c = tmps[gid].out[2]; + u32 d = tmps[gid].out[3]; + u32 e = tmps[gid].out[4] & 0xffff03ff; */ - const u32x r0 = tmps[gid].out[DGST_R0]; - const u32x r1 = tmps[gid].out[DGST_R1]; - const u32x r2 = tmps[gid].out[DGST_R2]; - const u32x r3 = tmps[gid].out[DGST_R3]; + const u32 r0 = tmps[gid].out[DGST_R0]; + const u32 r1 = tmps[gid].out[DGST_R1]; + const u32 r2 = tmps[gid].out[DGST_R2]; + const u32 r3 = tmps[gid].out[DGST_R3]; #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m06800.cl b/OpenCL/m06800.cl similarity index 97% rename from amd/m06800.cl rename to OpenCL/m06800.cl index 7f74bcd..c425485 100644 --- a/amd/m06800.cl +++ b/OpenCL/m06800.cl @@ -8,33 +8,21 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif #ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" +#define COMPARE_M "check_multi_vect2_comp4.c" #endif __constant u32 te0[256] = @@ -1020,33 +1008,33 @@ __constant u32 k_sha256[64] = SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f, }; -static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8]) +static void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[8]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + u32 e = digest[4]; + u32 f = digest[5]; + u32 g = digest[6]; + u32 h = digest[7]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #define ROUND_EXPAND() \ { \ @@ -1106,7 +1094,7 @@ static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[ digest[7] += h; } -static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8]) +static void hmac_sha256_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[8], u32 opad[8]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -1165,7 +1153,7 @@ static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32 sha256_transform (w0, w1, w2, w3, opad); } -static void hmac_sha256_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8], u32x digest[8]) +static void hmac_sha256_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[8], u32 opad[8], u32 digest[8]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -1217,28 +1205,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06800_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -1284,8 +1272,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06800_init (__gl w3[2] = swap_workaround (w3[2]); w3[3] = swap_workaround (w3[3]); - u32x ipad[8]; - u32x opad[8]; + u32 ipad[8]; + u32 opad[8]; hmac_sha256_pad (w0, w1, w2, w3, ipad, opad); @@ -1344,7 +1332,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06800_init (__gl w3[2] = 0; w3[3] = (64 + salt_len + 4) * 8; - u32x dgst[8]; + u32 dgst[8]; hmac_sha256_run (w0, w1, w2, w3, ipad, opad, dgst); @@ -1373,8 +1361,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06800_loop (__gl if (gid >= gid_max) return; - u32x ipad[8]; - u32x opad[8]; + u32 ipad[8]; + u32 opad[8]; ipad[0] = tmps[gid].ipad[0]; ipad[1] = tmps[gid].ipad[1]; @@ -1394,8 +1382,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06800_loop (__gl opad[6] = tmps[gid].opad[6]; opad[7] = tmps[gid].opad[7]; - u32x dgst[8]; - u32x out[8]; + u32 dgst[8]; + u32 out[8]; dgst[0] = tmps[gid].dgst[0]; dgst[1] = tmps[gid].dgst[1]; @@ -1417,10 +1405,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06800_loop (__gl for (u32 j = 0; j < loop_cnt; j++) { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = dgst[0]; w0[1] = dgst[1]; @@ -1620,7 +1608,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06800_comp (__gl && (out[2] == salt_buf[2]) && (out[3] == salt_buf[3])) { - mark_hash_s0 (plains_buf, hashes_shown, digests_offset + 0, gid, 0); + mark_hash (plains_buf, hashes_shown, digests_offset + 0, gid, 0); d_return_buf[lid] = 1; } @@ -1639,12 +1627,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06800_comp (__gl AES256_encrypt (lastpass_magic, out, rek, s_te0, s_te1, s_te2, s_te3, s_te4); - const u32x r0 = out[DGST_R0]; - const u32x r1 = out[DGST_R1]; - const u32x r2 = out[DGST_R2]; - const u32x r3 = out[DGST_R3]; + const u32 r0 = out[DGST_R0]; + const u32 r1 = out[DGST_R1]; + const u32 r2 = out[DGST_R2]; + const u32 r3 = out[DGST_R3]; #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m06900_a0.cl b/OpenCL/m06900_a0.cl similarity index 96% rename from amd/m06900_a0.cl rename to OpenCL/m06900_a0.cl index 508bf1c..1dd627d 100644 --- a/amd/m06900_a0.cl +++ b/OpenCL/m06900_a0.cl @@ -8,38 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 c_tables[4][256] = { @@ -319,7 +300,7 @@ __constant u32 c_tables[4][256] = #define round(k1,k2,tbl) \ { \ - u32x t; \ + u32 t; \ t = (k1) + r; \ l ^= BOX (amd_bfe (t, 0, 8), 0, tbl) ^ \ BOX (amd_bfe (t, 8, 8), 1, tbl) ^ \ @@ -334,8 +315,8 @@ __constant u32 c_tables[4][256] = #define R(k,h,s,i,t) \ { \ - u32x r; \ - u32x l; \ + u32 r; \ + u32 l; \ r = h[i + 0]; \ l = h[i + 1]; \ round (k[0], k[1], t); \ @@ -404,8 +385,8 @@ __constant u32 c_tables[4][256] = #define A(x) \ { \ - u32x l; \ - u32x r; \ + u32 l; \ + u32 r; \ l = x[0] ^ x[2]; \ r = x[1] ^ x[3]; \ x[0] = x[2]; \ @@ -420,8 +401,8 @@ __constant u32 c_tables[4][256] = #define AA(x) \ { \ - u32x l; \ - u32x r; \ + u32 l; \ + u32 r; \ l = x[0]; \ r = x[2]; \ x[0] = x[4]; \ @@ -679,8 +660,8 @@ __constant u32 c_tables[4][256] = #define PASS0(h,s,u,v,t) \ { \ - u32x k[8]; \ - u32x w[8]; \ + u32 k[8]; \ + u32 w[8]; \ X (w, u, v); \ P (k, w); \ R (k, h, s, 0, t); \ @@ -690,8 +671,8 @@ __constant u32 c_tables[4][256] = #define PASS2(h,s,u,v,t) \ { \ - u32x k[8]; \ - u32x w[8]; \ + u32 k[8]; \ + u32 w[8]; \ X (w, u, v); \ P (k, w); \ R (k, h, s, 2, t); \ @@ -702,8 +683,8 @@ __constant u32 c_tables[4][256] = #define PASS4(h,s,u,v,t) \ { \ - u32x k[8]; \ - u32x w[8]; \ + u32 k[8]; \ + u32 w[8]; \ X (w, u, v); \ P (k, w); \ R (k, h, s, 4, t); \ @@ -713,8 +694,8 @@ __constant u32 c_tables[4][256] = #define PASS6(h,s,u,v,t) \ { \ - u32x k[8]; \ - u32x w[8]; \ + u32 k[8]; \ + u32 w[8]; \ X (w, u, v); \ P (k, w); \ R (k, h, s, 6, t); \ @@ -734,14 +715,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_m04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -788,28 +769,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -820,7 +801,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_m04 (__glo u32 w14 = out_len * 8; - u32x data[8]; + u32 data[8]; data[0] = w0[0]; data[1] = w0[1]; @@ -831,7 +812,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_m04 (__glo data[6] = w1[2]; data[7] = w1[3]; - u32x state[16]; + u32 state[16]; state[ 0] = 0; state[ 1] = 0; @@ -850,8 +831,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_m04 (__glo state[14] = data[6]; state[15] = data[7]; - u32x state_m[8]; - u32x data_m[8]; + u32 state_m[8]; + u32 data_m[8]; /* gost1 */ @@ -873,7 +854,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_m04 (__glo data_m[6] = data[6]; data_m[7] = data[7]; - u32x tmp[8]; + u32 tmp[8]; PASS0 (state, tmp, state_m, data_m, s_tables); PASS2 (state, tmp, state_m, data_m, s_tables); @@ -962,12 +943,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_m04 (__glo /* store */ - const u32x r0 = state[0]; - const u32x r1 = state[1]; - const u32x r2 = state[2]; - const u32x r3 = state[3]; + const u32 r0 = state[0]; + const u32 r1 = state[1]; + const u32 r2 = state[2]; + const u32 r3 = state[3]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -993,14 +974,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_s04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -1059,28 +1040,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1091,7 +1072,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_s04 (__glo u32 w14 = out_len * 8; - u32x data[8]; + u32 data[8]; data[0] = w0[0]; data[1] = w0[1]; @@ -1102,7 +1083,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_s04 (__glo data[6] = w1[2]; data[7] = w1[3]; - u32x state[16]; + u32 state[16]; state[ 0] = 0; state[ 1] = 0; @@ -1121,8 +1102,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_s04 (__glo state[14] = data[6]; state[15] = data[7]; - u32x state_m[8]; - u32x data_m[8]; + u32 state_m[8]; + u32 data_m[8]; /* gost1 */ @@ -1144,7 +1125,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_s04 (__glo data_m[6] = data[6]; data_m[7] = data[7]; - u32x tmp[8]; + u32 tmp[8]; PASS0 (state, tmp, state_m, data_m, s_tables); PASS2 (state, tmp, state_m, data_m, s_tables); @@ -1233,12 +1214,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_s04 (__glo /* store */ - const u32x r0 = state[0]; - const u32x r1 = state[1]; - const u32x r2 = state[2]; - const u32x r3 = state[3]; + const u32 r0 = state[0]; + const u32 r1 = state[1]; + const u32 r2 = state[2]; + const u32 r3 = state[3]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m06900_a1.cl b/OpenCL/m06900_a1.cl similarity index 96% rename from amd/m06900_a1.cl rename to OpenCL/m06900_a1.cl index c8bfe6b..6d252bb 100644 --- a/amd/m06900_a1.cl +++ b/OpenCL/m06900_a1.cl @@ -8,36 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 c_tables[4][256] = { @@ -317,7 +298,7 @@ __constant u32 c_tables[4][256] = #define round(k1,k2,tbl) \ { \ - u32x t; \ + u32 t; \ t = (k1) + r; \ l ^= BOX (amd_bfe (t, 0, 8), 0, tbl) ^ \ BOX (amd_bfe (t, 8, 8), 1, tbl) ^ \ @@ -332,8 +313,8 @@ __constant u32 c_tables[4][256] = #define R(k,h,s,i,t) \ { \ - u32x r; \ - u32x l; \ + u32 r; \ + u32 l; \ r = h[i + 0]; \ l = h[i + 1]; \ round (k[0], k[1], t); \ @@ -402,8 +383,8 @@ __constant u32 c_tables[4][256] = #define A(x) \ { \ - u32x l; \ - u32x r; \ + u32 l; \ + u32 r; \ l = x[0] ^ x[2]; \ r = x[1] ^ x[3]; \ x[0] = x[2]; \ @@ -418,8 +399,8 @@ __constant u32 c_tables[4][256] = #define AA(x) \ { \ - u32x l; \ - u32x r; \ + u32 l; \ + u32 r; \ l = x[0]; \ r = x[2]; \ x[0] = x[4]; \ @@ -677,8 +658,8 @@ __constant u32 c_tables[4][256] = #define PASS0(h,s,u,v,t) \ { \ - u32x k[8]; \ - u32x w[8]; \ + u32 k[8]; \ + u32 w[8]; \ X (w, u, v); \ P (k, w); \ R (k, h, s, 0, t); \ @@ -688,8 +669,8 @@ __constant u32 c_tables[4][256] = #define PASS2(h,s,u,v,t) \ { \ - u32x k[8]; \ - u32x w[8]; \ + u32 k[8]; \ + u32 w[8]; \ X (w, u, v); \ P (k, w); \ R (k, h, s, 2, t); \ @@ -700,8 +681,8 @@ __constant u32 c_tables[4][256] = #define PASS4(h,s,u,v,t) \ { \ - u32x k[8]; \ - u32x w[8]; \ + u32 k[8]; \ + u32 w[8]; \ X (w, u, v); \ P (k, w); \ R (k, h, s, 4, t); \ @@ -711,8 +692,8 @@ __constant u32 c_tables[4][256] = #define PASS6(h,s,u,v,t) \ { \ - u32x k[8]; \ - u32x w[8]; \ + u32 k[8]; \ + u32 w[8]; \ X (w, u, v); \ P (k, w); \ R (k, h, s, 6, t); \ @@ -732,28 +713,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_m04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -842,28 +823,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -872,7 +853,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_m04 (__glo const u32 w14 = pw_len * 8; - u32x data[8]; + u32 data[8]; data[0] = w0[0]; data[1] = w0[1]; @@ -883,7 +864,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_m04 (__glo data[6] = w1[2]; data[7] = w1[3]; - u32x state[16]; + u32 state[16]; state[ 0] = 0; state[ 1] = 0; @@ -902,8 +883,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_m04 (__glo state[14] = data[6]; state[15] = data[7]; - u32x state_m[8]; - u32x data_m[8]; + u32 state_m[8]; + u32 data_m[8]; /* gost1 */ @@ -925,7 +906,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_m04 (__glo data_m[6] = data[6]; data_m[7] = data[7]; - u32x tmp[8]; + u32 tmp[8]; PASS0 (state, tmp, state_m, data_m, s_tables); PASS2 (state, tmp, state_m, data_m, s_tables); @@ -1014,12 +995,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_m04 (__glo /* store */ - const u32x r0 = state[0]; - const u32x r1 = state[1]; - const u32x r2 = state[2]; - const u32x r3 = state[3]; + const u32 r0 = state[0]; + const u32 r1 = state[1]; + const u32 r2 = state[2]; + const u32 r3 = state[3]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -1045,28 +1026,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_s04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -1167,28 +1148,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -1197,7 +1178,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_s04 (__glo const u32 w14 = pw_len * 8; - u32x data[8]; + u32 data[8]; data[0] = w0[0]; data[1] = w0[1]; @@ -1208,7 +1189,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_s04 (__glo data[6] = w1[2]; data[7] = w1[3]; - u32x state[16]; + u32 state[16]; state[ 0] = 0; state[ 1] = 0; @@ -1227,8 +1208,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_s04 (__glo state[14] = data[6]; state[15] = data[7]; - u32x state_m[8]; - u32x data_m[8]; + u32 state_m[8]; + u32 data_m[8]; /* gost1 */ @@ -1250,7 +1231,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_s04 (__glo data_m[6] = data[6]; data_m[7] = data[7]; - u32x tmp[8]; + u32 tmp[8]; PASS0 (state, tmp, state_m, data_m, s_tables); PASS2 (state, tmp, state_m, data_m, s_tables); @@ -1339,12 +1320,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_s04 (__glo /* store */ - const u32x r0 = state[0]; - const u32x r1 = state[1]; - const u32x r2 = state[2]; - const u32x r3 = state[3]; + const u32 r0 = state[0]; + const u32 r1 = state[1]; + const u32 r2 = state[2]; + const u32 r3 = state[3]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m06900_a3.cl b/OpenCL/m06900_a3.cl similarity index 93% rename from amd/m06900_a3.cl rename to OpenCL/m06900_a3.cl index 6166900..2d6bfbc 100644 --- a/amd/m06900_a3.cl +++ b/OpenCL/m06900_a3.cl @@ -8,36 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 c_tables[4][256] = { @@ -317,7 +298,7 @@ __constant u32 c_tables[4][256] = #define round(k1,k2,tbl) \ { \ - u32x t; \ + u32 t; \ t = (k1) + r; \ l ^= BOX (amd_bfe (t, 0, 8), 0, tbl) ^ \ BOX (amd_bfe (t, 8, 8), 1, tbl) ^ \ @@ -332,8 +313,8 @@ __constant u32 c_tables[4][256] = #define R(k,h,s,i,t) \ { \ - u32x r; \ - u32x l; \ + u32 r; \ + u32 l; \ r = h[i + 0]; \ l = h[i + 1]; \ round (k[0], k[1], t); \ @@ -402,8 +383,8 @@ __constant u32 c_tables[4][256] = #define A(x) \ { \ - u32x l; \ - u32x r; \ + u32 l; \ + u32 r; \ l = x[0] ^ x[2]; \ r = x[1] ^ x[3]; \ x[0] = x[2]; \ @@ -418,8 +399,8 @@ __constant u32 c_tables[4][256] = #define AA(x) \ { \ - u32x l; \ - u32x r; \ + u32 l; \ + u32 r; \ l = x[0]; \ r = x[2]; \ x[0] = x[4]; \ @@ -677,8 +658,8 @@ __constant u32 c_tables[4][256] = #define PASS0(h,s,u,v,t) \ { \ - u32x k[8]; \ - u32x w[8]; \ + u32 k[8]; \ + u32 w[8]; \ X (w, u, v); \ P (k, w); \ R (k, h, s, 0, t); \ @@ -688,8 +669,8 @@ __constant u32 c_tables[4][256] = #define PASS2(h,s,u,v,t) \ { \ - u32x k[8]; \ - u32x w[8]; \ + u32 k[8]; \ + u32 w[8]; \ X (w, u, v); \ P (k, w); \ R (k, h, s, 2, t); \ @@ -700,8 +681,8 @@ __constant u32 c_tables[4][256] = #define PASS4(h,s,u,v,t) \ { \ - u32x k[8]; \ - u32x w[8]; \ + u32 k[8]; \ + u32 w[8]; \ X (w, u, v); \ P (k, w); \ R (k, h, s, 4, t); \ @@ -711,14 +692,14 @@ __constant u32 c_tables[4][256] = #define PASS6(h,s,u,v,t) \ { \ - u32x k[8]; \ - u32x w[8]; \ + u32 k[8]; \ + u32 w[8]; \ X (w, u, v); \ P (k, w); \ R (k, h, s, 6, t); \ } -static void m06900m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 s_tables[4][256]) +static void m06900m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 s_tables[4][256]) { /** * modifier @@ -737,7 +718,7 @@ static void m06900m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -745,7 +726,7 @@ static void m06900m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x data[8]; + u32 data[8]; data[0] = w0[0]; data[1] = w0[1]; @@ -756,7 +737,7 @@ static void m06900m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p data[6] = w1[2]; data[7] = w1[3]; - u32x state[16]; + u32 state[16]; state[ 0] = 0; state[ 1] = 0; @@ -775,8 +756,8 @@ static void m06900m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p state[14] = data[6]; state[15] = data[7]; - u32x state_m[8]; - u32x data_m[8]; + u32 state_m[8]; + u32 data_m[8]; /* gost1 */ @@ -798,7 +779,7 @@ static void m06900m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p data_m[6] = data[6]; data_m[7] = data[7]; - u32x tmp[8]; + u32 tmp[8]; PASS0 (state, tmp, state_m, data_m, s_tables); PASS2 (state, tmp, state_m, data_m, s_tables); @@ -887,15 +868,15 @@ static void m06900m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p /* store */ - const u32x r0 = state[0]; - const u32x r1 = state[1]; - const u32x r2 = state[2]; - const u32x r3 = state[3]; + const u32 r0 = state[0]; + const u32 r1 = state[1]; + const u32 r2 = state[2]; + const u32 r3 = state[3]; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m06900s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 s_tables[4][256]) +static void m06900s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 s_tables[4][256]) { /** * modifier @@ -926,7 +907,7 @@ static void m06900s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -934,7 +915,7 @@ static void m06900s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x data[8]; + u32 data[8]; data[0] = w0[0]; data[1] = w0[1]; @@ -945,7 +926,7 @@ static void m06900s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p data[6] = w1[2]; data[7] = w1[3]; - u32x state[16]; + u32 state[16]; state[ 0] = 0; state[ 1] = 0; @@ -964,8 +945,8 @@ static void m06900s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p state[14] = data[6]; state[15] = data[7]; - u32x state_m[8]; - u32x data_m[8]; + u32 state_m[8]; + u32 data_m[8]; /* gost1 */ @@ -987,7 +968,7 @@ static void m06900s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p data_m[6] = data[6]; data_m[7] = data[7]; - u32x tmp[8]; + u32 tmp[8]; PASS0 (state, tmp, state_m, data_m, s_tables); PASS2 (state, tmp, state_m, data_m, s_tables); @@ -1076,12 +1057,12 @@ static void m06900s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p /* store */ - const u32x r0 = state[0]; - const u32x r1 = state[1]; - const u32x r2 = state[2]; - const u32x r3 = state[3]; + const u32 r0 = state[0]; + const u32 r1 = state[1]; + const u32 r2 = state[2]; + const u32 r3 = state[3]; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -1099,28 +1080,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_m04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1182,28 +1163,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_m08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1269,28 +1250,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_s04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1352,28 +1333,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m06900_s08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; diff --git a/amd/m07100.cl b/OpenCL/m07100.cl similarity index 97% rename from amd/m07100.cl rename to OpenCL/m07100.cl index 25e108f..f00223e 100644 --- a/amd/m07100.cl +++ b/OpenCL/m07100.cl @@ -8,29 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif __constant u64 k_sha512[80] = @@ -256,28 +244,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07100_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = swap_workaround (pws[gid].i[ 0]); w0[1] = swap_workaround (pws[gid].i[ 1]); w0[2] = swap_workaround (pws[gid].i[ 2]); w0[3] = swap_workaround (pws[gid].i[ 3]); - u32x w1[4]; + u32 w1[4]; w1[0] = swap_workaround (pws[gid].i[ 4]); w1[1] = swap_workaround (pws[gid].i[ 5]); w1[2] = swap_workaround (pws[gid].i[ 6]); w1[3] = swap_workaround (pws[gid].i[ 7]); - u32x w2[4]; + u32 w2[4]; w2[0] = swap_workaround (pws[gid].i[ 8]); w2[1] = swap_workaround (pws[gid].i[ 9]); w2[2] = swap_workaround (pws[gid].i[10]); w2[3] = swap_workaround (pws[gid].i[11]); - u32x w3[4]; + u32 w3[4]; w3[0] = swap_workaround (pws[gid].i[12]); w3[1] = swap_workaround (pws[gid].i[13]); @@ -494,15 +482,15 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07100_comp (__gl const u32 lid = get_local_id (0); - const u64x a = tmps[gid].out[0]; - const u64x b = tmps[gid].out[1]; + const u64 a = tmps[gid].out[0]; + const u64 b = tmps[gid].out[1]; - const u32x r0 = l32_from_64 (a); - const u32x r1 = h32_from_64 (a); - const u32x r2 = l32_from_64 (b); - const u32x r3 = h32_from_64 (b); + const u32 r0 = l32_from_64 (a); + const u32 r1 = h32_from_64 (a); + const u32 r2 = l32_from_64 (b); + const u32 r3 = h32_from_64 (b); #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m07300_a0.cl b/OpenCL/m07300_a0.cl similarity index 91% rename from amd/m07300_a0.cl rename to OpenCL/m07300_a0.cl index a4dd484..466c10c 100644 --- a/amd/m07300_a0.cl +++ b/OpenCL/m07300_a0.cl @@ -8,68 +8,44 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -174,7 +150,7 @@ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4] digest[4] += E; } -static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) +static void hmac_sha1_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -227,7 +203,7 @@ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x sha1_transform (w0, w1, w2, w3, opad); } -static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) +static void hmac_sha1_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5], u32 digest[5]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -279,14 +255,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07300_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -307,28 +283,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07300_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -341,36 +317,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07300_m04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); w0_t[2] = swap_workaround (w0[2]); w0_t[3] = swap_workaround (w0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (w1[0]); w1_t[1] = swap_workaround (w1[1]); w1_t[2] = swap_workaround (w1[2]); w1_t[3] = swap_workaround (w1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -418,16 +394,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07300_m04 (__glo w3_t[2] = 0; w3_t[3] = (64 + esalt_size) * 8; - u32x digest[5]; + u32 digest[5]; hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -455,14 +431,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07300_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -495,28 +471,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07300_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -529,36 +505,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07300_s04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); w0_t[2] = swap_workaround (w0[2]); w0_t[3] = swap_workaround (w0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (w1[0]); w1_t[1] = swap_workaround (w1[1]); w1_t[2] = swap_workaround (w1[2]); w1_t[3] = swap_workaround (w1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -606,16 +582,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07300_s04 (__glo w3_t[2] = 0; w3_t[3] = (64 + esalt_size) * 8; - u32x digest[5]; + u32 digest[5]; hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m07300_a1.cl b/OpenCL/m07300_a1.cl similarity index 91% rename from amd/m07300_a1.cl rename to OpenCL/m07300_a1.cl index f7a6501..6242f83 100644 --- a/amd/m07300_a1.cl +++ b/OpenCL/m07300_a1.cl @@ -8,66 +8,42 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -172,7 +148,7 @@ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4] digest[4] += E; } -static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) +static void hmac_sha1_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -225,7 +201,7 @@ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x sha1_transform (w0, w1, w2, w3, opad); } -static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) +static void hmac_sha1_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5], u32 digest[5]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -277,28 +253,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07300_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -361,28 +337,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07300_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -393,36 +369,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07300_m04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); w0_t[2] = swap_workaround (w0[2]); w0_t[3] = swap_workaround (w0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (w1[0]); w1_t[1] = swap_workaround (w1[1]); w1_t[2] = swap_workaround (w1[2]); w1_t[3] = swap_workaround (w1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -470,16 +446,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07300_m04 (__glo w3_t[2] = 0; w3_t[3] = (64 + esalt_size) * 8; - u32x digest[5]; + u32 digest[5]; hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -507,28 +483,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07300_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -603,28 +579,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07300_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -635,36 +611,36 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07300_s04 (__glo * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); w0_t[2] = swap_workaround (w0[2]); w0_t[3] = swap_workaround (w0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (w1[0]); w1_t[1] = swap_workaround (w1[1]); w1_t[2] = swap_workaround (w1[2]); w1_t[3] = swap_workaround (w1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -712,16 +688,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07300_s04 (__glo w3_t[2] = 0; w3_t[3] = (64 + esalt_size) * 8; - u32x digest[5]; + u32 digest[5]; hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m07300_a3.cl b/OpenCL/m07300_a3.cl similarity index 86% rename from amd/m07300_a3.cl rename to OpenCL/m07300_a3.cl index 6b408b2..2356f1e 100644 --- a/amd/m07300_a3.cl +++ b/OpenCL/m07300_a3.cl @@ -8,66 +8,42 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -172,7 +148,7 @@ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4] digest[4] += E; } -static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) +static void hmac_sha1_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -225,7 +201,7 @@ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x sha1_transform (w0, w1, w2, w3, opad); } -static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) +static void hmac_sha1_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5], u32 digest[5]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -261,7 +237,7 @@ static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x sha1_transform (w0, w1, w2, w3, digest); } -static void m07300m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global rakp_t *rakp_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m07300m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global rakp_t *rakp_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -276,7 +252,7 @@ static void m07300m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -288,36 +264,36 @@ static void m07300m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -365,20 +341,20 @@ static void m07300m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[2] = 0; w3_t[3] = (64 + esalt_size) * 8; - u32x digest[5]; + u32 digest[5]; hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m07300s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global rakp_t *rakp_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m07300s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global rakp_t *rakp_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -405,7 +381,7 @@ static void m07300s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -417,36 +393,36 @@ static void m07300s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * pads */ - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -494,16 +470,16 @@ static void m07300s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[2] = 0; w3_t[3] = (64 + esalt_size) * 8; - u32x digest[5]; + u32 digest[5]; hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -517,28 +493,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07300_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -564,28 +540,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07300_m08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -611,28 +587,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07300_m16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -658,28 +634,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07300_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -705,28 +681,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07300_s08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -752,28 +728,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07300_s16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m07400.cl b/OpenCL/m07400.cl similarity index 92% rename from amd/m07400.cl rename to OpenCL/m07400.cl index 9a338a1..ece0688 100644 --- a/amd/m07400.cl +++ b/OpenCL/m07400.cl @@ -8,29 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif __constant u32 k_sha256[64] = @@ -53,33 +41,33 @@ __constant u32 k_sha256[64] = SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f, }; -static void sha256_transform (const u32x w[16], u32x digest[8]) +static void sha256_transform (const u32 w[16], u32 digest[8]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = swap_workaround (w[ 0]); - u32x w1_t = swap_workaround (w[ 1]); - u32x w2_t = swap_workaround (w[ 2]); - u32x w3_t = swap_workaround (w[ 3]); - u32x w4_t = swap_workaround (w[ 4]); - u32x w5_t = swap_workaround (w[ 5]); - u32x w6_t = swap_workaround (w[ 6]); - u32x w7_t = swap_workaround (w[ 7]); - u32x w8_t = swap_workaround (w[ 8]); - u32x w9_t = swap_workaround (w[ 9]); - u32x wa_t = swap_workaround (w[10]); - u32x wb_t = swap_workaround (w[11]); - u32x wc_t = swap_workaround (w[12]); - u32x wd_t = swap_workaround (w[13]); - u32x we_t = swap_workaround (w[14]); - u32x wf_t = swap_workaround (w[15]); + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + u32 e = digest[4]; + u32 f = digest[5]; + u32 g = digest[6]; + u32 h = digest[7]; + + u32 w0_t = swap_workaround (w[ 0]); + u32 w1_t = swap_workaround (w[ 1]); + u32 w2_t = swap_workaround (w[ 2]); + u32 w3_t = swap_workaround (w[ 3]); + u32 w4_t = swap_workaround (w[ 4]); + u32 w5_t = swap_workaround (w[ 5]); + u32 w6_t = swap_workaround (w[ 6]); + u32 w7_t = swap_workaround (w[ 7]); + u32 w8_t = swap_workaround (w[ 8]); + u32 w9_t = swap_workaround (w[ 9]); + u32 wa_t = swap_workaround (w[10]); + u32 wb_t = swap_workaround (w[11]); + u32 wc_t = swap_workaround (w[12]); + u32 wd_t = swap_workaround (w[13]); + u32 we_t = swap_workaround (w[14]); + u32 wf_t = swap_workaround (w[15]); #define ROUND_EXPAND() \ { \ @@ -139,9 +127,9 @@ static void sha256_transform (const u32x w[16], u32x digest[8]) digest[7] += h; } -static void sha256_transform_no14 (const u32x w[16], u32x digest[8]) +static void sha256_transform_no14 (const u32 w[16], u32 digest[8]) { - u32x w_t[16]; + u32 w_t[16]; w_t[ 0] = w[ 0]; w_t[ 1] = w[ 1]; @@ -163,7 +151,7 @@ static void sha256_transform_no14 (const u32x w[16], u32x digest[8]) sha256_transform (w_t, digest); } -static void init_ctx (u32x digest[8]) +static void init_ctx (u32 digest[8]) { digest[0] = SHA256M_A; digest[1] = SHA256M_B; @@ -175,7 +163,7 @@ static void init_ctx (u32x digest[8]) digest[7] = SHA256M_H; } -static void bzero16 (u32x block[16]) +static void bzero16 (u32 block[16]) { block[ 0] = 0; block[ 1] = 0; @@ -195,7 +183,7 @@ static void bzero16 (u32x block[16]) block[15] = 0; } -static void bswap8 (u32x block[16]) +static void bswap8 (u32 block[16]) { block[ 0] = swap_workaround (block[ 0]); block[ 1] = swap_workaround (block[ 1]); @@ -207,16 +195,16 @@ static void bswap8 (u32x block[16]) block[ 7] = swap_workaround (block[ 7]); } -static u32 memcat16 (u32x block[16], const u32 block_len, const u32x append[4], const u32 append_len) +static u32 memcat16 (u32 block[16], const u32 block_len, const u32 append[4], const u32 append_len) { const u32 mod = block_len & 3; const u32 div = block_len / 4; - u32x tmp0; - u32x tmp1; - u32x tmp2; - u32x tmp3; - u32x tmp4; + u32 tmp0; + u32 tmp1; + u32 tmp2; + u32 tmp3; + u32 tmp4; const int offset_minus_4 = 4 - block_len; @@ -330,16 +318,16 @@ static u32 memcat16 (u32x block[16], const u32 block_len, const u32x append[4], return new_len; } -static u32 memcat16c (u32x block[16], const u32 block_len, const u32x append[4], const u32 append_len, u32x digest[8]) +static u32 memcat16c (u32 block[16], const u32 block_len, const u32 append[4], const u32 append_len, u32 digest[8]) { const u32 mod = block_len & 3; const u32 div = block_len / 4; - u32x tmp0; - u32x tmp1; - u32x tmp2; - u32x tmp3; - u32x tmp4; + u32 tmp0; + u32 tmp1; + u32 tmp2; + u32 tmp3; + u32 tmp4; const int offset_minus_4 = 4 - block_len; @@ -358,7 +346,7 @@ static u32 memcat16c (u32x block[16], const u32 block_len, const u32x append[4], tmp4 = 0; } - u32x carry[4] = { 0, 0, 0, 0 }; + u32 carry[4] = { 0, 0, 0, 0 }; switch (div) { @@ -479,16 +467,16 @@ static u32 memcat16c (u32x block[16], const u32 block_len, const u32x append[4], return new_len; } -static u32 memcat20 (u32x block[20], const u32 block_len, const u32x append[4], const u32 append_len) +static u32 memcat20 (u32 block[20], const u32 block_len, const u32 append[4], const u32 append_len) { const u32 mod = block_len & 3; const u32 div = block_len / 4; - u32x tmp0; - u32x tmp1; - u32x tmp2; - u32x tmp3; - u32x tmp4; + u32 tmp0; + u32 tmp1; + u32 tmp2; + u32 tmp3; + u32 tmp4; const int offset_minus_4 = 4 - block_len; @@ -610,16 +598,16 @@ static u32 memcat20 (u32x block[20], const u32 block_len, const u32x append[4], return block_len + append_len; } -static u32 memcat20_x80 (u32x block[20], const u32 block_len, const u32x append[4], const u32 append_len) +static u32 memcat20_x80 (u32 block[20], const u32 block_len, const u32 append[4], const u32 append_len) { const u32 mod = block_len & 3; const u32 div = block_len / 4; - u32x tmp0; - u32x tmp1; - u32x tmp2; - u32x tmp3; - u32x tmp4; + u32 tmp0; + u32 tmp1; + u32 tmp2; + u32 tmp3; + u32 tmp4; const int offset_minus_4 = 4 - block_len; @@ -751,7 +739,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07400_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[0]; w0[1] = pws[gid].i[1]; @@ -780,11 +768,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07400_init (__gl u32 block_len; // never reaches > 64 u32 transform_len; // required for w[15] = len * 8 - u32x block[16]; + u32 block[16]; - u32x alt_result[8]; - u32x p_bytes[8]; - u32x s_bytes[8]; + u32 alt_result[8]; + u32 p_bytes[8]; + u32 s_bytes[8]; /* Prepare for the real work. */ @@ -818,7 +806,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07400_init (__gl bzero16 (block); - u32x alt_result_tmp[8]; + u32 alt_result_tmp[8]; alt_result_tmp[0] = alt_result[0]; alt_result_tmp[1] = alt_result[1]; @@ -1006,14 +994,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07400_loop (__gl * base */ - u32x p_bytes[4]; + u32 p_bytes[4]; p_bytes[0] = tmps[gid].p_bytes[0]; p_bytes[1] = tmps[gid].p_bytes[1]; p_bytes[2] = tmps[gid].p_bytes[2]; p_bytes[3] = tmps[gid].p_bytes[3]; - u32x p_bytes_x80[4]; + u32 p_bytes_x80[4]; p_bytes_x80[0] = tmps[gid].p_bytes[0]; p_bytes_x80[1] = tmps[gid].p_bytes[1]; @@ -1022,14 +1010,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07400_loop (__gl append_0x80_1 (p_bytes_x80, pw_len); - u32x s_bytes[4]; + u32 s_bytes[4]; s_bytes[0] = tmps[gid].s_bytes[0]; s_bytes[1] = tmps[gid].s_bytes[1]; s_bytes[2] = tmps[gid].s_bytes[2]; s_bytes[3] = tmps[gid].s_bytes[3]; - u32x alt_result[8]; + u32 alt_result[8]; alt_result[0] = tmps[gid].alt_result[0]; alt_result[1] = tmps[gid].alt_result[1]; @@ -1047,11 +1035,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07400_loop (__gl for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++) { - u32x tmp[8]; + u32 tmp[8]; init_ctx (tmp); - u32x block[32]; + u32 block[32]; bzero16 (&block[ 0]); bzero16 (&block[16]); @@ -1165,12 +1153,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07400_comp (__gl const u32 lid = get_local_id (0); - const u32x r0 = tmps[gid].alt_result[0]; - const u32x r1 = tmps[gid].alt_result[1]; - const u32x r2 = tmps[gid].alt_result[2]; - const u32x r3 = tmps[gid].alt_result[3]; + const u32 r0 = tmps[gid].alt_result[0]; + const u32 r1 = tmps[gid].alt_result[1]; + const u32 r2 = tmps[gid].alt_result[2]; + const u32 r3 = tmps[gid].alt_result[3]; #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m07500_a0.cl b/OpenCL/m07500_a0.cl similarity index 93% rename from amd/m07500_a0.cl rename to OpenCL/m07500_a0.cl index 102c98c..6525505 100644 --- a/amd/m07500_a0.cl +++ b/OpenCL/m07500_a0.cl @@ -8,28 +8,16 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 3 #define DGST_R1 7 #define DGST_R2 2 #define DGST_R3 6 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" +#include "rp.c" typedef struct { @@ -186,12 +174,12 @@ static int decrypt_and_check (__local RC4_KEY *rc4_key, u32 data[4], u32 timesta return 1; } -static void md4_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md4_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; MD4_STEP (MD4_Fo, a, b, c, d, w0[0], MD4C00, MD4S00); MD4_STEP (MD4_Fo, d, a, b, c, w0[1], MD4C00, MD4S01); @@ -250,29 +238,29 @@ static void md4_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -348,7 +336,7 @@ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) +static void hmac_md5_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -399,7 +387,7 @@ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x i md5_transform (w0, w1, w2, w3, opad); } -static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) +static void hmac_md5_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4], u32 digest[4]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -433,16 +421,16 @@ static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x i md5_transform (w0, w1, w2, w3, digest); } -static void kerb_prepare (const u32x w0[4], const u32x w1[4], const u32 pw_len, const u32 checksum[4], u32x digest[4]) +static void kerb_prepare (const u32 w0[4], const u32 w1[4], const u32 pw_len, const u32 checksum[4], u32 digest[4]) { /** * pads */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; @@ -497,8 +485,8 @@ static void kerb_prepare (const u32x w0[4], const u32x w1[4], const u32 pw_len, w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[4]; - u32x opad[4]; + u32 ipad[4]; + u32 opad[4]; hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -579,14 +567,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07500_m04 (__glo * base */ - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -623,28 +611,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07500_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -657,7 +645,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07500_m04 (__glo * kerberos */ - u32x digest[4]; + u32 digest[4]; kerb_prepare (w0, w1, out_len, checksum, digest); @@ -672,7 +660,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07500_m04 (__glo if (decrypt_and_check (&rc4_keys[lid], tmp, timestamp_ct) == 1) { - mark_hash_s0 (plains_buf, hashes_shown, digests_offset, gid, il_pos); + mark_hash (plains_buf, hashes_shown, digests_offset, gid, il_pos); d_return_buf[lid] = 1; } @@ -706,14 +694,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07500_s04 (__glo * base */ - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -750,28 +738,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07500_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -784,7 +772,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07500_s04 (__glo * kerberos */ - u32x digest[4]; + u32 digest[4]; kerb_prepare (w0, w1, out_len, checksum, digest); @@ -799,7 +787,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07500_s04 (__glo if (decrypt_and_check (&rc4_keys[lid], tmp, timestamp_ct) == 1) { - mark_hash_s0 (plains_buf, hashes_shown, digests_offset, gid, il_pos); + mark_hash (plains_buf, hashes_shown, digests_offset, gid, il_pos); d_return_buf[lid] = 1; } diff --git a/amd/m07500_a1.cl b/OpenCL/m07500_a1.cl similarity index 93% rename from amd/m07500_a1.cl rename to OpenCL/m07500_a1.cl index ebbfb74..55679f8 100644 --- a/amd/m07500_a1.cl +++ b/OpenCL/m07500_a1.cl @@ -8,26 +8,14 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 3 #define DGST_R1 7 #define DGST_R2 2 #define DGST_R3 6 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" typedef struct { @@ -184,12 +172,12 @@ static int decrypt_and_check (__local RC4_KEY *rc4_key, u32 data[4], u32 timesta return 1; } -static void md4_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md4_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; MD4_STEP (MD4_Fo, a, b, c, d, w0[0], MD4C00, MD4S00); MD4_STEP (MD4_Fo, d, a, b, c, w0[1], MD4C00, MD4S01); @@ -248,29 +236,29 @@ static void md4_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -346,7 +334,7 @@ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) +static void hmac_md5_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -397,7 +385,7 @@ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x i md5_transform (w0, w1, w2, w3, opad); } -static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) +static void hmac_md5_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4], u32 digest[4]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -431,16 +419,16 @@ static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x i md5_transform (w0, w1, w2, w3, digest); } -static void kerb_prepare (const u32x w0[4], const u32x w1[4], const u32 pw_len, const u32 checksum[4], u32x digest[4]) +static void kerb_prepare (const u32 w0[4], const u32 w1[4], const u32 pw_len, const u32 checksum[4], u32 digest[4]) { /** * pads */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; @@ -495,8 +483,8 @@ static void kerb_prepare (const u32x w0[4], const u32x w1[4], const u32 pw_len, w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[4]; - u32x opad[4]; + u32 ipad[4]; + u32 opad[4]; hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -577,28 +565,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07500_m04 (__glo * base */ - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -677,28 +665,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07500_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -709,7 +697,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07500_m04 (__glo * kerberos */ - u32x digest[4]; + u32 digest[4]; kerb_prepare (w0, w1, pw_len, checksum, digest); @@ -724,7 +712,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07500_m04 (__glo if (decrypt_and_check (&rc4_keys[lid], tmp, timestamp_ct) == 1) { - mark_hash_s0 (plains_buf, hashes_shown, digests_offset, gid, il_pos); + mark_hash (plains_buf, hashes_shown, digests_offset, gid, il_pos); d_return_buf[lid] = 1; } @@ -758,28 +746,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07500_s04 (__glo * base */ - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -858,28 +846,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07500_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -890,7 +878,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07500_s04 (__glo * kerberos */ - u32x digest[4]; + u32 digest[4]; kerb_prepare (w0, w1, pw_len, checksum, digest); @@ -905,7 +893,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07500_s04 (__glo if (decrypt_and_check (&rc4_keys[lid], tmp, timestamp_ct) == 1) { - mark_hash_s0 (plains_buf, hashes_shown, digests_offset, gid, il_pos); + mark_hash (plains_buf, hashes_shown, digests_offset, gid, il_pos); d_return_buf[lid] = 1; } diff --git a/amd/m07500_a3.cl b/OpenCL/m07500_a3.cl similarity index 90% rename from amd/m07500_a3.cl rename to OpenCL/m07500_a3.cl index fc2c83d..7cd4cdb 100644 --- a/amd/m07500_a3.cl +++ b/OpenCL/m07500_a3.cl @@ -8,26 +8,14 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 3 #define DGST_R1 7 #define DGST_R2 2 #define DGST_R3 6 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" typedef struct { @@ -184,12 +172,12 @@ static int decrypt_and_check (__local RC4_KEY *rc4_key, u32 data[4], u32 timesta return 1; } -static void md4_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md4_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; MD4_STEP (MD4_Fo, a, b, c, d, w0[0], MD4C00, MD4S00); MD4_STEP (MD4_Fo, d, a, b, c, w0[1], MD4C00, MD4S01); @@ -248,29 +236,29 @@ static void md4_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -346,7 +334,7 @@ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) +static void hmac_md5_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -397,7 +385,7 @@ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x i md5_transform (w0, w1, w2, w3, opad); } -static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) +static void hmac_md5_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4], u32 digest[4]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -431,16 +419,16 @@ static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x i md5_transform (w0, w1, w2, w3, digest); } -static void kerb_prepare (const u32x w0[4], const u32x w1[4], const u32 pw_len, const u32 checksum[4], u32x digest[4]) +static void kerb_prepare (const u32 w0[4], const u32 w1[4], const u32 pw_len, const u32 checksum[4], u32 digest[4]) { /** * pads */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; @@ -495,8 +483,8 @@ static void kerb_prepare (const u32x w0[4], const u32x w1[4], const u32 pw_len, w3_t[2] = 0; w3_t[3] = 0; - u32x ipad[4]; - u32x opad[4]; + u32 ipad[4]; + u32 opad[4]; hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); @@ -560,7 +548,7 @@ static void kerb_prepare (const u32x w0[4], const u32x w1[4], const u32 pw_len, hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); } -static void m07500 (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global krb5pa_t *krb5pa_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m07500 (__local RC4_KEY rc4_keys[64], u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global krb5pa_t *krb5pa_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -595,7 +583,7 @@ static void m07500 (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -603,7 +591,7 @@ static void m07500 (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w w0[0] = w0l | w0r; - u32x digest[4]; + u32 digest[4]; kerb_prepare (w0, w1, pw_len, checksum, digest); @@ -618,7 +606,7 @@ static void m07500 (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w if (decrypt_and_check (&rc4_keys[lid], tmp, timestamp_ct) == 1) { - mark_hash_s0 (plains_buf, hashes_shown, digests_offset, gid, il_pos); + mark_hash (plains_buf, hashes_shown, digests_offset, gid, il_pos); d_return_buf[lid] = 1; } @@ -638,28 +626,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07500_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -688,28 +676,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07500_m08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -742,28 +730,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07500_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -792,28 +780,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07500_s08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; diff --git a/amd/m07600_a0.cl b/OpenCL/m07600_a0.cl similarity index 95% rename from amd/m07600_a0.cl rename to OpenCL/m07600_a0.cl index 4e63f0c..2940fdf 100644 --- a/amd/m07600_a0.cl +++ b/OpenCL/m07600_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" +#include "rp.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] @@ -72,14 +48,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_m04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -165,28 +141,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -201,28 +177,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_m04 (__glo * sha1 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = pw_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -330,7 +306,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_m04 (__glo * Prepend salt */ - u32x w0t[4]; + u32 w0t[4]; w0t[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 | uint_to_hex_lower8 ((a >> 16) & 255) << 16; @@ -341,7 +317,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_m04 (__glo w0t[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 | uint_to_hex_lower8 ((b >> 0) & 255) << 16; - u32x w1t[4]; + u32 w1t[4]; w1t[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 | uint_to_hex_lower8 ((c >> 16) & 255) << 16; @@ -352,7 +328,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_m04 (__glo w1t[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 | uint_to_hex_lower8 ((d >> 0) & 255) << 16; - u32x w2t[2]; + u32 w2t[2]; w2t[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0 | uint_to_hex_lower8 ((e >> 16) & 255) << 16; @@ -507,11 +483,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_m04 (__glo d += SHA1M_D; e += SHA1M_E; - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - u32x r_e = e; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; + u32 r_e = e; // 2nd transform @@ -634,12 +610,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_m04 (__glo d += r_d; e += r_e; - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -665,14 +641,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_s04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -776,28 +752,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -812,28 +788,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_s04 (__glo * sha1 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = pw_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -941,7 +917,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_s04 (__glo * Prepend salt */ - u32x w0t[4]; + u32 w0t[4]; w0t[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 | uint_to_hex_lower8 ((a >> 16) & 255) << 16; @@ -952,7 +928,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_s04 (__glo w0t[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 | uint_to_hex_lower8 ((b >> 0) & 255) << 16; - u32x w1t[4]; + u32 w1t[4]; w1t[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 | uint_to_hex_lower8 ((c >> 16) & 255) << 16; @@ -963,7 +939,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_s04 (__glo w1t[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 | uint_to_hex_lower8 ((d >> 0) & 255) << 16; - u32x w2t[2]; + u32 w2t[2]; w2t[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0 | uint_to_hex_lower8 ((e >> 16) & 255) << 16; @@ -1118,11 +1094,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_s04 (__glo d += SHA1M_D; e += SHA1M_E; - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - u32x r_e = e; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; + u32 r_e = e; // 2nd transform @@ -1245,12 +1221,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_s04 (__glo d += r_d; e += r_e; - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m07600_a1.cl b/OpenCL/m07600_a1.cl similarity index 95% rename from amd/m07600_a1.cl rename to OpenCL/m07600_a1.cl index 376c05e..b74fec3 100644 --- a/amd/m07600_a1.cl +++ b/OpenCL/m07600_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] @@ -70,28 +46,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_m04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -223,28 +199,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -255,28 +231,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_m04 (__glo * sha1 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = pw_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -384,7 +360,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_m04 (__glo * Prepend salt */ - u32x w0t[4]; + u32 w0t[4]; w0t[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 | uint_to_hex_lower8 ((a >> 16) & 255) << 16; @@ -395,7 +371,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_m04 (__glo w0t[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 | uint_to_hex_lower8 ((b >> 0) & 255) << 16; - u32x w1t[4]; + u32 w1t[4]; w1t[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 | uint_to_hex_lower8 ((c >> 16) & 255) << 16; @@ -406,7 +382,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_m04 (__glo w1t[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 | uint_to_hex_lower8 ((d >> 0) & 255) << 16; - u32x w2t[2]; + u32 w2t[2]; w2t[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0 | uint_to_hex_lower8 ((e >> 16) & 255) << 16; @@ -561,11 +537,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_m04 (__glo d += SHA1M_D; e += SHA1M_E; - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - u32x r_e = e; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; + u32 r_e = e; // 2nd transform @@ -688,12 +664,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_m04 (__glo d += r_d; e += r_e; - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -719,28 +695,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_s04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -890,28 +866,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -922,28 +898,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_s04 (__glo * sha1 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = pw_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -1051,7 +1027,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_s04 (__glo * Prepend salt */ - u32x w0t[4]; + u32 w0t[4]; w0t[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 | uint_to_hex_lower8 ((a >> 16) & 255) << 16; @@ -1062,7 +1038,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_s04 (__glo w0t[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 | uint_to_hex_lower8 ((b >> 0) & 255) << 16; - u32x w1t[4]; + u32 w1t[4]; w1t[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 | uint_to_hex_lower8 ((c >> 16) & 255) << 16; @@ -1073,7 +1049,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_s04 (__glo w1t[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 | uint_to_hex_lower8 ((d >> 0) & 255) << 16; - u32x w2t[2]; + u32 w2t[2]; w2t[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0 | uint_to_hex_lower8 ((e >> 16) & 255) << 16; @@ -1228,11 +1204,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_s04 (__glo d += SHA1M_D; e += SHA1M_E; - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - u32x r_e = e; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; + u32 r_e = e; // 2nd transform @@ -1355,12 +1331,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_s04 (__glo d += r_d; e += r_e; - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m07600_a3.cl b/OpenCL/m07600_a3.cl similarity index 94% rename from amd/m07600_a3.cl rename to OpenCL/m07600_a3.cl index e198455..a8d6ecf 100644 --- a/amd/m07600_a3.cl +++ b/OpenCL/m07600_a3.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] @@ -56,7 +32,7 @@ #define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) #endif -static void m07600m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) +static void m07600m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { /** * modifier @@ -103,7 +79,7 @@ static void m07600m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -115,28 +91,28 @@ static void m07600m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * sha1 */ - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = 0; + u32 wf_t = pw_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -244,7 +220,7 @@ static void m07600m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * Prepend salt */ - u32x w0t[4]; + u32 w0t[4]; w0t[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 | uint_to_hex_lower8 ((a >> 16) & 255) << 16; @@ -255,7 +231,7 @@ static void m07600m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0t[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 | uint_to_hex_lower8 ((b >> 0) & 255) << 16; - u32x w1t[4]; + u32 w1t[4]; w1t[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 | uint_to_hex_lower8 ((c >> 16) & 255) << 16; @@ -266,7 +242,7 @@ static void m07600m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w1t[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 | uint_to_hex_lower8 ((d >> 0) & 255) << 16; - u32x w2t[2]; + u32 w2t[2]; w2t[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0 | uint_to_hex_lower8 ((e >> 16) & 255) << 16; @@ -421,11 +397,11 @@ static void m07600m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p d += SHA1M_D; e += SHA1M_E; - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - u32x r_e = e; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; + u32 r_e = e; // 2nd transform @@ -548,16 +524,16 @@ static void m07600m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p d += r_d; e += r_e; - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m07600s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) +static void m07600s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { /** * modifier @@ -616,7 +592,7 @@ static void m07600s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -628,28 +604,28 @@ static void m07600s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * sha1 */ - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = 0; + u32 wf_t = pw_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -757,7 +733,7 @@ static void m07600s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * Prepend salt */ - u32x w0t[4]; + u32 w0t[4]; w0t[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 | uint_to_hex_lower8 ((a >> 16) & 255) << 16; @@ -768,7 +744,7 @@ static void m07600s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0t[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 | uint_to_hex_lower8 ((b >> 0) & 255) << 16; - u32x w1t[4]; + u32 w1t[4]; w1t[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 | uint_to_hex_lower8 ((c >> 16) & 255) << 16; @@ -779,7 +755,7 @@ static void m07600s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w1t[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 | uint_to_hex_lower8 ((d >> 0) & 255) << 16; - u32x w2t[2]; + u32 w2t[2]; w2t[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0 | uint_to_hex_lower8 ((e >> 16) & 255) << 16; @@ -934,11 +910,11 @@ static void m07600s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p d += SHA1M_D; e += SHA1M_E; - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - u32x r_e = e; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; + u32 r_e = e; // 2nd transform @@ -1061,12 +1037,12 @@ static void m07600s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p d += r_d; e += r_e; - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -1085,28 +1061,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_m04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1171,28 +1147,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_m08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1257,28 +1233,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_m16 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -1343,28 +1319,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_s04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1429,28 +1405,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_s08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1515,28 +1491,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07600_s16 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m07700_a0.cl b/OpenCL/m07700_a0.cl similarity index 95% rename from amd/m07700_a0.cl rename to OpenCL/m07700_a0.cl index de695d1..9d00474 100644 --- a/amd/m07700_a0.cl +++ b/OpenCL/m07700_a0.cl @@ -8,33 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" +#include "rp.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff) #define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8)))) @@ -76,9 +62,9 @@ __constant u32 bcodeArray[48] = 0xe2, 0xb7, 0x33, 0x71, 0x8b, 0x9f, 0x5d, 0x01, 0x44, 0x70, 0xae, 0x11, 0xef, 0x28, 0xf0, 0x0d }; -static u32x sapb_trans (const u32x in) +static u32 sapb_trans (const u32 in) { - u32x out = 0; + u32 out = 0; #ifdef VECT_SIZE1 out |= (sapb_trans_tbl[(in >> 0) & 0xff]) << 0; @@ -90,7 +76,7 @@ static u32x sapb_trans (const u32x in) return out; } -static u32x walld0rf_magic (const u32x w0[4], const u32 pw_len, const u32x salt_buf0[4], const u32 salt_len, const u32x a, const u32x b, const u32x c, const u32x d, u32x t[16]) +static u32 walld0rf_magic (const u32 w0[4], const u32 pw_len, const u32 salt_buf0[4], const u32 salt_len, const u32 a, const u32 b, const u32 c, const u32 d, u32 t[16]) { t[ 0] = 0; t[ 1] = 0; @@ -250,14 +236,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07700_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = 0; pw_buf0[3] = 0; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = 0; pw_buf1[1] = 0; @@ -289,28 +275,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07700_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = 0; w0[3] = 0; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -360,7 +346,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07700_m04 (__glo const u32 pw_salt_len = out_len + salt_len; - u32x t[16]; + u32 t[16]; t[ 0] = s0[0] | w0[0]; t[ 1] = s0[1] | w0[1]; @@ -385,10 +371,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07700_m04 (__glo * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, t[ 0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, t[ 1], MD5C01, MD5S01); @@ -463,7 +449,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07700_m04 (__glo c += MD5M_C; d += MD5M_D; - const u32x sum20 = walld0rf_magic (w0, out_len, salt_buf0, salt_len, a, b, c, d, t); + const u32 sum20 = walld0rf_magic (w0, out_len, salt_buf0, salt_len, a, b, c, d, t); SETSHIFTEDINT (t, sum20, 0x80); @@ -550,12 +536,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07700_m04 (__glo a ^= c; b ^= d; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -583,14 +569,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07700_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = 0; pw_buf0[3] = 0; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = 0; pw_buf1[1] = 0; @@ -634,28 +620,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07700_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = 0; w0[3] = 0; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -705,7 +691,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07700_s04 (__glo const u32 pw_salt_len = out_len + salt_len; - u32x t[16]; + u32 t[16]; t[ 0] = s0[0] | w0[0]; t[ 1] = s0[1] | w0[1]; @@ -730,10 +716,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07700_s04 (__glo * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, t[ 0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, t[ 1], MD5C01, MD5S01); @@ -808,7 +794,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07700_s04 (__glo c += MD5M_C; d += MD5M_D; - const u32x sum20 = walld0rf_magic (w0, out_len, salt_buf0, salt_len, a, b, c, d, t); + const u32 sum20 = walld0rf_magic (w0, out_len, salt_buf0, salt_len, a, b, c, d, t); SETSHIFTEDINT (t, sum20, 0x80); @@ -895,12 +881,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07700_s04 (__glo a ^= c; b ^= d; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m07700_a1.cl b/OpenCL/m07700_a1.cl similarity index 96% rename from amd/m07700_a1.cl rename to OpenCL/m07700_a1.cl index 222781f..399409b 100644 --- a/amd/m07700_a1.cl +++ b/OpenCL/m07700_a1.cl @@ -8,31 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff) #define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8)))) @@ -74,9 +60,9 @@ __constant u32 bcodeArray[48] = 0xe2, 0xb7, 0x33, 0x71, 0x8b, 0x9f, 0x5d, 0x01, 0x44, 0x70, 0xae, 0x11, 0xef, 0x28, 0xf0, 0x0d }; -static u32x sapb_trans (const u32x in) +static u32 sapb_trans (const u32 in) { - u32x out = 0; + u32 out = 0; #ifdef VECT_SIZE1 out |= (sapb_trans_tbl[(in >> 0) & 0xff]) << 0; @@ -88,7 +74,7 @@ static u32x sapb_trans (const u32x in) return out; } -static u32x walld0rf_magic (const u32x w0[4], const u32 pw_len, const u32x salt_buf0[4], const u32 salt_len, const u32x a, const u32x b, const u32x c, const u32x d, u32x t[16]) +static u32 walld0rf_magic (const u32 w0[4], const u32 pw_len, const u32 salt_buf0[4], const u32 salt_len, const u32 a, const u32 b, const u32 c, const u32 d, u32 t[16]) { t[ 0] = 0; t[ 1] = 0; @@ -248,28 +234,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07700_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = 0; wordl0[3] = 0; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = 0; wordl1[1] = 0; wordl1[2] = 0; wordl1[3] = 0; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -347,7 +333,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07700_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = sapb_trans (wordl0[0] | wordr0[0]); w0[1] = sapb_trans (wordl0[1] | wordr0[1]); @@ -390,7 +376,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07700_m04 (__glo const u32 pw_salt_len = pw_len + salt_len; - u32x t[16]; + u32 t[16]; t[ 0] = s0[0] | w0[0]; t[ 1] = s0[1] | w0[1]; @@ -415,10 +401,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07700_m04 (__glo * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, t[ 0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, t[ 1], MD5C01, MD5S01); @@ -493,7 +479,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07700_m04 (__glo c += MD5M_C; d += MD5M_D; - const u32x sum20 = walld0rf_magic (w0, pw_len, salt_buf0, salt_len, a, b, c, d, t); + const u32 sum20 = walld0rf_magic (w0, pw_len, salt_buf0, salt_len, a, b, c, d, t); SETSHIFTEDINT (t, sum20, 0x80); @@ -580,12 +566,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07700_m04 (__glo a ^= c; b ^= d; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -613,28 +599,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07700_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = 0; wordl0[3] = 0; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = 0; wordl1[1] = 0; wordl1[2] = 0; wordl1[3] = 0; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -720,7 +706,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07700_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = sapb_trans (wordl0[0] | wordr0[0]); w0[1] = sapb_trans (wordl0[1] | wordr0[1]); @@ -763,7 +749,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07700_s04 (__glo const u32 pw_salt_len = pw_len + salt_len; - u32x t[16]; + u32 t[16]; t[ 0] = s0[0] | w0[0]; t[ 1] = s0[1] | w0[1]; @@ -788,10 +774,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07700_s04 (__glo * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, t[ 0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, t[ 1], MD5C01, MD5S01); @@ -866,7 +852,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07700_s04 (__glo c += MD5M_C; d += MD5M_D; - const u32x sum20 = walld0rf_magic (w0, pw_len, salt_buf0, salt_len, a, b, c, d, t); + const u32 sum20 = walld0rf_magic (w0, pw_len, salt_buf0, salt_len, a, b, c, d, t); SETSHIFTEDINT (t, sum20, 0x80); @@ -953,12 +939,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07700_s04 (__glo a ^= c; b ^= d; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m07700_a3.cl b/OpenCL/m07700_a3.cl similarity index 91% rename from amd/m07700_a3.cl rename to OpenCL/m07700_a3.cl index b7cb02d..3dc8268 100644 --- a/amd/m07700_a3.cl +++ b/OpenCL/m07700_a3.cl @@ -8,31 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff) #define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8)))) @@ -74,9 +60,9 @@ __constant u32 bcodeArray[48] = 0xe2, 0xb7, 0x33, 0x71, 0x8b, 0x9f, 0x5d, 0x01, 0x44, 0x70, 0xae, 0x11, 0xef, 0x28, 0xf0, 0x0d }; -static u32x sapb_trans (const u32x in) +static u32 sapb_trans (const u32 in) { - u32x out = 0; + u32 out = 0; #ifdef VECT_SIZE1 out |= (sapb_trans_tbl[(in >> 0) & 0xff]) << 0; @@ -88,7 +74,7 @@ static u32x sapb_trans (const u32x in) return out; } -static u32x walld0rf_magic (const u32x w0[4], const u32 pw_len, const u32x salt_buf0[4], const u32 salt_len, const u32x a, const u32x b, const u32x c, const u32x d, u32x t[16]) +static u32 walld0rf_magic (const u32 w0[4], const u32 pw_len, const u32 salt_buf0[4], const u32 salt_len, const u32 a, const u32 b, const u32 c, const u32 d, u32 t[16]) { t[ 0] = 0; t[ 1] = 0; @@ -232,7 +218,7 @@ static u32x walld0rf_magic (const u32x w0[4], const u32 pw_len, const u32x salt_ return sum20; } -static void m07700m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m07700m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -296,7 +282,7 @@ static void m07700m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -304,7 +290,7 @@ static void m07700m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x t[16]; + u32 t[16]; t[ 0] = s0[0] | w0[0]; t[ 1] = s0[1] | w0[1]; @@ -329,10 +315,10 @@ static void m07700m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, t[ 0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, t[ 1], MD5C01, MD5S01); @@ -407,7 +393,7 @@ static void m07700m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p c += MD5M_C; d += MD5M_D; - const u32x sum20 = walld0rf_magic (w0, pw_len, salt_buf0, salt_len, a, b, c, d, t); + const u32 sum20 = walld0rf_magic (w0, pw_len, salt_buf0, salt_len, a, b, c, d, t); SETSHIFTEDINT (t, sum20, 0x80); @@ -494,16 +480,16 @@ static void m07700m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p a ^= c; b ^= d; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m07700s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m07700s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -579,7 +565,7 @@ static void m07700s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -587,7 +573,7 @@ static void m07700s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x t[16]; + u32 t[16]; t[ 0] = s0[0] | w0[0]; t[ 1] = s0[1] | w0[1]; @@ -612,10 +598,10 @@ static void m07700s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * md5 */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, t[ 0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, t[ 1], MD5C01, MD5S01); @@ -690,7 +676,7 @@ static void m07700s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p c += MD5M_C; d += MD5M_D; - const u32x sum20 = walld0rf_magic (w0, pw_len, salt_buf0, salt_len, a, b, c, d, t); + const u32 sum20 = walld0rf_magic (w0, pw_len, salt_buf0, salt_len, a, b, c, d, t); SETSHIFTEDINT (t, sum20, 0x80); @@ -777,12 +763,12 @@ static void m07700s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p a ^= c; b ^= d; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -802,28 +788,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07700_m04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -855,28 +841,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07700_m08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -912,28 +898,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07700_s04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -965,28 +951,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07700_s08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; diff --git a/amd/m07800_a0.cl b/OpenCL/m07800_a0.cl similarity index 94% rename from amd/m07800_a0.cl rename to OpenCL/m07800_a0.cl index 65e373e..7d7decd 100644 --- a/amd/m07800_a0.cl +++ b/OpenCL/m07800_a0.cl @@ -8,33 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" +#include "rp.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define GETSHIFTEDINT(a,n) amd_bytealign ((a)[((n)/4)+1], (a)[((n)/4)+0], (n)) @@ -58,7 +44,7 @@ __constant u32 theMagicArray[64] = 0, 0, 0, 0, 0, 0, 0, 0 }; -static void swap_buffer (u32x final[16]) +static void swap_buffer (u32 final[16]) { final[ 0] = swap_workaround (final[ 0]); final[ 1] = swap_workaround (final[ 1]); @@ -78,30 +64,30 @@ static void swap_buffer (u32x final[16]) final[15] = swap_workaround (final[15]); } -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -222,14 +208,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07800_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -261,28 +247,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07800_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -331,7 +317,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07800_m04 (__glo * sha1 */ - u32x final[256]; + u32 final[256]; final[ 0] = swap_workaround (w0[0] | s0[0]); final[ 1] = swap_workaround (w0[1] | s0[1]); @@ -350,7 +336,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07800_m04 (__glo final[14] = 0; final[15] = pw_salt_len * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -362,8 +348,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07800_m04 (__glo // prepare magic array range - u32x lengthMagicArray = 0x20; - u32x offsetMagicArray = 0; + u32 lengthMagicArray = 0x20; + u32 offsetMagicArray = 0; lengthMagicArray += ((digest[0] >> 24) & 0xff) % 6; lengthMagicArray += ((digest[0] >> 16) & 0xff) % 6; @@ -457,12 +443,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07800_m04 (__glo sha1_transform (&final[off + 0], &final[off + 4], &final[off + 8], &final[off + 12], digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -490,14 +476,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07800_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -541,28 +527,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07800_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -611,7 +597,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07800_s04 (__glo * sha1 */ - u32x final[256]; + u32 final[256]; final[ 0] = swap_workaround (w0[0] | s0[0]); final[ 1] = swap_workaround (w0[1] | s0[1]); @@ -630,7 +616,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07800_s04 (__glo final[14] = 0; final[15] = pw_salt_len * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -642,8 +628,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07800_s04 (__glo // prepare magic array range - u32x lengthMagicArray = 0x20; - u32x offsetMagicArray = 0; + u32 lengthMagicArray = 0x20; + u32 offsetMagicArray = 0; lengthMagicArray += ((digest[0] >> 24) & 0xff) % 6; lengthMagicArray += ((digest[0] >> 16) & 0xff) % 6; @@ -737,12 +723,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07800_s04 (__glo sha1_transform (&final[off + 0], &final[off + 4], &final[off + 8], &final[off + 12], digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m07800_a1.cl b/OpenCL/m07800_a1.cl similarity index 94% rename from amd/m07800_a1.cl rename to OpenCL/m07800_a1.cl index 265500b..1a7786f 100644 --- a/amd/m07800_a1.cl +++ b/OpenCL/m07800_a1.cl @@ -8,31 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define GETSHIFTEDINT(a,n) amd_bytealign ((a)[((n)/4)+1], (a)[((n)/4)+0], (n)) @@ -56,7 +42,7 @@ __constant u32 theMagicArray[64] = 0, 0, 0, 0, 0, 0, 0, 0 }; -static void swap_buffer (u32x final[16]) +static void swap_buffer (u32 final[16]) { final[ 0] = swap_workaround (final[ 0]); final[ 1] = swap_workaround (final[ 1]); @@ -76,30 +62,30 @@ static void swap_buffer (u32x final[16]) final[15] = swap_workaround (final[15]); } -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -220,28 +206,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07800_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -351,28 +337,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07800_m04 (__glo const u32 pw_salt_len = pw_len + salt_len; - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -383,7 +369,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07800_m04 (__glo * sha1 */ - u32x final[256]; + u32 final[256]; final[ 0] = swap_workaround (w0[0] | s0[0]); final[ 1] = swap_workaround (w0[1] | s0[1]); @@ -402,7 +388,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07800_m04 (__glo final[14] = 0; final[15] = pw_salt_len * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -414,8 +400,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07800_m04 (__glo // prepare magic array range - u32x lengthMagicArray = 0x20; - u32x offsetMagicArray = 0; + u32 lengthMagicArray = 0x20; + u32 offsetMagicArray = 0; lengthMagicArray += ((digest[0] >> 24) & 0xff) % 6; lengthMagicArray += ((digest[0] >> 16) & 0xff) % 6; @@ -509,12 +495,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07800_m04 (__glo sha1_transform (&final[off + 0], &final[off + 4], &final[off + 8], &final[off + 12], digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -542,28 +528,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07800_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -681,28 +667,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07800_s04 (__glo const u32 pw_salt_len = pw_len + salt_len; - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -713,7 +699,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07800_s04 (__glo * sha1 */ - u32x final[256]; + u32 final[256]; final[ 0] = swap_workaround (w0[0] | s0[0]); final[ 1] = swap_workaround (w0[1] | s0[1]); @@ -732,7 +718,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07800_s04 (__glo final[14] = 0; final[15] = pw_salt_len * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -744,8 +730,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07800_s04 (__glo // prepare magic array range - u32x lengthMagicArray = 0x20; - u32x offsetMagicArray = 0; + u32 lengthMagicArray = 0x20; + u32 offsetMagicArray = 0; lengthMagicArray += ((digest[0] >> 24) & 0xff) % 6; lengthMagicArray += ((digest[0] >> 16) & 0xff) % 6; @@ -839,12 +825,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07800_s04 (__glo sha1_transform (&final[off + 0], &final[off + 4], &final[off + 8], &final[off + 12], digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m07800_a3.cl b/OpenCL/m07800_a3.cl similarity index 89% rename from amd/m07800_a3.cl rename to OpenCL/m07800_a3.cl index 5bf3932..6c6ed28 100644 --- a/amd/m07800_a3.cl +++ b/OpenCL/m07800_a3.cl @@ -8,31 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define GETSHIFTEDINT(a,n) amd_bytealign ((a)[((n)/4)+1], (a)[((n)/4)+0], (n)) @@ -56,7 +42,7 @@ __constant u32 theMagicArray[64] = 0, 0, 0, 0, 0, 0, 0, 0 }; -static void swap_buffer (u32x final[16]) +static void swap_buffer (u32 final[16]) { final[ 0] = swap_workaround (final[ 0]); final[ 1] = swap_workaround (final[ 1]); @@ -76,30 +62,30 @@ static void swap_buffer (u32x final[16]) final[15] = swap_workaround (final[15]); } -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -204,7 +190,7 @@ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4] digest[4] += E; } -static void m07800m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m07800m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -276,7 +262,7 @@ static void m07800m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -288,7 +274,7 @@ static void m07800m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * sha1 */ - u32x final[256]; + u32 final[256]; final[ 0] = swap_workaround (w0[0] | s0[0]); final[ 1] = swap_workaround (w0[1] | s0[1]); @@ -307,7 +293,7 @@ static void m07800m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p final[14] = 0; final[15] = pw_salt_len * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -319,8 +305,8 @@ static void m07800m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p // prepare magic array range - u32x lengthMagicArray = 0x20; - u32x offsetMagicArray = 0; + u32 lengthMagicArray = 0x20; + u32 offsetMagicArray = 0; lengthMagicArray += ((digest[0] >> 24) & 0xff) % 6; lengthMagicArray += ((digest[0] >> 16) & 0xff) % 6; @@ -414,16 +400,16 @@ static void m07800m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p sha1_transform (&final[off + 0], &final[off + 4], &final[off + 8], &final[off + 12], digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m07800s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m07800s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -507,7 +493,7 @@ static void m07800s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -519,7 +505,7 @@ static void m07800s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * sha1 */ - u32x final[256]; + u32 final[256]; final[ 0] = swap_workaround (w0[0] | s0[0]); final[ 1] = swap_workaround (w0[1] | s0[1]); @@ -538,7 +524,7 @@ static void m07800s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p final[14] = 0; final[15] = pw_salt_len * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -550,8 +536,8 @@ static void m07800s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p // prepare magic array range - u32x lengthMagicArray = 0x20; - u32x offsetMagicArray = 0; + u32 lengthMagicArray = 0x20; + u32 offsetMagicArray = 0; lengthMagicArray += ((digest[0] >> 24) & 0xff) % 6; lengthMagicArray += ((digest[0] >> 16) & 0xff) % 6; @@ -645,12 +631,12 @@ static void m07800s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p sha1_transform (&final[off + 0], &final[off + 4], &final[off + 8], &final[off + 12], digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -670,28 +656,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07800_m04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -723,28 +709,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07800_m08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -780,28 +766,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07800_s04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -833,28 +819,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07800_s08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; diff --git a/amd/m07900.cl b/OpenCL/m07900.cl similarity index 95% rename from amd/m07900.cl rename to OpenCL/m07900.cl index a214683..23b176d 100644 --- a/amd/m07900.cl +++ b/OpenCL/m07900.cl @@ -8,29 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif __constant u64 k_sha512[80] = @@ -153,28 +141,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07900_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -256,28 +244,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07900_loop (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -381,12 +369,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m07900_comp (__gl * digest */ - const u32x r0 = l32_from_64 (tmps[gid].digest_buf[0]); - const u32x r1 = h32_from_64 (tmps[gid].digest_buf[0]); - const u32x r2 = l32_from_64 (tmps[gid].digest_buf[1]); - const u32x r3 = h32_from_64 (tmps[gid].digest_buf[1]); + const u32 r0 = l32_from_64 (tmps[gid].digest_buf[0]); + const u32 r1 = h32_from_64 (tmps[gid].digest_buf[0]); + const u32 r2 = l32_from_64 (tmps[gid].digest_buf[1]); + const u32 r3 = h32_from_64 (tmps[gid].digest_buf[1]); #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m08000_a0.cl b/OpenCL/m08000_a0.cl similarity index 89% rename from amd/m08000_a0.cl rename to OpenCL/m08000_a0.cl index ceddde5..2f10770 100644 --- a/amd/m08000_a0.cl +++ b/OpenCL/m08000_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 7 #define DGST_R2 2 #define DGST_R3 6 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 k_sha256[64] = { @@ -66,33 +42,33 @@ __constant u32 k_sha256[64] = SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f, }; -static void sha256_transform (u32x digest[8], const u32x w[16]) +static void sha256_transform (u32 digest[8], const u32 w[16]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = w[ 0]; - u32x w1_t = w[ 1]; - u32x w2_t = w[ 2]; - u32x w3_t = w[ 3]; - u32x w4_t = w[ 4]; - u32x w5_t = w[ 5]; - u32x w6_t = w[ 6]; - u32x w7_t = w[ 7]; - u32x w8_t = w[ 8]; - u32x w9_t = w[ 9]; - u32x wa_t = w[10]; - u32x wb_t = w[11]; - u32x wc_t = w[12]; - u32x wd_t = w[13]; - u32x we_t = w[14]; - u32x wf_t = w[15]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + u32 e = digest[4]; + u32 f = digest[5]; + u32 g = digest[6]; + u32 h = digest[7]; + + u32 w0_t = w[ 0]; + u32 w1_t = w[ 1]; + u32 w2_t = w[ 2]; + u32 w3_t = w[ 3]; + u32 w4_t = w[ 4]; + u32 w5_t = w[ 5]; + u32 w6_t = w[ 6]; + u32 w7_t = w[ 7]; + u32 w8_t = w[ 8]; + u32 w9_t = w[ 9]; + u32 wa_t = w[10]; + u32 wb_t = w[11]; + u32 wc_t = w[12]; + u32 wd_t = w[13]; + u32 we_t = w[14]; + u32 wf_t = w[15]; #define ROUND_EXPAND() \ { \ @@ -168,14 +144,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -198,10 +174,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; @@ -222,16 +198,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_m04 (__glo const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); - u32x w_t[16]; + u32 w_t[16]; w_t[ 0] = swap_workaround (w0_t[0]); w_t[ 1] = swap_workaround (w0_t[1]); @@ -267,7 +243,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_m04 (__glo w_t[14] = w_t[14] >> 8; w_t[15] = w_t[15] >> 8; - u32x digest[8]; + u32 digest[8]; digest[0] = SHA256M_A; digest[1] = SHA256M_B; @@ -315,12 +291,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_m04 (__glo sha256_transform (digest, w_t); // 512 - 576 - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; + const u32 r0 = digest[3]; + const u32 r1 = digest[7]; + const u32 r2 = digest[2]; + const u32 r3 = digest[6]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -348,14 +324,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -390,10 +366,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; @@ -414,16 +390,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_s04 (__glo const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); - u32x w_t[16]; + u32 w_t[16]; w_t[ 0] = swap_workaround (w0_t[0]); w_t[ 1] = swap_workaround (w0_t[1]); @@ -459,7 +435,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_s04 (__glo w_t[14] = w_t[14] >> 8; w_t[15] = w_t[15] >> 8; - u32x digest[8]; + u32 digest[8]; digest[0] = SHA256M_A; digest[1] = SHA256M_B; @@ -507,12 +483,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_s04 (__glo sha256_transform (digest, w_t); // 512 - 576 - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; + const u32 r0 = digest[3]; + const u32 r1 = digest[7]; + const u32 r2 = digest[2]; + const u32 r3 = digest[6]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m08000_a1.cl b/OpenCL/m08000_a1.cl similarity index 90% rename from amd/m08000_a1.cl rename to OpenCL/m08000_a1.cl index dc210ef..e9154aa 100644 --- a/amd/m08000_a1.cl +++ b/OpenCL/m08000_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 7 #define DGST_R2 2 #define DGST_R3 6 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 k_sha256[64] = { @@ -64,33 +40,33 @@ __constant u32 k_sha256[64] = SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f, }; -static void sha256_transform (u32x digest[8], const u32x w[16]) +static void sha256_transform (u32 digest[8], const u32 w[16]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = w[ 0]; - u32x w1_t = w[ 1]; - u32x w2_t = w[ 2]; - u32x w3_t = w[ 3]; - u32x w4_t = w[ 4]; - u32x w5_t = w[ 5]; - u32x w6_t = w[ 6]; - u32x w7_t = w[ 7]; - u32x w8_t = w[ 8]; - u32x w9_t = w[ 9]; - u32x wa_t = w[10]; - u32x wb_t = w[11]; - u32x wc_t = w[12]; - u32x wd_t = w[13]; - u32x we_t = w[14]; - u32x wf_t = w[15]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + u32 e = digest[4]; + u32 f = digest[5]; + u32 g = digest[6]; + u32 h = digest[7]; + + u32 w0_t = w[ 0]; + u32 w1_t = w[ 1]; + u32 w2_t = w[ 2]; + u32 w3_t = w[ 3]; + u32 w4_t = w[ 4]; + u32 w5_t = w[ 5]; + u32 w6_t = w[ 6]; + u32 w7_t = w[ 7]; + u32 w8_t = w[ 8]; + u32 w9_t = w[ 9]; + u32 wa_t = w[10]; + u32 wb_t = w[11]; + u32 wc_t = w[12]; + u32 wd_t = w[13]; + u32 we_t = w[14]; + u32 wf_t = w[15]; #define ROUND_EXPAND() \ { \ @@ -166,28 +142,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -242,10 +218,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -264,16 +240,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_m04 (__glo w3[2] = wordl3[2] | wordr3[2]; w3[3] = wordl3[3] | wordr3[3]; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); - u32x w_t[16]; + u32 w_t[16]; w_t[ 0] = swap_workaround (w0_t[0]); w_t[ 1] = swap_workaround (w0_t[1]); @@ -309,7 +285,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_m04 (__glo w_t[14] = w_t[14] >> 8; w_t[15] = w_t[15] >> 8; - u32x digest[8]; + u32 digest[8]; digest[0] = SHA256M_A; digest[1] = SHA256M_B; @@ -357,12 +333,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_m04 (__glo sha256_transform (digest, w_t); // 512 - 576 - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; + const u32 r0 = digest[3]; + const u32 r1 = digest[7]; + const u32 r2 = digest[2]; + const u32 r3 = digest[6]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -390,28 +366,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -478,10 +454,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -500,16 +476,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_s04 (__glo w3[2] = wordl3[2] | wordr3[2]; w3[3] = wordl3[3] | wordr3[3]; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); - u32x w_t[16]; + u32 w_t[16]; w_t[ 0] = swap_workaround (w0_t[0]); w_t[ 1] = swap_workaround (w0_t[1]); @@ -545,7 +521,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_s04 (__glo w_t[14] = w_t[14] >> 8; w_t[15] = w_t[15] >> 8; - u32x digest[8]; + u32 digest[8]; digest[0] = SHA256M_A; digest[1] = SHA256M_B; @@ -593,12 +569,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_s04 (__glo sha256_transform (digest, w_t); // 512 - 576 - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; + const u32 r0 = digest[3]; + const u32 r1 = digest[7]; + const u32 r2 = digest[2]; + const u32 r3 = digest[6]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m08000_a3.cl b/OpenCL/m08000_a3.cl similarity index 64% rename from amd/m08000_a3.cl rename to OpenCL/m08000_a3.cl index 488947a..49d2816 100644 --- a/amd/m08000_a3.cl +++ b/OpenCL/m08000_a3.cl @@ -4,46 +4,21 @@ */ #define _SHA256_ -#define _SCALAR_ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 7 #define DGST_R2 2 #define DGST_R3 6 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 k_sha256[64] = { @@ -65,33 +40,33 @@ __constant u32 k_sha256[64] = SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f, }; -static void sha256_transform (u32x digest[8], const u32x w[16]) +static void sha256_transform (u32 digest[8], const u32 w[16]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = w[ 0]; - u32x w1_t = w[ 1]; - u32x w2_t = w[ 2]; - u32x w3_t = w[ 3]; - u32x w4_t = w[ 4]; - u32x w5_t = w[ 5]; - u32x w6_t = w[ 6]; - u32x w7_t = w[ 7]; - u32x w8_t = w[ 8]; - u32x w9_t = w[ 9]; - u32x wa_t = w[10]; - u32x wb_t = w[11]; - u32x wc_t = w[12]; - u32x wd_t = w[13]; - u32x we_t = w[14]; - u32x wf_t = w[15]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + u32 e = digest[4]; + u32 f = digest[5]; + u32 g = digest[6]; + u32 h = digest[7]; + + u32 w0_t = w[ 0]; + u32 w1_t = w[ 1]; + u32 w2_t = w[ 2]; + u32 w3_t = w[ 3]; + u32 w4_t = w[ 4]; + u32 w5_t = w[ 5]; + u32 w6_t = w[ 6]; + u32 w7_t = w[ 7]; + u32 w8_t = w[ 8]; + u32 w9_t = w[ 9]; + u32 wa_t = w[10]; + u32 wb_t = w[11]; + u32 wc_t = w[12]; + u32 wd_t = w[13]; + u32 we_t = w[14]; + u32 wf_t = w[15]; #define ROUND_EXPAND() \ { \ @@ -151,7 +126,7 @@ static void sha256_transform (u32x digest[8], const u32x w[16]) digest[7] += h; } -static void m08000m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m08000m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -172,17 +147,15 @@ static void m08000m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x w_t[16]; + u32 w_t[16]; w_t[ 0] = w0 >> 8; w_t[ 1] = w[ 1] >> 8; @@ -201,7 +174,7 @@ static void m08000m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g w_t[14] = w[14] >> 8; w_t[15] = w[15] >> 8; - u32x digest[8]; + u32 digest[8]; digest[0] = SHA256M_A; digest[1] = SHA256M_B; @@ -249,16 +222,16 @@ static void m08000m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g sha256_transform (digest, w_t); // 512 - 576 - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; + const u32 r0 = digest[3]; + const u32 r1 = digest[7]; + const u32 r2 = digest[2]; + const u32 r3 = digest[6]; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m08000s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m08000s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -291,17 +264,15 @@ static void m08000s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x w_t[16]; + u32 w_t[16]; w_t[ 0] = w0 >> 8; w_t[ 1] = w[ 1] >> 8; @@ -320,7 +291,7 @@ static void m08000s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g w_t[14] = w[14] >> 8; w_t[15] = w[15] >> 8; - u32x digest[8]; + u32 digest[8]; digest[0] = SHA256M_A; digest[1] = SHA256M_B; @@ -368,16 +339,16 @@ static void m08000s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g sha256_transform (digest, w_t); // 512 - 576 - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; + const u32 r0 = digest[3]; + const u32 r1 = digest[7]; + const u32 r2 = digest[2]; + const u32 r3 = digest[6]; - #include VECT_COMPARE_S + #include COMPARE_S } } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -415,7 +386,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_m04 (__glo m08000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -453,7 +424,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_m08 (__glo m08000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -491,7 +462,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_m16 (__glo m08000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -529,7 +500,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_s04 (__glo m08000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -567,7 +538,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_s08 (__glo m08000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08000_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base diff --git a/amd/m08100_a0.cl b/OpenCL/m08100_a0.cl similarity index 95% rename from amd/m08100_a0.cl rename to OpenCL/m08100_a0.cl index 4fba8dd..4493d9d 100644 --- a/amd/m08100_a0.cl +++ b/OpenCL/m08100_a0.cl @@ -8,38 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08100_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -57,14 +38,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08100_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -90,10 +71,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08100_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; @@ -120,10 +101,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08100_m04 (__glo const u32 out_salt_len = out_len + salt_len; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = salt_buf0[0]; w0_t[1] = salt_buf0[1]; @@ -165,11 +146,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08100_m04 (__glo //w3_t[2] = swap_workaround (w3_t[2]); //w3_t[3] = swap_workaround (w3_t[3]); - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -267,12 +248,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08100_m04 (__glo w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]); w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -300,14 +281,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08100_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -351,10 +332,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08100_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; @@ -381,10 +362,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08100_s04 (__glo const u32 out_salt_len = out_len + salt_len; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = salt_buf0[0]; w0_t[1] = salt_buf0[1]; @@ -426,11 +407,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08100_s04 (__glo //w3_t[2] = swap_workaround (w3_t[2]); //w3_t[3] = swap_workaround (w3_t[3]); - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -531,12 +512,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08100_s04 (__glo w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]); w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m08100_a1.cl b/OpenCL/m08100_a1.cl similarity index 95% rename from amd/m08100_a1.cl rename to OpenCL/m08100_a1.cl index cfb1622..fa5f44b 100644 --- a/amd/m08100_a1.cl +++ b/OpenCL/m08100_a1.cl @@ -8,36 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08100_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -55,28 +36,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08100_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -138,10 +119,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08100_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -166,10 +147,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08100_m04 (__glo const u32 pw_salt_len = pw_len + salt_len; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = salt_buf0[0]; w0_t[1] = salt_buf0[1]; @@ -211,11 +192,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08100_m04 (__glo //w3_t[2] = swap_workaround (w3_t[2]); //w3_t[3] = swap_workaround (w3_t[3]); - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -313,12 +294,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08100_m04 (__glo w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]); w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -346,28 +327,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08100_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -447,10 +428,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08100_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -475,10 +456,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08100_s04 (__glo const u32 pw_salt_len = pw_len + salt_len; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = salt_buf0[0]; w0_t[1] = salt_buf0[1]; @@ -520,11 +501,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08100_s04 (__glo //w3_t[2] = swap_workaround (w3_t[2]); //w3_t[3] = swap_workaround (w3_t[3]); - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -625,12 +606,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08100_s04 (__glo w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]); w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m08100_a3.cl b/OpenCL/m08100_a3.cl similarity index 91% rename from amd/m08100_a3.cl rename to OpenCL/m08100_a3.cl index 9e74a0b..a26fda4 100644 --- a/amd/m08100_a3.cl +++ b/OpenCL/m08100_a3.cl @@ -8,38 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void m08100m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m08100m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -65,7 +46,7 @@ static void m08100m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -77,10 +58,10 @@ static void m08100m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * prepend salt */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = salt_buf0[0]; w0_t[1] = salt_buf0[1]; @@ -103,11 +84,11 @@ static void m08100m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * sha1 */ - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -205,16 +186,16 @@ static void m08100m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]); w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m08100s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m08100s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -258,7 +239,7 @@ static void m08100s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -270,10 +251,10 @@ static void m08100s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * prepend salt */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = salt_buf0[0]; w0_t[1] = salt_buf0[1]; @@ -296,11 +277,11 @@ static void m08100s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * sha1 */ - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -401,12 +382,12 @@ static void m08100s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]); w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -420,28 +401,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08100_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -484,28 +465,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08100_m08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -556,28 +537,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08100_m16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -643,28 +624,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08100_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -707,28 +688,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08100_s08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -779,28 +760,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08100_s16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m08200.cl b/OpenCL/m08200.cl similarity index 94% rename from amd/m08200.cl rename to OpenCL/m08200.cl index b368204..15ba027 100644 --- a/amd/m08200.cl +++ b/OpenCL/m08200.cl @@ -8,29 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif __constant u32 k_sha256[64] = @@ -77,33 +65,33 @@ __constant u64 k_sha512[80] = SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, }; -static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8]) +static void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[8]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + u32 e = digest[4]; + u32 f = digest[5]; + u32 g = digest[6]; + u32 h = digest[7]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #define ROUND_EXPAND() \ { \ @@ -163,7 +151,7 @@ static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[ digest[7] += h; } -static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8]) +static void hmac_sha256_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[8], u32 opad[8]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -222,7 +210,7 @@ static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32 sha256_transform (w0, w1, w2, w3, opad); } -static void hmac_sha256_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8], u32x digest[8]) +static void hmac_sha256_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[8], u32 opad[8], u32 digest[8]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -463,28 +451,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08200_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -786,16 +774,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08200_comp (__gl w3[2] = 0; w3[3] = (64 + size) * 8; - u32x digest[8]; + u32 digest[8]; hmac_sha256_run (w0, w1, w2, w3, ipad, opad, digest); - const u32x r0 = digest[0]; - const u32x r1 = digest[1]; - const u32x r2 = digest[2]; - const u32x r3 = digest[3]; + const u32 r0 = digest[0]; + const u32 r1 = digest[1]; + const u32 r2 = digest[2]; + const u32 r3 = digest[3]; #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m08300_a0.cl b/OpenCL/m08300_a0.cl similarity index 91% rename from amd/m08300_a0.cl rename to OpenCL/m08300_a0.cl index bf7979e..d3ecfa8 100644 --- a/amd/m08300_a0.cl +++ b/OpenCL/m08300_a0.cl @@ -8,68 +8,44 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -190,14 +166,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08300_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -250,10 +226,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08300_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; @@ -274,28 +250,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08300_m04 (__glo const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len); - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; @@ -374,35 +350,35 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08300_m04 (__glo * sha1 */ - u32x w0_t2[4]; + u32 w0_t2[4]; w0_t2[0] = swap_workaround (w0_t[0] | d0[0] | s0[0]); w0_t2[1] = swap_workaround (w0_t[1] | d0[1] | s0[1]); w0_t2[2] = swap_workaround (w0_t[2] | d0[2] | s0[2]); w0_t2[3] = swap_workaround (w0_t[3] | d0[3] | s0[3]); - u32x w1_t2[4]; + u32 w1_t2[4]; w1_t2[0] = swap_workaround (w1_t[0] | d1[0] | s1[0]); w1_t2[1] = swap_workaround (w1_t[1] | d1[1] | s1[1]); w1_t2[2] = swap_workaround (w1_t[2] | d1[2] | s1[2]); w1_t2[3] = swap_workaround (w1_t[3] | d1[3] | s1[3]); - u32x w2_t2[4]; + u32 w2_t2[4]; w2_t2[0] = swap_workaround (w2_t[0] | d2[0] | s2[0]); w2_t2[1] = swap_workaround (w2_t[1] | d2[1] | s2[1]); w2_t2[2] = swap_workaround (w2_t[2] | d2[2] | s2[2]); w2_t2[3] = swap_workaround (w2_t[3] | d2[3] | s2[3]); - u32x w3_t2[4]; + u32 w3_t2[4]; w3_t2[0] = swap_workaround (w3_t[0] | d3[0] | s3[0]); w3_t2[1] = swap_workaround (w3_t[1] | d3[1] | s3[1]); w3_t2[2] = 0; w3_t2[3] = (1 + out_len + domain_len + 1 + salt_len) * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -416,28 +392,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08300_m04 (__glo for (u32 i = 0; i < salt_iter; i++) { - u32x w0_t3[4]; + u32 w0_t3[4]; w0_t3[0] = digest[0]; w0_t3[1] = digest[1]; w0_t3[2] = digest[2]; w0_t3[3] = digest[3]; - u32x w1_t3[4]; + u32 w1_t3[4]; w1_t3[0] = digest[4]; w1_t3[1] = swap_workaround (salt_buf0[0]); w1_t3[2] = swap_workaround (salt_buf0[1]); w1_t3[3] = swap_workaround (salt_buf0[2]); - u32x w2_t3[4]; + u32 w2_t3[4]; w2_t3[0] = swap_workaround (salt_buf0[3]); w2_t3[1] = swap_workaround (salt_buf1[0]); w2_t3[2] = swap_workaround (salt_buf1[1]); w2_t3[3] = swap_workaround (salt_buf1[2]); - u32x w3_t3[4]; + u32 w3_t3[4]; w3_t3[0] = swap_workaround (salt_buf1[3]); w3_t3[1] = 0; @@ -453,12 +429,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08300_m04 (__glo sha1_transform (w0_t3, w1_t3, w2_t3, w3_t3, digest); } - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -486,14 +462,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08300_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -558,10 +534,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08300_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; @@ -582,28 +558,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08300_s04 (__glo const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len); - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; @@ -682,35 +658,35 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08300_s04 (__glo * sha1 */ - u32x w0_t2[4]; + u32 w0_t2[4]; w0_t2[0] = swap_workaround (w0_t[0] | d0[0] | s0[0]); w0_t2[1] = swap_workaround (w0_t[1] | d0[1] | s0[1]); w0_t2[2] = swap_workaround (w0_t[2] | d0[2] | s0[2]); w0_t2[3] = swap_workaround (w0_t[3] | d0[3] | s0[3]); - u32x w1_t2[4]; + u32 w1_t2[4]; w1_t2[0] = swap_workaround (w1_t[0] | d1[0] | s1[0]); w1_t2[1] = swap_workaround (w1_t[1] | d1[1] | s1[1]); w1_t2[2] = swap_workaround (w1_t[2] | d1[2] | s1[2]); w1_t2[3] = swap_workaround (w1_t[3] | d1[3] | s1[3]); - u32x w2_t2[4]; + u32 w2_t2[4]; w2_t2[0] = swap_workaround (w2_t[0] | d2[0] | s2[0]); w2_t2[1] = swap_workaround (w2_t[1] | d2[1] | s2[1]); w2_t2[2] = swap_workaround (w2_t[2] | d2[2] | s2[2]); w2_t2[3] = swap_workaround (w2_t[3] | d2[3] | s2[3]); - u32x w3_t2[4]; + u32 w3_t2[4]; w3_t2[0] = swap_workaround (w3_t[0] | d3[0] | s3[0]); w3_t2[1] = swap_workaround (w3_t[1] | d3[1] | s3[1]); w3_t2[2] = 0; w3_t2[3] = (1 + out_len + domain_len + 1 + salt_len) * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -724,28 +700,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08300_s04 (__glo for (u32 i = 0; i < salt_iter; i++) { - u32x w0_t3[4]; + u32 w0_t3[4]; w0_t3[0] = digest[0]; w0_t3[1] = digest[1]; w0_t3[2] = digest[2]; w0_t3[3] = digest[3]; - u32x w1_t3[4]; + u32 w1_t3[4]; w1_t3[0] = digest[4]; w1_t3[1] = swap_workaround (salt_buf0[0]); w1_t3[2] = swap_workaround (salt_buf0[1]); w1_t3[3] = swap_workaround (salt_buf0[2]); - u32x w2_t3[4]; + u32 w2_t3[4]; w2_t3[0] = swap_workaround (salt_buf0[3]); w2_t3[1] = swap_workaround (salt_buf1[0]); w2_t3[2] = swap_workaround (salt_buf1[1]); w2_t3[3] = swap_workaround (salt_buf1[2]); - u32x w3_t3[4]; + u32 w3_t3[4]; w3_t3[0] = swap_workaround (salt_buf1[3]); w3_t3[1] = 0; @@ -761,12 +737,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08300_s04 (__glo sha1_transform (w0_t3, w1_t3, w2_t3, w3_t3, digest); } - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m08300_a1.cl b/OpenCL/m08300_a1.cl similarity index 92% rename from amd/m08300_a1.cl rename to OpenCL/m08300_a1.cl index 0c400c8..1c026c9 100644 --- a/amd/m08300_a1.cl +++ b/OpenCL/m08300_a1.cl @@ -8,66 +8,42 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -188,28 +164,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08300_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -298,10 +274,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08300_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -320,28 +296,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08300_m04 (__glo w3[2] = wordl3[2] | wordr3[2]; w3[3] = wordl3[3] | wordr3[3]; - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; @@ -420,35 +396,35 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08300_m04 (__glo * sha1 */ - u32x w0_t2[4]; + u32 w0_t2[4]; w0_t2[0] = swap_workaround (w0_t[0] | d0[0] | s0[0]); w0_t2[1] = swap_workaround (w0_t[1] | d0[1] | s0[1]); w0_t2[2] = swap_workaround (w0_t[2] | d0[2] | s0[2]); w0_t2[3] = swap_workaround (w0_t[3] | d0[3] | s0[3]); - u32x w1_t2[4]; + u32 w1_t2[4]; w1_t2[0] = swap_workaround (w1_t[0] | d1[0] | s1[0]); w1_t2[1] = swap_workaround (w1_t[1] | d1[1] | s1[1]); w1_t2[2] = swap_workaround (w1_t[2] | d1[2] | s1[2]); w1_t2[3] = swap_workaround (w1_t[3] | d1[3] | s1[3]); - u32x w2_t2[4]; + u32 w2_t2[4]; w2_t2[0] = swap_workaround (w2_t[0] | d2[0] | s2[0]); w2_t2[1] = swap_workaround (w2_t[1] | d2[1] | s2[1]); w2_t2[2] = swap_workaround (w2_t[2] | d2[2] | s2[2]); w2_t2[3] = swap_workaround (w2_t[3] | d2[3] | s2[3]); - u32x w3_t2[4]; + u32 w3_t2[4]; w3_t2[0] = swap_workaround (w3_t[0] | d3[0] | s3[0]); w3_t2[1] = swap_workaround (w3_t[1] | d3[1] | s3[1]); w3_t2[2] = 0; w3_t2[3] = (1 + pw_len + domain_len + 1 + salt_len) * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -462,28 +438,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08300_m04 (__glo for (u32 i = 0; i < salt_iter; i++) { - u32x w0_t3[4]; + u32 w0_t3[4]; w0_t3[0] = digest[0]; w0_t3[1] = digest[1]; w0_t3[2] = digest[2]; w0_t3[3] = digest[3]; - u32x w1_t3[4]; + u32 w1_t3[4]; w1_t3[0] = digest[4]; w1_t3[1] = swap_workaround (salt_buf0[0]); w1_t3[2] = swap_workaround (salt_buf0[1]); w1_t3[3] = swap_workaround (salt_buf0[2]); - u32x w2_t3[4]; + u32 w2_t3[4]; w2_t3[0] = swap_workaround (salt_buf0[3]); w2_t3[1] = swap_workaround (salt_buf1[0]); w2_t3[2] = swap_workaround (salt_buf1[1]); w2_t3[3] = swap_workaround (salt_buf1[2]); - u32x w3_t3[4]; + u32 w3_t3[4]; w3_t3[0] = swap_workaround (salt_buf1[3]); w3_t3[1] = 0; @@ -499,12 +475,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08300_m04 (__glo sha1_transform (w0_t3, w1_t3, w2_t3, w3_t3, digest); } - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -532,28 +508,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08300_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -654,10 +630,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08300_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -676,28 +652,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08300_s04 (__glo w3[2] = wordl3[2] | wordr3[2]; w3[3] = wordl3[3] | wordr3[3]; - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; @@ -776,35 +752,35 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08300_s04 (__glo * sha1 */ - u32x w0_t2[4]; + u32 w0_t2[4]; w0_t2[0] = swap_workaround (w0_t[0] | d0[0] | s0[0]); w0_t2[1] = swap_workaround (w0_t[1] | d0[1] | s0[1]); w0_t2[2] = swap_workaround (w0_t[2] | d0[2] | s0[2]); w0_t2[3] = swap_workaround (w0_t[3] | d0[3] | s0[3]); - u32x w1_t2[4]; + u32 w1_t2[4]; w1_t2[0] = swap_workaround (w1_t[0] | d1[0] | s1[0]); w1_t2[1] = swap_workaround (w1_t[1] | d1[1] | s1[1]); w1_t2[2] = swap_workaround (w1_t[2] | d1[2] | s1[2]); w1_t2[3] = swap_workaround (w1_t[3] | d1[3] | s1[3]); - u32x w2_t2[4]; + u32 w2_t2[4]; w2_t2[0] = swap_workaround (w2_t[0] | d2[0] | s2[0]); w2_t2[1] = swap_workaround (w2_t[1] | d2[1] | s2[1]); w2_t2[2] = swap_workaround (w2_t[2] | d2[2] | s2[2]); w2_t2[3] = swap_workaround (w2_t[3] | d2[3] | s2[3]); - u32x w3_t2[4]; + u32 w3_t2[4]; w3_t2[0] = swap_workaround (w3_t[0] | d3[0] | s3[0]); w3_t2[1] = swap_workaround (w3_t[1] | d3[1] | s3[1]); w3_t2[2] = 0; w3_t2[3] = (1 + pw_len + domain_len + 1 + salt_len) * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -818,28 +794,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08300_s04 (__glo for (u32 i = 0; i < salt_iter; i++) { - u32x w0_t3[4]; + u32 w0_t3[4]; w0_t3[0] = digest[0]; w0_t3[1] = digest[1]; w0_t3[2] = digest[2]; w0_t3[3] = digest[3]; - u32x w1_t3[4]; + u32 w1_t3[4]; w1_t3[0] = digest[4]; w1_t3[1] = swap_workaround (salt_buf0[0]); w1_t3[2] = swap_workaround (salt_buf0[1]); w1_t3[3] = swap_workaround (salt_buf0[2]); - u32x w2_t3[4]; + u32 w2_t3[4]; w2_t3[0] = swap_workaround (salt_buf0[3]); w2_t3[1] = swap_workaround (salt_buf1[0]); w2_t3[2] = swap_workaround (salt_buf1[1]); w2_t3[3] = swap_workaround (salt_buf1[2]); - u32x w3_t3[4]; + u32 w3_t3[4]; w3_t3[0] = swap_workaround (salt_buf1[3]); w3_t3[1] = 0; @@ -855,12 +831,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08300_s04 (__glo sha1_transform (w0_t3, w1_t3, w2_t3, w3_t3, digest); } - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m08300_a3.cl b/OpenCL/m08300_a3.cl similarity index 87% rename from amd/m08300_a3.cl rename to OpenCL/m08300_a3.cl index be35d7c..ed51d67 100644 --- a/amd/m08300_a3.cl +++ b/OpenCL/m08300_a3.cl @@ -8,66 +8,42 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -172,7 +148,7 @@ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4] digest[4] += E; } -static void m08300m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m08300m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -287,7 +263,7 @@ static void m08300m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -295,28 +271,28 @@ static void m08300m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; @@ -331,35 +307,35 @@ static void m08300m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * sha1 */ - u32x w0_t2[4]; + u32 w0_t2[4]; w0_t2[0] = swap_workaround (w0_t[0] | d0[0] | s0[0]); w0_t2[1] = swap_workaround (w0_t[1] | d0[1] | s0[1]); w0_t2[2] = swap_workaround (w0_t[2] | d0[2] | s0[2]); w0_t2[3] = swap_workaround (w0_t[3] | d0[3] | s0[3]); - u32x w1_t2[4]; + u32 w1_t2[4]; w1_t2[0] = swap_workaround (w1_t[0] | d1[0] | s1[0]); w1_t2[1] = swap_workaround (w1_t[1] | d1[1] | s1[1]); w1_t2[2] = swap_workaround (w1_t[2] | d1[2] | s1[2]); w1_t2[3] = swap_workaround (w1_t[3] | d1[3] | s1[3]); - u32x w2_t2[4]; + u32 w2_t2[4]; w2_t2[0] = swap_workaround (w2_t[0] | d2[0] | s2[0]); w2_t2[1] = swap_workaround (w2_t[1] | d2[1] | s2[1]); w2_t2[2] = swap_workaround (w2_t[2] | d2[2] | s2[2]); w2_t2[3] = swap_workaround (w2_t[3] | d2[3] | s2[3]); - u32x w3_t2[4]; + u32 w3_t2[4]; w3_t2[0] = swap_workaround (w3_t[0] | d3[0] | s3[0]); w3_t2[1] = swap_workaround (w3_t[1] | d3[1] | s3[1]); w3_t2[2] = 0; w3_t2[3] = (1 + pw_len + domain_len + 1 + salt_len) * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -373,28 +349,28 @@ static void m08300m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p for (u32 i = 0; i < salt_iter; i++) { - u32x w0_t3[4]; + u32 w0_t3[4]; w0_t3[0] = digest[0]; w0_t3[1] = digest[1]; w0_t3[2] = digest[2]; w0_t3[3] = digest[3]; - u32x w1_t3[4]; + u32 w1_t3[4]; w1_t3[0] = digest[4]; w1_t3[1] = swap_workaround (salt_buf0[0]); w1_t3[2] = swap_workaround (salt_buf0[1]); w1_t3[3] = swap_workaround (salt_buf0[2]); - u32x w2_t3[4]; + u32 w2_t3[4]; w2_t3[0] = swap_workaround (salt_buf0[3]); w2_t3[1] = swap_workaround (salt_buf1[0]); w2_t3[2] = swap_workaround (salt_buf1[1]); w2_t3[3] = swap_workaround (salt_buf1[2]); - u32x w3_t3[4]; + u32 w3_t3[4]; w3_t3[0] = swap_workaround (salt_buf1[3]); w3_t3[1] = 0; @@ -410,16 +386,16 @@ static void m08300m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p sha1_transform (w0_t3, w1_t3, w2_t3, w3_t3, digest); } - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m08300s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m08300s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -546,7 +522,7 @@ static void m08300s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -554,28 +530,28 @@ static void m08300s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; @@ -590,35 +566,35 @@ static void m08300s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * sha1 */ - u32x w0_t2[4]; + u32 w0_t2[4]; w0_t2[0] = swap_workaround (w0_t[0] | d0[0] | s0[0]); w0_t2[1] = swap_workaround (w0_t[1] | d0[1] | s0[1]); w0_t2[2] = swap_workaround (w0_t[2] | d0[2] | s0[2]); w0_t2[3] = swap_workaround (w0_t[3] | d0[3] | s0[3]); - u32x w1_t2[4]; + u32 w1_t2[4]; w1_t2[0] = swap_workaround (w1_t[0] | d1[0] | s1[0]); w1_t2[1] = swap_workaround (w1_t[1] | d1[1] | s1[1]); w1_t2[2] = swap_workaround (w1_t[2] | d1[2] | s1[2]); w1_t2[3] = swap_workaround (w1_t[3] | d1[3] | s1[3]); - u32x w2_t2[4]; + u32 w2_t2[4]; w2_t2[0] = swap_workaround (w2_t[0] | d2[0] | s2[0]); w2_t2[1] = swap_workaround (w2_t[1] | d2[1] | s2[1]); w2_t2[2] = swap_workaround (w2_t[2] | d2[2] | s2[2]); w2_t2[3] = swap_workaround (w2_t[3] | d2[3] | s2[3]); - u32x w3_t2[4]; + u32 w3_t2[4]; w3_t2[0] = swap_workaround (w3_t[0] | d3[0] | s3[0]); w3_t2[1] = swap_workaround (w3_t[1] | d3[1] | s3[1]); w3_t2[2] = 0; w3_t2[3] = (1 + pw_len + domain_len + 1 + salt_len) * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -632,28 +608,28 @@ static void m08300s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p for (u32 i = 0; i < salt_iter; i++) { - u32x w0_t3[4]; + u32 w0_t3[4]; w0_t3[0] = digest[0]; w0_t3[1] = digest[1]; w0_t3[2] = digest[2]; w0_t3[3] = digest[3]; - u32x w1_t3[4]; + u32 w1_t3[4]; w1_t3[0] = digest[4]; w1_t3[1] = swap_workaround (salt_buf0[0]); w1_t3[2] = swap_workaround (salt_buf0[1]); w1_t3[3] = swap_workaround (salt_buf0[2]); - u32x w2_t3[4]; + u32 w2_t3[4]; w2_t3[0] = swap_workaround (salt_buf0[3]); w2_t3[1] = swap_workaround (salt_buf1[0]); w2_t3[2] = swap_workaround (salt_buf1[1]); w2_t3[3] = swap_workaround (salt_buf1[2]); - u32x w3_t3[4]; + u32 w3_t3[4]; w3_t3[0] = swap_workaround (salt_buf1[3]); w3_t3[1] = 0; @@ -669,12 +645,12 @@ static void m08300s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p sha1_transform (w0_t3, w1_t3, w2_t3, w3_t3, digest); } - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -688,28 +664,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08300_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -735,28 +711,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08300_m08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -782,28 +758,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08300_m16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -829,28 +805,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08300_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -876,28 +852,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08300_s08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -923,28 +899,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08300_s16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m08400_a0.cl b/OpenCL/m08400_a0.cl similarity index 93% rename from amd/m08400_a0.cl rename to OpenCL/m08400_a0.cl index 0eee243..eb869f0 100644 --- a/amd/m08400_a0.cl +++ b/OpenCL/m08400_a0.cl @@ -8,80 +8,56 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" +#include "rp.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8_le(i) l_bin2asc[(i)] #endif #ifdef VECT_SIZE2 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) #endif #ifdef VECT_SIZE4 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) #endif -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -200,14 +176,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08400_m04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -284,28 +260,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08400_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -316,35 +292,35 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08400_m04 (__glo append_0x80_2 (w0, w1, out_len); - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); w0_t[2] = swap_workaround (w0[2]); w0_t[3] = swap_workaround (w0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (w1[0]); w1_t[1] = swap_workaround (w1[1]); w1_t[2] = swap_workaround (w1[2]); w1_t[3] = swap_workaround (w1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = swap_workaround (w2[0]); w2_t[1] = swap_workaround (w2[1]); w2_t[2] = swap_workaround (w2[2]); w2_t[3] = swap_workaround (w2[3]); - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = swap_workaround (w3[0]); w3_t[1] = swap_workaround (w3[1]); w3_t[2] = 0; w3_t[3] = pw_len * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -354,11 +330,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08400_m04 (__glo sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - u32x a; - u32x b; - u32x c; - u32x d; - u32x e; + u32 a; + u32 b; + u32 c; + u32 d; + u32 e; a = digest[0]; b = digest[1]; @@ -480,12 +456,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08400_m04 (__glo sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -511,14 +487,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08400_s04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -607,28 +583,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08400_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -639,35 +615,35 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08400_s04 (__glo append_0x80_2 (w0, w1, out_len); - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); w0_t[2] = swap_workaround (w0[2]); w0_t[3] = swap_workaround (w0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (w1[0]); w1_t[1] = swap_workaround (w1[1]); w1_t[2] = swap_workaround (w1[2]); w1_t[3] = swap_workaround (w1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = swap_workaround (w2[0]); w2_t[1] = swap_workaround (w2[1]); w2_t[2] = swap_workaround (w2[2]); w2_t[3] = swap_workaround (w2[3]); - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = swap_workaround (w3[0]); w3_t[1] = swap_workaround (w3[1]); w3_t[2] = 0; w3_t[3] = pw_len * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -677,11 +653,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08400_s04 (__glo sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - u32x a; - u32x b; - u32x c; - u32x d; - u32x e; + u32 a; + u32 b; + u32 c; + u32 d; + u32 e; a = digest[0]; b = digest[1]; @@ -803,12 +779,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08400_s04 (__glo sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m08400_a1.cl b/OpenCL/m08400_a1.cl similarity index 93% rename from amd/m08400_a1.cl rename to OpenCL/m08400_a1.cl index 88dd037..b2264b4 100644 --- a/amd/m08400_a1.cl +++ b/OpenCL/m08400_a1.cl @@ -8,78 +8,54 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8_le(i) l_bin2asc[(i)] #endif #ifdef VECT_SIZE2 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) #endif #ifdef VECT_SIZE4 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) #endif -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -198,28 +174,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08400_m04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -342,63 +318,63 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08400_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; w3[2] = wordl3[2] | wordr3[2]; w3[3] = wordl3[3] | wordr3[3]; - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); w0_t[2] = swap_workaround (w0[2]); w0_t[3] = swap_workaround (w0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (w1[0]); w1_t[1] = swap_workaround (w1[1]); w1_t[2] = swap_workaround (w1[2]); w1_t[3] = swap_workaround (w1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = swap_workaround (w2[0]); w2_t[1] = swap_workaround (w2[1]); w2_t[2] = swap_workaround (w2[2]); w2_t[3] = swap_workaround (w2[3]); - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = swap_workaround (w3[0]); w3_t[1] = swap_workaround (w3[1]); w3_t[2] = 0; w3_t[3] = pw_len * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -408,11 +384,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08400_m04 (__glo sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - u32x a; - u32x b; - u32x c; - u32x d; - u32x e; + u32 a; + u32 b; + u32 c; + u32 d; + u32 e; a = digest[0]; b = digest[1]; @@ -534,12 +510,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08400_m04 (__glo sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -565,28 +541,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08400_s04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -721,63 +697,63 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08400_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; w3[2] = wordl3[2] | wordr3[2]; w3[3] = wordl3[3] | wordr3[3]; - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); w0_t[2] = swap_workaround (w0[2]); w0_t[3] = swap_workaround (w0[3]); - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = swap_workaround (w1[0]); w1_t[1] = swap_workaround (w1[1]); w1_t[2] = swap_workaround (w1[2]); w1_t[3] = swap_workaround (w1[3]); - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = swap_workaround (w2[0]); w2_t[1] = swap_workaround (w2[1]); w2_t[2] = swap_workaround (w2[2]); w2_t[3] = swap_workaround (w2[3]); - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = swap_workaround (w3[0]); w3_t[1] = swap_workaround (w3[1]); w3_t[2] = 0; w3_t[3] = pw_len * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -787,11 +763,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08400_s04 (__glo sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - u32x a; - u32x b; - u32x c; - u32x d; - u32x e; + u32 a; + u32 b; + u32 c; + u32 d; + u32 e; a = digest[0]; b = digest[1]; @@ -913,12 +889,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08400_s04 (__glo sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m08400_a3.cl b/OpenCL/m08400_a3.cl similarity index 89% rename from amd/m08400_a3.cl rename to OpenCL/m08400_a3.cl index 7a97f84..61527ea 100644 --- a/amd/m08400_a3.cl +++ b/OpenCL/m08400_a3.cl @@ -8,78 +8,54 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8_le(i) l_bin2asc[(i)] #endif #ifdef VECT_SIZE2 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) #endif #ifdef VECT_SIZE4 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#define uint_to_hex_lower8_le(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) #endif -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -184,7 +160,7 @@ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4] digest[4] += E; } -static void m08400m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) +static void m08400m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { /** * modifier @@ -224,7 +200,7 @@ static void m08400m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -232,35 +208,35 @@ static void m08400m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; w3_t[2] = 0; w3_t[3] = pw_len * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -270,11 +246,11 @@ static void m08400m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - u32x a; - u32x b; - u32x c; - u32x d; - u32x e; + u32 a; + u32 b; + u32 c; + u32 d; + u32 e; a = digest[0]; b = digest[1]; @@ -396,16 +372,16 @@ static void m08400m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m08400s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) +static void m08400s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { /** * modifier @@ -457,7 +433,7 @@ static void m08400s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -465,35 +441,35 @@ static void m08400s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; w0_t[2] = w0[2]; w0_t[3] = w0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0]; w1_t[1] = w1[1]; w1_t[2] = w1[2]; w1_t[3] = w1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0]; w2_t[1] = w2[1]; w2_t[2] = w2[2]; w2_t[3] = w2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0]; w3_t[1] = w3[1]; w3_t[2] = 0; w3_t[3] = pw_len * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -503,11 +479,11 @@ static void m08400s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - u32x a; - u32x b; - u32x c; - u32x d; - u32x e; + u32 a; + u32 b; + u32 c; + u32 d; + u32 e; a = digest[0]; b = digest[1]; @@ -629,12 +605,12 @@ static void m08400s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; + const u32 r0 = digest[3]; + const u32 r1 = digest[4]; + const u32 r2 = digest[2]; + const u32 r3 = digest[1]; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -647,28 +623,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08400_m04 (__glo const u32 gid = get_global_id (0); const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -728,28 +704,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08400_m08 (__glo const u32 gid = get_global_id (0); const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -809,28 +785,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08400_m16 (__glo const u32 gid = get_global_id (0); const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -890,28 +866,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08400_s04 (__glo const u32 gid = get_global_id (0); const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -971,28 +947,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08400_s08 (__glo const u32 gid = get_global_id (0); const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1052,28 +1028,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08400_s16 (__glo const u32 gid = get_global_id (0); const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m08500_a0.cl b/OpenCL/m08500_a0.cl similarity index 95% rename from amd/m08500_a0.cl rename to OpenCL/m08500_a0.cl index 0d0ed4d..02deea4 100644 --- a/amd/m08500_a0.cl +++ b/OpenCL/m08500_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define PERM_OP(a,b,tt,n,m) \ { \ @@ -388,18 +364,18 @@ __constant u32 c_skb[8][64] = #define NBOX(i,n,S) (S)[(n)][(i)] -static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __local u32 s_SPtrans[8][64]) +static void _des_crypt_encrypt (u32 iv[2], u32 data[2], u32 Kc[16], u32 Kd[16], __local u32 s_SPtrans[8][64]) { - u32x tt; + u32 tt; - u32x r = data[0]; - u32x l = data[1]; + u32 r = data[0]; + u32 l = data[1]; #pragma unroll 16 for (u32 i = 0; i < 16; i++) { - u32x u = Kc[i] ^ r; - u32x t = Kd[i] ^ rotl32 (r, 28u); + u32 u = Kc[i] ^ r; + u32 t = Kd[i] ^ rotl32 (r, 28u); #ifdef VECT_SIZE1 l ^= NBOX (((u >> 2) & 0x3f), 0, s_SPtrans) @@ -479,9 +455,9 @@ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[1 iv[1] = r; } -static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 s_skb[8][64]) +static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u32 s_skb[8][64]) { - u32x tt; + u32 tt; PERM_OP (d, c, tt, 4, 0x0f0f0f0f); HPERM_OP (c, tt, 2, 0xcccc0000); @@ -509,8 +485,8 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc c = c & 0x0fffffff; d = d & 0x0fffffff; - u32x s; - u32x t; + u32 s; + u32 t; #ifdef VECT_SIZE1 s = NBOX ((( c >> 0) & 0x3f), 0, s_skb) @@ -643,7 +619,7 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc } } -static void transform_racf_key (const u32x w0, const u32x w1, u32x key[2]) +static void transform_racf_key (const u32 w0, const u32 w1, u32 key[2]) { #ifdef VECT_SIZE1 @@ -738,7 +714,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08500_m04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf[4]; + u32 pw_buf[4]; pw_buf[0] = pws[gid].i[ 0]; pw_buf[1] = pws[gid].i[ 1]; @@ -791,28 +767,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08500_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf[0]; w0[1] = pw_buf[1]; w0[2] = pw_buf[2]; w0[3] = pw_buf[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -823,33 +799,33 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08500_m04 (__glo out_len = (out_len >= 8) ? 8 : out_len; - u32x key[2]; + u32 key[2]; transform_racf_key (w0[0], w0[1], key); - const u32x c = key[0]; - const u32x d = key[1]; + const u32 c = key[0]; + const u32 d = key[1]; - u32x Kc[16]; - u32x Kd[16]; + u32 Kc[16]; + u32 Kd[16]; _des_crypt_keysetup (c, d, Kc, Kd, s_skb); - u32x data[2]; + u32 data[2]; data[0] = salt_buf0[0]; data[1] = salt_buf0[1]; - u32x iv[2]; + u32 iv[2]; _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = iv[0]; + const u32 r1 = iv[1]; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -875,7 +851,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08500_s04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf[4]; + u32 pw_buf[4]; pw_buf[0] = pws[gid].i[ 0]; pw_buf[1] = pws[gid].i[ 1]; @@ -940,28 +916,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08500_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf[0]; w0[1] = pw_buf[1]; w0[2] = pw_buf[2]; w0[3] = pw_buf[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -972,33 +948,33 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08500_s04 (__glo out_len = (out_len >= 8) ? 8 : out_len; - u32x key[2]; + u32 key[2]; transform_racf_key (w0[0], w0[1], key); - const u32x c = key[0]; - const u32x d = key[1]; + const u32 c = key[0]; + const u32 d = key[1]; - u32x Kc[16]; - u32x Kd[16]; + u32 Kc[16]; + u32 Kd[16]; _des_crypt_keysetup (c, d, Kc, Kd, s_skb); - u32x data[2]; + u32 data[2]; data[0] = salt_buf0[0]; data[1] = salt_buf0[1]; - u32x iv[2]; + u32 iv[2]; _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = iv[0]; + const u32 r1 = iv[1]; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m08500_a1.cl b/OpenCL/m08500_a1.cl similarity index 95% rename from amd/m08500_a1.cl rename to OpenCL/m08500_a1.cl index 9e2714f..3f58df8 100644 --- a/amd/m08500_a1.cl +++ b/OpenCL/m08500_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define PERM_OP(a,b,tt,n,m) \ { \ @@ -386,18 +362,18 @@ __constant u32 c_skb[8][64] = #define NBOX(i,n,S) (S)[(n)][(i)] -static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __local u32 s_SPtrans[8][64]) +static void _des_crypt_encrypt (u32 iv[2], u32 data[2], u32 Kc[16], u32 Kd[16], __local u32 s_SPtrans[8][64]) { - u32x tt; + u32 tt; - u32x r = data[0]; - u32x l = data[1]; + u32 r = data[0]; + u32 l = data[1]; #pragma unroll 16 for (u32 i = 0; i < 16; i++) { - u32x u = Kc[i] ^ r; - u32x t = Kd[i] ^ rotl32 (r, 28u); + u32 u = Kc[i] ^ r; + u32 t = Kd[i] ^ rotl32 (r, 28u); #ifdef VECT_SIZE1 l ^= NBOX (((u >> 2) & 0x3f), 0, s_SPtrans) @@ -477,9 +453,9 @@ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[1 iv[1] = r; } -static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 s_skb[8][64]) +static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u32 s_skb[8][64]) { - u32x tt; + u32 tt; PERM_OP (d, c, tt, 4, 0x0f0f0f0f); HPERM_OP (c, tt, 2, 0xcccc0000); @@ -507,8 +483,8 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc c = c & 0x0fffffff; d = d & 0x0fffffff; - u32x s; - u32x t; + u32 s; + u32 t; #ifdef VECT_SIZE1 s = NBOX ((( c >> 0) & 0x3f), 0, s_skb) @@ -641,7 +617,7 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc } } -static void transform_racf_key (const u32x w0, const u32x w1, u32x key[2]) +static void transform_racf_key (const u32 w0, const u32 w1, u32 key[2]) { #ifdef VECT_SIZE1 @@ -736,28 +712,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08500_m04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = 0; wordl0[3] = 0; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = 0; wordl1[1] = 0; wordl1[2] = 0; wordl1[3] = 0; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -855,61 +831,61 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08500_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = 0; w0[3] = 0; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; - u32x key[2]; + u32 key[2]; transform_racf_key (w0[0], w0[1], key); - const u32x c = key[0]; - const u32x d = key[1]; + const u32 c = key[0]; + const u32 d = key[1]; - u32x Kc[16]; - u32x Kd[16]; + u32 Kc[16]; + u32 Kd[16]; _des_crypt_keysetup (c, d, Kc, Kd, s_skb); - u32x data[2]; + u32 data[2]; data[0] = salt_buf0[0]; data[1] = salt_buf0[1]; - u32x iv[2]; + u32 iv[2]; _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = iv[0]; + const u32 r1 = iv[1]; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -935,28 +911,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08500_s04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = 0; wordl0[3] = 0; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = 0; wordl1[1] = 0; wordl1[2] = 0; wordl1[3] = 0; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -1066,61 +1042,61 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08500_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = 0; w0[3] = 0; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; - u32x key[2]; + u32 key[2]; transform_racf_key (w0[0], w0[1], key); - const u32x c = key[0]; - const u32x d = key[1]; + const u32 c = key[0]; + const u32 d = key[1]; - u32x Kc[16]; - u32x Kd[16]; + u32 Kc[16]; + u32 Kd[16]; _des_crypt_keysetup (c, d, Kc, Kd, s_skb); - u32x data[2]; + u32 data[2]; data[0] = salt_buf0[0]; data[1] = salt_buf0[1]; - u32x iv[2]; + u32 iv[2]; _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = iv[0]; + const u32 r1 = iv[1]; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m08500_a3.cl b/OpenCL/m08500_a3.cl similarity index 80% rename from amd/m08500_a3.cl rename to OpenCL/m08500_a3.cl index d84a67a..e9bb6dc 100644 --- a/amd/m08500_a3.cl +++ b/OpenCL/m08500_a3.cl @@ -4,46 +4,21 @@ */ #define _DES_ -#define _SCALAR_ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define PERM_OP(a,b,tt,n,m) \ { \ @@ -387,18 +362,18 @@ __constant u32 c_skb[8][64] = #define NBOX(i,n,S) (S)[(n)][(i)] -static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], __local u32 s_SPtrans[8][64]) +static void _des_crypt_encrypt (u32 iv[2], u32 data[2], u32 Kc[16], u32 Kd[16], __local u32 s_SPtrans[8][64]) { - u32x tt; + u32 tt; - u32x r = data[0]; - u32x l = data[1]; + u32 r = data[0]; + u32 l = data[1]; #pragma unroll 16 for (u32 i = 0; i < 16; i++) { - u32x u = Kc[i] ^ r; - u32x t = Kd[i] ^ rotl32 (r, 28u); + u32 u = Kc[i] ^ r; + u32 t = Kd[i] ^ rotl32 (r, 28u); #ifdef VECT_SIZE1 l ^= NBOX (((u >> 2) & 0x3f), 0, s_SPtrans) @@ -478,9 +453,9 @@ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[1 iv[1] = r; } -static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 s_skb[8][64]) +static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u32 s_skb[8][64]) { - u32x tt; + u32 tt; PERM_OP (d, c, tt, 4, 0x0f0f0f0f); HPERM_OP (c, tt, 2, 0xcccc0000); @@ -508,8 +483,8 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc c = c & 0x0fffffff; d = d & 0x0fffffff; - u32x s; - u32x t; + u32 s; + u32 t; #ifdef VECT_SIZE1 s = NBOX ((( c >> 0) & 0x3f), 0, s_skb) @@ -642,7 +617,7 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc } } -static void transform_racf_key (const u32x w0, const u32x w1, u32x key[2]) +static void transform_racf_key (const u32 w0, const u32 w1, u32 key[2]) { #ifdef VECT_SIZE1 @@ -723,7 +698,7 @@ static void transform_racf_key (const u32x w0, const u32x w1, u32x key[2]) #endif } -static void m08500m (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m08500m (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -745,49 +720,47 @@ static void m08500m (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - u32x w1 = w[1]; + u32 w1 = w[1]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x key[2]; + u32 key[2]; transform_racf_key (w0, w1, key); - const u32x c = key[0]; - const u32x d = key[1]; + const u32 c = key[0]; + const u32 d = key[1]; - u32x Kc[16]; - u32x Kd[16]; + u32 Kc[16]; + u32 Kd[16]; _des_crypt_keysetup (c, d, Kc, Kd, s_skb); - u32x data[2]; + u32 data[2]; data[0] = salt_buf0[0]; data[1] = salt_buf0[1]; - u32x iv[2]; + u32 iv[2]; _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = iv[0]; + const u32 r1 = iv[1]; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m08500s (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m08500s (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -821,49 +794,47 @@ static void m08500s (__local u32 s_SPtrans[8][64], __local u32 s_skb[8][64], u32 * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - u32x w1 = w[1]; + u32 w1 = w[1]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x key[2]; + u32 key[2]; transform_racf_key (w0, w1, key); - const u32x c = key[0]; - const u32x d = key[1]; + const u32 c = key[0]; + const u32 d = key[1]; - u32x Kc[16]; - u32x Kd[16]; + u32 Kc[16]; + u32 Kd[16]; _des_crypt_keysetup (c, d, Kc, Kd, s_skb); - u32x data[2]; + u32 data[2]; data[0] = salt_buf0[0]; data[1] = salt_buf0[1]; - u32x iv[2]; + u32 iv[2]; _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = iv[0]; + const u32 r1 = iv[1]; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08500_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08500_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { __local u32 s_SPtrans[8][64]; @@ -930,15 +901,15 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08500_m04 (__glo m08500m (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08500_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08500_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08500_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08500_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08500_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08500_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { __local u32 s_SPtrans[8][64]; @@ -1005,10 +976,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08500_s04 (__glo m08500s (s_SPtrans, s_skb, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08500_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08500_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08500_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08500_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { } diff --git a/amd/m08600_a0.cl b/OpenCL/m08600_a0.cl similarity index 88% rename from amd/m08600_a0.cl rename to OpenCL/m08600_a0.cl index dfc6402..959fffc 100644 --- a/amd/m08600_a0.cl +++ b/OpenCL/m08600_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" +#include "rp.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 lotus_magic_table[256] = { @@ -83,20 +59,20 @@ __constant u32 lotus_magic_table[256] = }; #ifdef VECT_SIZE1 -#define BOX(S,i) u32x ((S)[(i)]) +#define BOX(S,i) u32 ((S)[(i)]) #endif #ifdef VECT_SIZE2 -#define BOX(S,i) u32x ((S)[(i).s0], (S)[(i).s1]) +#define BOX(S,i) u32 ((S)[(i).s0], (S)[(i).s1]) #endif #ifdef VECT_SIZE4 -#define BOX(S,i) u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3]) +#define BOX(S,i) u32 ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3]) #endif -static void lotus_mix (u32x *in, __local u32 s_lotus_magic_table[256]) +static void lotus_mix (u32 *in, __local u32 s_lotus_magic_table[256]) { - u32x p = 0; + u32 p = 0; for (int i = 0; i < 18; i++) { @@ -105,8 +81,8 @@ static void lotus_mix (u32x *in, __local u32 s_lotus_magic_table[256]) #pragma unroll 12 for (int j = 0; j < 12; j++) { - u32x tmp_in = in[j]; - u32x tmp_out = 0; + u32 tmp_in = in[j]; + u32 tmp_out = 0; p = (p + s--) & 0xff; p = ((tmp_in >> 0) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 0; p = (p + s--) & 0xff; p = ((tmp_in >> 8) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 8; @@ -118,11 +94,11 @@ static void lotus_mix (u32x *in, __local u32 s_lotus_magic_table[256]) } } -static void lotus_transform_password (u32x *in, u32x *out, __local u32 s_lotus_magic_table[256]) +static void lotus_transform_password (u32 *in, u32 *out, __local u32 s_lotus_magic_table[256]) { - u32x t = out[3] >> 24; + u32 t = out[3] >> 24; - u32x c; + u32 c; #pragma unroll 4 for (int i = 0; i < 4; i++) @@ -134,7 +110,7 @@ static void lotus_transform_password (u32x *in, u32x *out, __local u32 s_lotus_m } } -static void pad (u32x w[4], const u32 len) +static void pad (u32 w[4], const u32 len) { const u32 val = 16 - len; @@ -213,9 +189,9 @@ static void pad (u32x w[4], const u32 len) } } -static void mdtransform_norecalc (u32x state[4], u32x block[4], __local u32 s_lotus_magic_table[256]) +static void mdtransform_norecalc (u32 state[4], u32 block[4], __local u32 s_lotus_magic_table[256]) { - u32x x[12]; + u32 x[12]; x[ 0] = state[0]; x[ 1] = state[1]; @@ -238,23 +214,23 @@ static void mdtransform_norecalc (u32x state[4], u32x block[4], __local u32 s_lo state[3] = x[3]; } -static void mdtransform (u32x state[4], u32x checksum[4], u32x block[4], __local u32 s_lotus_magic_table[256]) +static void mdtransform (u32 state[4], u32 checksum[4], u32 block[4], __local u32 s_lotus_magic_table[256]) { mdtransform_norecalc (state, block, s_lotus_magic_table); lotus_transform_password (block, checksum, s_lotus_magic_table); } -static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[4], __local u32 s_lotus_magic_table[256]) +static void domino_big_md (const u32 saved_key[16], const u32 size, u32 state[4], __local u32 s_lotus_magic_table[256]) { - u32x checksum[4]; + u32 checksum[4]; checksum[0] = 0; checksum[1] = 0; checksum[2] = 0; checksum[3] = 0; - u32x block[4]; + u32 block[4]; block[0] = saved_key[0]; block[1] = saved_key[1]; @@ -297,14 +273,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -319,28 +295,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -349,7 +325,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_m04 (__glo const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len); - u32x w[16]; + u32 w[16]; w[ 0] = w0[0]; w[ 1] = w0[1]; @@ -368,7 +344,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_m04 (__glo w[14] = 0; w[15] = 0; - u32x state[4]; + u32 state[4]; state[0] = 0; state[1] = 0; @@ -398,12 +374,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_m04 (__glo domino_big_md (w, pw_len, state, s_lotus_magic_table); - const u32x r0 = state[0]; - const u32x r1 = state[1]; - const u32x r2 = state[2]; - const u32x r3 = state[3]; + const u32 r0 = state[0]; + const u32 r1 = state[1]; + const u32 r2 = state[2]; + const u32 r3 = state[3]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -446,14 +422,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -480,28 +456,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -510,7 +486,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_s04 (__glo const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len); - u32x w[16]; + u32 w[16]; w[ 0] = w0[0]; w[ 1] = w0[1]; @@ -529,7 +505,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_s04 (__glo w[14] = 0; w[15] = 0; - u32x state[4]; + u32 state[4]; state[0] = 0; state[1] = 0; @@ -559,12 +535,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_s04 (__glo domino_big_md (w, pw_len, state, s_lotus_magic_table); - const u32x r0 = state[0]; - const u32x r1 = state[1]; - const u32x r2 = state[2]; - const u32x r3 = state[3]; + const u32 r0 = state[0]; + const u32 r1 = state[1]; + const u32 r2 = state[2]; + const u32 r3 = state[3]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m08600_a1.cl b/OpenCL/m08600_a1.cl similarity index 90% rename from amd/m08600_a1.cl rename to OpenCL/m08600_a1.cl index de9644e..da89b02 100644 --- a/amd/m08600_a1.cl +++ b/OpenCL/m08600_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 lotus_magic_table[256] = { @@ -81,20 +57,20 @@ __constant u32 lotus_magic_table[256] = }; #ifdef VECT_SIZE1 -#define BOX(S,i) u32x ((S)[(i)]) +#define BOX(S,i) u32 ((S)[(i)]) #endif #ifdef VECT_SIZE2 -#define BOX(S,i) u32x ((S)[(i).s0], (S)[(i).s1]) +#define BOX(S,i) u32 ((S)[(i).s0], (S)[(i).s1]) #endif #ifdef VECT_SIZE4 -#define BOX(S,i) u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3]) +#define BOX(S,i) u32 ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3]) #endif -static void lotus_mix (u32x *in, __local u32 s_lotus_magic_table[256]) +static void lotus_mix (u32 *in, __local u32 s_lotus_magic_table[256]) { - u32x p = 0; + u32 p = 0; for (int i = 0; i < 18; i++) { @@ -103,8 +79,8 @@ static void lotus_mix (u32x *in, __local u32 s_lotus_magic_table[256]) #pragma unroll 12 for (int j = 0; j < 12; j++) { - u32x tmp_in = in[j]; - u32x tmp_out = 0; + u32 tmp_in = in[j]; + u32 tmp_out = 0; p = (p + s--) & 0xff; p = ((tmp_in >> 0) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 0; p = (p + s--) & 0xff; p = ((tmp_in >> 8) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 8; @@ -116,11 +92,11 @@ static void lotus_mix (u32x *in, __local u32 s_lotus_magic_table[256]) } } -static void lotus_transform_password (u32x *in, u32x *out, __local u32 s_lotus_magic_table[256]) +static void lotus_transform_password (u32 *in, u32 *out, __local u32 s_lotus_magic_table[256]) { - u32x t = out[3] >> 24; + u32 t = out[3] >> 24; - u32x c; + u32 c; #pragma unroll 4 for (int i = 0; i < 4; i++) @@ -132,7 +108,7 @@ static void lotus_transform_password (u32x *in, u32x *out, __local u32 s_lotus_m } } -static void pad (u32x w[4], const u32 len) +static void pad (u32 w[4], const u32 len) { const u32 val = 16 - len; @@ -211,9 +187,9 @@ static void pad (u32x w[4], const u32 len) } } -static void mdtransform_norecalc (u32x state[4], u32x block[4], __local u32 s_lotus_magic_table[256]) +static void mdtransform_norecalc (u32 state[4], u32 block[4], __local u32 s_lotus_magic_table[256]) { - u32x x[12]; + u32 x[12]; x[ 0] = state[0]; x[ 1] = state[1]; @@ -236,23 +212,23 @@ static void mdtransform_norecalc (u32x state[4], u32x block[4], __local u32 s_lo state[3] = x[3]; } -static void mdtransform (u32x state[4], u32x checksum[4], u32x block[4], __local u32 s_lotus_magic_table[256]) +static void mdtransform (u32 state[4], u32 checksum[4], u32 block[4], __local u32 s_lotus_magic_table[256]) { mdtransform_norecalc (state, block, s_lotus_magic_table); lotus_transform_password (block, checksum, s_lotus_magic_table); } -static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[4], __local u32 s_lotus_magic_table[256]) +static void domino_big_md (const u32 saved_key[16], const u32 size, u32 state[4], __local u32 s_lotus_magic_table[256]) { - u32x checksum[4]; + u32 checksum[4]; checksum[0] = 0; checksum[1] = 0; checksum[2] = 0; checksum[3] = 0; - u32x block[4]; + u32 block[4]; block[0] = saved_key[0]; block[1] = saved_key[1]; @@ -295,28 +271,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -373,7 +349,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w[16]; + u32 w[16]; w[ 0] = wordl0[0] | wordr0[0]; w[ 1] = wordl0[1] | wordr0[1]; @@ -392,7 +368,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_m04 (__glo w[14] = wordl3[2] | wordr3[2]; w[15] = wordl3[3] | wordr3[3]; - u32x state[4]; + u32 state[4]; state[0] = 0; state[1] = 0; @@ -422,12 +398,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_m04 (__glo domino_big_md (w, pw_len, state, s_lotus_magic_table); - const u32x r0 = state[0]; - const u32x r1 = state[1]; - const u32x r2 = state[2]; - const u32x r3 = state[3]; + const u32 r0 = state[0]; + const u32 r1 = state[1]; + const u32 r2 = state[2]; + const u32 r3 = state[3]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -470,28 +446,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -560,7 +536,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w[16]; + u32 w[16]; w[ 0] = wordl0[0] | wordr0[0]; w[ 1] = wordl0[1] | wordr0[1]; @@ -579,7 +555,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_s04 (__glo w[14] = wordl3[2] | wordr3[2]; w[15] = wordl3[3] | wordr3[3]; - u32x state[4]; + u32 state[4]; state[0] = 0; state[1] = 0; @@ -609,12 +585,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_s04 (__glo domino_big_md (w, pw_len, state, s_lotus_magic_table); - const u32x r0 = state[0]; - const u32x r1 = state[1]; - const u32x r2 = state[2]; - const u32x r3 = state[3]; + const u32 r0 = state[0]; + const u32 r1 = state[1]; + const u32 r2 = state[2]; + const u32 r3 = state[3]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m08600_a3.cl b/OpenCL/m08600_a3.cl similarity index 66% rename from amd/m08600_a3.cl rename to OpenCL/m08600_a3.cl index e4caa74..dfc2d36 100644 --- a/amd/m08600_a3.cl +++ b/OpenCL/m08600_a3.cl @@ -4,46 +4,21 @@ */ #define _LOTUS5_ -#define _SCALAR_ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 lotus_magic_table[256] = { @@ -82,20 +57,20 @@ __constant u32 lotus_magic_table[256] = }; #ifdef VECT_SIZE1 -#define BOX(S,i) u32x ((S)[(i)]) +#define BOX(S,i) u32 ((S)[(i)]) #endif #ifdef VECT_SIZE2 -#define BOX(S,i) u32x ((S)[(i).s0], (S)[(i).s1]) +#define BOX(S,i) u32 ((S)[(i).s0], (S)[(i).s1]) #endif #ifdef VECT_SIZE4 -#define BOX(S,i) u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3]) +#define BOX(S,i) u32 ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3]) #endif -static void lotus_mix (u32x *in, __local u32 s_lotus_magic_table[256]) +static void lotus_mix (u32 *in, __local u32 s_lotus_magic_table[256]) { - u32x p = 0; + u32 p = 0; for (int i = 0; i < 18; i++) { @@ -104,8 +79,8 @@ static void lotus_mix (u32x *in, __local u32 s_lotus_magic_table[256]) #pragma unroll 12 for (int j = 0; j < 12; j++) { - u32x tmp_in = in[j]; - u32x tmp_out = 0; + u32 tmp_in = in[j]; + u32 tmp_out = 0; p = (p + s--) & 0xff; p = ((tmp_in >> 0) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 0; p = (p + s--) & 0xff; p = ((tmp_in >> 8) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 8; @@ -117,11 +92,11 @@ static void lotus_mix (u32x *in, __local u32 s_lotus_magic_table[256]) } } -static void lotus_transform_password (u32x *in, u32x *out, __local u32 s_lotus_magic_table[256]) +static void lotus_transform_password (u32 *in, u32 *out, __local u32 s_lotus_magic_table[256]) { - u32x t = out[3] >> 24; + u32 t = out[3] >> 24; - u32x c; + u32 c; #pragma unroll 4 for (int i = 0; i < 4; i++) @@ -212,9 +187,9 @@ static void pad (u32 w[4], const u32 len) } } -static void mdtransform_norecalc (u32x state[4], u32x block[4], __local u32 s_lotus_magic_table[256]) +static void mdtransform_norecalc (u32 state[4], u32 block[4], __local u32 s_lotus_magic_table[256]) { - u32x x[12]; + u32 x[12]; x[ 0] = state[0]; x[ 1] = state[1]; @@ -237,23 +212,23 @@ static void mdtransform_norecalc (u32x state[4], u32x block[4], __local u32 s_lo state[3] = x[3]; } -static void mdtransform (u32x state[4], u32x checksum[4], u32x block[4], __local u32 s_lotus_magic_table[256]) +static void mdtransform (u32 state[4], u32 checksum[4], u32 block[4], __local u32 s_lotus_magic_table[256]) { mdtransform_norecalc (state, block, s_lotus_magic_table); lotus_transform_password (block, checksum, s_lotus_magic_table); } -static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[4], __local u32 s_lotus_magic_table[256]) +static void domino_big_md (const u32 saved_key[16], const u32 size, u32 state[4], __local u32 s_lotus_magic_table[256]) { - u32x checksum[4]; + u32 checksum[4]; checksum[0] = 0; checksum[1] = 0; checksum[2] = 0; checksum[3] = 0; - u32x block[4]; + u32 block[4]; block[0] = saved_key[0]; block[1] = saved_key[1]; @@ -265,7 +240,7 @@ static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[ mdtransform_norecalc (state, checksum, s_lotus_magic_table); } -static void m08600m (__local u32 s_lotus_magic_table[256], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m08600m (__local u32 s_lotus_magic_table[256], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -299,17 +274,15 @@ static void m08600m (__local u32 s_lotus_magic_table[256], u32 w[16], const u32 * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x w_tmp[16]; + u32 w_tmp[16]; w_tmp[ 0] = w0; w_tmp[ 1] = w[ 1]; @@ -328,7 +301,7 @@ static void m08600m (__local u32 s_lotus_magic_table[256], u32 w[16], const u32 w_tmp[14] = w[14]; w_tmp[15] = w[15]; - u32x state[4]; + u32 state[4]; state[0] = 0; state[1] = 0; @@ -337,16 +310,16 @@ static void m08600m (__local u32 s_lotus_magic_table[256], u32 w[16], const u32 domino_big_md (w_tmp, pw_len, state, s_lotus_magic_table); - const u32x r0 = state[0]; - const u32x r1 = state[1]; - const u32x r2 = state[2]; - const u32x r3 = state[3]; + const u32 r0 = state[0]; + const u32 r1 = state[1]; + const u32 r2 = state[2]; + const u32 r3 = state[3]; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m08600s (__local u32 s_lotus_magic_table[256], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m08600s (__local u32 s_lotus_magic_table[256], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -392,17 +365,15 @@ static void m08600s (__local u32 s_lotus_magic_table[256], u32 w[16], const u32 * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x w_tmp[16]; + u32 w_tmp[16]; w_tmp[ 0] = w0; w_tmp[ 1] = w[ 1]; @@ -421,7 +392,7 @@ static void m08600s (__local u32 s_lotus_magic_table[256], u32 w[16], const u32 w_tmp[14] = w[14]; w_tmp[15] = w[15]; - u32x state[4]; + u32 state[4]; state[0] = 0; state[1] = 0; @@ -430,16 +401,16 @@ static void m08600s (__local u32 s_lotus_magic_table[256], u32 w[16], const u32 domino_big_md (w_tmp, pw_len, state, s_lotus_magic_table); - const u32x r0 = state[0]; - const u32x r1 = state[1]; - const u32x r2 = state[2]; - const u32x r3 = state[3]; + const u32 r0 = state[0]; + const u32 r1 = state[1]; + const u32 r2 = state[2]; + const u32 r3 = state[3]; - #include VECT_COMPARE_S + #include COMPARE_S } } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -493,7 +464,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_m04 (__glo m08600m (s_lotus_magic_table, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -547,7 +518,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_m08 (__glo m08600m (s_lotus_magic_table, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -601,7 +572,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_m16 (__glo m08600m (s_lotus_magic_table, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -655,7 +626,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_s04 (__glo m08600s (s_lotus_magic_table, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -709,7 +680,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_s08 (__glo m08600s (s_lotus_magic_table, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08600_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base diff --git a/amd/m08700_a0.cl b/OpenCL/m08700_a0.cl similarity index 86% rename from amd/m08700_a0.cl rename to OpenCL/m08700_a0.cl index 7c3e935..753b9a2 100644 --- a/amd/m08700_a0.cl +++ b/OpenCL/m08700_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" +#include "rp.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 lotus_magic_table[256] = { @@ -106,9 +82,9 @@ __constant u32 lotus_magic_table[256] = #define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) #endif -static void lotus_mix (u32x *in, __local u32 s_lotus_magic_table[256]) +static void lotus_mix (u32 *in, __local u32 s_lotus_magic_table[256]) { - u32x p = 0; + u32 p = 0; for (int i = 0; i < 18; i++) { @@ -117,8 +93,8 @@ static void lotus_mix (u32x *in, __local u32 s_lotus_magic_table[256]) #pragma unroll for (int j = 0; j < 12; j++) { - u32x tmp_in = in[j]; - u32x tmp_out = 0; + u32 tmp_in = in[j]; + u32 tmp_out = 0; p = (p + s--) & 0xff; p = ((tmp_in >> 0) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 0; p = (p + s--) & 0xff; p = ((tmp_in >> 8) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 8; @@ -130,11 +106,11 @@ static void lotus_mix (u32x *in, __local u32 s_lotus_magic_table[256]) } } -static void lotus_transform_password (u32x *in, u32x *out, __local u32 s_lotus_magic_table[256]) +static void lotus_transform_password (u32 *in, u32 *out, __local u32 s_lotus_magic_table[256]) { - u32x t = out[3] >> 24; + u32 t = out[3] >> 24; - u32x c; + u32 c; //#pragma unroll // kernel fails if used for (int i = 0; i < 4; i++) @@ -146,7 +122,7 @@ static void lotus_transform_password (u32x *in, u32x *out, __local u32 s_lotus_m } } -static void pad (u32x w[4], const u32 len) +static void pad (u32 w[4], const u32 len) { const u32 val = 16 - len; @@ -225,9 +201,9 @@ static void pad (u32x w[4], const u32 len) } } -static void mdtransform_norecalc (u32x state[4], u32x block[4], __local u32 s_lotus_magic_table[256]) +static void mdtransform_norecalc (u32 state[4], u32 block[4], __local u32 s_lotus_magic_table[256]) { - u32x x[12]; + u32 x[12]; x[ 0] = state[0]; x[ 1] = state[1]; @@ -250,23 +226,23 @@ static void mdtransform_norecalc (u32x state[4], u32x block[4], __local u32 s_lo state[3] = x[3]; } -static void mdtransform (u32x state[4], u32x checksum[4], u32x block[4], __local u32 s_lotus_magic_table[256]) +static void mdtransform (u32 state[4], u32 checksum[4], u32 block[4], __local u32 s_lotus_magic_table[256]) { mdtransform_norecalc (state, block, s_lotus_magic_table); lotus_transform_password (block, checksum, s_lotus_magic_table); } -static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[4], __local u32 s_lotus_magic_table[256]) +static void domino_big_md (const u32 saved_key[16], const u32 size, u32 state[4], __local u32 s_lotus_magic_table[256]) { - u32x checksum[4]; + u32 checksum[4]; checksum[0] = 0; checksum[1] = 0; checksum[2] = 0; checksum[3] = 0; - u32x block[4]; + u32 block[4]; block[0] = 0; block[1] = 0; @@ -358,14 +334,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -387,28 +363,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -417,7 +393,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_m04 (__glo const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len); - u32x w[16]; + u32 w[16]; w[ 0] = w0[0]; w[ 1] = w0[1]; @@ -436,7 +412,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_m04 (__glo w[14] = w3[2]; w[15] = w3[3]; - u32x state[4]; + u32 state[4]; state[0] = 0; state[1] = 0; @@ -466,21 +442,21 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_m04 (__glo domino_big_md (w, pw_len, state, s_lotus_magic_table); - const u32x w0_t = uint_to_hex_upper8 ((state[0] >> 0) & 255) << 0 + const u32 w0_t = uint_to_hex_upper8 ((state[0] >> 0) & 255) << 0 | uint_to_hex_upper8 ((state[0] >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_upper8 ((state[0] >> 16) & 255) << 0 + const u32 w1_t = uint_to_hex_upper8 ((state[0] >> 16) & 255) << 0 | uint_to_hex_upper8 ((state[0] >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_upper8 ((state[1] >> 0) & 255) << 0 + const u32 w2_t = uint_to_hex_upper8 ((state[1] >> 0) & 255) << 0 | uint_to_hex_upper8 ((state[1] >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_upper8 ((state[1] >> 16) & 255) << 0 + const u32 w3_t = uint_to_hex_upper8 ((state[1] >> 16) & 255) << 0 | uint_to_hex_upper8 ((state[1] >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_upper8 ((state[2] >> 0) & 255) << 0 + const u32 w4_t = uint_to_hex_upper8 ((state[2] >> 0) & 255) << 0 | uint_to_hex_upper8 ((state[2] >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_upper8 ((state[2] >> 16) & 255) << 0 + const u32 w5_t = uint_to_hex_upper8 ((state[2] >> 16) & 255) << 0 | uint_to_hex_upper8 ((state[2] >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_upper8 ((state[3] >> 0) & 255) << 0 + const u32 w6_t = uint_to_hex_upper8 ((state[3] >> 0) & 255) << 0 | uint_to_hex_upper8 ((state[3] >> 8) & 255) << 16; - //const u32x w7_t = uint_to_hex_upper8 ((state[3] >> 16) & 255) << 0 + //const u32 w7_t = uint_to_hex_upper8 ((state[3] >> 16) & 255) << 0 // | uint_to_hex_upper8 ((state[3] >> 24) & 255) << 16; const u32 pade = 0x0e0e0e0e; @@ -509,17 +485,17 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_m04 (__glo domino_big_md (w, 34, state, s_lotus_magic_table); - u32x a = state[0] & 0xffffffff; - u32x b = state[1] & 0xffffffff; - u32x c = state[2] & 0x000000ff; - u32x d = state[3] & 0x00000000; + u32 a = state[0] & 0xffffffff; + u32 b = state[1] & 0xffffffff; + u32 c = state[2] & 0x000000ff; + u32 d = state[3] & 0x00000000; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = c; - const u32x r3 = d; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = c; + const u32 r3 = d; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -593,14 +569,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -634,28 +610,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -664,7 +640,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_s04 (__glo const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len); - u32x w[16]; + u32 w[16]; w[ 0] = w0[0]; w[ 1] = w0[1]; @@ -683,7 +659,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_s04 (__glo w[14] = w3[2]; w[15] = w3[3]; - u32x state[4]; + u32 state[4]; state[0] = 0; state[1] = 0; @@ -713,21 +689,21 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_s04 (__glo domino_big_md (w, pw_len, state, s_lotus_magic_table); - const u32x w0_t = uint_to_hex_upper8 ((state[0] >> 0) & 255) << 0 + const u32 w0_t = uint_to_hex_upper8 ((state[0] >> 0) & 255) << 0 | uint_to_hex_upper8 ((state[0] >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_upper8 ((state[0] >> 16) & 255) << 0 + const u32 w1_t = uint_to_hex_upper8 ((state[0] >> 16) & 255) << 0 | uint_to_hex_upper8 ((state[0] >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_upper8 ((state[1] >> 0) & 255) << 0 + const u32 w2_t = uint_to_hex_upper8 ((state[1] >> 0) & 255) << 0 | uint_to_hex_upper8 ((state[1] >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_upper8 ((state[1] >> 16) & 255) << 0 + const u32 w3_t = uint_to_hex_upper8 ((state[1] >> 16) & 255) << 0 | uint_to_hex_upper8 ((state[1] >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_upper8 ((state[2] >> 0) & 255) << 0 + const u32 w4_t = uint_to_hex_upper8 ((state[2] >> 0) & 255) << 0 | uint_to_hex_upper8 ((state[2] >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_upper8 ((state[2] >> 16) & 255) << 0 + const u32 w5_t = uint_to_hex_upper8 ((state[2] >> 16) & 255) << 0 | uint_to_hex_upper8 ((state[2] >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_upper8 ((state[3] >> 0) & 255) << 0 + const u32 w6_t = uint_to_hex_upper8 ((state[3] >> 0) & 255) << 0 | uint_to_hex_upper8 ((state[3] >> 8) & 255) << 16; - //const u32x w7_t = uint_to_hex_upper8 ((state[3] >> 16) & 255) << 0 + //const u32 w7_t = uint_to_hex_upper8 ((state[3] >> 16) & 255) << 0 // | uint_to_hex_upper8 ((state[3] >> 24) & 255) << 16; const u32 pade = 0x0e0e0e0e; @@ -756,17 +732,17 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_s04 (__glo domino_big_md (w, 34, state, s_lotus_magic_table); - u32x a = state[0] & 0xffffffff; - u32x b = state[1] & 0xffffffff; - u32x c = state[2] & 0x000000ff; - u32x d = state[3] & 0x00000000; + u32 a = state[0] & 0xffffffff; + u32 b = state[1] & 0xffffffff; + u32 c = state[2] & 0x000000ff; + u32 d = state[3] & 0x00000000; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = c; - const u32x r3 = d; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = c; + const u32 r3 = d; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m08700_a1.cl b/OpenCL/m08700_a1.cl similarity index 88% rename from amd/m08700_a1.cl rename to OpenCL/m08700_a1.cl index 67760b1..7515e91 100644 --- a/amd/m08700_a1.cl +++ b/OpenCL/m08700_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 lotus_magic_table[256] = { @@ -104,9 +80,9 @@ __constant u32 lotus_magic_table[256] = #define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) #endif -static void lotus_mix (u32x *in, __local u32 s_lotus_magic_table[256]) +static void lotus_mix (u32 *in, __local u32 s_lotus_magic_table[256]) { - u32x p = 0; + u32 p = 0; for (int i = 0; i < 18; i++) { @@ -115,8 +91,8 @@ static void lotus_mix (u32x *in, __local u32 s_lotus_magic_table[256]) #pragma unroll for (int j = 0; j < 12; j++) { - u32x tmp_in = in[j]; - u32x tmp_out = 0; + u32 tmp_in = in[j]; + u32 tmp_out = 0; p = (p + s--) & 0xff; p = ((tmp_in >> 0) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 0; p = (p + s--) & 0xff; p = ((tmp_in >> 8) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 8; @@ -128,11 +104,11 @@ static void lotus_mix (u32x *in, __local u32 s_lotus_magic_table[256]) } } -static void lotus_transform_password (u32x *in, u32x *out, __local u32 s_lotus_magic_table[256]) +static void lotus_transform_password (u32 *in, u32 *out, __local u32 s_lotus_magic_table[256]) { - u32x t = out[3] >> 24; + u32 t = out[3] >> 24; - u32x c; + u32 c; //#pragma unroll // kernel fails if used for (int i = 0; i < 4; i++) @@ -144,7 +120,7 @@ static void lotus_transform_password (u32x *in, u32x *out, __local u32 s_lotus_m } } -static void pad (u32x w[4], const u32 len) +static void pad (u32 w[4], const u32 len) { const u32 val = 16 - len; @@ -223,9 +199,9 @@ static void pad (u32x w[4], const u32 len) } } -static void mdtransform_norecalc (u32x state[4], u32x block[4], __local u32 s_lotus_magic_table[256]) +static void mdtransform_norecalc (u32 state[4], u32 block[4], __local u32 s_lotus_magic_table[256]) { - u32x x[12]; + u32 x[12]; x[ 0] = state[0]; x[ 1] = state[1]; @@ -248,23 +224,23 @@ static void mdtransform_norecalc (u32x state[4], u32x block[4], __local u32 s_lo state[3] = x[3]; } -static void mdtransform (u32x state[4], u32x checksum[4], u32x block[4], __local u32 s_lotus_magic_table[256]) +static void mdtransform (u32 state[4], u32 checksum[4], u32 block[4], __local u32 s_lotus_magic_table[256]) { mdtransform_norecalc (state, block, s_lotus_magic_table); lotus_transform_password (block, checksum, s_lotus_magic_table); } -static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[4], __local u32 s_lotus_magic_table[256]) +static void domino_big_md (const u32 saved_key[16], const u32 size, u32 state[4], __local u32 s_lotus_magic_table[256]) { - u32x checksum[4]; + u32 checksum[4]; checksum[0] = 0; checksum[1] = 0; checksum[2] = 0; checksum[3] = 0; - u32x block[4]; + u32 block[4]; block[0] = 0; block[1] = 0; @@ -356,28 +332,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -442,7 +418,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w[16]; + u32 w[16]; w[ 0] = wordl0[0] | wordr0[0]; w[ 1] = wordl0[1] | wordr0[1]; @@ -461,7 +437,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_m04 (__glo w[14] = wordl3[2] | wordr3[2]; w[15] = wordl3[3] | wordr3[3]; - u32x state[4]; + u32 state[4]; state[0] = 0; state[1] = 0; @@ -491,21 +467,21 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_m04 (__glo domino_big_md (w, pw_len, state, s_lotus_magic_table); - const u32x w0_t = uint_to_hex_upper8 ((state[0] >> 0) & 255) << 0 + const u32 w0_t = uint_to_hex_upper8 ((state[0] >> 0) & 255) << 0 | uint_to_hex_upper8 ((state[0] >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_upper8 ((state[0] >> 16) & 255) << 0 + const u32 w1_t = uint_to_hex_upper8 ((state[0] >> 16) & 255) << 0 | uint_to_hex_upper8 ((state[0] >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_upper8 ((state[1] >> 0) & 255) << 0 + const u32 w2_t = uint_to_hex_upper8 ((state[1] >> 0) & 255) << 0 | uint_to_hex_upper8 ((state[1] >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_upper8 ((state[1] >> 16) & 255) << 0 + const u32 w3_t = uint_to_hex_upper8 ((state[1] >> 16) & 255) << 0 | uint_to_hex_upper8 ((state[1] >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_upper8 ((state[2] >> 0) & 255) << 0 + const u32 w4_t = uint_to_hex_upper8 ((state[2] >> 0) & 255) << 0 | uint_to_hex_upper8 ((state[2] >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_upper8 ((state[2] >> 16) & 255) << 0 + const u32 w5_t = uint_to_hex_upper8 ((state[2] >> 16) & 255) << 0 | uint_to_hex_upper8 ((state[2] >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_upper8 ((state[3] >> 0) & 255) << 0 + const u32 w6_t = uint_to_hex_upper8 ((state[3] >> 0) & 255) << 0 | uint_to_hex_upper8 ((state[3] >> 8) & 255) << 16; - //const u32x w7_t = uint_to_hex_upper8 ((state[3] >> 16) & 255) << 0 + //const u32 w7_t = uint_to_hex_upper8 ((state[3] >> 16) & 255) << 0 // | uint_to_hex_upper8 ((state[3] >> 24) & 255) << 16; const u32 pade = 0x0e0e0e0e; @@ -534,17 +510,17 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_m04 (__glo domino_big_md (w, 34, state, s_lotus_magic_table); - u32x a = state[0] & 0xffffffff; - u32x b = state[1] & 0xffffffff; - u32x c = state[2] & 0x000000ff; - u32x d = state[3] & 0x00000000; + u32 a = state[0] & 0xffffffff; + u32 b = state[1] & 0xffffffff; + u32 c = state[2] & 0x000000ff; + u32 d = state[3] & 0x00000000; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = c; - const u32x r3 = d; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = c; + const u32 r3 = d; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -618,28 +594,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -716,7 +692,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w[16]; + u32 w[16]; w[ 0] = wordl0[0] | wordr0[0]; w[ 1] = wordl0[1] | wordr0[1]; @@ -735,7 +711,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_s04 (__glo w[14] = wordl3[2] | wordr3[2]; w[15] = wordl3[3] | wordr3[3]; - u32x state[4]; + u32 state[4]; state[0] = 0; state[1] = 0; @@ -765,21 +741,21 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_s04 (__glo domino_big_md (w, pw_len, state, s_lotus_magic_table); - const u32x w0_t = uint_to_hex_upper8 ((state[0] >> 0) & 255) << 0 + const u32 w0_t = uint_to_hex_upper8 ((state[0] >> 0) & 255) << 0 | uint_to_hex_upper8 ((state[0] >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_upper8 ((state[0] >> 16) & 255) << 0 + const u32 w1_t = uint_to_hex_upper8 ((state[0] >> 16) & 255) << 0 | uint_to_hex_upper8 ((state[0] >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_upper8 ((state[1] >> 0) & 255) << 0 + const u32 w2_t = uint_to_hex_upper8 ((state[1] >> 0) & 255) << 0 | uint_to_hex_upper8 ((state[1] >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_upper8 ((state[1] >> 16) & 255) << 0 + const u32 w3_t = uint_to_hex_upper8 ((state[1] >> 16) & 255) << 0 | uint_to_hex_upper8 ((state[1] >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_upper8 ((state[2] >> 0) & 255) << 0 + const u32 w4_t = uint_to_hex_upper8 ((state[2] >> 0) & 255) << 0 | uint_to_hex_upper8 ((state[2] >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_upper8 ((state[2] >> 16) & 255) << 0 + const u32 w5_t = uint_to_hex_upper8 ((state[2] >> 16) & 255) << 0 | uint_to_hex_upper8 ((state[2] >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_upper8 ((state[3] >> 0) & 255) << 0 + const u32 w6_t = uint_to_hex_upper8 ((state[3] >> 0) & 255) << 0 | uint_to_hex_upper8 ((state[3] >> 8) & 255) << 16; - //const u32x w7_t = uint_to_hex_upper8 ((state[3] >> 16) & 255) << 0 + //const u32 w7_t = uint_to_hex_upper8 ((state[3] >> 16) & 255) << 0 // | uint_to_hex_upper8 ((state[3] >> 24) & 255) << 16; const u32 pade = 0x0e0e0e0e; @@ -808,17 +784,17 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_s04 (__glo domino_big_md (w, 34, state, s_lotus_magic_table); - u32x a = state[0] & 0xffffffff; - u32x b = state[1] & 0xffffffff; - u32x c = state[2] & 0x000000ff; - u32x d = state[3] & 0x00000000; + u32 a = state[0] & 0xffffffff; + u32 b = state[1] & 0xffffffff; + u32 c = state[2] & 0x000000ff; + u32 d = state[3] & 0x00000000; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = c; - const u32x r3 = d; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = c; + const u32 r3 = d; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m08700_a3.cl b/OpenCL/m08700_a3.cl similarity index 74% rename from amd/m08700_a3.cl rename to OpenCL/m08700_a3.cl index e2b66d6..84609cc 100644 --- a/amd/m08700_a3.cl +++ b/OpenCL/m08700_a3.cl @@ -4,46 +4,21 @@ */ #define _LOTUS6_ -#define _SCALAR_ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 lotus_magic_table[256] = { @@ -105,9 +80,9 @@ __constant u32 lotus_magic_table[256] = #define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) #endif -static void lotus_mix (u32x *in, __local u32 s_lotus_magic_table[256]) +static void lotus_mix (u32 *in, __local u32 s_lotus_magic_table[256]) { - u32x p = 0; + u32 p = 0; for (int i = 0; i < 18; i++) { @@ -116,8 +91,8 @@ static void lotus_mix (u32x *in, __local u32 s_lotus_magic_table[256]) #pragma unroll for (int j = 0; j < 12; j++) { - u32x tmp_in = in[j]; - u32x tmp_out = 0; + u32 tmp_in = in[j]; + u32 tmp_out = 0; p = (p + s--) & 0xff; p = ((tmp_in >> 0) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 0; p = (p + s--) & 0xff; p = ((tmp_in >> 8) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 8; @@ -129,11 +104,11 @@ static void lotus_mix (u32x *in, __local u32 s_lotus_magic_table[256]) } } -static void lotus_transform_password (u32x *in, u32x *out, __local u32 s_lotus_magic_table[256]) +static void lotus_transform_password (u32 *in, u32 *out, __local u32 s_lotus_magic_table[256]) { - u32x t = out[3] >> 24; + u32 t = out[3] >> 24; - u32x c; + u32 c; //#pragma unroll // kernel fails if used for (int i = 0; i < 4; i++) @@ -224,9 +199,9 @@ static void pad (u32 w[4], const u32 len) } } -static void mdtransform_norecalc (u32x state[4], u32x block[4], __local u32 s_lotus_magic_table[256]) +static void mdtransform_norecalc (u32 state[4], u32 block[4], __local u32 s_lotus_magic_table[256]) { - u32x x[12]; + u32 x[12]; x[ 0] = state[0]; x[ 1] = state[1]; @@ -249,23 +224,23 @@ static void mdtransform_norecalc (u32x state[4], u32x block[4], __local u32 s_lo state[3] = x[3]; } -static void mdtransform (u32x state[4], u32x checksum[4], u32x block[4], __local u32 s_lotus_magic_table[256]) +static void mdtransform (u32 state[4], u32 checksum[4], u32 block[4], __local u32 s_lotus_magic_table[256]) { mdtransform_norecalc (state, block, s_lotus_magic_table); lotus_transform_password (block, checksum, s_lotus_magic_table); } -static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[4], __local u32 s_lotus_magic_table[256]) +static void domino_big_md (const u32 saved_key[16], const u32 size, u32 state[4], __local u32 s_lotus_magic_table[256]) { - u32x checksum[4]; + u32 checksum[4]; checksum[0] = 0; checksum[1] = 0; checksum[2] = 0; checksum[3] = 0; - u32x block[4]; + u32 block[4]; block[0] = 0; block[1] = 0; @@ -295,7 +270,7 @@ static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[ mdtransform_norecalc (state, checksum, s_lotus_magic_table); } -static void m08700m (__local u32 s_lotus_magic_table[256], __local u32 l_bin2asc[256], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m08700m (__local u32 s_lotus_magic_table[256], __local u32 l_bin2asc[256], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -336,17 +311,15 @@ static void m08700m (__local u32 s_lotus_magic_table[256], __local u32 l_bin2asc * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x w_tmp[16]; + u32 w_tmp[16]; w_tmp[ 0] = w0; w_tmp[ 1] = w[ 1]; @@ -365,7 +338,7 @@ static void m08700m (__local u32 s_lotus_magic_table[256], __local u32 l_bin2asc w_tmp[14] = w[14]; w_tmp[15] = w[15]; - u32x state[4]; + u32 state[4]; state[0] = 0; state[1] = 0; @@ -374,21 +347,21 @@ static void m08700m (__local u32 s_lotus_magic_table[256], __local u32 l_bin2asc domino_big_md (w_tmp, pw_len, state, s_lotus_magic_table); - const u32x w0_t = uint_to_hex_upper8 ((state[0] >> 0) & 255) << 0 + const u32 w0_t = uint_to_hex_upper8 ((state[0] >> 0) & 255) << 0 | uint_to_hex_upper8 ((state[0] >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_upper8 ((state[0] >> 16) & 255) << 0 + const u32 w1_t = uint_to_hex_upper8 ((state[0] >> 16) & 255) << 0 | uint_to_hex_upper8 ((state[0] >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_upper8 ((state[1] >> 0) & 255) << 0 + const u32 w2_t = uint_to_hex_upper8 ((state[1] >> 0) & 255) << 0 | uint_to_hex_upper8 ((state[1] >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_upper8 ((state[1] >> 16) & 255) << 0 + const u32 w3_t = uint_to_hex_upper8 ((state[1] >> 16) & 255) << 0 | uint_to_hex_upper8 ((state[1] >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_upper8 ((state[2] >> 0) & 255) << 0 + const u32 w4_t = uint_to_hex_upper8 ((state[2] >> 0) & 255) << 0 | uint_to_hex_upper8 ((state[2] >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_upper8 ((state[2] >> 16) & 255) << 0 + const u32 w5_t = uint_to_hex_upper8 ((state[2] >> 16) & 255) << 0 | uint_to_hex_upper8 ((state[2] >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_upper8 ((state[3] >> 0) & 255) << 0 + const u32 w6_t = uint_to_hex_upper8 ((state[3] >> 0) & 255) << 0 | uint_to_hex_upper8 ((state[3] >> 8) & 255) << 16; - //const u32x w7_t = uint_to_hex_upper8 ((state[3] >> 16) & 255) << 0 + //const u32 w7_t = uint_to_hex_upper8 ((state[3] >> 16) & 255) << 0 // | uint_to_hex_upper8 ((state[3] >> 24) & 255) << 16; const u32 pade = 0x0e0e0e0e; @@ -417,21 +390,21 @@ static void m08700m (__local u32 s_lotus_magic_table[256], __local u32 l_bin2asc domino_big_md (w_tmp, 34, state, s_lotus_magic_table); - u32x a = state[0] & 0xffffffff; - u32x b = state[1] & 0xffffffff; - u32x c = state[2] & 0x000000ff; - u32x d = state[3] & 0x00000000; + u32 a = state[0] & 0xffffffff; + u32 b = state[1] & 0xffffffff; + u32 c = state[2] & 0x000000ff; + u32 d = state[3] & 0x00000000; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = c; - const u32x r3 = d; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = c; + const u32 r3 = d; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m08700s (__local u32 s_lotus_magic_table[256], __local u32 l_bin2asc[256], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m08700s (__local u32 s_lotus_magic_table[256], __local u32 l_bin2asc[256], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -484,17 +457,15 @@ static void m08700s (__local u32 s_lotus_magic_table[256], __local u32 l_bin2asc * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x w_tmp[16]; + u32 w_tmp[16]; w_tmp[ 0] = w0; w_tmp[ 1] = w[ 1]; @@ -513,7 +484,7 @@ static void m08700s (__local u32 s_lotus_magic_table[256], __local u32 l_bin2asc w_tmp[14] = w[14]; w_tmp[15] = w[15]; - u32x state[4]; + u32 state[4]; state[0] = 0; state[1] = 0; @@ -522,21 +493,21 @@ static void m08700s (__local u32 s_lotus_magic_table[256], __local u32 l_bin2asc domino_big_md (w_tmp, pw_len, state, s_lotus_magic_table); - const u32x w0_t = uint_to_hex_upper8 ((state[0] >> 0) & 255) << 0 + const u32 w0_t = uint_to_hex_upper8 ((state[0] >> 0) & 255) << 0 | uint_to_hex_upper8 ((state[0] >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_upper8 ((state[0] >> 16) & 255) << 0 + const u32 w1_t = uint_to_hex_upper8 ((state[0] >> 16) & 255) << 0 | uint_to_hex_upper8 ((state[0] >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_upper8 ((state[1] >> 0) & 255) << 0 + const u32 w2_t = uint_to_hex_upper8 ((state[1] >> 0) & 255) << 0 | uint_to_hex_upper8 ((state[1] >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_upper8 ((state[1] >> 16) & 255) << 0 + const u32 w3_t = uint_to_hex_upper8 ((state[1] >> 16) & 255) << 0 | uint_to_hex_upper8 ((state[1] >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_upper8 ((state[2] >> 0) & 255) << 0 + const u32 w4_t = uint_to_hex_upper8 ((state[2] >> 0) & 255) << 0 | uint_to_hex_upper8 ((state[2] >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_upper8 ((state[2] >> 16) & 255) << 0 + const u32 w5_t = uint_to_hex_upper8 ((state[2] >> 16) & 255) << 0 | uint_to_hex_upper8 ((state[2] >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_upper8 ((state[3] >> 0) & 255) << 0 + const u32 w6_t = uint_to_hex_upper8 ((state[3] >> 0) & 255) << 0 | uint_to_hex_upper8 ((state[3] >> 8) & 255) << 16; - //const u32x w7_t = uint_to_hex_upper8 ((state[3] >> 16) & 255) << 0 + //const u32 w7_t = uint_to_hex_upper8 ((state[3] >> 16) & 255) << 0 // | uint_to_hex_upper8 ((state[3] >> 24) & 255) << 16; const u32 pade = 0x0e0e0e0e; @@ -565,21 +536,21 @@ static void m08700s (__local u32 s_lotus_magic_table[256], __local u32 l_bin2asc domino_big_md (w_tmp, 34, state, s_lotus_magic_table); - u32x a = state[0] & 0xffffffff; - u32x b = state[1] & 0xffffffff; - u32x c = state[2] & 0x000000ff; - u32x d = state[3] & 0x00000000; + u32 a = state[0] & 0xffffffff; + u32 b = state[1] & 0xffffffff; + u32 c = state[2] & 0x000000ff; + u32 d = state[3] & 0x00000000; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = c; - const u32x r3 = d; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = c; + const u32 r3 = d; - #include VECT_COMPARE_S + #include COMPARE_S } } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -664,7 +635,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_m04 (__glo m08700m (s_lotus_magic_table, l_bin2asc, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -749,7 +720,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_m08 (__glo m08700m (s_lotus_magic_table, l_bin2asc, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -834,7 +805,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_m16 (__glo m08700m (s_lotus_magic_table, l_bin2asc, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -919,7 +890,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_s04 (__glo m08700s (s_lotus_magic_table, l_bin2asc, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -1004,7 +975,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_s08 (__glo m08700s (s_lotus_magic_table, l_bin2asc, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08700_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base diff --git a/amd/m08800.cl b/OpenCL/m08800.cl similarity index 97% rename from amd/m08800.cl rename to OpenCL/m08800.cl index 577f8fb..a00588e 100644 --- a/amd/m08800.cl +++ b/OpenCL/m08800.cl @@ -8,26 +8,14 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" __constant u32 te0[256] = { @@ -1152,33 +1140,33 @@ __constant u32 k_sha256[64] = SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f, }; -static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8]) +static void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[8]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + u32 e = digest[4]; + u32 f = digest[5]; + u32 g = digest[6]; + u32 h = digest[7]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #define ROUND_EXPAND() \ { \ @@ -1238,30 +1226,30 @@ static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[ digest[7] += h; } -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -1366,7 +1354,7 @@ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4] digest[4] += E; } -static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) +static void hmac_sha1_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -1419,7 +1407,7 @@ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x sha1_transform (w0, w1, w2, w3, opad); } -static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) +static void hmac_sha1_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5], u32 digest[5]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -1465,28 +1453,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08800_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -1527,8 +1515,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08800_init (__gl w3[2] = swap_workaround (w3[2]); w3[3] = swap_workaround (w3[3]); - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_sha1_pad (w0, w1, w2, w3, ipad, opad); @@ -1587,7 +1575,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08800_init (__gl w3[2] = 0; w3[3] = (64 + salt_len + 4) * 8; - u32x dgst[5]; + u32 dgst[5]; hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst); @@ -1611,8 +1599,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08800_loop (__gl if (gid >= gid_max) return; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; ipad[0] = tmps[gid].ipad[0]; ipad[1] = tmps[gid].ipad[1]; @@ -1628,8 +1616,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08800_loop (__gl for (u32 i = 0; i < 8; i += 5) { - u32x dgst[5]; - u32x out[5]; + u32 dgst[5]; + u32 out[5]; dgst[0] = tmps[gid].dgst[i + 0]; dgst[1] = tmps[gid].dgst[i + 1]; @@ -1645,10 +1633,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08800_loop (__gl for (u32 j = 0; j < loop_cnt; j++) { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = dgst[0]; w0[1] = dgst[1]; @@ -1782,7 +1770,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08800_comp (__gl * aes init */ - u32x ukeyx[8]; + u32 ukeyx[8]; ukeyx[0] = tmps[gid].out[0]; ukeyx[1] = tmps[gid].out[1]; @@ -1793,10 +1781,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08800_comp (__gl ukeyx[6] = 0; ukeyx[7] = 0; - u32x a; - u32x b; - u32x c; - u32x d; + u32 a; + u32 b; + u32 c; + u32 d; /** * aes decrypt key @@ -1831,7 +1819,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08800_comp (__gl // 1. start with simple sha256_transform - u32x essivhash[8]; + u32 essivhash[8]; essivhash[0] = SHA256M_A; essivhash[1] = SHA256M_B; @@ -1842,10 +1830,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08800_comp (__gl essivhash[6] = SHA256M_G; essivhash[7] = SHA256M_H; - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = a; w0[1] = b; @@ -1920,10 +1908,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08800_comp (__gl AES128_decrypt (data, out, rdk, s_td0, s_td1, s_td2, s_td3, s_td4); - u32x r0 = out[0] ^ iv[0]; - u32x r1 = out[1] ^ iv[1]; - u32x r2 = out[2] ^ iv[2]; - u32x r3 = out[3] ^ iv[3]; + u32 r0 = out[0] ^ iv[0]; + u32 r1 = out[1] ^ iv[1]; + u32 r2 = out[2] ^ iv[2]; + u32 r3 = out[3] ^ iv[3]; // rotate 3 byte (static in fat!) @@ -1933,7 +1921,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08800_comp (__gl // MSDOS5.0 if ((r0 == 0x4f44534d) && (r1 == 0x302e3553)) { - mark_hash_s0 (plains_buf, hashes_shown, digests_offset + 0, gid, 0); + mark_hash (plains_buf, hashes_shown, digests_offset + 0, gid, 0); d_return_buf[lid] = 1; } @@ -2001,7 +1989,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08800_comp (__gl if ((r[5] < 2) && (r[6] < 16) && ((r[14] & 0xffff) == 0xEF53)) { - mark_hash_s0 (plains_buf, hashes_shown, digests_offset + 0, gid, 0); + mark_hash (plains_buf, hashes_shown, digests_offset + 0, gid, 0); d_return_buf[lid] = 1; } diff --git a/amd/m08900.cl b/OpenCL/m08900.cl similarity index 94% rename from amd/m08900.cl rename to OpenCL/m08900.cl index 341578c..25bfeac 100644 --- a/amd/m08900.cl +++ b/OpenCL/m08900.cl @@ -8,37 +8,25 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif #ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" +#define COMPARE_M "check_multi_vect2_comp4.c" #endif #ifdef VECT_SIZE4 -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" +#define COMPARE_M "check_multi_vect4_comp4.c" #endif __constant u32 k_sha256[64] = @@ -61,33 +49,33 @@ __constant u32 k_sha256[64] = SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f, }; -static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8]) +static void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[8]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + u32 e = digest[4]; + u32 f = digest[5]; + u32 g = digest[6]; + u32 h = digest[7]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #define ROUND_EXPAND() \ { \ @@ -147,7 +135,7 @@ static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[ digest[7] += h; } -static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8]) +static void hmac_sha256_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[8], u32 opad[8]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -206,7 +194,7 @@ static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32 sha256_transform (w0, w1, w2, w3, opad); } -static void hmac_sha256_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8], u32x digest[8]) +static void hmac_sha256_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[8], u32 opad[8], u32 digest[8]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -248,7 +236,7 @@ static void hmac_sha256_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32 sha256_transform (w0, w1, w2, w3, digest); } -static void memcat8 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32 append[2]) +static void memcat8 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[2]) { switch (block_len) { @@ -792,28 +780,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08900_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -921,7 +909,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08900_init (__gl w3[2] = 0; w3[3] = (64 + salt_len + 4) * 8; - u32x digest[8]; + u32 digest[8]; hmac_sha256_run (w0, w1, w2, w3, ipad, opad, digest); @@ -983,28 +971,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08900_comp (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -1103,16 +1091,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m08900_comp (__gl w3[2] = 0; w3[3] = (64 + (scrypt_cnt * 4) + 4) * 8; - u32x digest[8]; + u32 digest[8]; hmac_sha256_run (w0, w1, w2, w3, ipad, opad, digest); - const u32x r0 = swap_workaround (digest[DGST_R0]); - const u32x r1 = swap_workaround (digest[DGST_R1]); - const u32x r2 = swap_workaround (digest[DGST_R2]); - const u32x r3 = swap_workaround (digest[DGST_R3]); + const u32 r0 = swap_workaround (digest[DGST_R0]); + const u32 r1 = swap_workaround (digest[DGST_R1]); + const u32 r2 = swap_workaround (digest[DGST_R2]); + const u32 r3 = swap_workaround (digest[DGST_R3]); #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m09000.cl b/OpenCL/m09000.cl similarity index 94% rename from amd/m09000.cl rename to OpenCL/m09000.cl index 81259e6..d938f24 100644 --- a/amd/m09000.cl +++ b/OpenCL/m09000.cl @@ -8,29 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif // http://www.schneier.com/code/constants.txt @@ -320,7 +308,7 @@ __constant u32 c_pbox[18] = { \ uchar4 c = as_uchar4 ((L)); \ \ - u32x tmp; \ + u32 tmp; \ \ tmp = S0[c.s3]; \ tmp += S1[c.s2]; \ @@ -351,7 +339,7 @@ __constant u32 c_pbox[18] = BF_ROUND (L, R, 15); \ BF_ROUND (R, L, 16); \ \ - u32x tmp; \ + u32 tmp; \ \ tmp = R; \ R = L; \ @@ -360,30 +348,30 @@ __constant u32 c_pbox[18] = L ^= P[17]; \ } -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -499,28 +487,28 @@ __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m09000_init (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -581,7 +569,7 @@ __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m09000_init (__glo w3[2] = 0; w3[3] = block_len * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -602,15 +590,15 @@ __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m09000_init (__glo P[i] = c_pbox[i]; } - __local u32x S0_all[8][256]; - __local u32x S1_all[8][256]; - __local u32x S2_all[8][256]; - __local u32x S3_all[8][256]; + __local u32 S0_all[8][256]; + __local u32 S1_all[8][256]; + __local u32 S2_all[8][256]; + __local u32 S3_all[8][256]; - __local u32x *S0 = S0_all[lid]; - __local u32x *S1 = S1_all[lid]; - __local u32x *S2 = S2_all[lid]; - __local u32x *S3 = S3_all[lid]; + __local u32 *S0 = S0_all[lid]; + __local u32 *S1 = S1_all[lid]; + __local u32 *S2 = S2_all[lid]; + __local u32 *S3 = S3_all[lid]; for (u32 i = 0; i < 256; i++) { @@ -726,22 +714,22 @@ __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m09000_loop (__glo digest[0] = tmps[gid].digest[0]; digest[1] = tmps[gid].digest[1]; - u32x P[18]; + u32 P[18]; for (u32 i = 0; i < 18; i++) { P[i] = tmps[gid].P[i]; } - __local u32x S0_all[8][256]; - __local u32x S1_all[8][256]; - __local u32x S2_all[8][256]; - __local u32x S3_all[8][256]; + __local u32 S0_all[8][256]; + __local u32 S1_all[8][256]; + __local u32 S2_all[8][256]; + __local u32 S3_all[8][256]; - __local u32x *S0 = S0_all[lid]; - __local u32x *S1 = S1_all[lid]; - __local u32x *S2 = S2_all[lid]; - __local u32x *S3 = S3_all[lid]; + __local u32 *S0 = S0_all[lid]; + __local u32 *S1 = S1_all[lid]; + __local u32 *S2 = S2_all[lid]; + __local u32 *S3 = S3_all[lid]; for (u32 i = 0; i < 256; i++) { @@ -753,8 +741,8 @@ __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m09000_loop (__glo // loop - u32x L0 = digest[0]; - u32x R0 = digest[1]; + u32 L0 = digest[0]; + u32 R0 = digest[1]; for (u32 i = 0; i < loop_cnt; i++) { @@ -788,10 +776,10 @@ __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m09000_comp (__glo // final sha1 - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = swap_workaround (digest[0]); w0[1] = swap_workaround (digest[1]); @@ -810,7 +798,7 @@ __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m09000_comp (__glo w3[2] = 0; w3[3] = (8 + 2) * 8; - u32x out[5]; + u32 out[5]; out[0] = 0; // yep, not a bug! context is zero here out[1] = 0; @@ -820,12 +808,12 @@ __kernel void __attribute__((reqd_work_group_size (8, 1, 1))) m09000_comp (__glo sha1_transform (w0, w1, w2, w3, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m09100.cl b/OpenCL/m09100.cl similarity index 90% rename from amd/m09100.cl rename to OpenCL/m09100.cl index f24ba90..038b438 100644 --- a/amd/m09100.cl +++ b/OpenCL/m09100.cl @@ -8,18 +8,6 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 @@ -29,19 +17,19 @@ #undef _SHA1_ -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif #ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" +#define COMPARE_M "check_multi_vect2_comp4.c" #endif #ifdef VECT_SIZE4 -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" +#define COMPARE_M "check_multi_vect4_comp4.c" #endif __constant char lotus64_table[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/"; @@ -83,32 +71,32 @@ __constant u32 lotus_magic_table[256] = }; #ifdef VECT_SIZE1 -#define BOX(S,i) u32x ((S)[(i)]) +#define BOX(S,i) u32 ((S)[(i)]) #endif #ifdef VECT_SIZE2 -#define BOX(S,i) u32x ((S)[(i).s0], (S)[(i).s1]) +#define BOX(S,i) u32 ((S)[(i).s0], (S)[(i).s1]) #endif #ifdef VECT_SIZE4 -#define BOX(S,i) u32x ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3]) +#define BOX(S,i) u32 ((S)[(i).s0], (S)[(i).s1], (S)[(i).s2], (S)[(i).s3]) #endif #ifdef VECT_SIZE1 -#define uint_to_hex_upper8(i) u32x (l_bin2asc[(i)]) +#define uint_to_hex_upper8(i) u32 (l_bin2asc[(i)]) #endif #ifdef VECT_SIZE2 -#define uint_to_hex_upper8(i) u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#define uint_to_hex_upper8(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) #endif #ifdef VECT_SIZE4 -#define uint_to_hex_upper8(i) u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#define uint_to_hex_upper8(i) u32 (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) #endif -static void lotus_mix (u32x *in, __local u32 s_lotus_magic_table[256]) +static void lotus_mix (u32 *in, __local u32 s_lotus_magic_table[256]) { - u32x p = 0; + u32 p = 0; for (int i = 0; i < 18; i++) { @@ -117,8 +105,8 @@ static void lotus_mix (u32x *in, __local u32 s_lotus_magic_table[256]) #pragma unroll 12 for (int j = 0; j < 12; j++) { - u32x tmp_in = in[j]; - u32x tmp_out = 0; + u32 tmp_in = in[j]; + u32 tmp_out = 0; p = (p + s--) & 0xff; p = ((tmp_in >> 0) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 0; p = (p + s--) & 0xff; p = ((tmp_in >> 8) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 8; @@ -130,11 +118,11 @@ static void lotus_mix (u32x *in, __local u32 s_lotus_magic_table[256]) } } -static void lotus_transform_password (u32x *in, u32x *out, __local u32 s_lotus_magic_table[256]) +static void lotus_transform_password (u32 *in, u32 *out, __local u32 s_lotus_magic_table[256]) { - u32x t = out[3] >> 24; + u32 t = out[3] >> 24; - u32x c; + u32 c; #pragma unroll 4 for (int i = 0; i < 4; i++) @@ -146,7 +134,7 @@ static void lotus_transform_password (u32x *in, u32x *out, __local u32 s_lotus_m } } -static void pad (u32x w[4], const u32 len) +static void pad (u32 w[4], const u32 len) { const u32 val = 16 - len; @@ -225,9 +213,9 @@ static void pad (u32x w[4], const u32 len) } } -static void mdtransform_norecalc (u32x state[4], u32x block[4], __local u32 s_lotus_magic_table[256]) +static void mdtransform_norecalc (u32 state[4], u32 block[4], __local u32 s_lotus_magic_table[256]) { - u32x x[12]; + u32 x[12]; x[ 0] = state[0]; x[ 1] = state[1]; @@ -250,23 +238,23 @@ static void mdtransform_norecalc (u32x state[4], u32x block[4], __local u32 s_lo state[3] = x[3]; } -static void mdtransform (u32x state[4], u32x checksum[4], u32x block[4], __local u32 s_lotus_magic_table[256]) +static void mdtransform (u32 state[4], u32 checksum[4], u32 block[4], __local u32 s_lotus_magic_table[256]) { mdtransform_norecalc (state, block, s_lotus_magic_table); lotus_transform_password (block, checksum, s_lotus_magic_table); } -static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[4], __local u32 s_lotus_magic_table[256]) +static void domino_big_md (const u32 saved_key[16], const u32 size, u32 state[4], __local u32 s_lotus_magic_table[256]) { - u32x checksum[4]; + u32 checksum[4]; checksum[0] = 0; checksum[1] = 0; checksum[2] = 0; checksum[3] = 0; - u32x block[4]; + u32 block[4]; block[0] = 0; block[1] = 0; @@ -298,30 +286,30 @@ static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[ mdtransform_norecalc (state, checksum, s_lotus_magic_table); } -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -426,7 +414,7 @@ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4] digest[4] += E; } -static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) +static void hmac_sha1_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -479,7 +467,7 @@ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x sha1_transform (w0, w1, w2, w3, opad); } -static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) +static void hmac_sha1_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5], u32 digest[5]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -539,7 +527,7 @@ static void base64_encode (u8 *base64_hash, const u32 len, const u8 *base64_plai } } -static void lotus6_base64_encode (u8x base64_hash[24], const u32 salt0, const u32 salt1, u32x a, u32x b, u32x c) +static void lotus6_base64_encode (u8 base64_hash[24], const u32 salt0, const u32 salt1, u32 a, u32 b, u32 c) { uchar4 salt0c = as_uchar4 (salt0); uchar4 salt1c = as_uchar4 (salt1); @@ -771,7 +759,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09100_init (__gl if (gid >= gid_max) return; - u32x w[16]; + u32 w[16]; w[ 0] = pws[gid].i[ 0]; w[ 1] = pws[gid].i[ 1]; @@ -857,7 +845,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09100_init (__gl * Lotus 6 hash - SEC_pwddigest_V2 */ - u32x w_tmp[16]; + u32 w_tmp[16]; w_tmp[ 0] = w[ 0]; w_tmp[ 1] = w[ 1]; @@ -876,7 +864,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09100_init (__gl w_tmp[14] = w[14]; w_tmp[15] = w[15]; - u32x state[4]; + u32 state[4]; state[0] = 0; state[1] = 0; @@ -885,19 +873,19 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09100_init (__gl domino_big_md (w_tmp, pw_len, state, s_lotus_magic_table); - const u32x w0_t = uint_to_hex_upper8 ((state[0] >> 0) & 255) << 0 + const u32 w0_t = uint_to_hex_upper8 ((state[0] >> 0) & 255) << 0 | uint_to_hex_upper8 ((state[0] >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_upper8 ((state[0] >> 16) & 255) << 0 + const u32 w1_t = uint_to_hex_upper8 ((state[0] >> 16) & 255) << 0 | uint_to_hex_upper8 ((state[0] >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_upper8 ((state[1] >> 0) & 255) << 0 + const u32 w2_t = uint_to_hex_upper8 ((state[1] >> 0) & 255) << 0 | uint_to_hex_upper8 ((state[1] >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_upper8 ((state[1] >> 16) & 255) << 0 + const u32 w3_t = uint_to_hex_upper8 ((state[1] >> 16) & 255) << 0 | uint_to_hex_upper8 ((state[1] >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_upper8 ((state[2] >> 0) & 255) << 0 + const u32 w4_t = uint_to_hex_upper8 ((state[2] >> 0) & 255) << 0 | uint_to_hex_upper8 ((state[2] >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_upper8 ((state[2] >> 16) & 255) << 0 + const u32 w5_t = uint_to_hex_upper8 ((state[2] >> 16) & 255) << 0 | uint_to_hex_upper8 ((state[2] >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_upper8 ((state[3] >> 0) & 255) << 0 + const u32 w6_t = uint_to_hex_upper8 ((state[3] >> 0) & 255) << 0 | uint_to_hex_upper8 ((state[3] >> 8) & 255) << 16; const u32 pade = 0x0e0e0e0e; @@ -926,9 +914,9 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09100_init (__gl domino_big_md (w_tmp, 34, state, s_lotus_magic_table); - u32x a = state[0]; - u32x b = state[1]; - u32x c = state[2]; + u32 a = state[0]; + u32 b = state[1]; + u32 c = state[2]; /** * Base64 encode @@ -936,7 +924,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09100_init (__gl pw_len = 22; - u8x base64_hash[22]; + u8 base64_hash[22]; lotus6_base64_encode (base64_hash, salt_buf0[0], salt_buf0[1], a, b, c); @@ -946,14 +934,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09100_init (__gl */ #ifdef VECT_SIZE1 - u32x w0[4]; + u32 w0[4]; w0[0] = (base64_hash[ 0] << 24) | (base64_hash[ 1] << 16) | (base64_hash[ 2] << 8) | base64_hash[ 3]; w0[1] = (base64_hash[ 4] << 24) | (base64_hash[ 5] << 16) | (base64_hash[ 6] << 8) | base64_hash[ 7]; w0[2] = (base64_hash[ 8] << 24) | (base64_hash[ 9] << 16) | (base64_hash[10] << 8) | base64_hash[11]; w0[3] = (base64_hash[12] << 24) | (base64_hash[13] << 16) | (base64_hash[14] << 8) | base64_hash[15]; - u32x w1[4]; + u32 w1[4]; w1[0] = (base64_hash[16] << 24) | (base64_hash[17] << 16) | (base64_hash[18] << 8) | base64_hash[19]; w1[1] = (base64_hash[20] << 24) | (base64_hash[21] << 16); @@ -962,7 +950,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09100_init (__gl #endif #ifdef VECT_SIZE2 - u32x w0[4]; + u32 w0[4]; w0[0].s0 = (base64_hash[ 0].s0 << 24) | (base64_hash[ 1].s0 << 16) | (base64_hash[ 2].s0 << 8) | base64_hash[ 3].s0; w0[1].s0 = (base64_hash[ 4].s0 << 24) | (base64_hash[ 5].s0 << 16) | (base64_hash[ 6].s0 << 8) | base64_hash[ 7].s0; @@ -974,7 +962,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09100_init (__gl w0[2].s1 = (base64_hash[ 8].s1 << 24) | (base64_hash[ 9].s1 << 16) | (base64_hash[10].s1 << 8) | base64_hash[11].s1; w0[3].s1 = (base64_hash[12].s1 << 24) | (base64_hash[13].s1 << 16) | (base64_hash[14].s1 << 8) | base64_hash[15].s1; - u32x w1[4]; + u32 w1[4]; w1[0].s0 = (base64_hash[16].s0 << 24) | (base64_hash[17].s0 << 16) | (base64_hash[18].s0 << 8) | base64_hash[19].s0; w1[1].s0 = (base64_hash[20].s0 << 24) | (base64_hash[21].s0 << 16); @@ -987,14 +975,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09100_init (__gl w1[3].s1 = 0; #endif - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1005,8 +993,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09100_init (__gl * pads */ - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_sha1_pad (w0, w1, w2, w3, ipad, opad); @@ -1056,7 +1044,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09100_init (__gl w3[2] = swap_workaround (w3[2]); w3[3] = (64 + salt_len + 4) * 8; - u32x dgst[5]; + u32 dgst[5]; hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst); @@ -1079,8 +1067,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09100_loop (__gl if (gid >= gid_max) return; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; ipad[0] = tmps[gid].ipad[0]; ipad[1] = tmps[gid].ipad[1]; @@ -1094,8 +1082,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09100_loop (__gl opad[3] = tmps[gid].opad[3]; opad[4] = tmps[gid].opad[4]; - u32x dgst[5]; - u32x out[5]; + u32 dgst[5]; + u32 out[5]; dgst[0] = tmps[gid].dgst[0]; dgst[1] = tmps[gid].dgst[1]; @@ -1111,10 +1099,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09100_loop (__gl for (u32 j = 0; j < loop_cnt; j++) { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = dgst[0]; w0[1] = dgst[1]; @@ -1171,12 +1159,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09100_comp (__gl * digest */ - const u32x r0 = tmps[gid].out[DGST_R0]; - const u32x r1 = tmps[gid].out[DGST_R1]; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = tmps[gid].out[DGST_R0]; + const u32 r1 = tmps[gid].out[DGST_R1]; + const u32 r2 = 0; + const u32 r3 = 0; #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m09400.cl b/OpenCL/m09400.cl similarity index 98% rename from amd/m09400.cl rename to OpenCL/m09400.cl index a3314dc..731da4c 100644 --- a/amd/m09400.cl +++ b/OpenCL/m09400.cl @@ -8,37 +8,25 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif #ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" +#define COMPARE_M "check_multi_vect2_comp4.c" #endif #ifdef VECT_SIZE4 -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" +#define COMPARE_M "check_multi_vect4_comp4.c" #endif __constant u32 te0[256] = @@ -1212,30 +1200,30 @@ static void AES256_encrypt (const u32 *in, u32 *out, const u32 *rek, __local u32 ^ rek[59]; } -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -1350,28 +1338,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09400_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -1402,35 +1390,35 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09400_init (__gl * init */ - u32x t0[4]; + u32 t0[4]; t0[0] = salt_buf[0]; t0[1] = salt_buf[1]; t0[2] = salt_buf[2]; t0[3] = salt_buf[3]; - u32x t1[4]; + u32 t1[4]; t1[0] = swap_workaround (w0[0]); t1[1] = swap_workaround (w0[1]); t1[2] = swap_workaround (w0[2]); t1[3] = swap_workaround (w0[3]); - u32x t2[4]; + u32 t2[4]; t2[0] = swap_workaround (w1[0]); t2[1] = swap_workaround (w1[1]); t2[2] = swap_workaround (w1[2]); t2[3] = swap_workaround (w1[3]); - u32x t3[4]; + u32 t3[4]; t3[0] = swap_workaround (w2[0]); t3[1] = swap_workaround (w2[1]); t3[2] = 0; t3[3] = (salt_len + (pw_len * 2)) * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -1453,28 +1441,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09400_loop (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = 0; w0[1] = tmps[gid].out[0]; w0[2] = tmps[gid].out[1]; w0[3] = tmps[gid].out[2]; - u32x w1[4]; + u32 w1[4]; w1[0] = tmps[gid].out[3]; w1[1] = tmps[gid].out[4]; w1[2] = 0x80000000; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1485,7 +1473,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09400_loop (__gl { w0[0] = swap_workaround (j); - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -1586,28 +1574,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09400_comp (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = tmps[gid].out[0]; w0[1] = tmps[gid].out[1]; w0[2] = tmps[gid].out[2]; w0[3] = tmps[gid].out[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = tmps[gid].out[4]; w1[1] = 0; w1[2] = 0x80000000; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1682,8 +1670,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09400_comp (__gl // now we got the AES key, decrypt the verifier - u32x rek[60]; - u32x rdk[60]; + u32 rek[60]; + u32 rdk[60]; u32 verifier[4]; @@ -1704,7 +1692,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09400_comp (__gl data[6] = 0; data[7] = 0; - u32x ukeyx[8]; + u32 ukeyx[8]; ukeyx[0] = digest[0]; ukeyx[1] = digest[1]; @@ -1758,14 +1746,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09400_comp (__gl AES128_encrypt (data, out, rek, s_te0, s_te1, s_te2, s_te3, s_te4); { - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } /* @@ -1879,13 +1867,13 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09400_comp (__gl AES256_encrypt (data, out, rek, s_te0, s_te1, s_te2, s_te3, s_te4); { - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } } diff --git a/amd/m09500.cl b/OpenCL/m09500.cl similarity index 97% rename from amd/m09500.cl rename to OpenCL/m09500.cl index b974298..dfb6fd8 100644 --- a/amd/m09500.cl +++ b/OpenCL/m09500.cl @@ -8,37 +8,25 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif #ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" +#define COMPARE_M "check_multi_vect2_comp4.c" #endif #ifdef VECT_SIZE4 -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" +#define COMPARE_M "check_multi_vect4_comp4.c" #endif __constant u32 te0[256] = @@ -942,30 +930,30 @@ static void AES128_decrypt (const u32 *in, u32 *out, const u32 *rdk, __local u32 ^ rdk[43]; } -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -1080,28 +1068,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09500_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -1132,35 +1120,35 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09500_init (__gl * init */ - u32x t0[4]; + u32 t0[4]; t0[0] = salt_buf[0]; t0[1] = salt_buf[1]; t0[2] = salt_buf[2]; t0[3] = salt_buf[3]; - u32x t1[4]; + u32 t1[4]; t1[0] = swap_workaround (w0[0]); t1[1] = swap_workaround (w0[1]); t1[2] = swap_workaround (w0[2]); t1[3] = swap_workaround (w0[3]); - u32x t2[4]; + u32 t2[4]; t2[0] = swap_workaround (w1[0]); t2[1] = swap_workaround (w1[1]); t2[2] = swap_workaround (w1[2]); t2[3] = swap_workaround (w1[3]); - u32x t3[4]; + u32 t3[4]; t3[0] = swap_workaround (w2[0]); t3[1] = swap_workaround (w2[1]); t3[2] = 0; t3[3] = (salt_len + (pw_len * 2)) * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -1183,28 +1171,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09500_loop (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = 0; w0[1] = tmps[gid].out[0]; w0[2] = tmps[gid].out[1]; w0[3] = tmps[gid].out[2]; - u32x w1[4]; + u32 w1[4]; w1[0] = tmps[gid].out[3]; w1[1] = tmps[gid].out[4]; w1[2] = 0x80000000; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1215,7 +1203,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09500_loop (__gl { w0[0] = swap_workaround (j); - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -1316,13 +1304,13 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09500_comp (__gl if (gid >= gid_max) return; - u32x encryptedVerifierHashInputBlockKey[2] = { 0xfea7d276, 0x3b4b9e79 }; - u32x encryptedVerifierHashValueBlockKey[2] = { 0xd7aa0f6d, 0x3061344e }; + u32 encryptedVerifierHashInputBlockKey[2] = { 0xfea7d276, 0x3b4b9e79 }; + u32 encryptedVerifierHashValueBlockKey[2] = { 0xd7aa0f6d, 0x3061344e }; - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = tmps[gid].out[0]; w0[1] = tmps[gid].out[1]; @@ -1380,8 +1368,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09500_comp (__gl // now we got the AES key, decrypt the verifier - u32x rek[60]; - u32x rdk[60]; + u32 rek[60]; + u32 rdk[60]; u32 data[4]; @@ -1390,7 +1378,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09500_comp (__gl data[2] = office2010_bufs[salt_pos].encryptedVerifier[2]; data[3] = office2010_bufs[salt_pos].encryptedVerifier[3]; - u32x ukeyx[4]; + u32 ukeyx[4]; ukeyx[0] = digest0[0]; ukeyx[1] = digest0[1]; @@ -1431,7 +1419,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09500_comp (__gl w3[2] = 0; w3[3] = 16 * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -1457,12 +1445,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09500_comp (__gl AES128_encrypt (data, out, rek, s_te0, s_te1, s_te2, s_te3, s_te4); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m09600.cl b/OpenCL/m09600.cl similarity index 98% rename from amd/m09600.cl rename to OpenCL/m09600.cl index 5d0eb10..312f052 100644 --- a/amd/m09600.cl +++ b/OpenCL/m09600.cl @@ -8,37 +8,25 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif #ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" +#define COMPARE_M "check_multi_vect2_comp4.c" #endif #ifdef VECT_SIZE4 -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" +#define COMPARE_M "check_multi_vect4_comp4.c" #endif __constant u32 te0[256] = @@ -1118,28 +1106,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09600_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -1170,35 +1158,35 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09600_init (__gl * init */ - u64x t0[4]; + u64 t0[4]; t0[0] = (u64) salt_buf[0] << 32 | salt_buf[1]; t0[1] = (u64) salt_buf[2] << 32 | salt_buf[3]; t0[2] = (u64) swap_workaround (w0[0]) << 32 | swap_workaround (w0[1]); t0[3] = (u64) swap_workaround (w0[2]) << 32 | swap_workaround (w0[3]); - u64x t1[4]; + u64 t1[4]; t1[0] = (u64) swap_workaround (w1[0]) << 32 | swap_workaround (w1[1]); t1[1] = (u64) swap_workaround (w1[2]) << 32 | swap_workaround (w1[3]); t1[2] = (u64) swap_workaround (w2[0]) << 32 | swap_workaround (w2[1]); t1[3] = (u64) swap_workaround (w2[2]) << 32 | swap_workaround (w2[3]); - u64x t2[4]; + u64 t2[4]; t2[0] = (u64) swap_workaround (w3[0]) << 32 | swap_workaround (w3[1]); t2[1] = (u64) swap_workaround (w3[2]) << 32 | swap_workaround (w3[3]); t2[2] = 0; t2[3] = 0; - u64x t3[4]; + u64 t3[4]; t3[0] = 0; t3[1] = 0; t3[2] = 0; t3[3] = (salt_len + (pw_len * 2)) * 8; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -1227,28 +1215,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09600_loop (__gl if (gid >= gid_max) return; - u64x w0[4]; + u64 w0[4]; w0[0] = tmps[gid].out[0] >> 32; w0[1] = tmps[gid].out[0] << 32 | tmps[gid].out[1] >> 32; w0[2] = tmps[gid].out[1] << 32 | tmps[gid].out[2] >> 32; w0[3] = tmps[gid].out[2] << 32 | tmps[gid].out[3] >> 32; - u64x w1[4]; + u64 w1[4]; w1[0] = tmps[gid].out[3] << 32 | tmps[gid].out[4] >> 32; w1[1] = tmps[gid].out[4] << 32 | tmps[gid].out[5] >> 32; w1[2] = tmps[gid].out[5] << 32 | tmps[gid].out[6] >> 32; w1[3] = tmps[gid].out[6] << 32 | tmps[gid].out[7] >> 32; - u64x w2[4]; + u64 w2[4]; w2[0] = tmps[gid].out[7] << 32 | 0x80000000; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u64x w3[4]; + u64 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1259,7 +1247,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09600_loop (__gl { w0[0] = (u64) swap_workaround (j) << 32 | w0[0] & 0xffffffff; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -1370,13 +1358,13 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09600_comp (__gl if (gid >= gid_max) return; - u32x encryptedVerifierHashInputBlockKey[2] = { 0xfea7d276, 0x3b4b9e79 }; - u32x encryptedVerifierHashValueBlockKey[2] = { 0xd7aa0f6d, 0x3061344e }; + u32 encryptedVerifierHashInputBlockKey[2] = { 0xfea7d276, 0x3b4b9e79 }; + u32 encryptedVerifierHashValueBlockKey[2] = { 0xd7aa0f6d, 0x3061344e }; - u64x w0[4]; - u64x w1[4]; - u64x w2[4]; - u64x w3[4]; + u64 w0[4]; + u64 w1[4]; + u64 w2[4]; + u64 w3[4]; w0[0] = tmps[gid].out[0]; w0[1] = tmps[gid].out[1]; @@ -1395,7 +1383,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09600_comp (__gl w3[2] = 0; w3[3] = (64 + 8) * 8; - u64x digest0[8]; + u64 digest0[8]; digest0[0] = SHA512M_A; digest0[1] = SHA512M_B; @@ -1425,7 +1413,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09600_comp (__gl w3[2] = 0; w3[3] = (64 + 8) * 8; - u64x digest1[8]; + u64 digest1[8]; digest1[0] = SHA512M_A; digest1[1] = SHA512M_B; @@ -1440,8 +1428,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09600_comp (__gl // now we got the AES key, decrypt the verifier - u32x rek[60]; - u32x rdk[60]; + u32 rek[60]; + u32 rdk[60]; u32 data[4]; @@ -1450,7 +1438,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09600_comp (__gl data[2] = office2013_bufs[salt_pos].encryptedVerifier[2]; data[3] = office2013_bufs[salt_pos].encryptedVerifier[3]; - u32x ukeyx[8]; + u32 ukeyx[8]; ukeyx[0] = h32_from_64 (digest0[0]); ukeyx[1] = l32_from_64 (digest0[0]); @@ -1495,7 +1483,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09600_comp (__gl w3[2] = 0; w3[3] = 16 * 8; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA512M_A; digest[1] = SHA512M_B; @@ -1528,12 +1516,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09600_comp (__gl AES256_encrypt (data, out, rek, s_te0, s_te1, s_te2, s_te3, s_te4); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m09700_a0.cl b/OpenCL/m09700_a0.cl similarity index 92% rename from amd/m09700_a0.cl rename to OpenCL/m09700_a0.cl index 4a44ac2..79c9bc7 100644 --- a/amd/m09700_a0.cl +++ b/OpenCL/m09700_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" typedef struct { @@ -166,29 +142,29 @@ static u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u3 return j; } -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -264,12 +240,12 @@ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void gen336 (u32x digest_pre[4], u32 salt_buf[4], u32x digest[4]) +static void gen336 (u32 digest_pre[4], u32 salt_buf[4], u32 digest[4]) { - u32x digest_t0[2]; - u32x digest_t1[2]; - u32x digest_t2[2]; - u32x digest_t3[2]; + u32 digest_t0[2]; + u32 digest_t1[2]; + u32 digest_t2[2]; + u32 digest_t3[2]; digest_t0[0] = digest_pre[0]; digest_t0[1] = digest_pre[1] & 0xff; @@ -311,10 +287,10 @@ static void gen336 (u32x digest_pre[4], u32 salt_buf[4], u32x digest[4]) salt_buf_t3[3] = salt_buf[2] >> 8 | salt_buf[3] << 24; salt_buf_t3[4] = salt_buf[3] >> 8; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; // generate the 16 * 21 buffer @@ -628,14 +604,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -674,28 +650,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -706,17 +682,17 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_m04 (__glo append_0x80_2 (w0, w1, out_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); w3_t[2] = out_len * 8 * 2; - u32x digest_pre[4]; + u32 digest_pre[4]; digest_pre[0] = MD5M_A; digest_pre[1] = MD5M_B; @@ -730,7 +706,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_m04 (__glo digest_pre[2] &= 0x00000000; digest_pre[3] &= 0x00000000; - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -767,7 +743,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_m04 (__glo // now the RC4 part - u32x key[4]; + u32 key[4]; key[0] = digest[0]; key[1] = digest[1]; @@ -776,7 +752,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_m04 (__glo rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); @@ -806,12 +782,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_m04 (__glo rc4_next_16 (rc4_key, 16, j, digest, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -843,14 +819,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -901,28 +877,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -933,17 +909,17 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_s04 (__glo append_0x80_2 (w0, w1, out_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); w3_t[2] = out_len * 8 * 2; - u32x digest_pre[4]; + u32 digest_pre[4]; digest_pre[0] = MD5M_A; digest_pre[1] = MD5M_B; @@ -957,7 +933,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_s04 (__glo digest_pre[2] &= 0x00000000; digest_pre[3] &= 0x00000000; - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -994,7 +970,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_s04 (__glo // now the RC4 part - u32x key[4]; + u32 key[4]; key[0] = digest[0]; key[1] = digest[1]; @@ -1003,7 +979,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_s04 (__glo rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); @@ -1033,12 +1009,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_s04 (__glo rc4_next_16 (rc4_key, 16, j, digest, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m09700_a1.cl b/OpenCL/m09700_a1.cl similarity index 92% rename from amd/m09700_a1.cl rename to OpenCL/m09700_a1.cl index 9343056..4ef61a2 100644 --- a/amd/m09700_a1.cl +++ b/OpenCL/m09700_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" typedef struct { @@ -164,29 +140,29 @@ static u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u3 return j; } -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -262,12 +238,12 @@ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void gen336 (u32x digest_pre[4], u32 salt_buf[4], u32x digest[4]) +static void gen336 (u32 digest_pre[4], u32 salt_buf[4], u32 digest[4]) { - u32x digest_t0[2]; - u32x digest_t1[2]; - u32x digest_t2[2]; - u32x digest_t3[2]; + u32 digest_t0[2]; + u32 digest_t1[2]; + u32 digest_t2[2]; + u32 digest_t3[2]; digest_t0[0] = digest_pre[0]; digest_t0[1] = digest_pre[1] & 0xff; @@ -309,10 +285,10 @@ static void gen336 (u32x digest_pre[4], u32 salt_buf[4], u32x digest[4]) salt_buf_t3[3] = salt_buf[2] >> 8 | salt_buf[3] << 24; salt_buf_t3[4] = salt_buf[3] >> 8; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; // generate the 16 * 21 buffer @@ -626,28 +602,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -740,28 +716,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -770,17 +746,17 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_m04 (__glo append_0x80_2 (w0, w1, pw_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); w3_t[2] = pw_len * 8 * 2; - u32x digest_pre[4]; + u32 digest_pre[4]; digest_pre[0] = MD5M_A; digest_pre[1] = MD5M_B; @@ -794,7 +770,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_m04 (__glo digest_pre[2] &= 0x00000000; digest_pre[3] &= 0x00000000; - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -831,7 +807,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_m04 (__glo // now the RC4 part - u32x key[4]; + u32 key[4]; key[0] = digest[0]; key[1] = digest[1]; @@ -840,7 +816,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_m04 (__glo rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); @@ -870,12 +846,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_m04 (__glo rc4_next_16 (rc4_key, 16, j, digest, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -907,28 +883,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -1021,28 +997,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -1051,17 +1027,17 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_s04 (__glo append_0x80_2 (w0, w1, pw_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); w3_t[2] = pw_len * 8 * 2; - u32x digest_pre[4]; + u32 digest_pre[4]; digest_pre[0] = MD5M_A; digest_pre[1] = MD5M_B; @@ -1075,7 +1051,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_s04 (__glo digest_pre[2] &= 0x00000000; digest_pre[3] &= 0x00000000; - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -1112,7 +1088,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_s04 (__glo // now the RC4 part - u32x key[4]; + u32 key[4]; key[0] = digest[0]; key[1] = digest[1]; @@ -1121,7 +1097,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_s04 (__glo rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); @@ -1151,12 +1127,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_s04 (__glo rc4_next_16 (rc4_key, 16, j, digest, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m09700_a3.cl b/OpenCL/m09700_a3.cl similarity index 90% rename from amd/m09700_a3.cl rename to OpenCL/m09700_a3.cl index fd0b106..c410c46 100644 --- a/amd/m09700_a3.cl +++ b/OpenCL/m09700_a3.cl @@ -8,36 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" typedef struct { @@ -159,29 +140,29 @@ static u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u3 return j; } -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -257,7 +238,7 @@ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void m09700m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global oldoffice01_t *oldoffice01_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m09700m (__local RC4_KEY rc4_keys[64], u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global oldoffice01_t *oldoffice01_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -319,7 +300,7 @@ static void m09700m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -327,10 +308,10 @@ static void m09700m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w0[0] = w0l | w0r; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; @@ -349,10 +330,10 @@ static void m09700m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w3_t[2] = pw_len * 8; w3_t[3] = 0; - u32x digest_t0[4]; - u32x digest_t1[2]; // need only first 5 byte - u32x digest_t2[2]; - u32x digest_t3[2]; + u32 digest_t0[4]; + u32 digest_t1[2]; // need only first 5 byte + u32 digest_t2[2]; + u32 digest_t3[2]; digest_t0[0] = MD5M_A; digest_t0[1] = MD5M_B; @@ -363,7 +344,7 @@ static void m09700m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x // prepare 16 * 21 buffer stuff - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -705,7 +686,7 @@ static void m09700m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x // now the RC4 part - u32x key[4]; + u32 key[4]; key[0] = digest[0]; key[1] = digest[1]; @@ -714,7 +695,7 @@ static void m09700m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); @@ -744,16 +725,16 @@ static void m09700m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x rc4_next_16 (rc4_key, 16, j, digest, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m09700s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global oldoffice01_t *oldoffice01_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m09700s (__local RC4_KEY rc4_keys[64], u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global oldoffice01_t *oldoffice01_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -827,7 +808,7 @@ static void m09700s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -835,10 +816,10 @@ static void m09700s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w0[0] = w0l | w0r; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; @@ -857,10 +838,10 @@ static void m09700s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w3_t[2] = pw_len * 8; w3_t[3] = 0; - u32x digest_t0[4]; - u32x digest_t1[2]; // need only first 5 byte - u32x digest_t2[2]; - u32x digest_t3[2]; + u32 digest_t0[4]; + u32 digest_t1[2]; // need only first 5 byte + u32 digest_t2[2]; + u32 digest_t3[2]; digest_t0[0] = MD5M_A; digest_t0[1] = MD5M_B; @@ -871,7 +852,7 @@ static void m09700s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x // prepare 16 * 21 buffer stuff - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -1213,7 +1194,7 @@ static void m09700s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x // now the RC4 part - u32x key[4]; + u32 key[4]; key[0] = digest[0]; key[1] = digest[1]; @@ -1222,7 +1203,7 @@ static void m09700s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); @@ -1252,12 +1233,12 @@ static void m09700s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x rc4_next_16 (rc4_key, 16, j, digest, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -1271,28 +1252,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1320,28 +1301,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_m08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1369,28 +1350,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_m16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -1418,28 +1399,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1467,28 +1448,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_s08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1516,28 +1497,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09700_s16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m09710_a0.cl b/OpenCL/m09710_a0.cl similarity index 90% rename from amd/m09710_a0.cl rename to OpenCL/m09710_a0.cl index 448a628..b6aa2cb 100644 --- a/amd/m09710_a0.cl +++ b/OpenCL/m09710_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" typedef struct { @@ -166,29 +142,29 @@ static u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u3 return j; } -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -284,14 +260,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09710_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -319,28 +295,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09710_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -351,10 +327,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09710_m04 (__glo // first md5 to generate RC4 128 bit key - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1] & 0xff; @@ -373,7 +349,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09710_m04 (__glo w3_t[2] = 9 * 8; w3_t[3] = 0; - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -384,7 +360,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09710_m04 (__glo // now the RC4 part - u32x key[4]; + u32 key[4]; key[0] = digest[0]; key[1] = digest[1]; @@ -393,7 +369,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09710_m04 (__glo rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); @@ -423,12 +399,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09710_m04 (__glo rc4_next_16 (rc4_key, 16, j, digest, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -460,14 +436,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09710_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -507,28 +483,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09710_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -539,10 +515,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09710_s04 (__glo // first md5 to generate RC4 128 bit key - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1] & 0xff; @@ -561,7 +537,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09710_s04 (__glo w3_t[2] = 9 * 8; w3_t[3] = 0; - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -572,7 +548,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09710_s04 (__glo // now the RC4 part - u32x key[4]; + u32 key[4]; key[0] = digest[0]; key[1] = digest[1]; @@ -581,7 +557,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09710_s04 (__glo rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); @@ -611,12 +587,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09710_s04 (__glo rc4_next_16 (rc4_key, 16, j, digest, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m09710_a1.cl b/OpenCL/m09710_a1.cl similarity index 91% rename from amd/m09710_a1.cl rename to OpenCL/m09710_a1.cl index 87acd45..b3d9ffe 100644 --- a/amd/m09710_a1.cl +++ b/OpenCL/m09710_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" typedef struct { @@ -164,29 +140,29 @@ static u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u3 return j; } -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -282,28 +258,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09710_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -375,10 +351,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09710_m04 (__glo // first md5 to generate RC4 128 bit key - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = wordl0[0] | wordr0[0]; w0_t[1] = (wordl0[1] | wordr0[1]) & 0xff; @@ -397,7 +373,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09710_m04 (__glo w3_t[2] = 9 * 8; w3_t[3] = 0; - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -408,7 +384,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09710_m04 (__glo // now the RC4 part - u32x key[4]; + u32 key[4]; key[0] = digest[0]; key[1] = digest[1]; @@ -417,7 +393,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09710_m04 (__glo rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); @@ -447,12 +423,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09710_m04 (__glo rc4_next_16 (rc4_key, 16, j, digest, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -484,28 +460,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09710_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -589,10 +565,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09710_s04 (__glo // first md5 to generate RC4 128 bit key - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = wordl0[0] | wordr0[0]; w0_t[1] = (wordl0[1] | wordr0[1]) & 0xff; @@ -611,7 +587,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09710_s04 (__glo w3_t[2] = 9 * 8; w3_t[3] = 0; - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -622,7 +598,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09710_s04 (__glo // now the RC4 part - u32x key[4]; + u32 key[4]; key[0] = digest[0]; key[1] = digest[1]; @@ -631,7 +607,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09710_s04 (__glo rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); @@ -661,12 +637,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09710_s04 (__glo rc4_next_16 (rc4_key, 16, j, digest, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m09710_a3.cl b/OpenCL/m09710_a3.cl similarity index 83% rename from amd/m09710_a3.cl rename to OpenCL/m09710_a3.cl index 326af7a..43eda72 100644 --- a/amd/m09710_a3.cl +++ b/OpenCL/m09710_a3.cl @@ -8,36 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" typedef struct { @@ -159,29 +140,29 @@ static u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u3 return j; } -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -257,7 +238,7 @@ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void m09710m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global oldoffice01_t *oldoffice01_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m09710m (__local RC4_KEY rc4_keys[64], u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global oldoffice01_t *oldoffice01_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -285,7 +266,7 @@ static void m09710m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -295,10 +276,10 @@ static void m09710m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x // first md5 to generate RC4 128 bit key - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1] & 0xff; @@ -317,7 +298,7 @@ static void m09710m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w3_t[2] = 9 * 8; w3_t[3] = 0; - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -328,7 +309,7 @@ static void m09710m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x // now the RC4 part - u32x key[4]; + u32 key[4]; key[0] = digest[0]; key[1] = digest[1]; @@ -337,7 +318,7 @@ static void m09710m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); @@ -367,16 +348,16 @@ static void m09710m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x rc4_next_16 (rc4_key, 16, j, digest, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m09710s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global oldoffice01_t *oldoffice01_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m09710s (__local RC4_KEY rc4_keys[64], u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global oldoffice01_t *oldoffice01_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -416,7 +397,7 @@ static void m09710s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -426,10 +407,10 @@ static void m09710s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x // first md5 to generate RC4 128 bit key - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1] & 0xff; @@ -448,7 +429,7 @@ static void m09710s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w3_t[2] = 9 * 8; w3_t[3] = 0; - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -459,7 +440,7 @@ static void m09710s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x // now the RC4 part - u32x key[4]; + u32 key[4]; key[0] = digest[0]; key[1] = digest[1]; @@ -468,7 +449,7 @@ static void m09710s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); @@ -498,12 +479,12 @@ static void m09710s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x rc4_next_16 (rc4_key, 16, j, digest, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -517,28 +498,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09710_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -574,28 +555,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09710_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; diff --git a/amd/m09720_a0.cl b/OpenCL/m09720_a0.cl similarity index 90% rename from amd/m09720_a0.cl rename to OpenCL/m09720_a0.cl index 43168f9..a27a9a1 100644 --- a/amd/m09720_a0.cl +++ b/OpenCL/m09720_a0.cl @@ -8,67 +8,43 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -144,12 +120,12 @@ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void gen336 (u32x digest_pre[4], u32 salt_buf[4], u32x digest[4]) +static void gen336 (u32 digest_pre[4], u32 salt_buf[4], u32 digest[4]) { - u32x digest_t0[2]; - u32x digest_t1[2]; - u32x digest_t2[2]; - u32x digest_t3[2]; + u32 digest_t0[2]; + u32 digest_t1[2]; + u32 digest_t2[2]; + u32 digest_t3[2]; digest_t0[0] = digest_pre[0]; digest_t0[1] = digest_pre[1] & 0xff; @@ -191,10 +167,10 @@ static void gen336 (u32x digest_pre[4], u32 salt_buf[4], u32x digest[4]) salt_buf_t3[3] = salt_buf[2] >> 8 | salt_buf[3] << 24; salt_buf_t3[4] = salt_buf[3] >> 8; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; // generate the 16 * 21 buffer @@ -504,14 +480,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09720_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -537,28 +513,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09720_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -569,17 +545,17 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09720_m04 (__glo append_0x80_2 (w0, w1, out_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); w3_t[2] = out_len * 8 * 2; - u32x digest_pre[4]; + u32 digest_pre[4]; digest_pre[0] = MD5M_A; digest_pre[1] = MD5M_B; @@ -593,7 +569,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09720_m04 (__glo digest_pre[2] &= 0x00000000; digest_pre[3] &= 0x00000000; - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -602,15 +578,15 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09720_m04 (__glo gen336 (digest_pre, salt_buf, digest); - u32x a = digest[0]; - u32x b = digest[1] & 0xff; + u32 a = digest[0]; + u32 b = digest[1] & 0xff; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -638,14 +614,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09720_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -683,28 +659,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09720_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -715,17 +691,17 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09720_s04 (__glo append_0x80_2 (w0, w1, out_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); w3_t[2] = out_len * 8 * 2; - u32x digest_pre[4]; + u32 digest_pre[4]; digest_pre[0] = MD5M_A; digest_pre[1] = MD5M_B; @@ -739,7 +715,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09720_s04 (__glo digest_pre[2] &= 0x00000000; digest_pre[3] &= 0x00000000; - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -748,15 +724,15 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09720_s04 (__glo gen336 (digest_pre, salt_buf, digest); - u32x a = digest[0]; - u32x b = digest[1] & 0xff; + u32 a = digest[0]; + u32 b = digest[1] & 0xff; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m09720_a1.cl b/OpenCL/m09720_a1.cl similarity index 91% rename from amd/m09720_a1.cl rename to OpenCL/m09720_a1.cl index 65fe0ff..44d95a8 100644 --- a/amd/m09720_a1.cl +++ b/OpenCL/m09720_a1.cl @@ -8,65 +8,41 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -142,12 +118,12 @@ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void gen336 (u32x digest_pre[4], u32 salt_buf[4], u32x digest[4]) +static void gen336 (u32 digest_pre[4], u32 salt_buf[4], u32 digest[4]) { - u32x digest_t0[2]; - u32x digest_t1[2]; - u32x digest_t2[2]; - u32x digest_t3[2]; + u32 digest_t0[2]; + u32 digest_t1[2]; + u32 digest_t2[2]; + u32 digest_t3[2]; digest_t0[0] = digest_pre[0]; digest_t0[1] = digest_pre[1] & 0xff; @@ -189,10 +165,10 @@ static void gen336 (u32x digest_pre[4], u32 salt_buf[4], u32x digest[4]) salt_buf_t3[3] = salt_buf[2] >> 8 | salt_buf[3] << 24; salt_buf_t3[4] = salt_buf[3] >> 8; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; // generate the 16 * 21 buffer @@ -502,28 +478,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09720_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -591,28 +567,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09720_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -621,17 +597,17 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09720_m04 (__glo append_0x80_2 (w0, w1, pw_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); w3_t[2] = pw_len * 8 * 2; - u32x digest_pre[4]; + u32 digest_pre[4]; digest_pre[0] = MD5M_A; digest_pre[1] = MD5M_B; @@ -645,7 +621,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09720_m04 (__glo digest_pre[2] &= 0x00000000; digest_pre[3] &= 0x00000000; - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -654,15 +630,15 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09720_m04 (__glo gen336 (digest_pre, salt_buf, digest); - u32x a = digest[0]; - u32x b = digest[1] & 0xff; + u32 a = digest[0]; + u32 b = digest[1] & 0xff; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -690,28 +666,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09720_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -791,28 +767,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09720_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -821,17 +797,17 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09720_s04 (__glo append_0x80_2 (w0, w1, pw_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); w3_t[2] = pw_len * 8 * 2; - u32x digest_pre[4]; + u32 digest_pre[4]; digest_pre[0] = MD5M_A; digest_pre[1] = MD5M_B; @@ -845,7 +821,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09720_s04 (__glo digest_pre[2] &= 0x00000000; digest_pre[3] &= 0x00000000; - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -854,15 +830,15 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09720_s04 (__glo gen336 (digest_pre, salt_buf, digest); - u32x a = digest[0]; - u32x b = digest[1] & 0xff; + u32 a = digest[0]; + u32 b = digest[1] & 0xff; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m09720_a3.cl b/OpenCL/m09720_a3.cl similarity index 85% rename from amd/m09720_a3.cl rename to OpenCL/m09720_a3.cl index 491ef28..bd624b3 100644 --- a/amd/m09720_a3.cl +++ b/OpenCL/m09720_a3.cl @@ -8,65 +8,41 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -142,12 +118,12 @@ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void gen336 (u32x digest_pre[4], u32 salt_buf[4], u32x digest[4]) +static void gen336 (u32 digest_pre[4], u32 salt_buf[4], u32 digest[4]) { - u32x digest_t0[2]; - u32x digest_t1[2]; - u32x digest_t2[2]; - u32x digest_t3[2]; + u32 digest_t0[2]; + u32 digest_t1[2]; + u32 digest_t2[2]; + u32 digest_t3[2]; digest_t0[0] = digest_pre[0]; digest_t0[1] = digest_pre[1] & 0xff; @@ -189,10 +165,10 @@ static void gen336 (u32x digest_pre[4], u32 salt_buf[4], u32x digest[4]) salt_buf_t3[3] = salt_buf[2] >> 8 | salt_buf[3] << 24; salt_buf_t3[4] = salt_buf[3] >> 8; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; // generate the 16 * 21 buffer @@ -486,7 +462,7 @@ static void gen336 (u32x digest_pre[4], u32 salt_buf[4], u32x digest[4]) md5_transform (w0_t, w1_t, w2_t, w3_t, digest); } -static void m09720m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global oldoffice01_t *oldoffice01_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m09720m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global oldoffice01_t *oldoffice01_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -510,7 +486,7 @@ static void m09720m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -518,10 +494,10 @@ static void m09720m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; @@ -540,7 +516,7 @@ static void m09720m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[2] = pw_len * 8; w3_t[3] = 0; - u32x digest_pre[4]; + u32 digest_pre[4]; digest_pre[0] = MD5M_A; digest_pre[1] = MD5M_B; @@ -554,7 +530,7 @@ static void m09720m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p digest_pre[2] &= 0x00000000; digest_pre[3] &= 0x00000000; - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -563,19 +539,19 @@ static void m09720m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p gen336 (digest_pre, salt_buf, digest); - u32x a = digest[0]; - u32x b = digest[1] & 0xff; + u32 a = digest[0]; + u32 b = digest[1] & 0xff; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m09720s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global oldoffice01_t *oldoffice01_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m09720s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global oldoffice01_t *oldoffice01_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -611,7 +587,7 @@ static void m09720s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -619,10 +595,10 @@ static void m09720s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0[0]; w0_t[1] = w0[1]; @@ -641,7 +617,7 @@ static void m09720s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[2] = pw_len * 8; w3_t[3] = 0; - u32x digest_pre[4]; + u32 digest_pre[4]; digest_pre[0] = MD5M_A; digest_pre[1] = MD5M_B; @@ -655,7 +631,7 @@ static void m09720s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p digest_pre[2] &= 0x00000000; digest_pre[3] &= 0x00000000; - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -664,15 +640,15 @@ static void m09720s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p gen336 (digest_pre, salt_buf, digest); - u32x a = digest[0]; - u32x b = digest[1] & 0xff; + u32 a = digest[0]; + u32 b = digest[1] & 0xff; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -686,28 +662,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09720_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -733,28 +709,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09720_m08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -780,28 +756,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09720_m16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -827,28 +803,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09720_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -874,28 +850,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09720_s08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -921,28 +897,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09720_s16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m09800_a0.cl b/OpenCL/m09800_a0.cl similarity index 92% rename from amd/m09800_a0.cl rename to OpenCL/m09800_a0.cl index 63810b6..f88f368 100644 --- a/amd/m09800_a0.cl +++ b/OpenCL/m09800_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" typedef struct { @@ -166,30 +142,30 @@ static u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u3 return j; } -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -314,14 +290,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -362,28 +338,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -396,10 +372,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_m04 (__glo append_0x80_2 (w0, w1, out_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); @@ -423,7 +399,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_m04 (__glo w3_t[2] = 0; w3_t[3] = pw_salt_len * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -458,7 +434,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_m04 (__glo sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - u32x key[4]; + u32 key[4]; key[0] = swap_workaround (digest[0]); key[1] = swap_workaround (digest[1]); @@ -474,7 +450,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_m04 (__glo rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); @@ -510,12 +486,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_m04 (__glo rc4_next_16 (rc4_key, 16, j, digest, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -547,14 +523,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -607,28 +583,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -641,10 +617,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_s04 (__glo append_0x80_2 (w0, w1, out_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); @@ -668,7 +644,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_s04 (__glo w3_t[2] = 0; w3_t[3] = pw_salt_len * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -703,7 +679,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_s04 (__glo sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - u32x key[4]; + u32 key[4]; key[0] = swap_workaround (digest[0]); key[1] = swap_workaround (digest[1]); @@ -719,7 +695,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_s04 (__glo rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); @@ -755,12 +731,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_s04 (__glo rc4_next_16 (rc4_key, 16, j, digest, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m09800_a1.cl b/OpenCL/m09800_a1.cl similarity index 93% rename from amd/m09800_a1.cl rename to OpenCL/m09800_a1.cl index 3544fea..76b94f5 100644 --- a/amd/m09800_a1.cl +++ b/OpenCL/m09800_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" typedef struct { @@ -164,30 +140,30 @@ static u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u3 return j; } -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -312,28 +288,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -418,28 +394,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -448,10 +424,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_m04 (__glo append_0x80_2 (w0, w1, pw_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); @@ -475,7 +451,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_m04 (__glo w3_t[2] = 0; w3_t[3] = pw_salt_len * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -510,7 +486,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_m04 (__glo sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - u32x key[4]; + u32 key[4]; key[0] = swap_workaround (digest[0]); key[1] = swap_workaround (digest[1]); @@ -526,7 +502,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_m04 (__glo rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); @@ -562,12 +538,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_m04 (__glo rc4_next_16 (rc4_key, 16, j, digest, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -599,28 +575,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -717,28 +693,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -747,10 +723,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_s04 (__glo append_0x80_2 (w0, w1, pw_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); @@ -774,7 +750,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_s04 (__glo w3_t[2] = 0; w3_t[3] = pw_salt_len * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -809,7 +785,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_s04 (__glo sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - u32x key[4]; + u32 key[4]; key[0] = swap_workaround (digest[0]); key[1] = swap_workaround (digest[1]); @@ -825,7 +801,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_s04 (__glo rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); @@ -861,12 +837,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_s04 (__glo rc4_next_16 (rc4_key, 16, j, digest, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m09800_a3.cl b/OpenCL/m09800_a3.cl similarity index 88% rename from amd/m09800_a3.cl rename to OpenCL/m09800_a3.cl index ee30da1..1541e50 100644 --- a/amd/m09800_a3.cl +++ b/OpenCL/m09800_a3.cl @@ -8,36 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" typedef struct { @@ -159,30 +140,30 @@ static u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u3 return j; } -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -287,7 +268,7 @@ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4] digest[4] += E; } -static void m09800m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global oldoffice34_t *oldoffice34_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m09800m (__local RC4_KEY rc4_keys[64], u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global oldoffice34_t *oldoffice34_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -330,7 +311,7 @@ static void m09800m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -338,10 +319,10 @@ static void m09800m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w0[0] = w0l | w0r; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = salt_buf[0]; w0_t[1] = salt_buf[1]; @@ -360,7 +341,7 @@ static void m09800m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w3_t[2] = 0; w3_t[3] = pw_salt_len * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -395,7 +376,7 @@ static void m09800m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - u32x key[4]; + u32 key[4]; key[0] = swap_workaround (digest[0]); key[1] = swap_workaround (digest[1]); @@ -411,7 +392,7 @@ static void m09800m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); @@ -447,16 +428,16 @@ static void m09800m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x rc4_next_16 (rc4_key, 16, j, digest, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m09800s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global oldoffice34_t *oldoffice34_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m09800s (__local RC4_KEY rc4_keys[64], u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global oldoffice34_t *oldoffice34_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -511,7 +492,7 @@ static void m09800s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -519,10 +500,10 @@ static void m09800s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w0[0] = w0l | w0r; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = salt_buf[0]; w0_t[1] = salt_buf[1]; @@ -541,7 +522,7 @@ static void m09800s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w3_t[2] = 0; w3_t[3] = pw_salt_len * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -576,7 +557,7 @@ static void m09800s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - u32x key[4]; + u32 key[4]; key[0] = swap_workaround (digest[0]); key[1] = swap_workaround (digest[1]); @@ -592,7 +573,7 @@ static void m09800s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); @@ -628,12 +609,12 @@ static void m09800s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x rc4_next_16 (rc4_key, 16, j, digest, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -647,28 +628,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -696,28 +677,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_m08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -745,28 +726,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_m16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -794,28 +775,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -843,28 +824,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_s08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -892,28 +873,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09800_s16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m09810_a0.cl b/OpenCL/m09810_a0.cl similarity index 91% rename from amd/m09810_a0.cl rename to OpenCL/m09810_a0.cl index c118734..45fb4ee 100644 --- a/amd/m09810_a0.cl +++ b/OpenCL/m09810_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" typedef struct { @@ -166,30 +142,30 @@ static u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u3 return j; } -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -314,14 +290,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09810_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -349,28 +325,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09810_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -379,7 +355,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09810_m04 (__glo apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len); - u32x key[4]; + u32 key[4]; key[0] = w0[0]; key[1] = w0[1] & 0xff; @@ -388,14 +364,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09810_m04 (__glo rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = swap_workaround (out[0]); w0_t[1] = swap_workaround (out[1]); @@ -414,7 +390,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09810_m04 (__glo w3_t[2] = 0; w3_t[3] = 16 * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -431,12 +407,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09810_m04 (__glo rc4_next_16 (rc4_key, 16, j, digest, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -468,14 +444,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09810_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -515,28 +491,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09810_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -545,7 +521,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09810_s04 (__glo apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len); - u32x key[4]; + u32 key[4]; key[0] = w0[0]; key[1] = w0[1] & 0xff; @@ -554,14 +530,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09810_s04 (__glo rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = swap_workaround (out[0]); w0_t[1] = swap_workaround (out[1]); @@ -580,7 +556,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09810_s04 (__glo w3_t[2] = 0; w3_t[3] = 16 * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -597,12 +573,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09810_s04 (__glo rc4_next_16 (rc4_key, 16, j, digest, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m09810_a1.cl b/OpenCL/m09810_a1.cl similarity index 92% rename from amd/m09810_a1.cl rename to OpenCL/m09810_a1.cl index 7d6a7ab..8458139 100644 --- a/amd/m09810_a1.cl +++ b/OpenCL/m09810_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" typedef struct { @@ -164,30 +140,30 @@ static u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u3 return j; } -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -312,28 +288,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09810_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -403,9 +379,9 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09810_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; - u32x key[4]; + u32 key[4]; key[0] = wordl0[0] | wordr0[0]; key[1] = (wordl0[1] | wordr0[1]) & 0xff; @@ -414,14 +390,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09810_m04 (__glo rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = swap_workaround (out[0]); w0_t[1] = swap_workaround (out[1]); @@ -440,7 +416,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09810_m04 (__glo w3_t[2] = 0; w3_t[3] = 16 * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -457,12 +433,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09810_m04 (__glo rc4_next_16 (rc4_key, 16, j, digest, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -494,28 +470,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09810_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -597,9 +573,9 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09810_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; - u32x key[4]; + u32 key[4]; key[0] = wordl0[0] | wordr0[0]; key[1] = (wordl0[1] | wordr0[1]) & 0xff; @@ -608,14 +584,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09810_s04 (__glo rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = swap_workaround (out[0]); w0_t[1] = swap_workaround (out[1]); @@ -634,7 +610,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09810_s04 (__glo w3_t[2] = 0; w3_t[3] = 16 * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -651,12 +627,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09810_s04 (__glo rc4_next_16 (rc4_key, 16, j, digest, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m09810_a3.cl b/OpenCL/m09810_a3.cl similarity index 86% rename from amd/m09810_a3.cl rename to OpenCL/m09810_a3.cl index 4d0c5a5..8d69709 100644 --- a/amd/m09810_a3.cl +++ b/OpenCL/m09810_a3.cl @@ -8,36 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" typedef struct { @@ -159,30 +140,30 @@ static u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u3 return j; } -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -287,7 +268,7 @@ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4] digest[4] += E; } -static void m09810m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global oldoffice34_t *oldoffice34_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m09810m (__local RC4_KEY rc4_keys[64], u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global oldoffice34_t *oldoffice34_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -315,7 +296,7 @@ static void m09810m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -323,7 +304,7 @@ static void m09810m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w0[0] = w0l | w0r; - u32x key[4]; + u32 key[4]; key[0] = w0[0]; key[1] = w0[1] & 0xff; @@ -332,14 +313,14 @@ static void m09810m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = swap_workaround (out[0]); w0_t[1] = swap_workaround (out[1]); @@ -358,7 +339,7 @@ static void m09810m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w3_t[2] = 0; w3_t[3] = 16 * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -375,16 +356,16 @@ static void m09810m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x rc4_next_16 (rc4_key, 16, j, digest, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m09810s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global oldoffice34_t *oldoffice34_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m09810s (__local RC4_KEY rc4_keys[64], u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global oldoffice34_t *oldoffice34_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -424,7 +405,7 @@ static void m09810s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -432,7 +413,7 @@ static void m09810s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w0[0] = w0l | w0r; - u32x key[4]; + u32 key[4]; key[0] = w0[0]; key[1] = w0[1] & 0xff; @@ -441,14 +422,14 @@ static void m09810s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = swap_workaround (out[0]); w0_t[1] = swap_workaround (out[1]); @@ -467,7 +448,7 @@ static void m09810s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w3_t[2] = 0; w3_t[3] = 16 * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -484,12 +465,12 @@ static void m09810s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x rc4_next_16 (rc4_key, 16, j, digest, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -503,28 +484,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09810_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -552,28 +533,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09810_m08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -601,28 +582,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09810_m16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -650,28 +631,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09810_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -699,28 +680,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09810_s08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -748,28 +729,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09810_s16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m09820_a0.cl b/OpenCL/m09820_a0.cl similarity index 90% rename from amd/m09820_a0.cl rename to OpenCL/m09820_a0.cl index 1221279..d6e3f12 100644 --- a/amd/m09820_a0.cl +++ b/OpenCL/m09820_a0.cl @@ -8,68 +8,44 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -190,14 +166,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09820_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -227,28 +203,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09820_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -261,10 +237,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09820_m04 (__glo append_0x80_2 (w0, w1, out_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); @@ -288,7 +264,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09820_m04 (__glo w3_t[2] = 0; w3_t[3] = pw_salt_len * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -323,15 +299,15 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09820_m04 (__glo sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - u32x a = swap_workaround (digest[0]); - u32x b = swap_workaround (digest[1]) & 0xff; + u32 a = swap_workaround (digest[0]); + u32 b = swap_workaround (digest[1]) & 0xff; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -359,14 +335,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09820_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -408,28 +384,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09820_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -442,10 +418,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09820_s04 (__glo append_0x80_2 (w0, w1, out_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); @@ -469,7 +445,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09820_s04 (__glo w3_t[2] = 0; w3_t[3] = pw_salt_len * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -504,15 +480,15 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09820_s04 (__glo sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - u32x a = swap_workaround (digest[0]); - u32x b = swap_workaround (digest[1]) & 0xff; + u32 a = swap_workaround (digest[0]); + u32 b = swap_workaround (digest[1]) & 0xff; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m09820_a1.cl b/OpenCL/m09820_a1.cl similarity index 91% rename from amd/m09820_a1.cl rename to OpenCL/m09820_a1.cl index f5c5826..217e6e4 100644 --- a/amd/m09820_a1.cl +++ b/OpenCL/m09820_a1.cl @@ -8,66 +8,42 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -188,28 +164,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09820_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -281,28 +257,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09820_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -311,10 +287,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09820_m04 (__glo append_0x80_2 (w0, w1, pw_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); @@ -338,7 +314,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09820_m04 (__glo w3_t[2] = 0; w3_t[3] = pw_salt_len * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -373,15 +349,15 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09820_m04 (__glo sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - u32x a = swap_workaround (digest[0]); - u32x b = swap_workaround (digest[1]) & 0xff; + u32 a = swap_workaround (digest[0]); + u32 b = swap_workaround (digest[1]) & 0xff; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -409,28 +385,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09820_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -514,28 +490,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09820_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -544,10 +520,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09820_s04 (__glo append_0x80_2 (w0, w1, pw_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; make_unicode (w0, w0_t, w1_t); make_unicode (w1, w2_t, w3_t); @@ -571,7 +547,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09820_s04 (__glo w3_t[2] = 0; w3_t[3] = pw_salt_len * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -606,15 +582,15 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09820_s04 (__glo sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - u32x a = swap_workaround (digest[0]); - u32x b = swap_workaround (digest[1]) & 0xff; + u32 a = swap_workaround (digest[0]); + u32 b = swap_workaround (digest[1]) & 0xff; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m09820_a3.cl b/OpenCL/m09820_a3.cl similarity index 84% rename from amd/m09820_a3.cl rename to OpenCL/m09820_a3.cl index 0bddfce..c9b32c4 100644 --- a/amd/m09820_a3.cl +++ b/OpenCL/m09820_a3.cl @@ -8,66 +8,42 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -172,7 +148,7 @@ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4] digest[4] += E; } -static void m09820m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global oldoffice34_t *oldoffice34_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m09820m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global oldoffice34_t *oldoffice34_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -200,7 +176,7 @@ static void m09820m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -208,10 +184,10 @@ static void m09820m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = salt_buf[0]; w0_t[1] = salt_buf[1]; @@ -230,7 +206,7 @@ static void m09820m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[2] = 0; w3_t[3] = pw_salt_len * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -265,19 +241,19 @@ static void m09820m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - u32x a = swap_workaround (digest[0]); - u32x b = swap_workaround (digest[1]) & 0xff; + u32 a = swap_workaround (digest[0]); + u32 b = swap_workaround (digest[1]) & 0xff; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m09820s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global oldoffice34_t *oldoffice34_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m09820s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global oldoffice34_t *oldoffice34_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -317,7 +293,7 @@ static void m09820s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -325,10 +301,10 @@ static void m09820s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = salt_buf[0]; w0_t[1] = salt_buf[1]; @@ -347,7 +323,7 @@ static void m09820s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[2] = 0; w3_t[3] = pw_salt_len * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -382,15 +358,15 @@ static void m09820s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - u32x a = swap_workaround (digest[0]); - u32x b = swap_workaround (digest[1]) & 0xff; + u32 a = swap_workaround (digest[0]); + u32 b = swap_workaround (digest[1]) & 0xff; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -404,28 +380,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09820_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -447,28 +423,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09820_m08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -490,28 +466,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09820_m16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -533,28 +509,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09820_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -576,28 +552,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09820_s08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -619,28 +595,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09820_s16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m09900_a0.cl b/OpenCL/m09900_a0.cl similarity index 94% rename from amd/m09900_a0.cl rename to OpenCL/m09900_a0.cl index 8f3c7b1..eb222bc 100644 --- a/amd/m09900_a0.cl +++ b/OpenCL/m09900_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_m04 (__global pw_t *pws, __global gpu_rule_t * rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -62,14 +38,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -84,28 +60,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -114,10 +90,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_m04 (__glo apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len); - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -192,15 +168,15 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_m04 (__glo c += MD5M_C; d += MD5M_D; - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; - u32x t0[4]; - u32x t1[4]; - u32x t2[4]; - u32x t3[4]; + u32 t0[4]; + u32 t1[4]; + u32 t2[4]; + u32 t3[4]; t0[0] = 0; t0[1] = 0; @@ -292,12 +268,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_m04 (__glo c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -325,14 +301,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -359,28 +335,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -389,10 +365,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_s04 (__glo apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len); - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -467,15 +443,15 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_s04 (__glo c += MD5M_C; d += MD5M_D; - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; - u32x t0[4]; - u32x t1[4]; - u32x t2[4]; - u32x t3[4]; + u32 t0[4]; + u32 t1[4]; + u32 t2[4]; + u32 t3[4]; t0[0] = 0; t0[1] = 0; @@ -570,12 +546,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_s04 (__glo c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m09900_a1.cl b/OpenCL/m09900_a1.cl similarity index 94% rename from amd/m09900_a1.cl rename to OpenCL/m09900_a1.cl index 7b6e6e2..8dbeb6f 100644 --- a/amd/m09900_a1.cl +++ b/OpenCL/m09900_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -60,28 +36,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -138,38 +114,38 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; w3[2] = wordl3[2] | wordr3[2]; w3[3] = wordl3[3] | wordr3[3]; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -244,15 +220,15 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_m04 (__glo c += MD5M_C; d += MD5M_D; - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; - u32x t0[4]; - u32x t1[4]; - u32x t2[4]; - u32x t3[4]; + u32 t0[4]; + u32 t1[4]; + u32 t2[4]; + u32 t3[4]; t0[0] = 0; t0[1] = 0; @@ -344,12 +320,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_m04 (__glo c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -377,28 +353,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -467,38 +443,38 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; w3[2] = wordl3[2] | wordr3[2]; w3[3] = wordl3[3] | wordr3[3]; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); @@ -573,15 +549,15 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_s04 (__glo c += MD5M_C; d += MD5M_D; - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; - u32x t0[4]; - u32x t1[4]; - u32x t2[4]; - u32x t3[4]; + u32 t0[4]; + u32 t1[4]; + u32 t2[4]; + u32 t3[4]; t0[0] = 0; t0[1] = 0; @@ -676,12 +652,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_s04 (__glo c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m09900_a3.cl b/OpenCL/m09900_a3.cl similarity index 77% rename from amd/m09900_a3.cl rename to OpenCL/m09900_a3.cl index 7002b99..c57c51c 100644 --- a/amd/m09900_a3.cl +++ b/OpenCL/m09900_a3.cl @@ -4,48 +4,23 @@ */ #define _MD5_ -#define _SCALAR_ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -static void m09900m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m09900m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -130,20 +105,18 @@ static void m09900m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00); MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01); @@ -218,15 +191,15 @@ static void m09900m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g c += MD5M_C; d += MD5M_D; - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; - u32x t0[4]; - u32x t1[4]; - u32x t2[4]; - u32x t3[4]; + u32 t0[4]; + u32 t1[4]; + u32 t2[4]; + u32 t3[4]; t0[0] = 0; t0[1] = 0; @@ -318,16 +291,16 @@ static void m09900m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m09900s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m09900s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -424,20 +397,18 @@ static void m09900s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00); MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01); @@ -512,15 +483,15 @@ static void m09900s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g c += MD5M_C; d += MD5M_D; - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; - u32x t0[4]; - u32x t1[4]; - u32x t2[4]; - u32x t3[4]; + u32 t0[4]; + u32 t1[4]; + u32 t2[4]; + u32 t3[4]; t0[0] = 0; t0[1] = 0; @@ -615,16 +586,16 @@ static void m09900s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -662,7 +633,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_m04 (__glo m09900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -700,7 +671,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_m08 (__glo m09900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -738,7 +709,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_m16 (__glo m09900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -776,7 +747,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_s04 (__glo m09900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -814,7 +785,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_s08 (__glo m09900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m09900_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base diff --git a/amd/m10100_a0.cl b/OpenCL/m10100_a0.cl similarity index 87% rename from amd/m10100_a0.cl rename to OpenCL/m10100_a0.cl index 94b7102..1be7d61 100644 --- a/amd/m10100_a0.cl +++ b/OpenCL/m10100_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" +#include "rp.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define SIPROUND(v0,v1,v2,v3) \ @@ -96,14 +72,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -132,7 +108,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w[16]; + u32 w[16]; w[ 0] = pw_buf0[0]; w[ 1] = pw_buf0[1]; @@ -151,23 +127,23 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_m04 (__glo w[14] = 0; w[15] = 0; - const u32x out_len = apply_rules (rules_buf[il_pos].cmds, &w[0], &w[4], pw_len); + const u32 out_len = apply_rules (rules_buf[il_pos].cmds, &w[0], &w[4], pw_len); u64 *w_ptr = (u64 *) w; w_ptr[out_len / 8] |= (u64) out_len << 56; - u64x v0 = v0p; - u64x v1 = v1p; - u64x v2 = v2p; - u64x v3 = v3p; + u64 v0 = v0p; + u64 v1 = v1p; + u64 v2 = v2p; + u64 v3 = v3p; int i; int j; for (i = 0, j = 0; i <= pw_len; i += 8, j += 2) { - u64x m = hl32_to_64 (w[j + 1], w[j + 0]); + u64 m = hl32_to_64 (w[j + 1], w[j + 0]); v3 ^= m; @@ -184,17 +160,17 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_m04 (__glo SIPROUND (v0, v1, v2, v3); SIPROUND (v0, v1, v2, v3); - const u64x v = v0 ^ v1 ^ v2 ^ v3; + const u64 v = v0 ^ v1 ^ v2 ^ v3; - const u32x a = l32_from_64 (v); - const u32x b = h32_from_64 (v); + const u32 a = l32_from_64 (v); + const u32 b = h32_from_64 (v); - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -222,14 +198,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -270,7 +246,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w[16]; + u32 w[16]; w[ 0] = pw_buf0[0]; w[ 1] = pw_buf0[1]; @@ -289,23 +265,23 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_s04 (__glo w[14] = 0; w[15] = 0; - const u32x out_len = apply_rules (rules_buf[il_pos].cmds, &w[0], &w[4], pw_len); + const u32 out_len = apply_rules (rules_buf[il_pos].cmds, &w[0], &w[4], pw_len); u64 *w_ptr = (u64 *) w; w_ptr[out_len / 8] |= (u64) out_len << 56; - u64x v0 = v0p; - u64x v1 = v1p; - u64x v2 = v2p; - u64x v3 = v3p; + u64 v0 = v0p; + u64 v1 = v1p; + u64 v2 = v2p; + u64 v3 = v3p; int i; int j; for (i = 0, j = 0; i <= pw_len; i += 8, j += 2) { - u64x m = hl32_to_64 (w[j + 1], w[j + 0]); + u64 m = hl32_to_64 (w[j + 1], w[j + 0]); v3 ^= m; @@ -322,17 +298,17 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_s04 (__glo SIPROUND (v0, v1, v2, v3); SIPROUND (v0, v1, v2, v3); - const u64x v = v0 ^ v1 ^ v2 ^ v3; + const u64 v = v0 ^ v1 ^ v2 ^ v3; - const u32x a = l32_from_64 (v); - const u32x b = h32_from_64 (v); + const u32 a = l32_from_64 (v); + const u32 b = h32_from_64 (v); - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m10100_a1.cl b/OpenCL/m10100_a1.cl similarity index 90% rename from amd/m10100_a1.cl rename to OpenCL/m10100_a1.cl index 4818202..9461036 100644 --- a/amd/m10100_a1.cl +++ b/OpenCL/m10100_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define SIPROUND(v0,v1,v2,v3) \ @@ -94,28 +70,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -186,7 +162,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w[16]; + u32 w[16]; w[ 0] = wordl0[0] | wordr0[0]; w[ 1] = wordl0[1] | wordr0[1]; @@ -209,17 +185,17 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_m04 (__glo w_ptr[pw_len / 8] |= (u64) pw_len << 56; - u64x v0 = v0p; - u64x v1 = v1p; - u64x v2 = v2p; - u64x v3 = v3p; + u64 v0 = v0p; + u64 v1 = v1p; + u64 v2 = v2p; + u64 v3 = v3p; int i; int j; for (i = 0, j = 0; i <= pw_len; i += 8, j += 2) { - u64x m = hl32_to_64 (w[j + 1], w[j + 0]); + u64 m = hl32_to_64 (w[j + 1], w[j + 0]); v3 ^= m; @@ -236,17 +212,17 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_m04 (__glo SIPROUND (v0, v1, v2, v3); SIPROUND (v0, v1, v2, v3); - const u64x v = v0 ^ v1 ^ v2 ^ v3; + const u64 v = v0 ^ v1 ^ v2 ^ v3; - const u32x a = l32_from_64 (v); - const u32x b = h32_from_64 (v); + const u32 a = l32_from_64 (v); + const u32 b = h32_from_64 (v); - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -274,28 +250,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -378,7 +354,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w[16]; + u32 w[16]; w[ 0] = wordl0[0] | wordr0[0]; w[ 1] = wordl0[1] | wordr0[1]; @@ -401,17 +377,17 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_s04 (__glo w_ptr[pw_len / 8] |= (u64) pw_len << 56; - u64x v0 = v0p; - u64x v1 = v1p; - u64x v2 = v2p; - u64x v3 = v3p; + u64 v0 = v0p; + u64 v1 = v1p; + u64 v2 = v2p; + u64 v3 = v3p; int i; int j; for (i = 0, j = 0; i <= pw_len; i += 8, j += 2) { - u64x m = hl32_to_64 (w[j + 1], w[j + 0]); + u64 m = hl32_to_64 (w[j + 1], w[j + 0]); v3 ^= m; @@ -428,17 +404,17 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_s04 (__glo SIPROUND (v0, v1, v2, v3); SIPROUND (v0, v1, v2, v3); - const u64x v = v0 ^ v1 ^ v2 ^ v3; + const u64 v = v0 ^ v1 ^ v2 ^ v3; - const u32x a = l32_from_64 (v); - const u32x b = h32_from_64 (v); + const u32 a = l32_from_64 (v); + const u32 b = h32_from_64 (v); - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m10100_a3.cl b/OpenCL/m10100_a3.cl similarity index 56% rename from amd/m10100_a3.cl rename to OpenCL/m10100_a3.cl index acbcc39..10f4907 100644 --- a/amd/m10100_a3.cl +++ b/OpenCL/m10100_a3.cl @@ -4,46 +4,21 @@ */ #define _SIPHASH_ -#define _SCALAR_ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define SIPROUND(v0,v1,v2,v3) \ @@ -79,7 +54,7 @@ (v2) = rotl64 ((v2), 32); #endif -static void m10100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m10100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -110,22 +85,20 @@ static void m10100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u64x v0 = v0p; - u64x v1 = v1p; - u64x v2 = v2p; - u64x v3 = v3p; + u64 v0 = v0p; + u64 v1 = v1p; + u64 v2 = v2p; + u64 v3 = v3p; - u64x m = hl32_to_64 (w[1], w0); + u64 m = hl32_to_64 (w[1], w0); v3 ^= m; @@ -156,21 +129,21 @@ static void m10100m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g SIPROUND (v0, v1, v2, v3); SIPROUND (v0, v1, v2, v3); - const u64x v = v0 ^ v1 ^ v2 ^ v3; + const u64 v = v0 ^ v1 ^ v2 ^ v3; - const u32x a = l32_from_64 (v); - const u32x b = h32_from_64 (v); + const u32 a = l32_from_64 (v); + const u32 b = h32_from_64 (v); - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m10100s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m10100s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -213,22 +186,20 @@ static void m10100s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u64x v0 = v0p; - u64x v1 = v1p; - u64x v2 = v2p; - u64x v3 = v3p; + u64 v0 = v0p; + u64 v1 = v1p; + u64 v2 = v2p; + u64 v3 = v3p; - u64x m = hl32_to_64 (w[1], w0); + u64 m = hl32_to_64 (w[1], w0); v3 ^= m; @@ -259,21 +230,21 @@ static void m10100s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g SIPROUND (v0, v1, v2, v3); SIPROUND (v0, v1, v2, v3); - const u64x v = v0 ^ v1 ^ v2 ^ v3; + const u64 v = v0 ^ v1 ^ v2 ^ v3; - const u32x a = l32_from_64 (v); - const u32x b = h32_from_64 (v); + const u32 a = l32_from_64 (v); + const u32 b = h32_from_64 (v); - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -311,7 +282,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_m04 (__glo m10100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -349,7 +320,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_m08 (__glo m10100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -387,7 +358,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_m16 (__glo m10100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -425,7 +396,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_s04 (__glo m10100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -463,7 +434,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_s08 (__glo m10100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10100_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base diff --git a/amd/m10300.cl b/OpenCL/m10300.cl similarity index 91% rename from amd/m10300.cl rename to OpenCL/m10300.cl index ee5c2ab..355d807 100644 --- a/amd/m10300.cl +++ b/OpenCL/m10300.cl @@ -8,63 +8,51 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif #ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" +#define COMPARE_M "check_multi_vect2_comp4.c" #endif #ifdef VECT_SIZE4 -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" +#define COMPARE_M "check_multi_vect4_comp4.c" #endif -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -179,21 +167,21 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10300_init (__gl if (gid >= gid_max) return; - u32x word_buf0[4]; + u32 word_buf0[4]; word_buf0[0] = pws[gid].i[0]; word_buf0[1] = pws[gid].i[1]; word_buf0[2] = pws[gid].i[2]; word_buf0[3] = pws[gid].i[3]; - u32x word_buf1[4]; + u32 word_buf1[4]; word_buf1[0] = pws[gid].i[4]; word_buf1[1] = pws[gid].i[5]; word_buf1[2] = pws[gid].i[6]; word_buf1[3] = pws[gid].i[7]; - u32x word_buf2[2]; + u32 word_buf2[2]; word_buf2[0] = pws[gid].i[8]; word_buf2[1] = pws[gid].i[9]; @@ -217,28 +205,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10300_init (__gl * init */ - u32x w0[4]; + u32 w0[4]; w0[0] = salt_buf[0]; w0[1] = salt_buf[1]; w0[2] = salt_buf[2]; w0[3] = salt_buf[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -286,7 +274,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10300_init (__gl w3[2] = swap_workaround (w3[2]); w3[3] = pw_salt_len * 8; - u32x digest[5]; + u32 digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -311,28 +299,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10300_loop (__gl const u32 gid = get_global_id (0); - u32x word_buf0[4]; + u32 word_buf0[4]; word_buf0[0] = swap_workaround (pws[gid].i[0]); word_buf0[1] = swap_workaround (pws[gid].i[1]); word_buf0[2] = swap_workaround (pws[gid].i[2]); word_buf0[3] = swap_workaround (pws[gid].i[3]); - u32x word_buf1[4]; + u32 word_buf1[4]; word_buf1[0] = swap_workaround (pws[gid].i[4]); word_buf1[1] = swap_workaround (pws[gid].i[5]); word_buf1[2] = swap_workaround (pws[gid].i[6]); word_buf1[3] = swap_workaround (pws[gid].i[7]); - u32x word_buf2[2]; + u32 word_buf2[2]; word_buf2[0] = swap_workaround (pws[gid].i[8]); word_buf2[1] = swap_workaround (pws[gid].i[9]); const u32 pw_len = pws[gid].pw_len; - u32x digest[5]; + u32 digest[5]; digest[0] = tmps[gid].digest_buf[0]; digest[1] = tmps[gid].digest_buf[1]; @@ -346,10 +334,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10300_loop (__gl for (u32 i = 0; i < loop_cnt; i++) { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = digest[0]; w0[1] = digest[1]; @@ -418,12 +406,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10300_comp (__gl * digest */ - const u32x r0 = tmps[gid].digest_buf[0]; - const u32x r1 = tmps[gid].digest_buf[1]; - const u32x r2 = tmps[gid].digest_buf[2]; - const u32x r3 = tmps[gid].digest_buf[3]; + const u32 r0 = tmps[gid].digest_buf[0]; + const u32 r1 = tmps[gid].digest_buf[1]; + const u32 r2 = tmps[gid].digest_buf[2]; + const u32 r3 = tmps[gid].digest_buf[3]; #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m10400_a0.cl b/OpenCL/m10400_a0.cl similarity index 91% rename from amd/m10400_a0.cl rename to OpenCL/m10400_a0.cl index 3906a00..0b9e38f 100644 --- a/amd/m10400_a0.cl +++ b/OpenCL/m10400_a0.cl @@ -8,38 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 padding[8] = { @@ -155,29 +136,29 @@ static u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, __constant u32 in[4 return j; } -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -269,14 +250,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10400_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -323,28 +304,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10400_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -353,10 +334,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10400_m04 (__glo const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; // max length supported by pdf11 is 32 @@ -400,7 +381,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10400_m04 (__glo w3_t[2] = o_buf[6]; w3_t[3] = o_buf[7]; - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -430,7 +411,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10400_m04 (__glo // now the RC4 part - u32x key[4]; + u32 key[4]; key[0] = digest[0]; key[1] = digest[1] & 0xff; @@ -439,16 +420,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10400_m04 (__glo rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; rc4_next_16 (rc4_key, 0, 0, padding, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -476,14 +457,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10400_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -542,28 +523,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10400_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -572,10 +553,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10400_s04 (__glo const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; // max length supported by pdf11 is 32 @@ -619,7 +600,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10400_s04 (__glo w3_t[2] = o_buf[6]; w3_t[3] = o_buf[7]; - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -649,7 +630,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10400_s04 (__glo // now the RC4 part - u32x key[4]; + u32 key[4]; key[0] = digest[0]; key[1] = digest[1] & 0xff; @@ -658,16 +639,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10400_s04 (__glo rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; rc4_next_16 (rc4_key, 0, 0, padding, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m10400_a1.cl b/OpenCL/m10400_a1.cl similarity index 92% rename from amd/m10400_a1.cl rename to OpenCL/m10400_a1.cl index 7347f4b..2acdda6 100644 --- a/amd/m10400_a1.cl +++ b/OpenCL/m10400_a1.cl @@ -8,36 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 padding[8] = { @@ -153,29 +134,29 @@ static u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, __constant u32 in[4 return j; } -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -267,28 +248,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10400_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -377,38 +358,38 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10400_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; w3[2] = wordl3[2] | wordr3[2]; w3[3] = wordl3[3] | wordr3[3]; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; // max length supported by pdf11 is 32 @@ -452,7 +433,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10400_m04 (__glo w3_t[2] = o_buf[6]; w3_t[3] = o_buf[7]; - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -482,7 +463,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10400_m04 (__glo // now the RC4 part - u32x key[4]; + u32 key[4]; key[0] = digest[0]; key[1] = digest[1] & 0xff; @@ -491,16 +472,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10400_m04 (__glo rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; rc4_next_16 (rc4_key, 0, 0, padding, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -528,28 +509,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10400_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -650,38 +631,38 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10400_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; w3[2] = wordl3[2] | wordr3[2]; w3[3] = wordl3[3] | wordr3[3]; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; // max length supported by pdf11 is 32 @@ -725,7 +706,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10400_s04 (__glo w3_t[2] = o_buf[6]; w3_t[3] = o_buf[7]; - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -755,7 +736,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10400_s04 (__glo // now the RC4 part - u32x key[4]; + u32 key[4]; key[0] = digest[0]; key[1] = digest[1] & 0xff; @@ -764,16 +745,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10400_s04 (__glo rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; rc4_next_16 (rc4_key, 0, 0, padding, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m10400_a3.cl b/OpenCL/m10400_a3.cl similarity index 85% rename from amd/m10400_a3.cl rename to OpenCL/m10400_a3.cl index dffc738..8431275 100644 --- a/amd/m10400_a3.cl +++ b/OpenCL/m10400_a3.cl @@ -8,36 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 padding[8] = { @@ -153,29 +134,29 @@ static u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, __constant u32 in[4 return j; } -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -251,7 +232,7 @@ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void m10400m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pdf_t *pdf_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m10400m (__local RC4_KEY rc4_keys[64], u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pdf_t *pdf_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -290,7 +271,7 @@ static void m10400m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -298,10 +279,10 @@ static void m10400m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w0[0] = w0l | w0r; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; // max length supported by pdf11 is 32 @@ -345,7 +326,7 @@ static void m10400m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w3_t[2] = o_buf[6]; w3_t[3] = o_buf[7]; - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -375,7 +356,7 @@ static void m10400m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x // now the RC4 part - u32x key[4]; + u32 key[4]; key[0] = digest[0]; key[1] = digest[1] & 0xff; @@ -384,20 +365,20 @@ static void m10400m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; rc4_next_16 (rc4_key, 0, 0, padding, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m10400s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pdf_t *pdf_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m10400s (__local RC4_KEY rc4_keys[64], u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pdf_t *pdf_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -448,7 +429,7 @@ static void m10400s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -456,10 +437,10 @@ static void m10400s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w0[0] = w0l | w0r; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; // max length supported by pdf11 is 32 @@ -503,7 +484,7 @@ static void m10400s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w3_t[2] = o_buf[6]; w3_t[3] = o_buf[7]; - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -533,7 +514,7 @@ static void m10400s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x // now the RC4 part - u32x key[4]; + u32 key[4]; key[0] = digest[0]; key[1] = digest[1] & 0xff; @@ -542,16 +523,16 @@ static void m10400s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; rc4_next_16 (rc4_key, 0, 0, padding, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -565,28 +546,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10400_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -614,28 +595,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10400_m08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -663,28 +644,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10400_m16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -712,28 +693,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10400_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -761,28 +742,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10400_s08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -810,28 +791,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10400_s16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m10410_a0.cl b/OpenCL/m10410_a0.cl similarity index 91% rename from amd/m10410_a0.cl rename to OpenCL/m10410_a0.cl index 3fa9386..8aeb169 100644 --- a/amd/m10410_a0.cl +++ b/OpenCL/m10410_a0.cl @@ -8,38 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 padding[8] = { @@ -170,14 +151,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10410_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -199,28 +180,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10410_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -231,7 +212,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10410_m04 (__glo // now the RC4 part - u32x key[4]; + u32 key[4]; key[0] = w0[0]; key[1] = w0[1]; @@ -240,16 +221,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10410_m04 (__glo rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; rc4_next_16 (rc4_key, 0, 0, padding, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -277,14 +258,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10410_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -318,28 +299,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10410_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -350,7 +331,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10410_s04 (__glo // now the RC4 part - u32x key[4]; + u32 key[4]; key[0] = w0[0]; key[1] = w0[1]; @@ -359,16 +340,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10410_s04 (__glo rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; rc4_next_16 (rc4_key, 0, 0, padding, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m10410_a1.cl b/OpenCL/m10410_a1.cl similarity index 92% rename from amd/m10410_a1.cl rename to OpenCL/m10410_a1.cl index daf9d9a..be447e7 100644 --- a/amd/m10410_a1.cl +++ b/OpenCL/m10410_a1.cl @@ -8,36 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 padding[8] = { @@ -168,28 +149,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10410_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -249,14 +230,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10410_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[2]; + u32 w0[2]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; // now the RC4 part - u32x key[4]; + u32 key[4]; key[0] = w0[0]; key[1] = w0[1]; @@ -265,16 +246,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10410_m04 (__glo rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; rc4_next_16 (rc4_key, 0, 0, padding, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -302,28 +283,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10410_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -395,14 +376,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10410_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[2]; + u32 w0[2]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; // now the RC4 part - u32x key[4]; + u32 key[4]; key[0] = w0[0]; key[1] = w0[1]; @@ -411,16 +392,16 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10410_s04 (__glo rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; rc4_next_16 (rc4_key, 0, 0, padding, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m10410_a3.cl b/OpenCL/m10410_a3.cl similarity index 83% rename from amd/m10410_a3.cl rename to OpenCL/m10410_a3.cl index a1346cd..e2de9d7 100644 --- a/amd/m10410_a3.cl +++ b/OpenCL/m10410_a3.cl @@ -8,36 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 padding[8] = { @@ -152,7 +133,7 @@ static u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, __constant u32 in[4 return j; } -static void m10410m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pdf_t *pdf_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m10410m (__local RC4_KEY rc4_keys[64], u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pdf_t *pdf_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -167,7 +148,7 @@ static void m10410m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -177,7 +158,7 @@ static void m10410m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x // now the RC4 part - u32x key[4]; + u32 key[4]; key[0] = w0[0]; key[1] = w0[1]; @@ -186,20 +167,20 @@ static void m10410m (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; rc4_next_16 (rc4_key, 0, 0, padding, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m10410s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pdf_t *pdf_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m10410s (__local RC4_KEY rc4_keys[64], u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pdf_t *pdf_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -226,7 +207,7 @@ static void m10410s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -236,7 +217,7 @@ static void m10410s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x // now the RC4 part - u32x key[4]; + u32 key[4]; key[0] = w0[0]; key[1] = w0[1]; @@ -245,16 +226,16 @@ static void m10410s (__local RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x rc4_init_16 (rc4_key, key); - u32x out[4]; + u32 out[4]; rc4_next_16 (rc4_key, 0, 0, padding, out); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = out[2]; + const u32 r3 = out[3]; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -268,28 +249,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10410_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -317,28 +298,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10410_m08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -366,28 +347,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10410_m16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -415,28 +396,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10410_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -464,28 +445,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10410_s08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -513,28 +494,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10410_s16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m10420_a0.cl b/OpenCL/m10420_a0.cl similarity index 89% rename from amd/m10420_a0.cl rename to OpenCL/m10420_a0.cl index cd10d8f..65f17c5 100644 --- a/amd/m10420_a0.cl +++ b/OpenCL/m10420_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 padding[8] = { @@ -58,29 +34,29 @@ __constant u32 padding[8] = 0x7a695364 }; -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -172,14 +148,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10420_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -218,28 +194,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10420_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -248,10 +224,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10420_m04 (__glo const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; // max length supported by pdf11 is 32 @@ -295,7 +271,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10420_m04 (__glo w3_t[2] = o_buf[6]; w3_t[3] = o_buf[7]; - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -323,15 +299,15 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10420_m04 (__glo md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - u32x a = digest[0]; - u32x b = digest[1] & 0xff; + u32 a = digest[0]; + u32 b = digest[1] & 0xff; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -359,14 +335,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10420_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -417,28 +393,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10420_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -447,10 +423,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10420_s04 (__glo const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len); - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; // max length supported by pdf11 is 32 @@ -494,7 +470,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10420_s04 (__glo w3_t[2] = o_buf[6]; w3_t[3] = o_buf[7]; - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -522,15 +498,15 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10420_s04 (__glo md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - u32x a = digest[0]; - u32x b = digest[1] & 0xff; + u32 a = digest[0]; + u32 b = digest[1] & 0xff; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m10420_a1.cl b/OpenCL/m10420_a1.cl similarity index 90% rename from amd/m10420_a1.cl rename to OpenCL/m10420_a1.cl index 64d3932..d382b65 100644 --- a/amd/m10420_a1.cl +++ b/OpenCL/m10420_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 padding[8] = { @@ -56,29 +32,29 @@ __constant u32 padding[8] = 0x7a695364 }; -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -170,28 +146,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10420_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -274,38 +250,38 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10420_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; w3[2] = wordl3[2] | wordr3[2]; w3[3] = wordl3[3] | wordr3[3]; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; // max length supported by pdf11 is 32 @@ -349,7 +325,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10420_m04 (__glo w3_t[2] = o_buf[6]; w3_t[3] = o_buf[7]; - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -377,15 +353,15 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10420_m04 (__glo md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - u32x a = digest[0]; - u32x b = digest[1] & 0xff; + u32 a = digest[0]; + u32 b = digest[1] & 0xff; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -413,28 +389,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10420_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -529,38 +505,38 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10420_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; w3[2] = wordl3[2] | wordr3[2]; w3[3] = wordl3[3] | wordr3[3]; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; // max length supported by pdf11 is 32 @@ -604,7 +580,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10420_s04 (__glo w3_t[2] = o_buf[6]; w3_t[3] = o_buf[7]; - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -632,15 +608,15 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10420_s04 (__glo md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - u32x a = digest[0]; - u32x b = digest[1] & 0xff; + u32 a = digest[0]; + u32 b = digest[1] & 0xff; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m10420_a3.cl b/OpenCL/m10420_a3.cl similarity index 83% rename from amd/m10420_a3.cl rename to OpenCL/m10420_a3.cl index fabdabf..f7db026 100644 --- a/amd/m10420_a3.cl +++ b/OpenCL/m10420_a3.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 padding[8] = { @@ -56,29 +32,29 @@ __constant u32 padding[8] = 0x7a695364 }; -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -154,7 +130,7 @@ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void m10420m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pdf_t *pdf_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m10420m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pdf_t *pdf_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -191,7 +167,7 @@ static void m10420m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -199,10 +175,10 @@ static void m10420m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; // max length supported by pdf11 is 32 @@ -246,7 +222,7 @@ static void m10420m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[2] = o_buf[6]; w3_t[3] = o_buf[7]; - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -274,19 +250,19 @@ static void m10420m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - u32x a = digest[0]; - u32x b = digest[1] & 0xff; + u32 a = digest[0]; + u32 b = digest[1] & 0xff; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m10420s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pdf_t *pdf_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m10420s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global pdf_t *pdf_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -335,7 +311,7 @@ static void m10420s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -343,10 +319,10 @@ static void m10420s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; // max length supported by pdf11 is 32 @@ -390,7 +366,7 @@ static void m10420s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w3_t[2] = o_buf[6]; w3_t[3] = o_buf[7]; - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -418,15 +394,15 @@ static void m10420s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - u32x a = digest[0]; - u32x b = digest[1] & 0xff; + u32 a = digest[0]; + u32 b = digest[1] & 0xff; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -440,28 +416,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10420_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -487,28 +463,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10420_m08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -534,28 +510,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10420_m16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -581,28 +557,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10420_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -628,28 +604,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10420_s08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -675,28 +651,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10420_s16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m10500.cl b/OpenCL/m10500.cl similarity index 92% rename from amd/m10500.cl rename to OpenCL/m10500.cl index 931b55d..3048b19 100644 --- a/amd/m10500.cl +++ b/OpenCL/m10500.cl @@ -8,33 +8,21 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif #ifdef VECT_SIZE4 -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" +#define COMPARE_M "check_multi_vect4_comp4.c" #endif __constant u32 padding[8] = @@ -169,31 +157,31 @@ static u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u3 return j; } -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - u32x tmp2; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; + + u32 tmp2; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -280,21 +268,21 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10500_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; @@ -355,8 +343,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10500_init (__gl u32 final_length = 68 + id_len; - u32x w11 = 0x80; - u32x w12 = 0; + u32 w11 = 0x80; + u32 w12 = 0; if (pdf_bufs[salt_pos].enc_md != 1) { @@ -373,10 +361,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10500_init (__gl * main init */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; // max length supported by pdf11 is 32 @@ -420,7 +408,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10500_init (__gl w3_t[2] = o_buf[6]; w3_t[3] = o_buf[7]; - u32x digest[4]; + u32 digest[4]; digest[0] = MD5M_A; digest[1] = MD5M_B; @@ -482,14 +470,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10500_loop (__gl * loop */ - u32x digest[4]; + u32 digest[4]; digest[0] = tmps[gid].digest[0]; digest[1] = tmps[gid].digest[1]; digest[2] = tmps[gid].digest[2]; digest[3] = tmps[gid].digest[3]; - u32x out[4]; + u32 out[4]; out[0] = tmps[gid].out[0]; out[1] = tmps[gid].out[1]; @@ -500,10 +488,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10500_loop (__gl { if (j < 50) { - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = digest[0]; w0_t[1] = digest[1]; @@ -538,7 +526,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10500_loop (__gl | x << 16 | x << 24; - u32x tmp[4]; + u32 tmp[4]; tmp[0] = digest[0] ^ xv; tmp[1] = digest[1] ^ xv; @@ -578,12 +566,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10500_comp (__gl * digest */ - const u32x r0 = tmps[gid].out[0]; - const u32x r1 = tmps[gid].out[1]; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = tmps[gid].out[0]; + const u32 r1 = tmps[gid].out[1]; + const u32 r2 = 0; + const u32 r3 = 0; #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m10700.cl b/OpenCL/m10700.cl similarity index 94% rename from amd/m10700.cl rename to OpenCL/m10700.cl index b38305c..94ba2ca 100644 --- a/amd/m10700.cl +++ b/OpenCL/m10700.cl @@ -8,33 +8,21 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif #ifdef VECT_SIZE4 -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" +#define COMPARE_M "check_multi_vect4_comp4.c" #endif typedef struct @@ -77,33 +65,33 @@ __constant u32 k_sha256[64] = SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f, }; -static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8]) +static void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[8]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = swap_workaround (w3[2]); - u32x wf_t = swap_workaround (w3[3]); + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + u32 e = digest[4]; + u32 f = digest[5]; + u32 g = digest[6]; + u32 h = digest[7]; + + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = swap_workaround (w3[2]); + u32 wf_t = swap_workaround (w3[3]); #define ROUND256_EXPAND() \ { \ @@ -187,33 +175,33 @@ __constant u64 k_sha384[80] = SHA384C4c, SHA384C4d, SHA384C4e, SHA384C4f, }; -static void sha384_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const u64 w3[4], u64x digest[8]) +static void sha384_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const u64 w3[4], u64 digest[8]) { - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; - - u64x w0_t = swap_workaround (w0[0]); - u64x w1_t = swap_workaround (w0[1]); - u64x w2_t = swap_workaround (w0[2]); - u64x w3_t = swap_workaround (w0[3]); - u64x w4_t = swap_workaround (w1[0]); - u64x w5_t = swap_workaround (w1[1]); - u64x w6_t = swap_workaround (w1[2]); - u64x w7_t = swap_workaround (w1[3]); - u64x w8_t = swap_workaround (w2[0]); - u64x w9_t = swap_workaround (w2[1]); - u64x wa_t = swap_workaround (w2[2]); - u64x wb_t = swap_workaround (w2[3]); - u64x wc_t = swap_workaround (w3[0]); - u64x wd_t = swap_workaround (w3[1]); - u64x we_t = swap_workaround (w3[2]); - u64x wf_t = swap_workaround (w3[3]); + u64 a = digest[0]; + u64 b = digest[1]; + u64 c = digest[2]; + u64 d = digest[3]; + u64 e = digest[4]; + u64 f = digest[5]; + u64 g = digest[6]; + u64 h = digest[7]; + + u64 w0_t = swap_workaround (w0[0]); + u64 w1_t = swap_workaround (w0[1]); + u64 w2_t = swap_workaround (w0[2]); + u64 w3_t = swap_workaround (w0[3]); + u64 w4_t = swap_workaround (w1[0]); + u64 w5_t = swap_workaround (w1[1]); + u64 w6_t = swap_workaround (w1[2]); + u64 w7_t = swap_workaround (w1[3]); + u64 w8_t = swap_workaround (w2[0]); + u64 w9_t = swap_workaround (w2[1]); + u64 wa_t = swap_workaround (w2[2]); + u64 wb_t = swap_workaround (w2[3]); + u64 wc_t = swap_workaround (w3[0]); + u64 wd_t = swap_workaround (w3[1]); + u64 we_t = swap_workaround (w3[2]); + u64 wf_t = swap_workaround (w3[3]); #define ROUND384_EXPAND() \ { \ @@ -299,31 +287,31 @@ __constant u64 k_sha512[80] = static void sha512_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const u64 w3[4], u64 digest[8]) { - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; - - u64x w0_t = swap_workaround (w0[0]); - u64x w1_t = swap_workaround (w0[1]); - u64x w2_t = swap_workaround (w0[2]); - u64x w3_t = swap_workaround (w0[3]); - u64x w4_t = swap_workaround (w1[0]); - u64x w5_t = swap_workaround (w1[1]); - u64x w6_t = swap_workaround (w1[2]); - u64x w7_t = swap_workaround (w1[3]); - u64x w8_t = swap_workaround (w2[0]); - u64x w9_t = swap_workaround (w2[1]); - u64x wa_t = swap_workaround (w2[2]); - u64x wb_t = swap_workaround (w2[3]); - u64x wc_t = swap_workaround (w3[0]); - u64x wd_t = swap_workaround (w3[1]); - u64x we_t = swap_workaround (w3[2]); - u64x wf_t = swap_workaround (w3[3]); + u64 a = digest[0]; + u64 b = digest[1]; + u64 c = digest[2]; + u64 d = digest[3]; + u64 e = digest[4]; + u64 f = digest[5]; + u64 g = digest[6]; + u64 h = digest[7]; + + u64 w0_t = swap_workaround (w0[0]); + u64 w1_t = swap_workaround (w0[1]); + u64 w2_t = swap_workaround (w0[2]); + u64 w3_t = swap_workaround (w0[3]); + u64 w4_t = swap_workaround (w1[0]); + u64 w5_t = swap_workaround (w1[1]); + u64 w6_t = swap_workaround (w1[2]); + u64 w7_t = swap_workaround (w1[3]); + u64 w8_t = swap_workaround (w2[0]); + u64 w9_t = swap_workaround (w2[1]); + u64 wa_t = swap_workaround (w2[2]); + u64 wb_t = swap_workaround (w2[3]); + u64 wc_t = swap_workaround (w3[0]); + u64 wd_t = swap_workaround (w3[1]); + u64 we_t = swap_workaround (w3[2]); + u64 wf_t = swap_workaround (w3[3]); #define ROUND512_EXPAND() \ { \ @@ -842,7 +830,7 @@ static void AES128_encrypt (const u32 *in, u32 *out, const u32 *rek, __local u32 out[3] = swap_workaround (out[3]); } -static void memcat8 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32 append[2]) +static void memcat8 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[2]) { switch (block_len) { @@ -1518,7 +1506,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10700_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[0]; w0[1] = pws[gid].i[1]; @@ -1544,28 +1532,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10700_init (__gl u32 block_len = pw_len; - u32x block0[4]; + u32 block0[4]; block0[0] = w0[0]; block0[1] = w0[1]; block0[2] = w0[2]; block0[3] = w0[3]; - u32x block1[4]; + u32 block1[4]; block1[0] = 0; block1[1] = 0; block1[2] = 0; block1[3] = 0; - u32x block2[4]; + u32 block2[4]; block2[0] = 0; block2[1] = 0; block2[2] = 0; block2[3] = 0; - u32x block3[4]; + u32 block3[4]; block3[0] = 0; block3[1] = 0; @@ -1580,7 +1568,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10700_init (__gl block3[3] = swap_workaround (block_len * 8); - u32x digest[8]; + u32 digest[8]; digest[0] = SHA256M_A; digest[1] = SHA256M_B; @@ -1664,7 +1652,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10700_loop (__gl * base */ - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[0]; w0[1] = pws[gid].i[1]; @@ -1733,12 +1721,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10700_comp (__gl * digest */ - const u32x r0 = swap_workaround (tmps[gid].dgst32[DGST_R0]); - const u32x r1 = swap_workaround (tmps[gid].dgst32[DGST_R1]); - const u32x r2 = swap_workaround (tmps[gid].dgst32[DGST_R2]); - const u32x r3 = swap_workaround (tmps[gid].dgst32[DGST_R3]); + const u32 r0 = swap_workaround (tmps[gid].dgst32[DGST_R0]); + const u32 r1 = swap_workaround (tmps[gid].dgst32[DGST_R1]); + const u32 r2 = swap_workaround (tmps[gid].dgst32[DGST_R2]); + const u32 r3 = swap_workaround (tmps[gid].dgst32[DGST_R3]); #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m10800_a0.cl b/OpenCL/m10800_a0.cl similarity index 86% rename from amd/m10800_a0.cl rename to OpenCL/m10800_a0.cl index 9de9af3..59d64d9 100644 --- a/amd/m10800_a0.cl +++ b/OpenCL/m10800_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 6 #define DGST_R1 7 #define DGST_R2 4 #define DGST_R3 5 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u64 k_sha384[80] = { @@ -70,33 +46,33 @@ __constant u64 k_sha384[80] = SHA384C4c, SHA384C4d, SHA384C4e, SHA384C4f, }; -static void sha384_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) +static void sha384_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u64 digest[8]) { - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; + u64 w0_t = hl32_to_64 (w0[0], w0[1]); + u64 w1_t = hl32_to_64 (w0[2], w0[3]); + u64 w2_t = hl32_to_64 (w1[0], w1[1]); + u64 w3_t = hl32_to_64 (w1[2], w1[3]); + u64 w4_t = hl32_to_64 (w2[0], w2[1]); + u64 w5_t = hl32_to_64 (w2[2], w2[3]); + u64 w6_t = hl32_to_64 (w3[0], w3[1]); + u64 w7_t = 0; + u64 w8_t = 0; + u64 w9_t = 0; + u64 wa_t = 0; + u64 wb_t = 0; + u64 wc_t = 0; + u64 wd_t = 0; + u64 we_t = 0; + u64 wf_t = hl32_to_64 (w3[2], w3[3]); + + u64 a = digest[0]; + u64 b = digest[1]; + u64 c = digest[2]; + u64 d = digest[3]; + u64 e = digest[4]; + u64 f = digest[5]; + u64 g = digest[6]; + u64 h = digest[7]; #define ROUND_EXPAND() \ { \ @@ -182,14 +158,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -204,28 +180,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -240,10 +216,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_m04 (__glo * SHA384 */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); @@ -262,7 +238,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_m04 (__glo w3_t[2] = 0; w3_t[3] = out_len * 8; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA384M_A; digest[1] = SHA384M_B; @@ -275,12 +251,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_m04 (__glo sha384_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[3]); - const u32x r1 = h32_from_64 (digest[3]); - const u32x r2 = l32_from_64 (digest[2]); - const u32x r3 = h32_from_64 (digest[2]); + const u32 r0 = l32_from_64 (digest[3]); + const u32 r1 = h32_from_64 (digest[3]); + const u32 r2 = l32_from_64 (digest[2]); + const u32 r3 = h32_from_64 (digest[2]); - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -308,14 +284,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -342,28 +318,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -378,10 +354,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_s04 (__glo * SHA384 */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); @@ -400,7 +376,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_s04 (__glo w3_t[2] = 0; w3_t[3] = out_len * 8; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA384M_A; digest[1] = SHA384M_B; @@ -413,12 +389,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_s04 (__glo sha384_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[3]); - const u32x r1 = h32_from_64 (digest[3]); - const u32x r2 = l32_from_64 (digest[2]); - const u32x r3 = h32_from_64 (digest[2]); + const u32 r0 = l32_from_64 (digest[3]); + const u32 r1 = h32_from_64 (digest[3]); + const u32 r2 = l32_from_64 (digest[2]); + const u32 r3 = h32_from_64 (digest[2]); - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m10800_a1.cl b/OpenCL/m10800_a1.cl similarity index 88% rename from amd/m10800_a1.cl rename to OpenCL/m10800_a1.cl index 75608e5..501f378 100644 --- a/amd/m10800_a1.cl +++ b/OpenCL/m10800_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 6 #define DGST_R1 7 #define DGST_R2 4 #define DGST_R3 5 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u64 k_sha384[80] = { @@ -68,33 +44,33 @@ __constant u64 k_sha384[80] = SHA384C4c, SHA384C4d, SHA384C4e, SHA384C4f, }; -static void sha384_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) +static void sha384_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u64 digest[8]) { - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; + u64 w0_t = hl32_to_64 (w0[0], w0[1]); + u64 w1_t = hl32_to_64 (w0[2], w0[3]); + u64 w2_t = hl32_to_64 (w1[0], w1[1]); + u64 w3_t = hl32_to_64 (w1[2], w1[3]); + u64 w4_t = hl32_to_64 (w2[0], w2[1]); + u64 w5_t = hl32_to_64 (w2[2], w2[3]); + u64 w6_t = hl32_to_64 (w3[0], w3[1]); + u64 w7_t = 0; + u64 w8_t = 0; + u64 w9_t = 0; + u64 wa_t = 0; + u64 wb_t = 0; + u64 wc_t = 0; + u64 wd_t = 0; + u64 we_t = 0; + u64 wf_t = hl32_to_64 (w3[2], w3[3]); + + u64 a = digest[0]; + u64 b = digest[1]; + u64 c = digest[2]; + u64 d = digest[3]; + u64 e = digest[4]; + u64 f = digest[5]; + u64 g = digest[6]; + u64 h = digest[7]; #define ROUND_EXPAND() \ { \ @@ -180,28 +156,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -262,10 +238,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -288,10 +264,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_m04 (__glo * SHA384 */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); @@ -310,7 +286,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_m04 (__glo w3_t[2] = 0; w3_t[3] = pw_len * 8; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA384M_A; digest[1] = SHA384M_B; @@ -323,12 +299,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_m04 (__glo sha384_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[3]); - const u32x r1 = h32_from_64 (digest[3]); - const u32x r2 = l32_from_64 (digest[2]); - const u32x r3 = h32_from_64 (digest[2]); + const u32 r0 = l32_from_64 (digest[3]); + const u32 r1 = h32_from_64 (digest[3]); + const u32 r2 = l32_from_64 (digest[2]); + const u32 r3 = h32_from_64 (digest[2]); - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -356,28 +332,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -450,10 +426,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -476,10 +452,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_s04 (__glo * SHA384 */ - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = swap_workaround (w0[0]); w0_t[1] = swap_workaround (w0[1]); @@ -498,7 +474,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_s04 (__glo w3_t[2] = 0; w3_t[3] = pw_len * 8; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA384M_A; digest[1] = SHA384M_B; @@ -511,12 +487,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_s04 (__glo sha384_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[3]); - const u32x r1 = h32_from_64 (digest[3]); - const u32x r2 = l32_from_64 (digest[2]); - const u32x r3 = h32_from_64 (digest[2]); + const u32 r0 = l32_from_64 (digest[3]); + const u32 r1 = h32_from_64 (digest[3]); + const u32 r2 = l32_from_64 (digest[2]); + const u32 r3 = h32_from_64 (digest[2]); - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m10800_a3.cl b/OpenCL/m10800_a3.cl similarity index 59% rename from amd/m10800_a3.cl rename to OpenCL/m10800_a3.cl index 6a3835d..4f3da4f 100644 --- a/amd/m10800_a3.cl +++ b/OpenCL/m10800_a3.cl @@ -4,46 +4,21 @@ */ #define _SHA384_ -#define _SCALAR_ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 6 #define DGST_R1 7 #define DGST_R2 4 #define DGST_R3 5 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u64 k_sha384[80] = { @@ -69,33 +44,33 @@ __constant u64 k_sha384[80] = SHA384C4c, SHA384C4d, SHA384C4e, SHA384C4f, }; -static void sha384_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) +static void sha384_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u64 digest[8]) { - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; + u64 w0_t = hl32_to_64 (w0[0], w0[1]); + u64 w1_t = hl32_to_64 (w0[2], w0[3]); + u64 w2_t = hl32_to_64 (w1[0], w1[1]); + u64 w3_t = hl32_to_64 (w1[2], w1[3]); + u64 w4_t = hl32_to_64 (w2[0], w2[1]); + u64 w5_t = hl32_to_64 (w2[2], w2[3]); + u64 w6_t = hl32_to_64 (w3[0], w3[1]); + u64 w7_t = 0; + u64 w8_t = 0; + u64 w9_t = 0; + u64 wa_t = 0; + u64 wb_t = 0; + u64 wc_t = 0; + u64 wd_t = 0; + u64 we_t = 0; + u64 wf_t = hl32_to_64 (w3[2], w3[3]); + + u64 a = digest[0]; + u64 b = digest[1]; + u64 c = digest[2]; + u64 d = digest[3]; + u64 e = digest[4]; + u64 f = digest[5]; + u64 g = digest[6]; + u64 h = digest[7]; #define ROUND_EXPAND() \ { \ @@ -165,7 +140,7 @@ static void sha384_transform (const u32x w0[4], const u32x w1[4], const u32x w2[ digest[7] = 0; } -static void m10800m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m10800m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -178,20 +153,18 @@ static void m10800m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0; w0_t[1] = w[ 1]; @@ -210,7 +183,7 @@ static void m10800m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g w3_t[2] = w[14]; w3_t[3] = w[15]; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA384M_A; digest[1] = SHA384M_B; @@ -223,16 +196,16 @@ static void m10800m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g sha384_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[3]); - const u32x r1 = h32_from_64 (digest[3]); - const u32x r2 = l32_from_64 (digest[2]); - const u32x r3 = h32_from_64 (digest[2]); + const u32 r0 = l32_from_64 (digest[3]); + const u32 r1 = h32_from_64 (digest[3]); + const u32 r2 = l32_from_64 (digest[2]); + const u32 r3 = h32_from_64 (digest[2]); - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m10800s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m10800s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -257,20 +230,18 @@ static void m10800s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = w0; w0_t[1] = w[ 1]; @@ -289,7 +260,7 @@ static void m10800s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g w3_t[2] = w[14]; w3_t[3] = w[15]; - u64x digest[8]; + u64 digest[8]; digest[0] = SHA384M_A; digest[1] = SHA384M_B; @@ -302,16 +273,16 @@ static void m10800s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g sha384_transform (w0_t, w1_t, w2_t, w3_t, digest); - const u32x r0 = l32_from_64 (digest[3]); - const u32x r1 = h32_from_64 (digest[3]); - const u32x r2 = l32_from_64 (digest[2]); - const u32x r3 = h32_from_64 (digest[2]); + const u32 r0 = l32_from_64 (digest[3]); + const u32 r1 = h32_from_64 (digest[3]); + const u32 r2 = l32_from_64 (digest[2]); + const u32 r3 = h32_from_64 (digest[2]); - #include VECT_COMPARE_S + #include COMPARE_S } } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -349,7 +320,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_m04 (__glo m10800m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -387,7 +358,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_m08 (__glo m10800m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -425,7 +396,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_m16 (__glo m10800m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -463,7 +434,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_s04 (__glo m10800s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -501,7 +472,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_s08 (__glo m10800s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10800_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base diff --git a/amd/m10900.cl b/OpenCL/m10900.cl similarity index 91% rename from amd/m10900.cl rename to OpenCL/m10900.cl index 066fa0d..de3b193 100644 --- a/amd/m10900.cl +++ b/OpenCL/m10900.cl @@ -8,18 +8,6 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 @@ -27,11 +15,11 @@ #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif __constant u32 k_sha256[64] = @@ -54,33 +42,33 @@ __constant u32 k_sha256[64] = SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f, }; -static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8]) +static void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[8]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + u32 e = digest[4]; + u32 f = digest[5]; + u32 g = digest[6]; + u32 h = digest[7]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #define ROUND_EXPAND() \ { \ @@ -140,7 +128,7 @@ static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[ digest[7] += h; } -static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8]) +static void hmac_sha256_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[8], u32 opad[8]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -199,7 +187,7 @@ static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32 sha256_transform (w0, w1, w2, w3, opad); } -static void hmac_sha256_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8], u32x digest[8]) +static void hmac_sha256_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[8], u32 opad[8], u32 digest[8]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -251,28 +239,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10900_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = swap_workaround (pws[gid].i[ 0]); w0[1] = swap_workaround (pws[gid].i[ 1]); w0[2] = swap_workaround (pws[gid].i[ 2]); w0[3] = swap_workaround (pws[gid].i[ 3]); - u32x w1[4]; + u32 w1[4]; w1[0] = swap_workaround (pws[gid].i[ 4]); w1[1] = swap_workaround (pws[gid].i[ 5]); w1[2] = swap_workaround (pws[gid].i[ 6]); w1[3] = swap_workaround (pws[gid].i[ 7]); - u32x w2[4]; + u32 w2[4]; w2[0] = swap_workaround (pws[gid].i[ 8]); w2[1] = swap_workaround (pws[gid].i[ 9]); w2[2] = swap_workaround (pws[gid].i[10]); w2[3] = swap_workaround (pws[gid].i[11]); - u32x w3[4]; + u32 w3[4]; w3[0] = swap_workaround (pws[gid].i[12]); w3[1] = swap_workaround (pws[gid].i[13]); @@ -476,12 +464,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m10900_comp (__gl const u32 lid = get_local_id (0); - const u32x r0 = tmps[gid].out[DGST_R0]; - const u32x r1 = tmps[gid].out[DGST_R1]; - const u32x r2 = tmps[gid].out[DGST_R2]; - const u32x r3 = tmps[gid].out[DGST_R3]; + const u32 r0 = tmps[gid].out[DGST_R0]; + const u32 r1 = tmps[gid].out[DGST_R1]; + const u32 r2 = tmps[gid].out[DGST_R2]; + const u32 r3 = tmps[gid].out[DGST_R3]; #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m11000_a0.cl b/OpenCL/m11000_a0.cl similarity index 94% rename from amd/m11000_a0.cl rename to OpenCL/m11000_a0.cl index 785e07a..3e1cce5 100644 --- a/amd/m11000_a0.cl +++ b/OpenCL/m11000_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11000_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -62,14 +38,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11000_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -117,10 +93,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11000_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; @@ -151,10 +127,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11000_m04 (__glo // first step fixed 56 bytes of salt - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = salt_buf0[0]; w0_t[1] = salt_buf0[1]; @@ -182,10 +158,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11000_m04 (__glo // first transform - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -260,10 +236,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11000_m04 (__glo c += MD5M_C; d += MD5M_D; - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; // 2nd transform @@ -357,12 +333,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11000_m04 (__glo c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -390,14 +366,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11000_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -457,10 +433,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11000_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; @@ -491,10 +467,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11000_s04 (__glo // first step fixed 56 bytes of salt - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = salt_buf0[0]; w0_t[1] = salt_buf0[1]; @@ -522,10 +498,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11000_s04 (__glo // first transform - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -600,10 +576,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11000_s04 (__glo c += MD5M_C; d += MD5M_D; - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; // 2nd transform @@ -697,12 +673,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11000_s04 (__glo c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m11000_a1.cl b/OpenCL/m11000_a1.cl similarity index 95% rename from amd/m11000_a1.cl rename to OpenCL/m11000_a1.cl index 1b9ee78..d391bd0 100644 --- a/amd/m11000_a1.cl +++ b/OpenCL/m11000_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11000_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -60,28 +36,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11000_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -164,10 +140,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11000_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -196,10 +172,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11000_m04 (__glo // first step fixed 56 bytes of salt - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = salt_buf0[0]; w0_t[1] = salt_buf0[1]; @@ -227,10 +203,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11000_m04 (__glo // first transform - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -305,10 +281,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11000_m04 (__glo c += MD5M_C; d += MD5M_D; - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; // 2nd transform @@ -402,12 +378,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11000_m04 (__glo c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -435,28 +411,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11000_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -551,10 +527,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11000_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; @@ -583,10 +559,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11000_s04 (__glo // first step fixed 56 bytes of salt - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = salt_buf0[0]; w0_t[1] = salt_buf0[1]; @@ -614,10 +590,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11000_s04 (__glo // first transform - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -692,10 +668,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11000_s04 (__glo c += MD5M_C; d += MD5M_D; - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; // 2nd transform @@ -789,12 +765,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11000_s04 (__glo c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m11000_a3.cl b/OpenCL/m11000_a3.cl similarity index 90% rename from amd/m11000_a3.cl rename to OpenCL/m11000_a3.cl index 5407cd2..a057ee1 100644 --- a/amd/m11000_a3.cl +++ b/OpenCL/m11000_a3.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void m11000m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m11000m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -91,7 +67,7 @@ static void m11000m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -105,10 +81,10 @@ static void m11000m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p // first step fixed 56 bytes of salt - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = salt_buf0[0]; w0_t[1] = salt_buf0[1]; @@ -136,10 +112,10 @@ static void m11000m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p // first transform - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -214,10 +190,10 @@ static void m11000m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p c += MD5M_C; d += MD5M_D; - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; // 2nd transform @@ -311,16 +287,16 @@ static void m11000m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m11000s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m11000s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -379,7 +355,7 @@ static void m11000s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -393,10 +369,10 @@ static void m11000s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p // first step fixed 56 bytes of salt - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; + u32 w0_t[4]; + u32 w1_t[4]; + u32 w2_t[4]; + u32 w3_t[4]; w0_t[0] = salt_buf0[0]; w0_t[1] = salt_buf0[1]; @@ -424,10 +400,10 @@ static void m11000s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p // first transform - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -502,10 +478,10 @@ static void m11000s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p c += MD5M_C; d += MD5M_D; - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; // 2nd transform @@ -599,12 +575,12 @@ static void m11000s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -618,28 +594,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11000_m04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -665,28 +641,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11000_m08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -712,28 +688,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11000_m16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -759,28 +735,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11000_s04 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -806,28 +782,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11000_s08 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -853,28 +829,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11000_s16 (__glo if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m11100_a0.cl b/OpenCL/m11100_a0.cl similarity index 95% rename from amd/m11100_a0.cl rename to OpenCL/m11100_a0.cl index 48a1145..0b6e8cf 100644 --- a/amd/m11100_a0.cl +++ b/OpenCL/m11100_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" +#include "rp.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] @@ -72,14 +48,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11100_m04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -157,28 +133,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11100_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -187,28 +163,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11100_m04 (__glo const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len); - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = salt_buf0[0]; w0_t[1] = salt_buf0[1]; w0_t[2] = salt_buf0[2]; w0_t[3] = salt_buf0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = salt_buf1[0]; w1_t[1] = salt_buf1[1]; w1_t[2] = salt_buf1[2]; w1_t[3] = salt_buf1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; @@ -247,10 +223,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11100_m04 (__glo * md5 ($pass.$salt) */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -431,12 +407,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11100_m04 (__glo MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -462,14 +438,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11100_s04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -559,28 +535,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11100_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -589,28 +565,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11100_s04 (__glo const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len); - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = salt_buf0[0]; w0_t[1] = salt_buf0[1]; w0_t[2] = salt_buf0[2]; w0_t[3] = salt_buf0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = salt_buf1[0]; w1_t[1] = salt_buf1[1]; w1_t[2] = salt_buf1[2]; w1_t[3] = salt_buf1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; @@ -649,10 +625,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11100_s04 (__glo * md5 ($pass.$salt) */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -833,12 +809,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11100_s04 (__glo MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m11100_a1.cl b/OpenCL/m11100_a1.cl similarity index 96% rename from amd/m11100_a1.cl rename to OpenCL/m11100_a1.cl index 9af3b42..2b7d89b 100644 --- a/amd/m11100_a1.cl +++ b/OpenCL/m11100_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] @@ -70,28 +46,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11100_m04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -211,28 +187,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11100_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = salt_buf0[0]; w0_t[1] = salt_buf0[1]; w0_t[2] = salt_buf0[2]; w0_t[3] = salt_buf0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = salt_buf1[0]; w1_t[1] = salt_buf1[1]; w1_t[2] = salt_buf1[2]; w1_t[3] = salt_buf1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; @@ -271,10 +247,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11100_m04 (__glo * md5 ($pass.$salt) */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -455,12 +431,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11100_m04 (__glo MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -486,28 +462,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11100_s04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -639,28 +615,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11100_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = salt_buf0[0]; w0_t[1] = salt_buf0[1]; w0_t[2] = salt_buf0[2]; w0_t[3] = salt_buf0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = salt_buf1[0]; w1_t[1] = salt_buf1[1]; w1_t[2] = salt_buf1[2]; w1_t[3] = salt_buf1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = 0; w2_t[1] = 0; w2_t[2] = 0; w2_t[3] = 0; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = 0; w3_t[1] = 0; @@ -699,10 +675,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11100_s04 (__glo * md5 ($pass.$salt) */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -883,12 +859,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11100_s04 (__glo MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m11100_a3.cl b/OpenCL/m11100_a3.cl similarity index 92% rename from amd/m11100_a3.cl rename to OpenCL/m11100_a3.cl index bc5d217..dfaa843 100644 --- a/amd/m11100_a3.cl +++ b/OpenCL/m11100_a3.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] @@ -56,7 +32,7 @@ #define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) #endif -static void m11100m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) +static void m11100m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { /** * modifier @@ -115,7 +91,7 @@ static void m11100m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -123,28 +99,28 @@ static void m11100m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0] | salt_buf0[0]; w0_t[1] = w0[1] | salt_buf0[1]; w0_t[2] = w0[2] | salt_buf0[2]; w0_t[3] = w0[3] | salt_buf0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0] | salt_buf1[0]; w1_t[1] = w1[1] | salt_buf1[1]; w1_t[2] = w1[2] | salt_buf1[2]; w1_t[3] = w1[3] | salt_buf1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0] | salt_buf2[0]; w2_t[1] = w2[1] | salt_buf2[1]; w2_t[2] = w2[2] | salt_buf2[2]; w2_t[3] = w2[3] | salt_buf2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0] | salt_buf3[0]; w3_t[1] = w3[1] | salt_buf3[1]; @@ -155,10 +131,10 @@ static void m11100m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * md5 ($pass.$salt) */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -339,16 +315,16 @@ static void m11100m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m11100s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) +static void m11100s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { /** * modifier @@ -419,7 +395,7 @@ static void m11100s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -427,28 +403,28 @@ static void m11100s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p w0[0] = w0l | w0r; - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = w0[0] | salt_buf0[0]; w0_t[1] = w0[1] | salt_buf0[1]; w0_t[2] = w0[2] | salt_buf0[2]; w0_t[3] = w0[3] | salt_buf0[3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = w1[0] | salt_buf1[0]; w1_t[1] = w1[1] | salt_buf1[1]; w1_t[2] = w1[2] | salt_buf1[2]; w1_t[3] = w1[3] | salt_buf1[3]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = w2[0] | salt_buf2[0]; w2_t[1] = w2[1] | salt_buf2[1]; w2_t[2] = w2[2] | salt_buf2[2]; w2_t[3] = w2[3] | salt_buf2[3]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = w3[0] | salt_buf3[0]; w3_t[1] = w3[1] | salt_buf3[1]; @@ -459,10 +435,10 @@ static void m11100s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * md5 ($pass.$salt) */ - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -643,12 +619,12 @@ static void m11100s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -666,28 +642,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11100_m04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -752,28 +728,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11100_m08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -838,28 +814,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11100_m16 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -924,28 +900,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11100_s04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1010,28 +986,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11100_s08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1096,28 +1072,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11100_s16 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m11200_a0.cl b/OpenCL/m11200_a0.cl similarity index 94% rename from amd/m11200_a0.cl rename to OpenCL/m11200_a0.cl index 29bcdc9..fc5f2ef 100644 --- a/amd/m11200_a0.cl +++ b/OpenCL/m11200_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11200_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -62,14 +38,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11200_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -96,28 +72,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11200_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -132,28 +108,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11200_m04 (__glo * sha1 ($pass) */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = out_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -251,11 +227,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11200_m04 (__glo we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - u32x plain_sha1_a = a + SHA1M_A; - u32x plain_sha1_b = b + SHA1M_B; - u32x plain_sha1_c = c + SHA1M_C; - u32x plain_sha1_d = d + SHA1M_D; - u32x plain_sha1_e = e + SHA1M_E; + u32 plain_sha1_a = a + SHA1M_A; + u32 plain_sha1_b = b + SHA1M_B; + u32 plain_sha1_c = c + SHA1M_C; + u32 plain_sha1_d = d + SHA1M_D; + u32 plain_sha1_e = e + SHA1M_E; /** * sha1 (sha1 ($pass)) @@ -522,12 +498,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11200_m04 (__glo d ^= plain_sha1_d; e ^= plain_sha1_e; - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -555,14 +531,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11200_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -601,28 +577,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11200_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -637,28 +613,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11200_s04 (__glo * sha1 ($pass) */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = out_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -756,11 +732,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11200_s04 (__glo we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - u32x plain_sha1_a = a + SHA1M_A; - u32x plain_sha1_b = b + SHA1M_B; - u32x plain_sha1_c = c + SHA1M_C; - u32x plain_sha1_d = d + SHA1M_D; - u32x plain_sha1_e = e + SHA1M_E; + u32 plain_sha1_a = a + SHA1M_A; + u32 plain_sha1_b = b + SHA1M_B; + u32 plain_sha1_c = c + SHA1M_C; + u32 plain_sha1_d = d + SHA1M_D; + u32 plain_sha1_e = e + SHA1M_E; /** * sha1 (sha1 ($pass)) @@ -1027,12 +1003,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11200_s04 (__glo d ^= plain_sha1_d; e ^= plain_sha1_e; - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m11200_a1.cl b/OpenCL/m11200_a1.cl similarity index 95% rename from amd/m11200_a1.cl rename to OpenCL/m11200_a1.cl index 1afa39b..24dadd3 100644 --- a/amd/m11200_a1.cl +++ b/OpenCL/m11200_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11200_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { @@ -60,28 +36,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11200_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -154,28 +130,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11200_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -186,28 +162,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11200_m04 (__glo * sha1 ($pass) */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = pw_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -305,11 +281,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11200_m04 (__glo we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - u32x plain_sha1_a = a + SHA1M_A; - u32x plain_sha1_b = b + SHA1M_B; - u32x plain_sha1_c = c + SHA1M_C; - u32x plain_sha1_d = d + SHA1M_D; - u32x plain_sha1_e = e + SHA1M_E; + u32 plain_sha1_a = a + SHA1M_A; + u32 plain_sha1_b = b + SHA1M_B; + u32 plain_sha1_c = c + SHA1M_C; + u32 plain_sha1_d = d + SHA1M_D; + u32 plain_sha1_e = e + SHA1M_E; /** * sha1 (sha1 ($pass)) @@ -576,12 +552,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11200_m04 (__glo d ^= plain_sha1_d; e ^= plain_sha1_e; - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -609,28 +585,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11200_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -715,28 +691,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11200_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -747,28 +723,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11200_s04 (__glo * sha1 ($pass) */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = pw_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -866,11 +842,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11200_s04 (__glo we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - u32x plain_sha1_a = a + SHA1M_A; - u32x plain_sha1_b = b + SHA1M_B; - u32x plain_sha1_c = c + SHA1M_C; - u32x plain_sha1_d = d + SHA1M_D; - u32x plain_sha1_e = e + SHA1M_E; + u32 plain_sha1_a = a + SHA1M_A; + u32 plain_sha1_b = b + SHA1M_B; + u32 plain_sha1_c = c + SHA1M_C; + u32 plain_sha1_d = d + SHA1M_D; + u32 plain_sha1_e = e + SHA1M_E; /** * sha1 (sha1 ($pass)) @@ -1137,12 +1113,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11200_s04 (__glo d ^= plain_sha1_d; e ^= plain_sha1_e; - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m11200_a3.cl b/OpenCL/m11200_a3.cl similarity index 93% rename from amd/m11200_a3.cl rename to OpenCL/m11200_a3.cl index c1a093d..3670a92 100644 --- a/amd/m11200_a3.cl +++ b/OpenCL/m11200_a3.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 3 #define DGST_R1 4 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" -static void m11200m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m11200m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -69,7 +45,7 @@ static void m11200m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -81,28 +57,28 @@ static void m11200m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * sha1 ($pass) */ - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = 0; + u32 wf_t = pw_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -200,11 +176,11 @@ static void m11200m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - u32x plain_sha1_a = a + SHA1M_A; - u32x plain_sha1_b = b + SHA1M_B; - u32x plain_sha1_c = c + SHA1M_C; - u32x plain_sha1_d = d + SHA1M_D; - u32x plain_sha1_e = e + SHA1M_E; + u32 plain_sha1_a = a + SHA1M_A; + u32 plain_sha1_b = b + SHA1M_B; + u32 plain_sha1_c = c + SHA1M_C; + u32 plain_sha1_d = d + SHA1M_D; + u32 plain_sha1_e = e + SHA1M_E; /** * sha1 (sha1 ($pass)) @@ -471,16 +447,16 @@ static void m11200m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p d ^= plain_sha1_d; e ^= plain_sha1_e; - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m11200s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m11200s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -517,7 +493,7 @@ static void m11200s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -529,28 +505,28 @@ static void m11200s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * sha1 ($pass) */ - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = 0; + u32 wf_t = pw_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; #undef K #define K SHA1C00 @@ -648,11 +624,11 @@ static void m11200s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - u32x plain_sha1_a = a + SHA1M_A; - u32x plain_sha1_b = b + SHA1M_B; - u32x plain_sha1_c = c + SHA1M_C; - u32x plain_sha1_d = d + SHA1M_D; - u32x plain_sha1_e = e + SHA1M_E; + u32 plain_sha1_a = a + SHA1M_A; + u32 plain_sha1_b = b + SHA1M_B; + u32 plain_sha1_c = c + SHA1M_C; + u32 plain_sha1_d = d + SHA1M_D; + u32 plain_sha1_e = e + SHA1M_E; /** * sha1 (sha1 ($pass)) @@ -919,12 +895,12 @@ static void m11200s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p d ^= plain_sha1_d; e ^= plain_sha1_e; - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = d; + const u32 r1 = e; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -942,28 +918,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11200_m04 (__glo * modifier */ - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -993,28 +969,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11200_m08 (__glo * modifier */ - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1044,28 +1020,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11200_m16 (__glo * modifier */ - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -1095,28 +1071,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11200_s04 (__glo * modifier */ - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1146,28 +1122,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11200_s08 (__glo * modifier */ - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1197,28 +1173,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11200_s16 (__glo * modifier */ - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m11300.cl b/OpenCL/m11300.cl similarity index 98% rename from amd/m11300.cl rename to OpenCL/m11300.cl index 99ec1ef..a786027 100644 --- a/amd/m11300.cl +++ b/OpenCL/m11300.cl @@ -8,26 +8,14 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" __constant u32 te0[256] = { @@ -926,33 +914,33 @@ __constant u64 k_sha512[80] = SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, }; -static void sha512_transform (const u64x w[16], u64x dgst[8]) +static void sha512_transform (const u64 w[16], u64 dgst[8]) { - u64x a = dgst[0]; - u64x b = dgst[1]; - u64x c = dgst[2]; - u64x d = dgst[3]; - u64x e = dgst[4]; - u64x f = dgst[5]; - u64x g = dgst[6]; - u64x h = dgst[7]; - - volatile u64x w0_t = w[ 0]; - volatile u64x w1_t = w[ 1]; - volatile u64x w2_t = w[ 2]; - volatile u64x w3_t = w[ 3]; - volatile u64x w4_t = w[ 4]; - volatile u64x w5_t = w[ 5]; - volatile u64x w6_t = w[ 6]; - volatile u64x w7_t = w[ 7]; - volatile u64x w8_t = w[ 8]; - volatile u64x w9_t = w[ 9]; - volatile u64x wa_t = w[10]; - volatile u64x wb_t = w[11]; - volatile u64x wc_t = w[12]; - volatile u64x wd_t = w[13]; - volatile u64x we_t = w[14]; - volatile u64x wf_t = w[15]; + u64 a = dgst[0]; + u64 b = dgst[1]; + u64 c = dgst[2]; + u64 d = dgst[3]; + u64 e = dgst[4]; + u64 f = dgst[5]; + u64 g = dgst[6]; + u64 h = dgst[7]; + + volatile u64 w0_t = w[ 0]; + volatile u64 w1_t = w[ 1]; + volatile u64 w2_t = w[ 2]; + volatile u64 w3_t = w[ 3]; + volatile u64 w4_t = w[ 4]; + volatile u64 w5_t = w[ 5]; + volatile u64 w6_t = w[ 6]; + volatile u64 w7_t = w[ 7]; + volatile u64 w8_t = w[ 8]; + volatile u64 w9_t = w[ 9]; + volatile u64 wa_t = w[10]; + volatile u64 wb_t = w[11]; + volatile u64 wc_t = w[12]; + volatile u64 wd_t = w[13]; + volatile u64 we_t = w[14]; + volatile u64 wf_t = w[15]; #define ROUND_EXPAND() \ { \ @@ -1022,28 +1010,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11300_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1131,7 +1119,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11300_init (__gl w3[2] = swap_workaround (w3[2]); w3[3] = swap_workaround (w3[3]); - u64x w[16]; + u64 w[16]; w[ 0] = hl32_to_64 (w0[0], w0[1]); w[ 1] = hl32_to_64 (w0[2], w0[3]); @@ -1150,7 +1138,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11300_init (__gl w[14] = 0; w[15] = block_len * 8; - u64x dgst[8]; + u64 dgst[8]; dgst[0] = SHA512M_A; dgst[1] = SHA512M_B; @@ -1179,7 +1167,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11300_loop (__gl if (gid >= gid_max) return; - u64x dgst[8]; + u64 dgst[8]; dgst[0] = tmps[gid].dgst[0]; dgst[1] = tmps[gid].dgst[1]; @@ -1190,7 +1178,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11300_loop (__gl dgst[6] = tmps[gid].dgst[6]; dgst[7] = tmps[gid].dgst[7]; - u64x w[16]; + u64 w[16]; w[ 0] = dgst[0]; w[ 1] = dgst[1]; @@ -1338,7 +1326,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11300_comp (__gl dgst[6] = tmps[gid].dgst[6]; dgst[7] = tmps[gid].dgst[7]; - u32x key[8]; + u32 key[8]; key[0] = h32_from_64 (dgst[0]); key[1] = l32_from_64 (dgst[0]); @@ -1349,7 +1337,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11300_comp (__gl key[6] = h32_from_64 (dgst[3]); key[7] = l32_from_64 (dgst[3]); - u32x iv[4]; + u32 iv[4]; iv[0] = h32_from_64 (dgst[4]); iv[1] = l32_from_64 (dgst[4]); @@ -1364,11 +1352,11 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11300_comp (__gl AES256_InvertKey (rk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4); - u32x out[4]; + u32 out[4]; for (u32 i = 0; i < esalt_bufs[salt_pos].cry_master_len; i += 16) { - u32x data[4]; + u32 data[4]; data[0] = swap_workaround (esalt_bufs[salt_pos].cry_master_buf[(i / 4) + 0]); data[1] = swap_workaround (esalt_bufs[salt_pos].cry_master_buf[(i / 4) + 1]); @@ -1393,7 +1381,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11300_comp (__gl && (out[2] == 0x10101010) && (out[3] == 0x10101010)) { - mark_hash_s0 (plains_buf, hashes_shown, digests_offset + 0, gid, 0); + mark_hash (plains_buf, hashes_shown, digests_offset + 0, gid, 0); d_return_buf[lid] = 1; } diff --git a/amd/m11400_a0.cl b/OpenCL/m11400_a0.cl similarity index 97% rename from amd/m11400_a0.cl rename to OpenCL/m11400_a0.cl index 08b28ae..7f55294 100644 --- a/amd/m11400_a0.cl +++ b/OpenCL/m11400_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" +#include "rp.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] @@ -58,42 +34,42 @@ #define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) #endif -static u32 memcat32 (u32x block0[16], u32x block1[16], const u32 block_len, const u32x append0[4], const u32x append1[4], const u32x append2[4], const u32x append3[4], const u32 append_len) +static u32 memcat32 (u32 block0[16], u32 block1[16], const u32 block_len, const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 append3[4], const u32 append_len) { const u32 mod = block_len & 3; const u32 div = block_len / 4; const int offset_minus_4 = 4 - mod; - u32x append0_t[4]; + u32 append0_t[4]; append0_t[0] = amd_bytealign (append0[0], 0, offset_minus_4); append0_t[1] = amd_bytealign (append0[1], append0[0], offset_minus_4); append0_t[2] = amd_bytealign (append0[2], append0[1], offset_minus_4); append0_t[3] = amd_bytealign (append0[3], append0[2], offset_minus_4); - u32x append1_t[4]; + u32 append1_t[4]; append1_t[0] = amd_bytealign (append1[0], append0[3], offset_minus_4); append1_t[1] = amd_bytealign (append1[1], append1[0], offset_minus_4); append1_t[2] = amd_bytealign (append1[2], append1[1], offset_minus_4); append1_t[3] = amd_bytealign (append1[3], append1[2], offset_minus_4); - u32x append2_t[4]; + u32 append2_t[4]; append2_t[0] = amd_bytealign (append2[0], append1[3], offset_minus_4); append2_t[1] = amd_bytealign (append2[1], append2[0], offset_minus_4); append2_t[2] = amd_bytealign (append2[2], append2[1], offset_minus_4); append2_t[3] = amd_bytealign (append2[3], append2[2], offset_minus_4); - u32x append3_t[4]; + u32 append3_t[4]; append3_t[0] = amd_bytealign (append3[0], append2[3], offset_minus_4); append3_t[1] = amd_bytealign (append3[1], append3[0], offset_minus_4); append3_t[2] = amd_bytealign (append3[2], append3[1], offset_minus_4); append3_t[3] = amd_bytealign (append3[3], append3[2], offset_minus_4); - u32x append4_t[4]; + u32 append4_t[4]; append4_t[0] = amd_bytealign ( 0, append3[3], offset_minus_4); append4_t[1] = 0; @@ -749,14 +725,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_m04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -916,28 +892,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -956,7 +932,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_m04 (__glo // append the pass to the salt - u32x block0[16]; + u32 block0[16]; block0[ 0] = salt_buf0[ 0]; block0[ 1] = salt_buf0[ 1]; @@ -975,7 +951,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_m04 (__glo block0[14] = salt_buf0[14]; block0[15] = salt_buf0[15]; - u32x block1[16]; + u32 block1[16]; block1[ 0] = salt_buf1[ 0]; block1[ 1] = salt_buf1[ 1]; @@ -998,28 +974,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_m04 (__glo block_len = memcat32 (block0, block1, salt_len, w0, w1, w2, w3, out_len); - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = block0[ 0]; w0_t[1] = block0[ 1]; w0_t[2] = block0[ 2]; w0_t[3] = block0[ 3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = block0[ 4]; w1_t[1] = block0[ 5]; w1_t[2] = block0[ 6]; w1_t[3] = block0[ 7]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = block0[ 8]; w2_t[1] = block0[ 9]; w2_t[2] = block0[10]; w2_t[3] = block0[11]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = block0[12]; w3_t[1] = block0[13]; @@ -1033,12 +1009,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_m04 (__glo // md5 - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -1115,10 +1091,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_m04 (__glo if (block_len > 55) { - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; w0_t[0] = block1[ 0]; w0_t[1] = block1[ 1]; @@ -1327,10 +1303,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_m04 (__glo c += MD5M_C; d += MD5M_D; - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; // 2nd transform @@ -1541,12 +1517,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_m04 (__glo c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -1572,14 +1548,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_s04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -1751,28 +1727,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1791,7 +1767,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_s04 (__glo // append the pass to the salt - u32x block0[16]; + u32 block0[16]; block0[ 0] = salt_buf0[ 0]; block0[ 1] = salt_buf0[ 1]; @@ -1810,7 +1786,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_s04 (__glo block0[14] = salt_buf0[14]; block0[15] = salt_buf0[15]; - u32x block1[16]; + u32 block1[16]; block1[ 0] = salt_buf1[ 0]; block1[ 1] = salt_buf1[ 1]; @@ -1833,28 +1809,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_s04 (__glo block_len = memcat32 (block0, block1, salt_len, w0, w1, w2, w3, out_len); - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = block0[ 0]; w0_t[1] = block0[ 1]; w0_t[2] = block0[ 2]; w0_t[3] = block0[ 3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = block0[ 4]; w1_t[1] = block0[ 5]; w1_t[2] = block0[ 6]; w1_t[3] = block0[ 7]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = block0[ 8]; w2_t[1] = block0[ 9]; w2_t[2] = block0[10]; w2_t[3] = block0[11]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = block0[12]; w3_t[1] = block0[13]; @@ -1868,12 +1844,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_s04 (__glo // md5 - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -1950,10 +1926,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_s04 (__glo if (block_len > 55) { - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; w0_t[0] = block1[ 0]; w0_t[1] = block1[ 1]; @@ -2162,10 +2138,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_s04 (__glo c += MD5M_C; d += MD5M_D; - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; // 2nd transform @@ -2376,12 +2352,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_s04 (__glo c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m11400_a1.cl b/OpenCL/m11400_a1.cl similarity index 97% rename from amd/m11400_a1.cl rename to OpenCL/m11400_a1.cl index 22c6c4c..05a15ce 100644 --- a/amd/m11400_a1.cl +++ b/OpenCL/m11400_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] @@ -56,42 +32,42 @@ #define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) #endif -static u32 memcat32 (u32x block0[16], u32x block1[16], const u32 block_len, const u32x append0[4], const u32x append1[4], const u32x append2[4], const u32x append3[4], const u32 append_len) +static u32 memcat32 (u32 block0[16], u32 block1[16], const u32 block_len, const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 append3[4], const u32 append_len) { const u32 mod = block_len & 3; const u32 div = block_len / 4; const int offset_minus_4 = 4 - mod; - u32x append0_t[4]; + u32 append0_t[4]; append0_t[0] = amd_bytealign (append0[0], 0, offset_minus_4); append0_t[1] = amd_bytealign (append0[1], append0[0], offset_minus_4); append0_t[2] = amd_bytealign (append0[2], append0[1], offset_minus_4); append0_t[3] = amd_bytealign (append0[3], append0[2], offset_minus_4); - u32x append1_t[4]; + u32 append1_t[4]; append1_t[0] = amd_bytealign (append1[0], append0[3], offset_minus_4); append1_t[1] = amd_bytealign (append1[1], append1[0], offset_minus_4); append1_t[2] = amd_bytealign (append1[2], append1[1], offset_minus_4); append1_t[3] = amd_bytealign (append1[3], append1[2], offset_minus_4); - u32x append2_t[4]; + u32 append2_t[4]; append2_t[0] = amd_bytealign (append2[0], append1[3], offset_minus_4); append2_t[1] = amd_bytealign (append2[1], append2[0], offset_minus_4); append2_t[2] = amd_bytealign (append2[2], append2[1], offset_minus_4); append2_t[3] = amd_bytealign (append2[3], append2[2], offset_minus_4); - u32x append3_t[4]; + u32 append3_t[4]; append3_t[0] = amd_bytealign (append3[0], append2[3], offset_minus_4); append3_t[1] = amd_bytealign (append3[1], append3[0], offset_minus_4); append3_t[2] = amd_bytealign (append3[2], append3[1], offset_minus_4); append3_t[3] = amd_bytealign (append3[3], append3[2], offset_minus_4); - u32x append4_t[4]; + u32 append4_t[4]; append4_t[0] = amd_bytealign ( 0, append3[3], offset_minus_4); append4_t[1] = 0; @@ -747,28 +723,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_m04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -972,28 +948,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -1008,7 +984,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_m04 (__glo // append the pass to the salt - u32x block0[16]; + u32 block0[16]; block0[ 0] = salt_buf0[ 0]; block0[ 1] = salt_buf0[ 1]; @@ -1027,7 +1003,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_m04 (__glo block0[14] = salt_buf0[14]; block0[15] = salt_buf0[15]; - u32x block1[16]; + u32 block1[16]; block1[ 0] = salt_buf1[ 0]; block1[ 1] = salt_buf1[ 1]; @@ -1050,28 +1026,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_m04 (__glo block_len = memcat32 (block0, block1, salt_len, w0, w1, w2, w3, pw_len); - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = block0[ 0]; w0_t[1] = block0[ 1]; w0_t[2] = block0[ 2]; w0_t[3] = block0[ 3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = block0[ 4]; w1_t[1] = block0[ 5]; w1_t[2] = block0[ 6]; w1_t[3] = block0[ 7]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = block0[ 8]; w2_t[1] = block0[ 9]; w2_t[2] = block0[10]; w2_t[3] = block0[11]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = block0[12]; w3_t[1] = block0[13]; @@ -1085,12 +1061,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_m04 (__glo // md5 - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -1167,10 +1143,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_m04 (__glo if (block_len > 55) { - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; w0_t[0] = block1[ 0]; w0_t[1] = block1[ 1]; @@ -1379,10 +1355,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_m04 (__glo c += MD5M_C; d += MD5M_D; - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; // 2nd transform @@ -1593,12 +1569,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_m04 (__glo c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -1624,28 +1600,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_s04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -1861,28 +1837,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -1897,7 +1873,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_s04 (__glo // append the pass to the salt - u32x block0[16]; + u32 block0[16]; block0[ 0] = salt_buf0[ 0]; block0[ 1] = salt_buf0[ 1]; @@ -1916,7 +1892,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_s04 (__glo block0[14] = salt_buf0[14]; block0[15] = salt_buf0[15]; - u32x block1[16]; + u32 block1[16]; block1[ 0] = salt_buf1[ 0]; block1[ 1] = salt_buf1[ 1]; @@ -1939,28 +1915,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_s04 (__glo block_len = memcat32 (block0, block1, salt_len, w0, w1, w2, w3, pw_len); - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = block0[ 0]; w0_t[1] = block0[ 1]; w0_t[2] = block0[ 2]; w0_t[3] = block0[ 3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = block0[ 4]; w1_t[1] = block0[ 5]; w1_t[2] = block0[ 6]; w1_t[3] = block0[ 7]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = block0[ 8]; w2_t[1] = block0[ 9]; w2_t[2] = block0[10]; w2_t[3] = block0[11]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = block0[12]; w3_t[1] = block0[13]; @@ -1974,12 +1950,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_s04 (__glo // md5 - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -2056,10 +2032,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_s04 (__glo if (block_len > 55) { - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; w0_t[0] = block1[ 0]; w0_t[1] = block1[ 1]; @@ -2268,10 +2244,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_s04 (__glo c += MD5M_C; d += MD5M_D; - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; // 2nd transform @@ -2482,12 +2458,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_s04 (__glo c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m11400_a3.cl b/OpenCL/m11400_a3.cl similarity index 95% rename from amd/m11400_a3.cl rename to OpenCL/m11400_a3.cl index 084ed1b..6043c1d 100644 --- a/amd/m11400_a3.cl +++ b/OpenCL/m11400_a3.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE2 -#endif - -#ifdef VLIW5 -#define VECT_SIZE2 -#endif - #define DGST_R0 0 #define DGST_R1 3 #define DGST_R2 2 #define DGST_R3 1 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_lower8(i) l_bin2asc[(i)] @@ -60,42 +36,42 @@ #define uint_to_hex_lower8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) #endif -static u32 memcat32 (u32x block0[16], u32x block1[16], const u32 block_len, const u32x append0[4], const u32x append1[4], const u32x append2[4], const u32x append3[4], const u32 append_len) +static u32 memcat32 (u32 block0[16], u32 block1[16], const u32 block_len, const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 append3[4], const u32 append_len) { const u32 mod = block_len & 3; const u32 div = block_len / 4; const int offset_minus_4 = 4 - mod; - u32x append0_t[4]; + u32 append0_t[4]; append0_t[0] = amd_bytealign (append0[0], 0, offset_minus_4); append0_t[1] = amd_bytealign (append0[1], append0[0], offset_minus_4); append0_t[2] = amd_bytealign (append0[2], append0[1], offset_minus_4); append0_t[3] = amd_bytealign (append0[3], append0[2], offset_minus_4); - u32x append1_t[4]; + u32 append1_t[4]; append1_t[0] = amd_bytealign (append1[0], append0[3], offset_minus_4); append1_t[1] = amd_bytealign (append1[1], append1[0], offset_minus_4); append1_t[2] = amd_bytealign (append1[2], append1[1], offset_minus_4); append1_t[3] = amd_bytealign (append1[3], append1[2], offset_minus_4); - u32x append2_t[4]; + u32 append2_t[4]; append2_t[0] = amd_bytealign (append2[0], append1[3], offset_minus_4); append2_t[1] = amd_bytealign (append2[1], append2[0], offset_minus_4); append2_t[2] = amd_bytealign (append2[2], append2[1], offset_minus_4); append2_t[3] = amd_bytealign (append2[3], append2[2], offset_minus_4); - u32x append3_t[4]; + u32 append3_t[4]; append3_t[0] = amd_bytealign (append3[0], append2[3], offset_minus_4); append3_t[1] = amd_bytealign (append3[1], append3[0], offset_minus_4); append3_t[2] = amd_bytealign (append3[2], append3[1], offset_minus_4); append3_t[3] = amd_bytealign (append3[3], append3[2], offset_minus_4); - u32x append4_t[4]; + u32 append4_t[4]; append4_t[0] = amd_bytealign ( 0, append3[3], offset_minus_4); append4_t[1] = 0; @@ -737,7 +713,7 @@ static u32 memcat32 (u32x block0[16], u32x block1[16], const u32 block_len, cons return new_len; } -static void m11400m_0_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) +static void m11400m_0_0 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { /** * modifier @@ -854,7 +830,7 @@ static void m11400m_0_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -868,7 +844,7 @@ static void m11400m_0_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u // append the pass to the salt - u32x block0[16]; + u32 block0[16]; block0[ 0] = salt_buf0[ 0]; block0[ 1] = salt_buf0[ 1]; @@ -887,7 +863,7 @@ static void m11400m_0_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u block0[14] = salt_buf0[14]; block0[15] = salt_buf0[15]; - u32x block1[16]; + u32 block1[16]; block1[ 0] = salt_buf1[ 0]; block1[ 1] = salt_buf1[ 1]; @@ -908,28 +884,28 @@ static void m11400m_0_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u memcat32 (block0, block1, salt_len, w0, w1, w2, w3, pw_len); - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = block0[ 0]; w0_t[1] = block0[ 1]; w0_t[2] = block0[ 2]; w0_t[3] = block0[ 3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = block0[ 4]; w1_t[1] = block0[ 5]; w1_t[2] = block0[ 6]; w1_t[3] = block0[ 7]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = block0[ 8]; w2_t[1] = block0[ 9]; w2_t[2] = block0[10]; w2_t[3] = block0[11]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = block0[12]; w3_t[1] = block0[13]; @@ -938,12 +914,12 @@ static void m11400m_0_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u // md5 - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -1131,10 +1107,10 @@ static void m11400m_0_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u c += MD5M_C; d += MD5M_D; - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; // 2nd transform @@ -1231,16 +1207,16 @@ static void m11400m_0_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } -static void m11400m_0_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) +static void m11400m_0_1 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { /** * modifier @@ -1376,7 +1352,7 @@ static void m11400m_0_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -1390,7 +1366,7 @@ static void m11400m_0_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u // append the pass to the salt - u32x block0[16]; + u32 block0[16]; block0[ 0] = salt_buf0[ 0]; block0[ 1] = salt_buf0[ 1]; @@ -1409,7 +1385,7 @@ static void m11400m_0_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u block0[14] = salt_buf0[14]; block0[15] = salt_buf0[15]; - u32x block1[16]; + u32 block1[16]; block1[ 0] = salt_buf1[ 0]; block1[ 1] = salt_buf1[ 1]; @@ -1430,28 +1406,28 @@ static void m11400m_0_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u memcat32 (block0, block1, salt_len, w0, w1, w2, w3, pw_len); - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = block0[ 0]; w0_t[1] = block0[ 1]; w0_t[2] = block0[ 2]; w0_t[3] = block0[ 3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = block0[ 4]; w1_t[1] = block0[ 5]; w1_t[2] = block0[ 6]; w1_t[3] = block0[ 7]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = block0[ 8]; w2_t[1] = block0[ 9]; w2_t[2] = block0[10]; w2_t[3] = block0[11]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = block0[12]; w3_t[1] = block0[13]; @@ -1460,12 +1436,12 @@ static void m11400m_0_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u // md5 - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -1653,10 +1629,10 @@ static void m11400m_0_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u c += MD5M_C; d += MD5M_D; - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; // 2nd transform @@ -1853,16 +1829,16 @@ static void m11400m_0_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } -static void m11400m_1_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) +static void m11400m_1_0 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { /** * modifier @@ -1979,7 +1955,7 @@ static void m11400m_1_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -1993,7 +1969,7 @@ static void m11400m_1_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u // append the pass to the salt - u32x block0[16]; + u32 block0[16]; block0[ 0] = salt_buf0[ 0]; block0[ 1] = salt_buf0[ 1]; @@ -2012,7 +1988,7 @@ static void m11400m_1_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u block0[14] = salt_buf0[14]; block0[15] = salt_buf0[15]; - u32x block1[16]; + u32 block1[16]; block1[ 0] = salt_buf1[ 0]; block1[ 1] = salt_buf1[ 1]; @@ -2033,28 +2009,28 @@ static void m11400m_1_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u memcat32 (block0, block1, salt_len, w0, w1, w2, w3, pw_len); - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = block0[ 0]; w0_t[1] = block0[ 1]; w0_t[2] = block0[ 2]; w0_t[3] = block0[ 3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = block0[ 4]; w1_t[1] = block0[ 5]; w1_t[2] = block0[ 6]; w1_t[3] = block0[ 7]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = block0[ 8]; w2_t[1] = block0[ 9]; w2_t[2] = block0[10]; w2_t[3] = block0[11]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = block0[12]; w3_t[1] = block0[13]; @@ -2063,12 +2039,12 @@ static void m11400m_1_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u // md5 - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -2143,10 +2119,10 @@ static void m11400m_1_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u c += MD5M_C; d += MD5M_D; - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; w0_t[0] = block1[ 0]; w0_t[1] = block1[ 1]; @@ -2454,16 +2430,16 @@ static void m11400m_1_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } -static void m11400m_1_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) +static void m11400m_1_1 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { /** * modifier @@ -2599,7 +2575,7 @@ static void m11400m_1_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -2613,7 +2589,7 @@ static void m11400m_1_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u // append the pass to the salt - u32x block0[16]; + u32 block0[16]; block0[ 0] = salt_buf0[ 0]; block0[ 1] = salt_buf0[ 1]; @@ -2632,7 +2608,7 @@ static void m11400m_1_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u block0[14] = salt_buf0[14]; block0[15] = salt_buf0[15]; - u32x block1[16]; + u32 block1[16]; block1[ 0] = salt_buf1[ 0]; block1[ 1] = salt_buf1[ 1]; @@ -2653,28 +2629,28 @@ static void m11400m_1_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u memcat32 (block0, block1, salt_len, w0, w1, w2, w3, pw_len); - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = block0[ 0]; w0_t[1] = block0[ 1]; w0_t[2] = block0[ 2]; w0_t[3] = block0[ 3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = block0[ 4]; w1_t[1] = block0[ 5]; w1_t[2] = block0[ 6]; w1_t[3] = block0[ 7]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = block0[ 8]; w2_t[1] = block0[ 9]; w2_t[2] = block0[10]; w2_t[3] = block0[11]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = block0[12]; w3_t[1] = block0[13]; @@ -2683,12 +2659,12 @@ static void m11400m_1_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u // md5 - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -2763,10 +2739,10 @@ static void m11400m_1_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u c += MD5M_C; d += MD5M_D; - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; w0_t[0] = block1[ 0]; w0_t[1] = block1[ 1]; @@ -3174,16 +3150,16 @@ static void m11400m_1_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } -static void m11400s_0_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) +static void m11400s_0_0 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { /** * modifier @@ -3300,7 +3276,7 @@ static void m11400s_0_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -3314,7 +3290,7 @@ static void m11400s_0_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u // append the pass to the salt - u32x block0[16]; + u32 block0[16]; block0[ 0] = salt_buf0[ 0]; block0[ 1] = salt_buf0[ 1]; @@ -3333,7 +3309,7 @@ static void m11400s_0_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u block0[14] = salt_buf0[14]; block0[15] = salt_buf0[15]; - u32x block1[16]; + u32 block1[16]; block1[ 0] = salt_buf1[ 0]; block1[ 1] = salt_buf1[ 1]; @@ -3354,28 +3330,28 @@ static void m11400s_0_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u memcat32 (block0, block1, salt_len, w0, w1, w2, w3, pw_len); - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = block0[ 0]; w0_t[1] = block0[ 1]; w0_t[2] = block0[ 2]; w0_t[3] = block0[ 3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = block0[ 4]; w1_t[1] = block0[ 5]; w1_t[2] = block0[ 6]; w1_t[3] = block0[ 7]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = block0[ 8]; w2_t[1] = block0[ 9]; w2_t[2] = block0[10]; w2_t[3] = block0[11]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = block0[12]; w3_t[1] = block0[13]; @@ -3384,12 +3360,12 @@ static void m11400s_0_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u // md5 - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -3577,10 +3553,10 @@ static void m11400s_0_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u c += MD5M_C; d += MD5M_D; - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; // 2nd transform @@ -3677,16 +3653,16 @@ static void m11400s_0_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } -static void m11400s_0_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) +static void m11400s_0_1 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { /** * modifier @@ -3822,7 +3798,7 @@ static void m11400s_0_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -3836,7 +3812,7 @@ static void m11400s_0_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u // append the pass to the salt - u32x block0[16]; + u32 block0[16]; block0[ 0] = salt_buf0[ 0]; block0[ 1] = salt_buf0[ 1]; @@ -3855,7 +3831,7 @@ static void m11400s_0_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u block0[14] = salt_buf0[14]; block0[15] = salt_buf0[15]; - u32x block1[16]; + u32 block1[16]; block1[ 0] = salt_buf1[ 0]; block1[ 1] = salt_buf1[ 1]; @@ -3876,28 +3852,28 @@ static void m11400s_0_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u memcat32 (block0, block1, salt_len, w0, w1, w2, w3, pw_len); - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = block0[ 0]; w0_t[1] = block0[ 1]; w0_t[2] = block0[ 2]; w0_t[3] = block0[ 3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = block0[ 4]; w1_t[1] = block0[ 5]; w1_t[2] = block0[ 6]; w1_t[3] = block0[ 7]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = block0[ 8]; w2_t[1] = block0[ 9]; w2_t[2] = block0[10]; w2_t[3] = block0[11]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = block0[12]; w3_t[1] = block0[13]; @@ -3906,12 +3882,12 @@ static void m11400s_0_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u // md5 - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -4099,10 +4075,10 @@ static void m11400s_0_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u c += MD5M_C; d += MD5M_D; - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; // 2nd transform @@ -4299,16 +4275,16 @@ static void m11400s_0_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } -static void m11400s_1_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) +static void m11400s_1_0 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { /** * modifier @@ -4425,7 +4401,7 @@ static void m11400s_1_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -4439,7 +4415,7 @@ static void m11400s_1_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u // append the pass to the salt - u32x block0[16]; + u32 block0[16]; block0[ 0] = salt_buf0[ 0]; block0[ 1] = salt_buf0[ 1]; @@ -4458,7 +4434,7 @@ static void m11400s_1_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u block0[14] = salt_buf0[14]; block0[15] = salt_buf0[15]; - u32x block1[16]; + u32 block1[16]; block1[ 0] = salt_buf1[ 0]; block1[ 1] = salt_buf1[ 1]; @@ -4479,28 +4455,28 @@ static void m11400s_1_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u memcat32 (block0, block1, salt_len, w0, w1, w2, w3, pw_len); - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = block0[ 0]; w0_t[1] = block0[ 1]; w0_t[2] = block0[ 2]; w0_t[3] = block0[ 3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = block0[ 4]; w1_t[1] = block0[ 5]; w1_t[2] = block0[ 6]; w1_t[3] = block0[ 7]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = block0[ 8]; w2_t[1] = block0[ 9]; w2_t[2] = block0[10]; w2_t[3] = block0[11]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = block0[12]; w3_t[1] = block0[13]; @@ -4509,12 +4485,12 @@ static void m11400s_1_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u // md5 - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -4589,10 +4565,10 @@ static void m11400s_1_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u c += MD5M_C; d += MD5M_D; - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; w0_t[0] = block1[ 0]; w0_t[1] = block1[ 1]; @@ -4900,16 +4876,16 @@ static void m11400s_1_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } -static void m11400s_1_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) +static void m11400s_1_1 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global sip_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { /** * modifier @@ -5045,7 +5021,7 @@ static void m11400s_1_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -5059,7 +5035,7 @@ static void m11400s_1_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u // append the pass to the salt - u32x block0[16]; + u32 block0[16]; block0[ 0] = salt_buf0[ 0]; block0[ 1] = salt_buf0[ 1]; @@ -5078,7 +5054,7 @@ static void m11400s_1_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u block0[14] = salt_buf0[14]; block0[15] = salt_buf0[15]; - u32x block1[16]; + u32 block1[16]; block1[ 0] = salt_buf1[ 0]; block1[ 1] = salt_buf1[ 1]; @@ -5099,28 +5075,28 @@ static void m11400s_1_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u memcat32 (block0, block1, salt_len, w0, w1, w2, w3, pw_len); - u32x w0_t[4]; + u32 w0_t[4]; w0_t[0] = block0[ 0]; w0_t[1] = block0[ 1]; w0_t[2] = block0[ 2]; w0_t[3] = block0[ 3]; - u32x w1_t[4]; + u32 w1_t[4]; w1_t[0] = block0[ 4]; w1_t[1] = block0[ 5]; w1_t[2] = block0[ 6]; w1_t[3] = block0[ 7]; - u32x w2_t[4]; + u32 w2_t[4]; w2_t[0] = block0[ 8]; w2_t[1] = block0[ 9]; w2_t[2] = block0[10]; w2_t[3] = block0[11]; - u32x w3_t[4]; + u32 w3_t[4]; w3_t[0] = block0[12]; w3_t[1] = block0[13]; @@ -5129,12 +5105,12 @@ static void m11400s_1_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u // md5 - u32x tmp2; + u32 tmp2; - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; + u32 a = MD5M_A; + u32 b = MD5M_B; + u32 c = MD5M_C; + u32 d = MD5M_D; MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); @@ -5209,10 +5185,10 @@ static void m11400s_1_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u c += MD5M_C; d += MD5M_D; - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; + u32 r_a = a; + u32 r_b = b; + u32 r_c = c; + u32 r_d = d; w0_t[0] = block1[ 0]; w0_t[1] = block1[ 1]; @@ -5620,12 +5596,12 @@ static void m11400s_1_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u c += r_c; d += r_d; - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; + const u32 r0 = a; + const u32 r1 = d; + const u32 r2 = c; + const u32 r3 = b; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -5644,28 +5620,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_m04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -5750,28 +5726,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_m08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -5856,28 +5832,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_m16 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -5962,28 +5938,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_s04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -6068,28 +6044,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_s08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -6174,28 +6150,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11400_s16 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m11500_a0.cl b/OpenCL/m11500_a0.cl similarity index 90% rename from amd/m11500_a0.cl rename to OpenCL/m11500_a0.cl index 559d4b8..23550ec 100644 --- a/amd/m11500_a0.cl +++ b/OpenCL/m11500_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 crc32tab[0x100] = { @@ -114,11 +90,11 @@ __constant u32 crc32tab[0x100] = 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d }; -static u32x round_crc32 (u32x a, const u32x v) +static u32 round_crc32 (u32 a, const u32 v) { - const u32x k = (a ^ v) & 0xff; + const u32 k = (a ^ v) & 0xff; - const u32x s = a >> 8; + const u32 s = a >> 8; #ifdef VECT_SIZE1 a = crc32tab[k]; @@ -141,9 +117,9 @@ static u32x round_crc32 (u32x a, const u32x v) return a; } -static u32x crc32 (const u32x w[16], const u32 pw_len, const u32 iv) +static u32 crc32 (const u32 w[16], const u32 pw_len, const u32 iv) { - u32x a = iv ^ ~0; + u32 a = iv ^ ~0; if (pw_len >= 1) a = round_crc32 (a, w[0] >> 0); if (pw_len >= 2) a = round_crc32 (a, w[0] >> 8); @@ -183,14 +159,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -205,28 +181,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -235,7 +211,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_m04 (__glo const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len); - u32x w_t[16]; + u32 w_t[16]; w_t[ 0] = w0[0]; w_t[ 1] = w0[1]; @@ -254,15 +230,15 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_m04 (__glo w_t[14] = 0; w_t[15] = 0; - u32x a = crc32 (w_t, out_len, iv); - u32x b = 0; + u32 a = crc32 (w_t, out_len, iv); + u32 b = 0; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -290,14 +266,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -326,28 +302,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -356,7 +332,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_s04 (__glo const u32 out_len = apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len); - u32x w_t[16]; + u32 w_t[16]; w_t[ 0] = w0[0]; w_t[ 1] = w0[1]; @@ -375,15 +351,15 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_s04 (__glo w_t[14] = 0; w_t[15] = 0; - u32x a = crc32 (w_t, out_len, iv); - u32x b = 0; + u32 a = crc32 (w_t, out_len, iv); + u32 b = 0; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m11500_a1.cl b/OpenCL/m11500_a1.cl similarity index 91% rename from amd/m11500_a1.cl rename to OpenCL/m11500_a1.cl index 8f00e6c..b297b98 100644 --- a/amd/m11500_a1.cl +++ b/OpenCL/m11500_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 crc32tab[0x100] = { @@ -112,11 +88,11 @@ __constant u32 crc32tab[0x100] = 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d }; -static u32x round_crc32 (u32x a, const u32x v) +static u32 round_crc32 (u32 a, const u32 v) { - const u32x k = (a ^ v) & 0xff; + const u32 k = (a ^ v) & 0xff; - const u32x s = a >> 8; + const u32 s = a >> 8; #ifdef VECT_SIZE1 a = crc32tab[k]; @@ -139,9 +115,9 @@ static u32x round_crc32 (u32x a, const u32x v) return a; } -static u32x crc32 (const u32x w[16], const u32 pw_len, const u32 iv) +static u32 crc32 (const u32 w[16], const u32 pw_len, const u32 iv) { - u32x a = iv ^ ~0; + u32 a = iv ^ ~0; if (pw_len >= 1) a = round_crc32 (a, w[0] >> 0); if (pw_len >= 2) a = round_crc32 (a, w[0] >> 8); @@ -181,28 +157,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -259,7 +235,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w_t[16]; + u32 w_t[16]; w_t[ 0] = wordl0[0] | wordr0[0]; w_t[ 1] = wordl0[1] | wordr0[1]; @@ -278,15 +254,15 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_m04 (__glo w_t[14] = wordl3[2] | wordr3[2]; w_t[15] = 0; - u32x a = crc32 (w_t, pw_len, iv); - u32x b = 0; + u32 a = crc32 (w_t, pw_len, iv); + u32 b = 0; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -314,28 +290,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -406,7 +382,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w_t[16]; + u32 w_t[16]; w_t[ 0] = wordl0[0] | wordr0[0]; w_t[ 1] = wordl0[1] | wordr0[1]; @@ -425,15 +401,15 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_s04 (__glo w_t[14] = wordl3[2] | wordr3[2]; w_t[15] = 0; - u32x a = crc32 (w_t, pw_len, iv); - u32x b = 0; + u32 a = crc32 (w_t, pw_len, iv); + u32 b = 0; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m11500_a3.cl b/OpenCL/m11500_a3.cl similarity index 60% rename from amd/m11500_a3.cl rename to OpenCL/m11500_a3.cl index 9575ce0..b29b184 100644 --- a/amd/m11500_a3.cl +++ b/OpenCL/m11500_a3.cl @@ -4,46 +4,21 @@ */ #define _CRC32_ -#define _SCALAR_ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" __constant u32 crc32tab[0x100] = { @@ -113,11 +88,11 @@ __constant u32 crc32tab[0x100] = 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d }; -static u32x round_crc32 (u32x a, const u32x v) +static u32 round_crc32 (u32 a, const u32 v) { - const u32x k = (a ^ v) & 0xff; + const u32 k = (a ^ v) & 0xff; - const u32x s = a >> 8; + const u32 s = a >> 8; #ifdef VECT_SIZE1 a = crc32tab[k]; @@ -140,9 +115,9 @@ static u32x round_crc32 (u32x a, const u32x v) return a; } -static u32x crc32 (const u32x w[16], const u32 pw_len, const u32 iv) +static u32 crc32 (const u32 w[16], const u32 pw_len, const u32 iv) { - u32x a = iv ^ ~0; + u32 a = iv ^ ~0; if (pw_len >= 1) a = round_crc32 (a, w[0] >> 0); if (pw_len >= 2) a = round_crc32 (a, w[0] >> 8); @@ -160,7 +135,7 @@ static u32x crc32 (const u32x w[16], const u32 pw_len, const u32 iv) return ~a; } -static void m11500m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m11500m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -179,17 +154,15 @@ static void m11500m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x w_t[16]; + u32 w_t[16]; w_t[ 0] = w0; w_t[ 1] = w[ 1]; @@ -208,19 +181,19 @@ static void m11500m (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g w_t[14] = w[14]; w_t[15] = w[15]; - u32x a = crc32 (w_t, pw_len, iv); - u32x b = 0; + u32 a = crc32 (w_t, pw_len, iv); + u32 b = 0; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m11500s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m11500s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -247,17 +220,15 @@ static void m11500s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g * loop */ - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; + u32 w0l = w[0]; - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) + for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { - const u32x w0r = words_buf_r[il_pos]; + const u32 w0r = words_buf_r[il_pos]; - const u32x w0 = w0l | w0r; + const u32 w0 = w0l | w0r; - u32x w_t[16]; + u32 w_t[16]; w_t[ 0] = w0; w_t[ 1] = w[ 1]; @@ -276,19 +247,19 @@ static void m11500s (u32 w[16], const u32 pw_len, __global pw_t *pws, __global g w_t[14] = w[14]; w_t[15] = w[15]; - u32x a = crc32 (w_t, pw_len, iv); - u32x b = 0; + u32 a = crc32 (w_t, pw_len, iv); + u32 b = 0; - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = a; + const u32 r1 = b; + const u32 r2 = 0; + const u32 r3 = 0; - #include VECT_COMPARE_S + #include COMPARE_S } } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_m04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -326,7 +297,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_m04 (__glo m11500m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_m08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -364,7 +335,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_m08 (__glo m11500m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_m16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -402,7 +373,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_m16 (__glo m11500m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_s04 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -440,7 +411,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_s04 (__glo m11500s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_s08 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base @@ -478,7 +449,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_s08 (__glo m11500s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32x * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11500_s16 (__global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global u32 * words_buf_r, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) { /** * base diff --git a/amd/m11600.cl b/OpenCL/m11600.cl similarity index 97% rename from amd/m11600.cl rename to OpenCL/m11600.cl index 21dd5e5..95d1022 100644 --- a/amd/m11600.cl +++ b/OpenCL/m11600.cl @@ -8,29 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif __constant u32 te0[256] = @@ -926,33 +914,33 @@ __constant u32 k_sha256[64] = SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f, }; -static void sha256_transform (const u32x w[16], u32x digest[8]) +static void sha256_transform (const u32 w[16], u32 digest[8]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = swap_workaround (w[ 0]); - u32x w1_t = swap_workaround (w[ 1]); - u32x w2_t = swap_workaround (w[ 2]); - u32x w3_t = swap_workaround (w[ 3]); - u32x w4_t = swap_workaround (w[ 4]); - u32x w5_t = swap_workaround (w[ 5]); - u32x w6_t = swap_workaround (w[ 6]); - u32x w7_t = swap_workaround (w[ 7]); - u32x w8_t = swap_workaround (w[ 8]); - u32x w9_t = swap_workaround (w[ 9]); - u32x wa_t = swap_workaround (w[10]); - u32x wb_t = swap_workaround (w[11]); - u32x wc_t = swap_workaround (w[12]); - u32x wd_t = swap_workaround (w[13]); - u32x we_t = swap_workaround (w[14]); - u32x wf_t = swap_workaround (w[15]); + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + u32 e = digest[4]; + u32 f = digest[5]; + u32 g = digest[6]; + u32 h = digest[7]; + + u32 w0_t = swap_workaround (w[ 0]); + u32 w1_t = swap_workaround (w[ 1]); + u32 w2_t = swap_workaround (w[ 2]); + u32 w3_t = swap_workaround (w[ 3]); + u32 w4_t = swap_workaround (w[ 4]); + u32 w5_t = swap_workaround (w[ 5]); + u32 w6_t = swap_workaround (w[ 6]); + u32 w7_t = swap_workaround (w[ 7]); + u32 w8_t = swap_workaround (w[ 8]); + u32 w9_t = swap_workaround (w[ 9]); + u32 wa_t = swap_workaround (w[10]); + u32 wb_t = swap_workaround (w[11]); + u32 wc_t = swap_workaround (w[12]); + u32 wd_t = swap_workaround (w[13]); + u32 we_t = swap_workaround (w[14]); + u32 wf_t = swap_workaround (w[15]); #define ROUND_EXPAND() \ { \ @@ -1080,11 +1068,11 @@ __constant u32 crc32tab[0x100] = 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d }; -static u32x round_crc32 (u32x a, const u32x v) +static u32 round_crc32 (u32 a, const u32 v) { - const u32x k = (a ^ v) & 0xff; + const u32 k = (a ^ v) & 0xff; - const u32x s = a >> 8; + const u32 s = a >> 8; #ifdef VECT_SIZE1 a = crc32tab[k]; @@ -1107,9 +1095,9 @@ static u32x round_crc32 (u32x a, const u32x v) return a; } -static u32x crc32 (const u32x w[16], const u32 pw_len, const u32 iv) +static u32 crc32 (const u32 w[16], const u32 pw_len, const u32 iv) { - u32x a = iv ^ ~0; + u32 a = iv ^ ~0; if (pw_len >= 1) a = round_crc32 (a, w[0] >> 0); if (pw_len >= 2) a = round_crc32 (a, w[0] >> 8); @@ -1127,7 +1115,7 @@ static u32x crc32 (const u32x w[16], const u32 pw_len, const u32 iv) return ~a; } -static void bzero16 (u32x block[16]) +static void bzero16 (u32 block[16]) { block[ 0] = 0; block[ 1] = 0; @@ -1147,14 +1135,14 @@ static void bzero16 (u32x block[16]) block[15] = 0; } -static u32 memcat8c (u32x block[16], const u32 block_len, const u32x append[2], const u32 append_len, u32x digest[8]) +static u32 memcat8c (u32 block[16], const u32 block_len, const u32 append[2], const u32 append_len, u32 digest[8]) { const u32 mod = block_len & 3; const u32 div = block_len / 4; - u32x tmp0; - u32x tmp1; - u32x tmp2; + u32 tmp0; + u32 tmp1; + u32 tmp2; const int offset_minus_4 = 4 - block_len; @@ -1169,7 +1157,7 @@ static u32 memcat8c (u32x block[16], const u32 block_len, const u32x append[2], tmp2 = 0; } - u32x carry[2] = { 0, 0 }; + u32 carry[2] = { 0, 0 }; switch (div) { @@ -1256,20 +1244,20 @@ static u32 memcat8c (u32x block[16], const u32 block_len, const u32x append[2], return new_len; } -static u32 memcat32c (u32x block[16], const u32 block_len, const u32x append[8], const u32 append_len, u32x digest[8]) +static u32 memcat32c (u32 block[16], const u32 block_len, const u32 append[8], const u32 append_len, u32 digest[8]) { const u32 mod = block_len & 3; const u32 div = block_len / 4; - u32x tmp0; - u32x tmp1; - u32x tmp2; - u32x tmp3; - u32x tmp4; - u32x tmp5; - u32x tmp6; - u32x tmp7; - u32x tmp8; + u32 tmp0; + u32 tmp1; + u32 tmp2; + u32 tmp3; + u32 tmp4; + u32 tmp5; + u32 tmp6; + u32 tmp7; + u32 tmp8; const int offset_minus_4 = 4 - block_len; @@ -1296,7 +1284,7 @@ static u32 memcat32c (u32x block[16], const u32 block_len, const u32x append[8], tmp8 = 0; } - u32x carry[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + u32 carry[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; switch (div) { @@ -1499,7 +1487,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11600_init (__gl * algo starts here already */ - u32x dgst[8]; + u32 dgst[8]; dgst[0] = SHA256M_A; dgst[1] = SHA256M_B; @@ -1510,7 +1498,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11600_init (__gl dgst[6] = SHA256M_G; dgst[7] = SHA256M_H; - u32x block[16]; + u32 block[16]; bzero16 (block); @@ -1561,7 +1549,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11600_loop (__gl if (gid >= gid_max) return; - u32x pw[8]; + u32 pw[8]; pw[0] = pws[gid].i[ 0]; pw[1] = pws[gid].i[ 1]; @@ -1582,7 +1570,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11600_loop (__gl * context load */ - u32x dgst[8]; + u32 dgst[8]; dgst[0] = tmps[gid].dgst[0]; dgst[1] = tmps[gid].dgst[1]; @@ -1593,7 +1581,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11600_loop (__gl dgst[6] = tmps[gid].dgst[6]; dgst[7] = tmps[gid].dgst[7]; - u32x block[16]; + u32 block[16]; block[ 0] = tmps[gid].block[ 0]; block[ 1] = tmps[gid].block[ 1]; @@ -1749,7 +1737,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11600_comp (__gl * context load */ - u32x dgst[8]; + u32 dgst[8]; dgst[0] = tmps[gid].dgst[0]; dgst[1] = tmps[gid].dgst[1]; @@ -1760,7 +1748,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11600_comp (__gl dgst[6] = tmps[gid].dgst[6]; dgst[7] = tmps[gid].dgst[7]; - u32x block[16]; + u32 block[16]; block[ 0] = tmps[gid].block[ 0]; block[ 1] = tmps[gid].block[ 1]; @@ -1806,7 +1794,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11600_comp (__gl iv[2] = esalt_bufs[salt_pos].iv_buf[2]; iv[3] = esalt_bufs[salt_pos].iv_buf[3]; - u32x ukey[8]; + u32 ukey[8]; ukey[0] = dgst[0]; ukey[1] = dgst[1]; @@ -1957,18 +1945,18 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11600_comp (__gl if ((out[0] == 0) && (out[1] == 0) && (out[2] == 0) && (out[3] == 0)) { - mark_hash_s0 (plains_buf, hashes_shown, digests_offset + 0, gid, 0); + mark_hash (plains_buf, hashes_shown, digests_offset + 0, gid, 0); d_return_buf[lid] = 1; } } - const u32x r0 = crc; - const u32x r1 = 0; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = crc; + const u32 r1 = 0; + const u32 r2 = 0; + const u32 r3 = 0; #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m11700_a0.cl b/OpenCL/m11700_a0.cl similarity index 98% rename from amd/m11700_a0.cl rename to OpenCL/m11700_a0.cl index 47f214b..572c7ac 100644 --- a/amd/m11700_a0.cl +++ b/OpenCL/m11700_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define INITVAL 0x0101010101010101 @@ -2367,14 +2343,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11700_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -2389,7 +2365,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11700_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w[16]; + u32 w[16]; w[ 0] = pw_buf0[0]; w[ 1] = pw_buf0[1]; @@ -2470,7 +2446,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11700_m04 (__glo const u32 r2 = l32_from_64 (h[1]); const u32 r3 = h32_from_64 (h[1]); - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -2542,14 +2518,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11700_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -2576,7 +2552,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11700_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w[16]; + u32 w[16]; w[ 0] = pw_buf0[0]; w[ 1] = pw_buf0[1]; @@ -2657,7 +2633,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11700_s04 (__glo const u32 r2 = l32_from_64 (h[1]); const u32 r3 = h32_from_64 (h[1]); - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m11700_a1.cl b/OpenCL/m11700_a1.cl similarity index 98% rename from amd/m11700_a1.cl rename to OpenCL/m11700_a1.cl index bb45751..ce49d21 100644 --- a/amd/m11700_a1.cl +++ b/OpenCL/m11700_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define INITVAL 0x0101010101010101 @@ -2365,28 +2341,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11700_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -2445,7 +2421,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11700_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w[16]; + u32 w[16]; w[ 0] = wordl0[0] | wordr0[0]; w[ 1] = wordl0[1] | wordr0[1]; @@ -2524,7 +2500,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11700_m04 (__glo const u32 r2 = l32_from_64 (h[1]); const u32 r3 = h32_from_64 (h[1]); - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -2608,28 +2584,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11700_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -2688,7 +2664,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11700_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w[16]; + u32 w[16]; w[ 0] = wordl0[0] | wordr0[0]; w[ 1] = wordl0[1] | wordr0[1]; @@ -2767,7 +2743,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11700_s04 (__glo const u32 r2 = l32_from_64 (h[1]); const u32 r3 = h32_from_64 (h[1]); - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m11700_a3.cl b/OpenCL/m11700_a3.cl similarity index 96% rename from amd/m11700_a3.cl rename to OpenCL/m11700_a3.cl index f65dd45..12187ef 100644 --- a/amd/m11700_a3.cl +++ b/OpenCL/m11700_a3.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define INITVAL 0x0101010101010101 @@ -2306,7 +2282,7 @@ static void streebog_g (u64 h[8], const u64 m[8], __local u64 s_sbob_sl64[8][256 } } -static void m11700m (__local u64 s_sbob_sl64[8][256], u32x w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m11700m (__local u64 s_sbob_sl64[8][256], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -2319,7 +2295,7 @@ static void m11700m (__local u64 s_sbob_sl64[8][256], u32x w[16], const u32 pw_l * loop */ - u32x w0l = w[0]; + u32 w0l = w[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -2385,11 +2361,11 @@ static void m11700m (__local u64 s_sbob_sl64[8][256], u32x w[16], const u32 pw_l const u32 r2 = l32_from_64 (h[1]); const u32 r3 = h32_from_64 (h[1]); - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m11700s (__local u64 s_sbob_sl64[8][256], u32x w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m11700s (__local u64 s_sbob_sl64[8][256], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -2414,7 +2390,7 @@ static void m11700s (__local u64 s_sbob_sl64[8][256], u32x w[16], const u32 pw_l * loop */ - u32x w0l = w[0]; + u32 w0l = w[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -2480,7 +2456,7 @@ static void m11700s (__local u64 s_sbob_sl64[8][256], u32x w[16], const u32 pw_l const u32 r2 = l32_from_64 (h[1]); const u32 r3 = h32_from_64 (h[1]); - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -2493,7 +2469,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11700_m04 (__glo const u32 gid = get_global_id (0); const u32 lid = get_local_id (0); - u32x w[16]; + u32 w[16]; w[ 0] = pws[gid].i[ 0]; w[ 1] = pws[gid].i[ 1]; @@ -2575,7 +2551,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11700_m08 (__glo const u32 gid = get_global_id (0); const u32 lid = get_local_id (0); - u32x w[16]; + u32 w[16]; w[ 0] = pws[gid].i[ 0]; w[ 1] = pws[gid].i[ 1]; @@ -2657,7 +2633,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11700_m16 (__glo const u32 gid = get_global_id (0); const u32 lid = get_local_id (0); - u32x w[16]; + u32 w[16]; w[ 0] = pws[gid].i[ 0]; w[ 1] = pws[gid].i[ 1]; @@ -2739,7 +2715,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11700_s04 (__glo const u32 gid = get_global_id (0); const u32 lid = get_local_id (0); - u32x w[16]; + u32 w[16]; w[ 0] = pws[gid].i[ 0]; w[ 1] = pws[gid].i[ 1]; @@ -2821,7 +2797,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11700_s08 (__glo const u32 gid = get_global_id (0); const u32 lid = get_local_id (0); - u32x w[16]; + u32 w[16]; w[ 0] = pws[gid].i[ 0]; w[ 1] = pws[gid].i[ 1]; @@ -2903,7 +2879,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11700_s16 (__glo const u32 gid = get_global_id (0); const u32 lid = get_local_id (0); - u32x w[16]; + u32 w[16]; w[ 0] = pws[gid].i[ 0]; w[ 1] = pws[gid].i[ 1]; diff --git a/amd/m11800_a0.cl b/OpenCL/m11800_a0.cl similarity index 98% rename from amd/m11800_a0.cl rename to OpenCL/m11800_a0.cl index d0e95b3..676e651 100644 --- a/amd/m11800_a0.cl +++ b/OpenCL/m11800_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "rp.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define INITVAL 0 @@ -2367,14 +2343,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11800_m04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -2389,7 +2365,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11800_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w[16]; + u32 w[16]; w[ 0] = pw_buf0[0]; w[ 1] = pw_buf0[1]; @@ -2470,7 +2446,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11800_m04 (__glo const u32 r2 = l32_from_64 (h[1]); const u32 r3 = h32_from_64 (h[1]); - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -2542,14 +2518,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11800_s04 (__glo if (gid >= gid_max) return; - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -2576,7 +2552,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11800_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w[16]; + u32 w[16]; w[ 0] = pw_buf0[0]; w[ 1] = pw_buf0[1]; @@ -2657,7 +2633,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11800_s04 (__glo const u32 r2 = l32_from_64 (h[1]); const u32 r3 = h32_from_64 (h[1]); - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m11800_a1.cl b/OpenCL/m11800_a1.cl similarity index 98% rename from amd/m11800_a1.cl rename to OpenCL/m11800_a1.cl index 2b76d2f..943eff5 100644 --- a/amd/m11800_a1.cl +++ b/OpenCL/m11800_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define INITVAL 0 @@ -2366,28 +2342,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11800_m04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -2446,7 +2422,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11800_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w[16]; + u32 w[16]; w[ 0] = wordl0[0] | wordr0[0]; w[ 1] = wordl0[1] | wordr0[1]; @@ -2525,7 +2501,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11800_m04 (__glo const u32 r2 = l32_from_64 (h[1]); const u32 r3 = h32_from_64 (h[1]); - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -2609,28 +2585,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11800_s04 (__glo if (gid >= gid_max) return; - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -2689,7 +2665,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11800_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w[16]; + u32 w[16]; w[ 0] = wordl0[0] | wordr0[0]; w[ 1] = wordl0[1] | wordr0[1]; @@ -2768,7 +2744,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11800_s04 (__glo const u32 r2 = l32_from_64 (h[1]); const u32 r3 = h32_from_64 (h[1]); - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m11800_a3.cl b/OpenCL/m11800_a3.cl similarity index 96% rename from amd/m11800_a3.cl rename to OpenCL/m11800_a3.cl index 9928c0b..b1e81d7 100644 --- a/amd/m11800_a3.cl +++ b/OpenCL/m11800_a3.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define INITVAL 0 @@ -2306,7 +2282,7 @@ static void streebog_g (u64 h[8], const u64 m[8], __local u64 s_sbob_sl64[8][256 } } -static void m11800m (__local u64 s_sbob_sl64[8][256], u32x w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m11800m (__local u64 s_sbob_sl64[8][256], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -2319,7 +2295,7 @@ static void m11800m (__local u64 s_sbob_sl64[8][256], u32x w[16], const u32 pw_l * loop */ - u32x w0l = w[0]; + u32 w0l = w[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -2385,11 +2361,11 @@ static void m11800m (__local u64 s_sbob_sl64[8][256], u32x w[16], const u32 pw_l const u32 r2 = l32_from_64 (h[1]); const u32 r3 = h32_from_64 (h[1]); - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m11800s (__local u64 s_sbob_sl64[8][256], u32x w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) +static void m11800s (__local u64 s_sbob_sl64[8][256], u32 w[16], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -2414,7 +2390,7 @@ static void m11800s (__local u64 s_sbob_sl64[8][256], u32x w[16], const u32 pw_l * loop */ - u32x w0l = w[0]; + u32 w0l = w[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -2480,7 +2456,7 @@ static void m11800s (__local u64 s_sbob_sl64[8][256], u32x w[16], const u32 pw_l const u32 r2 = l32_from_64 (h[1]); const u32 r3 = h32_from_64 (h[1]); - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -2493,7 +2469,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11800_m04 (__glo const u32 gid = get_global_id (0); const u32 lid = get_local_id (0); - u32x w[16]; + u32 w[16]; w[ 0] = pws[gid].i[ 0]; w[ 1] = pws[gid].i[ 1]; @@ -2575,7 +2551,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11800_m08 (__glo const u32 gid = get_global_id (0); const u32 lid = get_local_id (0); - u32x w[16]; + u32 w[16]; w[ 0] = pws[gid].i[ 0]; w[ 1] = pws[gid].i[ 1]; @@ -2657,7 +2633,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11800_m16 (__glo const u32 gid = get_global_id (0); const u32 lid = get_local_id (0); - u32x w[16]; + u32 w[16]; w[ 0] = pws[gid].i[ 0]; w[ 1] = pws[gid].i[ 1]; @@ -2739,7 +2715,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11800_s04 (__glo const u32 gid = get_global_id (0); const u32 lid = get_local_id (0); - u32x w[16]; + u32 w[16]; w[ 0] = pws[gid].i[ 0]; w[ 1] = pws[gid].i[ 1]; @@ -2821,7 +2797,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11800_s08 (__glo const u32 gid = get_global_id (0); const u32 lid = get_local_id (0); - u32x w[16]; + u32 w[16]; w[ 0] = pws[gid].i[ 0]; w[ 1] = pws[gid].i[ 1]; @@ -2903,7 +2879,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11800_s16 (__glo const u32 gid = get_global_id (0); const u32 lid = get_local_id (0); - u32x w[16]; + u32 w[16]; w[ 0] = pws[gid].i[ 0]; w[ 1] = pws[gid].i[ 1]; diff --git a/amd/m11900.cl b/OpenCL/m11900.cl similarity index 90% rename from amd/m11900.cl rename to OpenCL/m11900.cl index 4ad4691..b3fede6 100644 --- a/amd/m11900.cl +++ b/OpenCL/m11900.cl @@ -8,18 +8,6 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 @@ -27,38 +15,38 @@ #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif -static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - u32x tmp2; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; + + u32 tmp2; MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); @@ -134,7 +122,7 @@ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) +static void hmac_md5_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -185,7 +173,7 @@ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x i md5_transform (w0, w1, w2, w3, opad); } -static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) +static void hmac_md5_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[4], u32 opad[4], u32 digest[4]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -229,28 +217,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11900_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -410,12 +398,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m11900_comp (__gl const u32 lid = get_local_id (0); - const u32x r0 = tmps[gid].out[DGST_R0]; - const u32x r1 = tmps[gid].out[DGST_R1]; - const u32x r2 = tmps[gid].out[DGST_R2]; - const u32x r3 = tmps[gid].out[DGST_R3]; + const u32 r0 = tmps[gid].out[DGST_R0]; + const u32 r1 = tmps[gid].out[DGST_R1]; + const u32 r2 = tmps[gid].out[DGST_R2]; + const u32 r3 = tmps[gid].out[DGST_R3]; #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m12000.cl b/OpenCL/m12000.cl similarity index 93% rename from amd/m12000.cl rename to OpenCL/m12000.cl index 2eddbd1..d85fbf4 100644 --- a/amd/m12000.cl +++ b/OpenCL/m12000.cl @@ -8,18 +8,6 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 @@ -27,37 +15,37 @@ #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -162,7 +150,7 @@ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4] digest[4] += E; } -static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) +static void hmac_sha1_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -215,7 +203,7 @@ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x sha1_transform (w0, w1, w2, w3, opad); } -static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) +static void hmac_sha1_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5], u32 digest[5]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -261,28 +249,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12000_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = swap_workaround (pws[gid].i[ 0]); w0[1] = swap_workaround (pws[gid].i[ 1]); w0[2] = swap_workaround (pws[gid].i[ 2]); w0[3] = swap_workaround (pws[gid].i[ 3]); - u32x w1[4]; + u32 w1[4]; w1[0] = swap_workaround (pws[gid].i[ 4]); w1[1] = swap_workaround (pws[gid].i[ 5]); w1[2] = swap_workaround (pws[gid].i[ 6]); w1[3] = swap_workaround (pws[gid].i[ 7]); - u32x w2[4]; + u32 w2[4]; w2[0] = swap_workaround (pws[gid].i[ 8]); w2[1] = swap_workaround (pws[gid].i[ 9]); w2[2] = swap_workaround (pws[gid].i[10]); w2[3] = swap_workaround (pws[gid].i[11]); - u32x w3[4]; + u32 w3[4]; w3[0] = swap_workaround (pws[gid].i[12]); w3[1] = swap_workaround (pws[gid].i[13]); @@ -453,12 +441,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12000_comp (__gl const u32 lid = get_local_id (0); - const u32x r0 = tmps[gid].out[DGST_R0]; - const u32x r1 = tmps[gid].out[DGST_R1]; - const u32x r2 = tmps[gid].out[DGST_R2]; - const u32x r3 = tmps[gid].out[DGST_R3]; + const u32 r0 = tmps[gid].out[DGST_R0]; + const u32 r1 = tmps[gid].out[DGST_R1]; + const u32 r2 = tmps[gid].out[DGST_R2]; + const u32 r3 = tmps[gid].out[DGST_R3]; #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m12200.cl b/OpenCL/m12200.cl similarity index 96% rename from amd/m12200.cl rename to OpenCL/m12200.cl index b7ae9d2..1c35b46 100644 --- a/amd/m12200.cl +++ b/OpenCL/m12200.cl @@ -8,29 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif __constant u64 k_sha512[80] = @@ -153,28 +141,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12200_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -327,14 +315,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12200_comp (__gl const u32 lid = get_local_id (0); - const u64x a = tmps[gid].out[0]; + const u64 a = tmps[gid].out[0]; - const u32x r0 = h32_from_64 (a); - const u32x r1 = l32_from_64 (a); - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = h32_from_64 (a); + const u32 r1 = l32_from_64 (a); + const u32 r2 = 0; + const u32 r3 = 0; #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m12300.cl b/OpenCL/m12300.cl similarity index 97% rename from amd/m12300.cl rename to OpenCL/m12300.cl index 65a8177..97e67fe 100644 --- a/amd/m12300.cl +++ b/OpenCL/m12300.cl @@ -8,29 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif __constant u64 k_sha512[80] = @@ -256,28 +244,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12300_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = swap_workaround (pws[gid].i[ 0]); w0[1] = swap_workaround (pws[gid].i[ 1]); w0[2] = swap_workaround (pws[gid].i[ 2]); w0[3] = swap_workaround (pws[gid].i[ 3]); - u32x w1[4]; + u32 w1[4]; w1[0] = swap_workaround (pws[gid].i[ 4]); w1[1] = swap_workaround (pws[gid].i[ 5]); w1[2] = swap_workaround (pws[gid].i[ 6]); w1[3] = swap_workaround (pws[gid].i[ 7]); - u32x w2[4]; + u32 w2[4]; w2[0] = swap_workaround (pws[gid].i[ 8]); w2[1] = swap_workaround (pws[gid].i[ 9]); w2[2] = swap_workaround (pws[gid].i[10]); w2[3] = swap_workaround (pws[gid].i[11]); - u32x w3[4]; + u32 w3[4]; w3[0] = swap_workaround (pws[gid].i[12]); w3[1] = swap_workaround (pws[gid].i[13]); @@ -524,12 +512,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12300_comp (__gl sha512_transform (w, dgst); - const u32x r0 = h32_from_64 (dgst[0]); - const u32x r1 = l32_from_64 (dgst[0]); - const u32x r2 = h32_from_64 (dgst[1]); - const u32x r3 = l32_from_64 (dgst[1]); + const u32 r0 = h32_from_64 (dgst[0]); + const u32 r1 = l32_from_64 (dgst[0]); + const u32 r2 = h32_from_64 (dgst[1]); + const u32 r3 = l32_from_64 (dgst[1]); #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m12400.cl b/OpenCL/m12400.cl similarity index 94% rename from amd/m12400.cl rename to OpenCL/m12400.cl index 9d6b0fe..79cfaae 100644 --- a/amd/m12400.cl +++ b/OpenCL/m12400.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #define PERM_OP(a,b,tt,n,m) \ { \ @@ -374,9 +350,9 @@ __constant u32 c_skb[8][64] = #define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) #endif -static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __local u32 s_skb[8][64]) +static void _des_crypt_keysetup (u32 c, u32 d, u32 Kc[16], u32 Kd[16], __local u32 s_skb[8][64]) { - u32x tt; + u32 tt; PERM_OP (d, c, tt, 4, 0x0f0f0f0f); HPERM_OP (c, tt, 2, 0xcccc0000); @@ -404,13 +380,13 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc c = c & 0x0fffffff; d = d & 0x0fffffff; - const u32x c00 = (c >> 0) & 0x0000003f; - const u32x c06 = (c >> 6) & 0x00383003; - const u32x c07 = (c >> 7) & 0x0000003c; - const u32x c13 = (c >> 13) & 0x0000060f; - const u32x c20 = (c >> 20) & 0x00000001; + const u32 c00 = (c >> 0) & 0x0000003f; + const u32 c06 = (c >> 6) & 0x00383003; + const u32 c07 = (c >> 7) & 0x0000003c; + const u32 c13 = (c >> 13) & 0x0000060f; + const u32 c20 = (c >> 20) & 0x00000001; - u32x s = BOX (((c00 >> 0) & 0xff), 0, s_skb) + u32 s = BOX (((c00 >> 0) & 0xff), 0, s_skb) | BOX (((c06 >> 0) & 0xff) |((c07 >> 0) & 0xff), 1, s_skb) | BOX (((c13 >> 0) & 0xff) @@ -419,12 +395,12 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc |((c13 >> 8) & 0xff) |((c06 >> 16) & 0xff), 3, s_skb); - const u32x d00 = (d >> 0) & 0x00003c3f; - const u32x d07 = (d >> 7) & 0x00003f03; - const u32x d21 = (d >> 21) & 0x0000000f; - const u32x d22 = (d >> 22) & 0x00000030; + const u32 d00 = (d >> 0) & 0x00003c3f; + const u32 d07 = (d >> 7) & 0x00003f03; + const u32 d21 = (d >> 21) & 0x0000000f; + const u32 d22 = (d >> 22) & 0x00000030; - u32x t = BOX (((d00 >> 0) & 0xff), 4, s_skb) + u32 t = BOX (((d00 >> 0) & 0xff), 4, s_skb) | BOX (((d07 >> 0) & 0xff) |((d00 >> 8) & 0xff), 5, s_skb) | BOX (((d07 >> 8) & 0xff), 6, s_skb) @@ -436,9 +412,9 @@ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], __loc } } -static void _des_crypt_encrypt (u32x iv[2], u32 mask, u32 rounds, u32x Kc[16], u32x Kd[16], __local u32 s_SPtrans[8][64]) +static void _des_crypt_encrypt (u32 iv[2], u32 mask, u32 rounds, u32 Kc[16], u32 Kd[16], __local u32 s_SPtrans[8][64]) { - u32x tt; + u32 tt; const u32 E0 = ((mask >> 0) & 0x003f) | ((mask >> 4) & 0x3f00); @@ -446,8 +422,8 @@ static void _des_crypt_encrypt (u32x iv[2], u32 mask, u32 rounds, u32x Kc[16], u | ((mask >> 6) & 0xf000) | ((mask >> 22) & 0x0003); - u32x r = iv[0]; - u32x l = iv[1]; + u32 r = iv[0]; + u32 l = iv[1]; for (u32 i = 0; i < rounds; i++) { @@ -455,9 +431,9 @@ static void _des_crypt_encrypt (u32x iv[2], u32 mask, u32 rounds, u32x Kc[16], u for (u32 j = 0; j < 16; j++) { /* sbox */ - u32x t = r ^ (r >> 16); + u32 t = r ^ (r >> 16); - u32x u = t; + u32 u = t; // u u = u & E0; @@ -479,8 +455,8 @@ static void _des_crypt_encrypt (u32x iv[2], u32 mask, u32 rounds, u32x Kc[16], u t = rotl32 (t, 28u); t = t ^ Kd[j]; - const u32x um = u & 0x3f3f3f3f; - const u32x tm = t & 0x3f3f3f3f; + const u32 um = u & 0x3f3f3f3f; + const u32 tm = t & 0x3f3f3f3f; l ^= BOX (((um >> 0) & 0xff), 0, s_SPtrans) | BOX (((um >> 8) & 0xff), 2, s_SPtrans) @@ -548,7 +524,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12400_init (__gl * word */ - u32x w[16]; + u32 w[16]; w[ 0] = pws[gid].i[ 0]; w[ 1] = pws[gid].i[ 1]; @@ -776,12 +752,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12400_comp (__gl const u32 lid = get_local_id (0); - const u32x r0 = tmps[gid].iv[0]; - const u32x r1 = tmps[gid].iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = tmps[gid].iv[0]; + const u32 r1 = tmps[gid].iv[1]; + const u32 r2 = 0; + const u32 r3 = 0; #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m12500.cl b/OpenCL/m12500.cl similarity index 98% rename from amd/m12500.cl rename to OpenCL/m12500.cl index d628112..52a6149 100644 --- a/amd/m12500.cl +++ b/OpenCL/m12500.cl @@ -8,18 +8,6 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 @@ -27,11 +15,11 @@ #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif #define ROUNDS 0x40000 @@ -869,30 +857,30 @@ static void AES128_decrypt (const u32 *in, u32 *out, const u32 *rdk, __local u32 ^ rdk[43]; } -static void sha1_transform (const u32x w[16], u32x digest[5]) +static void sha1_transform (const u32 w[16], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w[ 0]; - u32x w1_t = w[ 1]; - u32x w2_t = w[ 2]; - u32x w3_t = w[ 3]; - u32x w4_t = w[ 4]; - u32x w5_t = w[ 5]; - u32x w6_t = w[ 6]; - u32x w7_t = w[ 7]; - u32x w8_t = w[ 8]; - u32x w9_t = w[ 9]; - u32x wa_t = w[10]; - u32x wb_t = w[11]; - u32x wc_t = w[12]; - u32x wd_t = w[13]; - u32x we_t = w[14]; - u32x wf_t = w[15]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w[ 0]; + u32 w1_t = w[ 1]; + u32 w2_t = w[ 2]; + u32 w3_t = w[ 3]; + u32 w4_t = w[ 4]; + u32 w5_t = w[ 5]; + u32 w6_t = w[ 6]; + u32 w7_t = w[ 7]; + u32 w8_t = w[ 8]; + u32 w9_t = w[ 9]; + u32 wa_t = w[10]; + u32 wb_t = w[11]; + u32 wc_t = w[12]; + u32 wd_t = w[13]; + u32 we_t = w[14]; + u32 wf_t = w[15]; #undef K #define K SHA1C00 @@ -1199,7 +1187,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12500_comp (__gl const u32 p3 = (pw_len * 2) + salt_len + 3; - u32x w_buf[16]; + u32 w_buf[16]; w_buf[ 0] = 0x80000000; w_buf[ 1] = 0; @@ -1228,7 +1216,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12500_comp (__gl sha1_transform (w_buf, dgst); - u32x rk[60]; + u32 rk[60]; u32 data[4]; @@ -1237,7 +1225,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12500_comp (__gl data[2] = salt_bufs[salt_pos].salt_buf[4]; data[3] = salt_bufs[salt_pos].salt_buf[5]; - u32x ukeyx[4]; + u32 ukeyx[4]; ukeyx[0] = swap_workaround (dgst[0]); ukeyx[1] = swap_workaround (dgst[1]); @@ -1280,7 +1268,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12500_comp (__gl const u32 p3 = (pw_len * 2) + salt_len + 3; - u32x w[16]; + u32 w[16]; w[ 0] = 0; w[ 1] = 0; @@ -1339,12 +1327,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12500_comp (__gl out[2] ^= swap_workaround (iv[2]); out[3] ^= swap_workaround (iv[3]); - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = 0; - const u32x r3 = 0; + const u32 r0 = out[0]; + const u32 r1 = out[1]; + const u32 r2 = 0; + const u32 r3 = 0; #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/m12600_a0.cl b/OpenCL/m12600_a0.cl similarity index 94% rename from amd/m12600_a0.cl rename to OpenCL/m12600_a0.cl index 200be91..231edec 100644 --- a/amd/m12600_a0.cl +++ b/OpenCL/m12600_a0.cl @@ -8,43 +8,19 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 7 #define DGST_R2 2 #define DGST_R3 6 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #include "include/rp_gpu.h" -#include "rp_amd.c" +#include "rp.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_upper8(i) l_bin2asc[(i)] @@ -72,14 +48,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12600_m04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -144,28 +120,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12600_m04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -180,31 +156,31 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12600_m04 (__glo * sha1 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - u32x f = 0; - u32x g = 0; - u32x h = 0; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = out_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; + u32 f = 0; + u32 g = 0; + u32 h = 0; #undef K #define K SHA1C00 @@ -427,12 +403,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12600_m04 (__glo we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -458,14 +434,14 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12600_s04 (__glo const u32 gid = get_global_id (0); - u32x pw_buf0[4]; + u32 pw_buf0[4]; pw_buf0[0] = pws[gid].i[ 0]; pw_buf0[1] = pws[gid].i[ 1]; pw_buf0[2] = pws[gid].i[ 2]; pw_buf0[3] = pws[gid].i[ 3]; - u32x pw_buf1[4]; + u32 pw_buf1[4]; pw_buf1[0] = pws[gid].i[ 4]; pw_buf1[1] = pws[gid].i[ 5]; @@ -542,28 +518,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12600_s04 (__glo for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) { - u32x w0[4]; + u32 w0[4]; w0[0] = pw_buf0[0]; w0[1] = pw_buf0[1]; w0[2] = pw_buf0[2]; w0[3] = pw_buf0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pw_buf1[0]; w1[1] = pw_buf1[1]; w1[2] = pw_buf1[2]; w1[3] = pw_buf1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -578,31 +554,31 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12600_s04 (__glo * sha1 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - u32x f = 0; - u32x g = 0; - u32x h = 0; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = out_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; + u32 f = 0; + u32 g = 0; + u32 h = 0; #undef K #define K SHA1C00 @@ -826,12 +802,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12600_s04 (__glo we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m12600_a1.cl b/OpenCL/m12600_a1.cl similarity index 94% rename from amd/m12600_a1.cl rename to OpenCL/m12600_a1.cl index 19f68b5..1c4430b 100644 --- a/amd/m12600_a1.cl +++ b/OpenCL/m12600_a1.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 7 #define DGST_R2 2 #define DGST_R3 6 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_upper8(i) l_bin2asc[(i)] @@ -70,28 +46,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12600_m04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -202,28 +178,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12600_m04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -234,31 +210,31 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12600_m04 (__glo * sha1 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - u32x f = 0; - u32x g = 0; - u32x h = 0; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = pw_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; + u32 f = 0; + u32 g = 0; + u32 h = 0; #undef K #define K SHA1C00 @@ -482,12 +458,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12600_m04 (__glo we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_M + #include COMPARE_M } } @@ -513,28 +489,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12600_s04 (__glo const u32 gid = get_global_id (0); - u32x wordl0[4]; + u32 wordl0[4]; wordl0[0] = pws[gid].i[ 0]; wordl0[1] = pws[gid].i[ 1]; wordl0[2] = pws[gid].i[ 2]; wordl0[3] = pws[gid].i[ 3]; - u32x wordl1[4]; + u32 wordl1[4]; wordl1[0] = pws[gid].i[ 4]; wordl1[1] = pws[gid].i[ 5]; wordl1[2] = pws[gid].i[ 6]; wordl1[3] = pws[gid].i[ 7]; - u32x wordl2[4]; + u32 wordl2[4]; wordl2[0] = 0; wordl2[1] = 0; wordl2[2] = 0; wordl2[3] = 0; - u32x wordl3[4]; + u32 wordl3[4]; wordl3[0] = 0; wordl3[1] = 0; @@ -657,28 +633,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12600_s04 (__glo switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); } - u32x w0[4]; + u32 w0[4]; w0[0] = wordl0[0] | wordr0[0]; w0[1] = wordl0[1] | wordr0[1]; w0[2] = wordl0[2] | wordr0[2]; w0[3] = wordl0[3] | wordr0[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = wordl1[0] | wordr1[0]; w1[1] = wordl1[1] | wordr1[1]; w1[2] = wordl1[2] | wordr1[2]; w1[3] = wordl1[3] | wordr1[3]; - u32x w2[4]; + u32 w2[4]; w2[0] = wordl2[0] | wordr2[0]; w2[1] = wordl2[1] | wordr2[1]; w2[2] = wordl2[2] | wordr2[2]; w2[3] = wordl2[3] | wordr2[3]; - u32x w3[4]; + u32 w3[4]; w3[0] = wordl3[0] | wordr3[0]; w3[1] = wordl3[1] | wordr3[1]; @@ -689,31 +665,31 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12600_s04 (__glo * sha1 */ - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - u32x f = 0; - u32x g = 0; - u32x h = 0; + u32 w0_t = swap_workaround (w0[0]); + u32 w1_t = swap_workaround (w0[1]); + u32 w2_t = swap_workaround (w0[2]); + u32 w3_t = swap_workaround (w0[3]); + u32 w4_t = swap_workaround (w1[0]); + u32 w5_t = swap_workaround (w1[1]); + u32 w6_t = swap_workaround (w1[2]); + u32 w7_t = swap_workaround (w1[3]); + u32 w8_t = swap_workaround (w2[0]); + u32 w9_t = swap_workaround (w2[1]); + u32 wa_t = swap_workaround (w2[2]); + u32 wb_t = swap_workaround (w2[3]); + u32 wc_t = swap_workaround (w3[0]); + u32 wd_t = swap_workaround (w3[1]); + u32 we_t = 0; + u32 wf_t = pw_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; + u32 f = 0; + u32 g = 0; + u32 h = 0; #undef K #define K SHA1C00 @@ -937,12 +913,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12600_s04 (__glo we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_S + #include COMPARE_S } } diff --git a/amd/m12600_a3.cl b/OpenCL/m12600_a3.cl similarity index 92% rename from amd/m12600_a3.cl rename to OpenCL/m12600_a3.cl index ae31557..1dfc85e 100644 --- a/amd/m12600_a3.cl +++ b/OpenCL/m12600_a3.cl @@ -8,41 +8,17 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE4 -#endif - -#ifdef VLIW5 -#define VECT_SIZE4 -#endif - #define DGST_R0 3 #define DGST_R1 7 #define DGST_R2 2 #define DGST_R3 6 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif +#define COMPARE_S "check_single_comp4.c" +#define COMPARE_M "check_multi_comp4.c" #ifdef VECT_SIZE1 #define uint_to_hex_upper8(i) l_bin2asc[(i)] @@ -56,7 +32,7 @@ #define uint_to_hex_upper8(i) (u32x) (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) #endif -static void m12600m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) +static void m12600m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { /** * modifier @@ -84,7 +60,7 @@ static void m12600m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -96,31 +72,31 @@ static void m12600m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * sha1 */ - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - u32x f = 0; - u32x g = 0; - u32x h = 0; + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = 0; + u32 wf_t = pw_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; + u32 f = 0; + u32 g = 0; + u32 h = 0; #undef K #define K SHA1C00 @@ -343,16 +319,16 @@ static void m12600m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_M + #include COMPARE_M } } -static void m12600s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) +static void m12600s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global u32 *bitmaps_buf_s1_a, __global u32 *bitmaps_buf_s1_b, __global u32 *bitmaps_buf_s1_c, __global u32 *bitmaps_buf_s1_d, __global u32 *bitmaps_buf_s2_a, __global u32 *bitmaps_buf_s2_b, __global u32 *bitmaps_buf_s2_c, __global u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global digest_t *digests_buf, __global u32 *hashes_shown, __global salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 l_bin2asc[256]) { /** * modifier @@ -392,7 +368,7 @@ static void m12600s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * loop */ - u32x w0l = w0[0]; + u32 w0l = w0[0]; for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) { @@ -404,31 +380,31 @@ static void m12600s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p * sha1 */ - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - u32x f = 0; - u32x g = 0; - u32x h = 0; + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = 0; + u32 wf_t = pw_len * 8; + + u32 a = SHA1M_A; + u32 b = SHA1M_B; + u32 c = SHA1M_C; + u32 d = SHA1M_D; + u32 e = SHA1M_E; + u32 f = 0; + u32 g = 0; + u32 h = 0; #undef K #define K SHA1C00 @@ -651,12 +627,12 @@ static void m12600s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 p we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; + const u32 r0 = d; + const u32 r1 = h; + const u32 r2 = c; + const u32 r3 = g; - #include VECT_COMPARE_S + #include COMPARE_S } } @@ -675,28 +651,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12600_m04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -761,28 +737,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12600_m08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -847,28 +823,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12600_m16 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -933,28 +909,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12600_s04 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1019,28 +995,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12600_s08 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -1105,28 +1081,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12600_s16 (__glo const u32 lid = get_local_id (0); - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; diff --git a/amd/m12700.cl b/OpenCL/m12700.cl similarity index 97% rename from amd/m12700.cl rename to OpenCL/m12700.cl index 5bbe450..2c56350 100644 --- a/amd/m12700.cl +++ b/OpenCL/m12700.cl @@ -8,33 +8,21 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 #define DGST_R3 3 #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif #ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" +#define COMPARE_M "check_multi_vect2_comp4.c" #endif __constant u32 te0[256] = @@ -910,30 +898,30 @@ static void AES256_decrypt (const u32 *in, u32 *out, const u32 *rdk, __local u32 ^ rdk[59]; } -static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) +static void sha1_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[5]) { - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 A = digest[0]; + u32 B = digest[1]; + u32 C = digest[2]; + u32 D = digest[3]; + u32 E = digest[4]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #undef K #define K SHA1C00 @@ -1038,7 +1026,7 @@ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4] digest[4] += E; } -static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) +static void hmac_sha1_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -1091,7 +1079,7 @@ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x sha1_transform (w0, w1, w2, w3, opad); } -static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) +static void hmac_sha1_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[5], u32 opad[5], u32 digest[5]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -1137,28 +1125,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12700_init (__gl if (gid >= gid_max) return; - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[ 0]; w0[1] = pws[gid].i[ 1]; w0[2] = pws[gid].i[ 2]; w0[3] = pws[gid].i[ 3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[ 4]; w1[1] = pws[gid].i[ 5]; w1[2] = pws[gid].i[ 6]; w1[3] = pws[gid].i[ 7]; - u32x w2[4]; + u32 w2[4]; w2[0] = pws[gid].i[ 8]; w2[1] = pws[gid].i[ 9]; w2[2] = pws[gid].i[10]; w2[3] = pws[gid].i[11]; - u32x w3[4]; + u32 w3[4]; w3[0] = pws[gid].i[12]; w3[1] = pws[gid].i[13]; @@ -1199,8 +1187,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12700_init (__gl w3[2] = swap_workaround (w3[2]); w3[3] = swap_workaround (w3[3]); - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; hmac_sha1_pad (w0, w1, w2, w3, ipad, opad); @@ -1236,7 +1224,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12700_init (__gl w3[2] = 0; w3[3] = (64 + salt_len + 4) * 8; - u32x dgst1[5]; + u32 dgst1[5]; hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst1); @@ -1273,7 +1261,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12700_init (__gl w3[2] = 0; w3[3] = (64 + salt_len + 4) * 8; - u32x dgst2[5]; + u32 dgst2[5]; hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst2); @@ -1297,8 +1285,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12700_loop (__gl if (gid >= gid_max) return; - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; ipad[0] = tmps[gid].ipad[0]; ipad[1] = tmps[gid].ipad[1]; @@ -1315,8 +1303,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12700_loop (__gl // first 160 bits { - u32x dgst1[5]; - u32x out1[5]; + u32 dgst1[5]; + u32 out1[5]; dgst1[0] = tmps[gid].dgst1[0]; dgst1[1] = tmps[gid].dgst1[1]; @@ -1332,10 +1320,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12700_loop (__gl for (u32 j = 0; j < loop_cnt; j++) { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = dgst1[0]; w0[1] = dgst1[1]; @@ -1379,8 +1367,8 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12700_loop (__gl // second 160 bits { - u32x dgst2[5]; - u32x out2[5]; + u32 dgst2[5]; + u32 out2[5]; dgst2[0] = tmps[gid].dgst2[0]; dgst2[1] = tmps[gid].dgst2[1]; @@ -1396,10 +1384,10 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12700_loop (__gl for (u32 j = 0; j < loop_cnt; j++) { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; w0[0] = dgst2[0]; w0[1] = dgst2[1]; @@ -1542,7 +1530,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12700_comp (__gl salt_bufs[salt_pos].salt_buf[7] }; - u32x ukey[8]; + u32 ukey[8]; ukey[0] = tmps[gid].out1[0]; ukey[1] = tmps[gid].out1[1]; @@ -1588,13 +1576,13 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12700_comp (__gl if (pt[i + 4] != 'd') continue; if (pt[i + 5] != '"') continue; - const u32x r0 = data[0]; - const u32x r1 = data[1]; - const u32x r2 = data[2]; - const u32x r3 = data[3]; + const u32 r0 = data[0]; + const u32 r1 = data[1]; + const u32 r2 = data[2]; + const u32 r3 = data[3]; #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } } diff --git a/amd/m12800.cl b/OpenCL/m12800.cl similarity index 92% rename from amd/m12800.cl rename to OpenCL/m12800.cl index 2877018..a42d812 100644 --- a/amd/m12800.cl +++ b/OpenCL/m12800.cl @@ -8,18 +8,6 @@ #include "include/constants.h" #include "include/kernel_vendor.h" -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW4 -#define VECT_SIZE1 -#endif - -#ifdef VLIW5 -#define VECT_SIZE1 -#endif - #define DGST_R0 0 #define DGST_R1 1 #define DGST_R2 2 @@ -27,11 +15,11 @@ #include "include/kernel_functions.c" -#include "types_amd.c" -#include "common_amd.c" +#include "types_ocl.c" +#include "common.c" #ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" +#define COMPARE_M "check_multi_vect1_comp4.c" #endif #ifdef VECT_SIZE1 @@ -58,12 +46,12 @@ __constant u32 k_sha256[64] = SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f, }; -static void md4_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) +static void md4_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[4]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; MD4_STEP (MD4_Fo, a, b, c, d, w0[0], MD4C00, MD4S00); MD4_STEP (MD4_Fo, d, a, b, c, w0[1], MD4C00, MD4S01); @@ -122,33 +110,33 @@ static void md4_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], digest[3] += d; } -static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8]) +static void sha256_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[8]) { - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; + u32 a = digest[0]; + u32 b = digest[1]; + u32 c = digest[2]; + u32 d = digest[3]; + u32 e = digest[4]; + u32 f = digest[5]; + u32 g = digest[6]; + u32 h = digest[7]; + + u32 w0_t = w0[0]; + u32 w1_t = w0[1]; + u32 w2_t = w0[2]; + u32 w3_t = w0[3]; + u32 w4_t = w1[0]; + u32 w5_t = w1[1]; + u32 w6_t = w1[2]; + u32 w7_t = w1[3]; + u32 w8_t = w2[0]; + u32 w9_t = w2[1]; + u32 wa_t = w2[2]; + u32 wb_t = w2[3]; + u32 wc_t = w3[0]; + u32 wd_t = w3[1]; + u32 we_t = w3[2]; + u32 wf_t = w3[3]; #define ROUND_EXPAND() \ { \ @@ -208,7 +196,7 @@ static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[ digest[7] += h; } -static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8]) +static void hmac_sha256_pad (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[8], u32 opad[8]) { w0[0] = w0[0] ^ 0x36363636; w0[1] = w0[1] ^ 0x36363636; @@ -267,7 +255,7 @@ static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32 sha256_transform (w0, w1, w2, w3, opad); } -static void hmac_sha256_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8], u32x digest[8]) +static void hmac_sha256_run (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 ipad[8], u32 opad[8], u32 digest[8]) { digest[0] = ipad[0]; digest[1] = ipad[1]; @@ -357,28 +345,28 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12800_init (__gl * base */ - u32x w0[4]; + u32 w0[4]; w0[0] = pws[gid].i[0]; w0[1] = pws[gid].i[1]; w0[2] = pws[gid].i[2]; w0[3] = pws[gid].i[3]; - u32x w1[4]; + u32 w1[4]; w1[0] = pws[gid].i[4]; w1[1] = pws[gid].i[5]; w1[2] = 0; w1[3] = 0; - u32x w2[4]; + u32 w2[4]; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; - u32x w3[4]; + u32 w3[4]; w3[0] = 0; w3[1] = 0; @@ -432,7 +420,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12800_init (__gl w3[2] = pw_len * 2 * 8; - u32x digest_md4[4]; + u32 digest_md4[4]; digest_md4[0] = MD4M_A; digest_md4[1] = MD4M_B; @@ -647,12 +635,12 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) m12800_comp (__gl const u32 lid = get_local_id (0); - const u32x r0 = tmps[gid].out[DGST_R0]; - const u32x r1 = tmps[gid].out[DGST_R1]; - const u32x r2 = tmps[gid].out[DGST_R2]; - const u32x r3 = tmps[gid].out[DGST_R3]; + const u32 r0 = tmps[gid].out[DGST_R0]; + const u32 r1 = tmps[gid].out[DGST_R1]; + const u32 r2 = tmps[gid].out[DGST_R2]; + const u32 r3 = tmps[gid].out[DGST_R3]; #define il_pos 0 - #include VECT_COMPARE_M + #include COMPARE_M } diff --git a/amd/markov_be_v1.cl b/OpenCL/markov_be.cl similarity index 81% rename from amd/markov_be_v1.cl rename to OpenCL/markov_be.cl index 1fad7bc..e0b39de 100644 --- a/amd/markov_be_v1.cl +++ b/OpenCL/markov_be.cl @@ -7,9 +7,7 @@ #define CHARSIZ 256 -#define VECT_SIZE1 - -#include "types_amd.c" +#include "types_ocl.c" static void generate_pw (u32 pw_buf[16], __global cs_t *root_css_buf, __global cs_t *markov_css_buf, const u32 pw_l_len, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, u64 val) { @@ -63,7 +61,7 @@ static void generate_pw (u32 pw_buf[16], __global cs_t *root_css_buf, __global c if (bits15) pw_buf[15] = (pw_l_len + pw_r_len) * 8; } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) l_markov (__global pw_t *pws_buf_l, __global cs_t *root_css_buf, __global cs_t *markov_css_buf, const u64 off, const u32 pw_l_len, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) l_markov (__global pw_t *pws_buf_l, __global cs_t *root_css_buf, __global cs_t *markov_css_buf, const u64 off, const u32 pw_l_len, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) { const u32 gid = get_global_id (0); @@ -93,7 +91,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) l_markov (__glob pws_buf_l[gid].pw_len = pw_l_len + pw_r_len; } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) r_markov (__global bf_t *pws_buf_r, __global cs_t *root_css_buf, __global cs_t *markov_css_buf, const u64 off, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) r_markov (__global bf_t *pws_buf_r, __global cs_t *root_css_buf, __global cs_t *markov_css_buf, const u64 off, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) { const u32 gid = get_global_id (0); @@ -106,7 +104,7 @@ __kernel void __attribute__((reqd_work_group_size (64, 1, 1))) r_markov (__glob pws_buf_r[gid].i = pw_buf[0]; } -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) C_markov (__global comb_t *pws_buf, __global cs_t *root_css_buf, __global cs_t *markov_css_buf, const u64 off, const u32 pw_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) +__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) C_markov (__global comb_t *pws_buf, __global cs_t *root_css_buf, __global cs_t *markov_css_buf, const u64 off, const u32 pw_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) { const u32 gid = get_global_id (0); diff --git a/amd/markov_le_v1.cl b/OpenCL/markov_le.cl similarity index 98% rename from amd/markov_le_v1.cl rename to OpenCL/markov_le.cl index 1b833b5..e56a909 100644 --- a/amd/markov_le_v1.cl +++ b/OpenCL/markov_le.cl @@ -7,9 +7,7 @@ #define CHARSIZ 256 -#define VECT_SIZE1 - -#include "types_amd.c" +#include "types_ocl.c" static void generate_pw (u32 pw_buf[16], __global cs_t *root_css_buf, __global cs_t *markov_css_buf, const u32 pw_l_len, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, u64 val) { diff --git a/nv/rp_nv.c b/OpenCL/rp.c similarity index 71% rename from nv/rp_nv.c rename to OpenCL/rp.c index f63a1f7..8782240 100644 --- a/nv/rp_nv.c +++ b/OpenCL/rp.c @@ -3,18 +3,18 @@ * License.....: MIT */ -__device__ static u32x generate_cmask (u32x buf) +static u32 generate_cmask (u32 buf) { - const u32x rmask = ((buf & 0x40404040) >> 1) - & ~((buf & 0x80808080) >> 2); + const u32 rmask = ((buf & 0x40404040) >> 1) + & ~((buf & 0x80808080) >> 2); - const u32x hmask = (buf & 0x1f1f1f1f) + 0x05050505; - const u32x lmask = (buf & 0x1f1f1f1f) + 0x1f1f1f1f; + const u32 hmask = (buf & 0x1f1f1f1f) + 0x05050505; + const u32 lmask = (buf & 0x1f1f1f1f) + 0x1f1f1f1f; return rmask & ~hmask & lmask; } -__device__ static void truncate_right (u32x w0[4], u32x w1[4], const u32 len) +static void truncate_right (u32 w0[4], u32 w1[4], const u32 len) { const u32 tmp = (1 << ((len % 4) * 8)) - 1; @@ -67,7 +67,7 @@ __device__ static void truncate_right (u32x w0[4], u32x w1[4], const u32 len) } } -__device__ static void truncate_left (u32x w0[4], u32x w1[4], const u32 len) +static void truncate_left (u32 w0[4], u32 w1[4], const u32 len) { const u32 tmp = ~((1 << ((len % 4) * 8)) - 1); @@ -120,10 +120,9 @@ __device__ static void truncate_left (u32x w0[4], u32x w1[4], const u32 len) } } -__device__ static void lshift_block (const u32x in0[4], const u32x in1[4], u32x out0[4], u32x out1[4]) +static void lshift_block (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4]) { - #if __CUDA_ARCH__ >= 200 - + #ifdef IS_NV out0[0] = __byte_perm (in0[0], in0[1], 0x4321); out0[1] = __byte_perm (in0[1], in0[2], 0x4321); out0[2] = __byte_perm (in0[2], in0[3], 0x4321); @@ -132,25 +131,23 @@ __device__ static void lshift_block (const u32x in0[4], const u32x in1[4], u32x out1[1] = __byte_perm (in1[1], in1[2], 0x4321); out1[2] = __byte_perm (in1[2], in1[3], 0x4321); out1[3] = __byte_perm (in1[3], 0, 0x4321); + #endif - #else - - out0[0] = in0[0] >> 8 | in0[1] << 24; - out0[1] = in0[1] >> 8 | in0[2] << 24; - out0[2] = in0[2] >> 8 | in0[3] << 24; - out0[3] = in0[3] >> 8 | in1[0] << 24; - out1[0] = in1[0] >> 8 | in1[1] << 24; - out1[1] = in1[1] >> 8 | in1[2] << 24; - out1[2] = in1[2] >> 8 | in1[3] << 24; - out1[3] = in1[3] >> 8; - + #ifdef IS_AMD + out0[0] = amd_bytealign (in0[1], in0[0], 1); + out0[1] = amd_bytealign (in0[2], in0[1], 1); + out0[2] = amd_bytealign (in0[3], in0[2], 1); + out0[3] = amd_bytealign (in1[0], in0[3], 1); + out1[0] = amd_bytealign (in1[1], in1[0], 1); + out1[1] = amd_bytealign (in1[2], in1[1], 1); + out1[2] = amd_bytealign (in1[3], in1[2], 1); + out1[3] = amd_bytealign ( 0, in1[3], 1); #endif } -__device__ static void rshift_block (const u32x in0[4], const u32x in1[4], u32x out0[4], u32x out1[4]) +static void rshift_block (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4]) { - #if __CUDA_ARCH__ >= 200 - + #ifdef IS_NV out1[3] = __byte_perm (in1[2], in1[3], 0x6543); out1[2] = __byte_perm (in1[1], in1[2], 0x6543); out1[1] = __byte_perm (in1[0], in1[1], 0x6543); @@ -159,25 +156,23 @@ __device__ static void rshift_block (const u32x in0[4], const u32x in1[4], u32x out0[2] = __byte_perm (in0[1], in0[2], 0x6543); out0[1] = __byte_perm (in0[0], in0[1], 0x6543); out0[0] = __byte_perm ( 0, in0[0], 0x6543); + #endif - #else - - out1[3] = in1[3] << 8 | in1[2] >> 24; - out1[2] = in1[2] << 8 | in1[1] >> 24; - out1[1] = in1[1] << 8 | in1[0] >> 24; - out1[0] = in1[0] << 8 | in0[3] >> 24; - out0[3] = in0[3] << 8 | in0[2] >> 24; - out0[2] = in0[2] << 8 | in0[1] >> 24; - out0[1] = in0[1] << 8 | in0[0] >> 24; - out0[0] = in0[0] << 8; - + #ifdef IS_AMD + out1[3] = amd_bytealign (in1[3], in1[2], 3); + out1[2] = amd_bytealign (in1[2], in1[1], 3); + out1[1] = amd_bytealign (in1[1], in1[0], 3); + out1[0] = amd_bytealign (in1[0], in0[3], 3); + out0[3] = amd_bytealign (in0[3], in0[2], 3); + out0[2] = amd_bytealign (in0[2], in0[1], 3); + out0[1] = amd_bytealign (in0[1], in0[0], 3); + out0[0] = amd_bytealign (in0[0], 0, 3); #endif } -__device__ static void rshift_block_N (const u32x in0[4], const u32x in1[4], u32x out0[4], u32x out1[4], const u32 num) +static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4], const u32 num) { - #if __CUDA_ARCH__ >= 200 - + #ifdef IS_NV switch (num) { case 0: out1[3] = in1[3]; @@ -469,308 +464,306 @@ __device__ static void rshift_block_N (const u32x in0[4], const u32x in1[4], u32 out0[0] = 0; break; } + #endif - #else - + #ifdef IS_AMD switch (num) { - case 0: out1[3] = in1[3]; - out1[2] = in1[2]; - out1[1] = in1[1]; - out1[0] = in1[0]; - out0[3] = in0[3]; - out0[2] = in0[2]; + case 0: out0[0] = in0[0]; out0[1] = in0[1]; - out0[0] = in0[0]; - break; - case 1: out1[3] = in1[3] << 8 | in1[2] >> 24; - out1[2] = in1[2] << 8 | in1[1] >> 24; - out1[1] = in1[1] << 8 | in1[0] >> 24; - out1[0] = in1[0] << 8 | in0[3] >> 24; - out0[3] = in0[3] << 8 | in0[2] >> 24; - out0[2] = in0[2] << 8 | in0[1] >> 24; - out0[1] = in0[1] << 8 | in0[0] >> 24; - out0[0] = in0[0] << 8; - break; - case 2: out1[3] = in1[3] << 16 | in1[2] >> 16; - out1[2] = in1[2] << 16 | in1[1] >> 16; - out1[1] = in1[1] << 16 | in1[0] >> 16; - out1[0] = in1[0] << 16 | in0[3] >> 16; - out0[3] = in0[3] << 16 | in0[2] >> 16; - out0[2] = in0[2] << 16 | in0[1] >> 16; - out0[1] = in0[1] << 16 | in0[0] >> 16; - out0[0] = in0[0] << 16; - break; - case 3: out1[3] = in1[3] << 24 | in1[2] >> 8; - out1[2] = in1[2] << 24 | in1[1] >> 8; - out1[1] = in1[1] << 24 | in1[0] >> 8; - out1[0] = in1[0] << 24 | in0[3] >> 8; - out0[3] = in0[3] << 24 | in0[2] >> 8; - out0[2] = in0[2] << 24 | in0[1] >> 8; - out0[1] = in0[1] << 24 | in0[0] >> 8; - out0[0] = in0[0] << 24; - break; - case 4: out1[3] = in1[2]; - out1[2] = in1[1]; - out1[1] = in1[0]; - out1[0] = in0[3]; - out0[3] = in0[2]; - out0[2] = in0[1]; - out0[1] = in0[0]; - out0[0] = 0; - break; - case 5: out1[3] = in1[2] << 8 | in1[1] >> 24; - out1[2] = in1[1] << 8 | in1[0] >> 24; - out1[1] = in1[0] << 8 | in0[3] >> 24; - out1[0] = in0[3] << 8 | in0[2] >> 24; - out0[3] = in0[2] << 8 | in0[1] >> 24; - out0[2] = in0[1] << 8 | in0[0] >> 24; - out0[1] = in0[0] << 8; - out0[0] = 0; - break; - case 6: out1[3] = in1[2] << 16 | in1[1] >> 16; - out1[2] = in1[1] << 16 | in1[0] >> 16; - out1[1] = in1[0] << 16 | in0[3] >> 16; - out1[0] = in0[3] << 16 | in0[2] >> 16; - out0[3] = in0[2] << 16 | in0[1] >> 16; - out0[2] = in0[1] << 16 | in0[0] >> 16; - out0[1] = in0[0] << 16; - out0[0] = 0; + out0[2] = in0[2]; + out0[3] = in0[3]; + out1[0] = in1[0]; + out1[1] = in1[1]; + out1[2] = in1[2]; + out1[3] = in1[3]; break; - case 7: out1[3] = in1[2] << 24 | in1[1] >> 8; - out1[2] = in1[1] << 24 | in1[0] >> 8; - out1[1] = in1[0] << 24 | in0[3] >> 8; - out1[0] = in0[3] << 24 | in0[2] >> 8; - out0[3] = in0[2] << 24 | in0[1] >> 8; - out0[2] = in0[1] << 24 | in0[0] >> 8; - out0[1] = in0[0] << 24; - out0[0] = 0; + case 1: out0[0] = amd_bytealign (in0[1], in0[0], 1); + out0[1] = amd_bytealign (in0[2], in0[1], 1); + out0[2] = amd_bytealign (in0[3], in0[2], 1); + out0[3] = amd_bytealign (in1[0], in0[3], 1); + out1[0] = amd_bytealign (in1[1], in1[0], 1); + out1[1] = amd_bytealign (in1[2], in1[1], 1); + out1[2] = amd_bytealign (in1[3], in1[2], 1); + out1[3] = amd_bytealign ( 0, in1[3], 1); + break; + case 2: out0[0] = amd_bytealign (in0[1], in0[0], 2); + out0[1] = amd_bytealign (in0[2], in0[1], 2); + out0[2] = amd_bytealign (in0[3], in0[2], 2); + out0[3] = amd_bytealign (in1[0], in0[3], 2); + out1[0] = amd_bytealign (in1[1], in1[0], 2); + out1[1] = amd_bytealign (in1[2], in1[1], 2); + out1[2] = amd_bytealign (in1[3], in1[2], 2); + out1[3] = amd_bytealign ( 0, in1[3], 2); + break; + case 3: out0[0] = amd_bytealign (in0[1], in0[0], 3); + out0[1] = amd_bytealign (in0[2], in0[1], 3); + out0[2] = amd_bytealign (in0[3], in0[2], 3); + out0[3] = amd_bytealign (in1[0], in0[3], 3); + out1[0] = amd_bytealign (in1[1], in1[0], 3); + out1[1] = amd_bytealign (in1[2], in1[1], 3); + out1[2] = amd_bytealign (in1[3], in1[2], 3); + out1[3] = amd_bytealign ( 0, in1[3], 3); break; - case 8: out1[3] = in1[1]; - out1[2] = in1[0]; - out1[1] = in0[3]; - out1[0] = in0[2]; - out0[3] = in0[1]; - out0[2] = in0[0]; - out0[1] = 0; - out0[0] = 0; + case 4: out0[0] = in0[1]; + out0[1] = in0[2]; + out0[2] = in0[3]; + out0[3] = in1[0]; + out1[0] = in1[1]; + out1[1] = in1[2]; + out1[2] = in1[3]; + out1[3] = 0; break; - case 9: out1[3] = in1[1] << 8 | in1[0] >> 24; - out1[2] = in1[0] << 8 | in0[3] >> 24; - out1[1] = in0[3] << 8 | in0[2] >> 24; - out1[0] = in0[2] << 8 | in0[1] >> 24; - out0[3] = in0[1] << 8 | in0[0] >> 24; - out0[2] = in0[0] << 8; - out0[1] = 0; - out0[0] = 0; + case 5: out0[0] = amd_bytealign (in0[2], in0[1], 1); + out0[1] = amd_bytealign (in0[3], in0[2], 1); + out0[2] = amd_bytealign (in1[0], in0[3], 1); + out0[3] = amd_bytealign (in1[1], in1[0], 1); + out1[0] = amd_bytealign (in1[2], in1[1], 1); + out1[1] = amd_bytealign (in1[3], in1[2], 1); + out1[2] = amd_bytealign ( 0, in1[3], 1); + out1[3] = 0; break; - case 10: out1[3] = in1[1] << 16 | in1[0] >> 16; - out1[2] = in1[0] << 16 | in0[3] >> 16; - out1[1] = in0[3] << 16 | in0[2] >> 16; - out1[0] = in0[2] << 16 | in0[1] >> 16; - out0[3] = in0[1] << 16 | in0[0] >> 16; - out0[2] = in0[0] << 16; - out0[1] = 0; - out0[0] = 0; + case 6: out0[0] = amd_bytealign (in0[2], in0[1], 2); + out0[1] = amd_bytealign (in0[3], in0[2], 2); + out0[2] = amd_bytealign (in1[0], in0[3], 2); + out0[3] = amd_bytealign (in1[1], in1[0], 2); + out1[0] = amd_bytealign (in1[2], in1[1], 2); + out1[1] = amd_bytealign (in1[3], in1[2], 2); + out1[2] = amd_bytealign ( 0, in1[3], 2); + out1[3] = 0; break; - case 11: out1[3] = in1[1] << 24 | in1[0] >> 8; - out1[2] = in1[0] << 24 | in0[3] >> 8; - out1[1] = in0[3] << 24 | in0[2] >> 8; - out1[0] = in0[2] << 24 | in0[1] >> 8; - out0[3] = in0[1] << 24 | in0[0] >> 8; - out0[2] = in0[0] << 24; - out0[1] = 0; - out0[0] = 0; + case 7: out0[0] = amd_bytealign (in0[2], in0[1], 3); + out0[1] = amd_bytealign (in0[3], in0[2], 3); + out0[2] = amd_bytealign (in1[0], in0[3], 3); + out0[3] = amd_bytealign (in1[1], in1[0], 3); + out1[0] = amd_bytealign (in1[2], in1[1], 3); + out1[1] = amd_bytealign (in1[3], in1[2], 3); + out1[2] = amd_bytealign ( 0, in1[3], 3); + out1[3] = 0; break; - case 12: out1[3] = in1[0]; - out1[2] = in0[3]; - out1[1] = in0[2]; - out1[0] = in0[1]; - out0[3] = in0[0]; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; + case 8: out0[0] = in0[2]; + out0[1] = in0[3]; + out0[2] = in1[0]; + out0[3] = in1[1]; + out1[0] = in1[2]; + out1[1] = in1[3]; + out1[2] = 0; + out1[3] = 0; break; - case 13: out1[3] = in1[0] << 8 | in0[3] >> 24; - out1[2] = in0[3] << 8 | in0[2] >> 24; - out1[1] = in0[2] << 8 | in0[1] >> 24; - out1[0] = in0[1] << 8 | in0[0] >> 24; - out0[3] = in0[0] << 8; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; + case 9: out0[0] = amd_bytealign (in0[3], in0[2], 1); + out0[1] = amd_bytealign (in1[0], in0[3], 1); + out0[2] = amd_bytealign (in1[1], in1[0], 1); + out0[3] = amd_bytealign (in1[2], in1[1], 1); + out1[0] = amd_bytealign (in1[3], in1[2], 1); + out1[1] = amd_bytealign ( 0, in1[3], 1); + out1[2] = 0; + out1[3] = 0; break; - case 14: out1[3] = in1[0] << 16 | in0[3] >> 16; - out1[2] = in0[3] << 16 | in0[2] >> 16; - out1[1] = in0[2] << 16 | in0[1] >> 16; - out1[0] = in0[1] << 16 | in0[0] >> 16; - out0[3] = in0[0] << 16; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; + case 10: out0[0] = amd_bytealign (in0[3], in0[2], 2); + out0[1] = amd_bytealign (in1[0], in0[3], 2); + out0[2] = amd_bytealign (in1[1], in1[0], 2); + out0[3] = amd_bytealign (in1[2], in1[1], 2); + out1[0] = amd_bytealign (in1[3], in1[2], 2); + out1[1] = amd_bytealign ( 0, in1[3], 2); + out1[2] = 0; + out1[3] = 0; break; - case 15: out1[3] = in1[0] << 24 | in0[3] >> 8; - out1[2] = in0[3] << 24 | in0[2] >> 8; - out1[1] = in0[2] << 24 | in0[1] >> 8; - out1[0] = in0[1] << 24 | in0[0] >> 8; - out0[3] = in0[0] << 24; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; + case 11: out0[0] = amd_bytealign (in0[3], in0[2], 3); + out0[1] = amd_bytealign (in1[0], in0[3], 3); + out0[2] = amd_bytealign (in1[1], in1[0], 3); + out0[3] = amd_bytealign (in1[2], in1[1], 3); + out1[0] = amd_bytealign (in1[3], in1[2], 3); + out1[1] = amd_bytealign ( 0, in1[3], 3); + out1[2] = 0; + out1[3] = 0; break; - case 16: out1[3] = in0[3]; - out1[2] = in0[2]; - out1[1] = in0[1]; - out1[0] = in0[0]; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; + case 12: out0[0] = in0[3]; + out0[1] = in1[0]; + out0[2] = in1[1]; + out0[3] = in1[2]; + out1[0] = in1[3]; + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; break; - case 17: out1[3] = in0[3] << 8 | in0[2] >> 24; - out1[2] = in0[2] << 8 | in0[1] >> 24; - out1[1] = in0[1] << 8 | in0[0] >> 24; - out1[0] = in0[0] << 8; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; + case 13: out0[0] = amd_bytealign (in1[0], in0[3], 1); + out0[1] = amd_bytealign (in1[1], in1[0], 1); + out0[2] = amd_bytealign (in1[2], in1[1], 1); + out0[3] = amd_bytealign (in1[3], in1[2], 1); + out1[0] = amd_bytealign ( 0, in1[3], 1); + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; break; - case 18: out1[3] = in0[3] << 16 | in0[2] >> 16; - out1[2] = in0[2] << 16 | in0[1] >> 16; - out1[1] = in0[1] << 16 | in0[0] >> 16; - out1[0] = in0[0] << 16; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; + case 14: out0[0] = amd_bytealign (in1[0], in0[3], 2); + out0[1] = amd_bytealign (in1[1], in1[0], 2); + out0[2] = amd_bytealign (in1[2], in1[1], 2); + out0[3] = amd_bytealign (in1[3], in1[2], 2); + out1[0] = amd_bytealign ( 0, in1[3], 2); + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; break; - case 19: out1[3] = in0[3] << 24 | in0[2] >> 8; - out1[2] = in0[2] << 24 | in0[1] >> 8; - out1[1] = in0[1] << 24 | in0[0] >> 8; - out1[0] = in0[0] << 24; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; + case 15: out0[0] = amd_bytealign (in1[0], in0[3], 3); + out0[1] = amd_bytealign (in1[1], in1[0], 3); + out0[2] = amd_bytealign (in1[2], in1[1], 3); + out0[3] = amd_bytealign (in1[3], in1[2], 3); + out1[0] = amd_bytealign ( 0, in1[3], 3); + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; break; - case 20: out1[3] = in0[2]; - out1[2] = in0[1]; - out1[1] = in0[0]; + case 16: out0[0] = in1[0]; + out0[1] = in1[1]; + out0[2] = in1[2]; + out0[3] = in1[3]; out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; break; - case 21: out1[3] = in0[2] << 8 | in0[1] >> 24; - out1[2] = in0[1] << 8 | in0[0] >> 24; - out1[1] = in0[0] << 8; + case 17: out0[0] = amd_bytealign (in1[1], in1[0], 1); + out0[1] = amd_bytealign (in1[2], in1[1], 1); + out0[2] = amd_bytealign (in1[3], in1[2], 1); + out0[3] = amd_bytealign ( 0, in1[3], 1); out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; break; - case 22: out1[3] = in0[2] << 16 | in0[1] >> 16; - out1[2] = in0[1] << 16 | in0[0] >> 16; - out1[1] = in0[0] << 16; + case 18: out0[0] = amd_bytealign (in1[1], in1[0], 2); + out0[1] = amd_bytealign (in1[2], in1[1], 2); + out0[2] = amd_bytealign (in1[3], in1[2], 2); + out0[3] = amd_bytealign ( 0, in1[3], 2); out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; break; - case 23: out1[3] = in0[2] << 24 | in0[1] >> 8; - out1[2] = in0[1] << 24 | in0[0] >> 8; - out1[1] = in0[0] << 24; + case 19: out0[0] = amd_bytealign (in1[1], in1[0], 3); + out0[1] = amd_bytealign (in1[2], in1[1], 3); + out0[2] = amd_bytealign (in1[3], in1[2], 3); + out0[3] = amd_bytealign ( 0, in1[3], 3); out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 24: out1[3] = in0[1]; - out1[2] = in0[0]; out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; + out1[2] = 0; + out1[3] = 0; break; - case 25: out1[3] = in0[1] << 8 | in0[0] >> 24; - out1[2] = in0[0] << 8; - out1[1] = 0; - out1[0] = 0; + case 20: out0[0] = in1[1]; + out0[1] = in1[2]; + out0[2] = in1[3]; out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 26: out1[3] = in0[1] << 16 | in0[0] >> 16; - out1[2] = in0[0] << 16; - out1[1] = 0; out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 27: out1[3] = in0[1] << 24 | in0[0] >> 8; - out1[2] = in0[0] << 24; out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 28: out1[3] = in0[0]; out1[2] = 0; - out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; + out1[3] = 0; break; - case 29: out1[3] = in0[0] << 8; - out1[2] = 0; - out1[1] = 0; - out1[0] = 0; + case 21: out0[0] = amd_bytealign (in1[2], in1[1], 1); + out0[1] = amd_bytealign (in1[3], in1[2], 1); + out0[2] = amd_bytealign ( 0, in1[3], 1); out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 30: out1[3] = in0[0] << 16; + out1[0] = 0; + out1[1] = 0; out1[2] = 0; + out1[3] = 0; + break; + case 22: out0[0] = amd_bytealign (in1[2], in1[1], 2); + out0[1] = amd_bytealign (in1[3], in1[2], 2); + out0[2] = amd_bytealign ( 0, in1[3], 2); + out0[3] = 0; + out1[0] = 0; out1[1] = 0; + out1[2] = 0; + out1[3] = 0; + break; + case 23: out0[0] = amd_bytealign (in1[2], in1[1], 3); + out0[1] = amd_bytealign (in1[3], in1[2], 3); + out0[2] = amd_bytealign ( 0, in1[3], 3); + out0[3] = 0; out1[0] = 0; + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; + break; + case 24: out0[0] = in1[2]; + out0[1] = in1[3]; + out0[2] = 0; out0[3] = 0; + out1[0] = 0; + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; + break; + case 25: out0[0] = amd_bytealign (in1[3], in1[2], 1); + out0[1] = amd_bytealign ( 0, in1[3], 1); out0[2] = 0; - out0[1] = 0; - out0[0] = 0; + out0[3] = 0; + out1[0] = 0; + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; break; - case 31: out1[3] = in0[0] << 24; + case 26: out0[0] = amd_bytealign (in1[3], in1[2], 2); + out0[1] = amd_bytealign ( 0, in1[3], 2); + out0[2] = 0; + out0[3] = 0; + out1[0] = 0; + out1[1] = 0; out1[2] = 0; + out1[3] = 0; + break; + case 27: out0[0] = amd_bytealign (in1[3], in1[2], 3); + out0[1] = amd_bytealign ( 0, in1[3], 3); + out0[2] = 0; + out0[3] = 0; + out1[0] = 0; out1[1] = 0; + out1[2] = 0; + out1[3] = 0; + break; + case 28: out0[0] = in1[3]; + out0[1] = 0; + out0[2] = 0; + out0[3] = 0; out1[0] = 0; + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; + break; + case 29: out0[0] = amd_bytealign ( 0, in1[3], 1); + out0[1] = 0; + out0[2] = 0; out0[3] = 0; + out1[0] = 0; + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; + break; + case 30: out0[0] = amd_bytealign ( 0, in1[3], 2); + out0[1] = 0; out0[2] = 0; + out0[3] = 0; + out1[0] = 0; + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; + break; + case 31: out0[0] = amd_bytealign ( 0, in1[3], 3); out0[1] = 0; - out0[0] = 0; + out0[2] = 0; + out0[3] = 0; + out1[0] = 0; + out1[1] = 0; + out1[2] = 0; + out1[3] = 0; break; } - #endif } -__device__ static void lshift_block_N (const u32x in0[4], const u32x in1[4], u32x out0[4], u32x out1[4], const u32 num) +static void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4], const u32 num) { - #if __CUDA_ARCH__ >= 200 - + #ifdef IS_NV switch (num) { case 0: out0[0] = in0[0]; @@ -1063,308 +1056,306 @@ __device__ static void lshift_block_N (const u32x in0[4], const u32x in1[4], u32 out1[3] = 0; break; } + #endif - #else - + #ifdef IS_AMD switch (num) { - case 0: out0[0] = in0[0]; - out0[1] = in0[1]; - out0[2] = in0[2]; - out0[3] = in0[3]; - out1[0] = in1[0]; - out1[1] = in1[1]; + case 0: out1[3] = in1[3]; out1[2] = in1[2]; - out1[3] = in1[3]; - break; - case 1: out0[0] = in0[0] >> 8 | in0[1] << 24; - out0[1] = in0[1] >> 8 | in0[2] << 24; - out0[2] = in0[2] >> 8 | in0[3] << 24; - out0[3] = in0[3] >> 8 | in1[0] << 24; - out1[0] = in1[0] >> 8 | in1[1] << 24; - out1[1] = in1[1] >> 8 | in1[2] << 24; - out1[2] = in1[2] >> 8 | in1[3] << 24; - out1[3] = in1[3] >> 8; - break; - case 2: out0[0] = in0[0] >> 16 | in0[1] << 16; - out0[1] = in0[1] >> 16 | in0[2] << 16; - out0[2] = in0[2] >> 16 | in0[3] << 16; - out0[3] = in0[3] >> 16 | in1[0] << 16; - out1[0] = in1[0] >> 16 | in1[1] << 16; - out1[1] = in1[1] >> 16 | in1[2] << 16; - out1[2] = in1[2] >> 16 | in1[3] << 16; - out1[3] = in1[3] >> 16; - break; - case 3: out0[0] = in0[0] >> 24 | in0[1] << 8; - out0[1] = in0[1] >> 24 | in0[2] << 8; - out0[2] = in0[2] >> 24 | in0[3] << 8; - out0[3] = in0[3] >> 24 | in1[0] << 8; - out1[0] = in1[0] >> 24 | in1[1] << 8; - out1[1] = in1[1] >> 24 | in1[2] << 8; - out1[2] = in1[2] >> 24 | in1[3] << 8; - out1[3] = in1[3] >> 24; - break; - case 4: out0[0] = in0[1]; - out0[1] = in0[2]; - out0[2] = in0[3]; - out0[3] = in1[0]; - out1[0] = in1[1]; - out1[1] = in1[2]; - out1[2] = in1[3]; - out1[3] = 0; - break; - case 5: out0[0] = in0[1] >> 8 | in0[2] << 24; - out0[1] = in0[2] >> 8 | in0[3] << 24; - out0[2] = in0[3] >> 8 | in1[0] << 24; - out0[3] = in1[0] >> 8 | in1[1] << 24; - out1[0] = in1[1] >> 8 | in1[2] << 24; - out1[1] = in1[2] >> 8 | in1[3] << 24; - out1[2] = in1[3] >> 8; - out1[3] = 0; - break; - case 6: out0[0] = in0[1] >> 16 | in0[2] << 16; - out0[1] = in0[2] >> 16 | in0[3] << 16; - out0[2] = in0[3] >> 16 | in1[0] << 16; - out0[3] = in1[0] >> 16 | in1[1] << 16; - out1[0] = in1[1] >> 16 | in1[2] << 16; - out1[1] = in1[2] >> 16 | in1[3] << 16; - out1[2] = in1[3] >> 16; - out1[3] = 0; - break; - case 7: out0[0] = in0[1] >> 24 | in0[2] << 8; - out0[1] = in0[2] >> 24 | in0[3] << 8; - out0[2] = in0[3] >> 24 | in1[0] << 8; - out0[3] = in1[0] >> 24 | in1[1] << 8; - out1[0] = in1[1] >> 24 | in1[2] << 8; - out1[1] = in1[2] >> 24 | in1[3] << 8; - out1[2] = in1[3] >> 24; - out1[3] = 0; - break; - case 8: out0[0] = in0[2]; - out0[1] = in0[3]; - out0[2] = in1[0]; - out0[3] = in1[1]; - out1[0] = in1[2]; - out1[1] = in1[3]; - out1[2] = 0; - out1[3] = 0; - break; - case 9: out0[0] = in0[2] >> 8 | in0[3] << 24; - out0[1] = in0[3] >> 8 | in1[0] << 24; - out0[2] = in1[0] >> 8 | in1[1] << 24; - out0[3] = in1[1] >> 8 | in1[2] << 24; - out1[0] = in1[2] >> 8 | in1[3] << 24; - out1[1] = in1[3] >> 8; - out1[2] = 0; - out1[3] = 0; - break; - case 10: out0[0] = in0[2] >> 16 | in0[3] << 16; - out0[1] = in0[3] >> 16 | in1[0] << 16; - out0[2] = in1[0] >> 16 | in1[1] << 16; - out0[3] = in1[1] >> 16 | in1[2] << 16; - out1[0] = in1[2] >> 16 | in1[3] << 16; - out1[1] = in1[3] >> 16; - out1[2] = 0; - out1[3] = 0; - break; - case 11: out0[0] = in0[2] >> 24 | in0[3] << 8; - out0[1] = in0[3] >> 24 | in1[0] << 8; - out0[2] = in1[0] >> 24 | in1[1] << 8; - out0[3] = in1[1] >> 24 | in1[2] << 8; - out1[0] = in1[2] >> 24 | in1[3] << 8; - out1[1] = in1[3] >> 24; - out1[2] = 0; - out1[3] = 0; - break; - case 12: out0[0] = in0[3]; - out0[1] = in1[0]; - out0[2] = in1[1]; - out0[3] = in1[2]; - out1[0] = in1[3]; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 13: - out0[0] = in0[3] >> 8 | in1[0] << 24; - out0[1] = in1[0] >> 8 | in1[1] << 24; - out0[2] = in1[1] >> 8 | in1[2] << 24; - out0[3] = in1[2] >> 8 | in1[3] << 24; - out1[0] = in1[3] >> 8; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 14: out0[0] = in0[3] >> 16 | in1[0] << 16; - out0[1] = in1[0] >> 16 | in1[1] << 16; - out0[2] = in1[1] >> 16 | in1[2] << 16; - out0[3] = in1[2] >> 16 | in1[3] << 16; - out1[0] = in1[3] >> 16; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; + out1[1] = in1[1]; + out1[0] = in1[0]; + out0[3] = in0[3]; + out0[2] = in0[2]; + out0[1] = in0[1]; + out0[0] = in0[0]; break; - case 15: out0[0] = in0[3] >> 24 | in1[0] << 8; - out0[1] = in1[0] >> 24 | in1[1] << 8; - out0[2] = in1[1] >> 24 | in1[2] << 8; - out0[3] = in1[2] >> 24 | in1[3] << 8; - out1[0] = in1[3] >> 24; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; + case 1: out1[3] = amd_bytealign (in1[3], in1[2], 3); + out1[2] = amd_bytealign (in1[2], in1[1], 3); + out1[1] = amd_bytealign (in1[1], in1[0], 3); + out1[0] = amd_bytealign (in1[0], in0[3], 3); + out0[3] = amd_bytealign (in0[3], in0[2], 3); + out0[2] = amd_bytealign (in0[2], in0[1], 3); + out0[1] = amd_bytealign (in0[1], in0[0], 3); + out0[0] = amd_bytealign (in0[0], 0, 3); + break; + case 2: out1[3] = amd_bytealign (in1[3], in1[2], 2); + out1[2] = amd_bytealign (in1[2], in1[1], 2); + out1[1] = amd_bytealign (in1[1], in1[0], 2); + out1[0] = amd_bytealign (in1[0], in0[3], 2); + out0[3] = amd_bytealign (in0[3], in0[2], 2); + out0[2] = amd_bytealign (in0[2], in0[1], 2); + out0[1] = amd_bytealign (in0[1], in0[0], 2); + out0[0] = amd_bytealign (in0[0], 0, 2); + break; + case 3: out1[3] = amd_bytealign (in1[3], in1[2], 1); + out1[2] = amd_bytealign (in1[2], in1[1], 1); + out1[1] = amd_bytealign (in1[1], in1[0], 1); + out1[0] = amd_bytealign (in1[0], in0[3], 1); + out0[3] = amd_bytealign (in0[3], in0[2], 1); + out0[2] = amd_bytealign (in0[2], in0[1], 1); + out0[1] = amd_bytealign (in0[1], in0[0], 1); + out0[0] = amd_bytealign (in0[0], 0, 1); break; - case 16: out0[0] = in1[0]; - out0[1] = in1[1]; - out0[2] = in1[2]; - out0[3] = in1[3]; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; + case 4: out1[3] = in1[2]; + out1[2] = in1[1]; + out1[1] = in1[0]; + out1[0] = in0[3]; + out0[3] = in0[2]; + out0[2] = in0[1]; + out0[1] = in0[0]; + out0[0] = 0; break; - case 17: out0[0] = in1[0] >> 8 | in1[1] << 24; - out0[1] = in1[1] >> 8 | in1[2] << 24; - out0[2] = in1[2] >> 8 | in1[3] << 24; - out0[3] = in1[3] >> 8; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; + case 5: out1[3] = amd_bytealign (in1[2], in1[1], 3); + out1[2] = amd_bytealign (in1[1], in1[0], 3); + out1[1] = amd_bytealign (in1[0], in0[3], 3); + out1[0] = amd_bytealign (in0[3], in0[2], 3); + out0[3] = amd_bytealign (in0[2], in0[1], 3); + out0[2] = amd_bytealign (in0[1], in0[0], 3); + out0[1] = amd_bytealign (in0[0], 0, 3); + out0[0] = 0; break; - case 18: out0[0] = in1[0] >> 16 | in1[1] << 16; - out0[1] = in1[1] >> 16 | in1[2] << 16; - out0[2] = in1[2] >> 16 | in1[3] << 16; - out0[3] = in1[3] >> 16; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; + case 6: out1[3] = amd_bytealign (in1[2], in1[1], 2); + out1[2] = amd_bytealign (in1[1], in1[0], 2); + out1[1] = amd_bytealign (in1[0], in0[3], 2); + out1[0] = amd_bytealign (in0[3], in0[2], 2); + out0[3] = amd_bytealign (in0[2], in0[1], 2); + out0[2] = amd_bytealign (in0[1], in0[0], 2); + out0[1] = amd_bytealign (in0[0], 0, 2); + out0[0] = 0; break; - case 19: out0[0] = in1[0] >> 24 | in1[1] << 8; - out0[1] = in1[1] >> 24 | in1[2] << 8; - out0[2] = in1[2] >> 24 | in1[3] << 8; - out0[3] = in1[3] >> 24; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; + case 7: out1[3] = amd_bytealign (in1[2], in1[1], 1); + out1[2] = amd_bytealign (in1[1], in1[0], 1); + out1[1] = amd_bytealign (in1[0], in0[3], 1); + out1[0] = amd_bytealign (in0[3], in0[2], 1); + out0[3] = amd_bytealign (in0[2], in0[1], 1); + out0[2] = amd_bytealign (in0[1], in0[0], 1); + out0[1] = amd_bytealign (in0[0], 0, 1); + out0[0] = 0; break; - case 20: out0[0] = in1[1]; - out0[1] = in1[2]; - out0[2] = in1[3]; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; + case 8: out1[3] = in1[1]; + out1[2] = in1[0]; + out1[1] = in0[3]; + out1[0] = in0[2]; + out0[3] = in0[1]; + out0[2] = in0[0]; + out0[1] = 0; + out0[0] = 0; break; - case 21: out0[0] = in1[1] >> 8 | in1[2] << 24; - out0[1] = in1[2] >> 8 | in1[3] << 24; - out0[2] = in1[3] >> 8; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; + case 9: out1[3] = amd_bytealign (in1[1], in1[0], 3); + out1[2] = amd_bytealign (in1[0], in0[3], 3); + out1[1] = amd_bytealign (in0[3], in0[2], 3); + out1[0] = amd_bytealign (in0[2], in0[1], 3); + out0[3] = amd_bytealign (in0[1], in0[0], 3); + out0[2] = amd_bytealign (in0[0], 0, 3); + out0[1] = 0; + out0[0] = 0; break; - case 22: out0[0] = in1[1] >> 16 | in1[2] << 16; - out0[1] = in1[2] >> 16 | in1[3] << 16; - out0[2] = in1[3] >> 16; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; + case 10: out1[3] = amd_bytealign (in1[1], in1[0], 2); + out1[2] = amd_bytealign (in1[0], in0[3], 2); + out1[1] = amd_bytealign (in0[3], in0[2], 2); + out1[0] = amd_bytealign (in0[2], in0[1], 2); + out0[3] = amd_bytealign (in0[1], in0[0], 2); + out0[2] = amd_bytealign (in0[0], 0, 2); + out0[1] = 0; + out0[0] = 0; break; - case 23: out0[0] = in1[1] >> 24 | in1[2] << 8; - out0[1] = in1[2] >> 24 | in1[3] << 8; - out0[2] = in1[3] >> 24; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; + case 11: out1[3] = amd_bytealign (in1[1], in1[0], 1); + out1[2] = amd_bytealign (in1[0], in0[3], 1); + out1[1] = amd_bytealign (in0[3], in0[2], 1); + out1[0] = amd_bytealign (in0[2], in0[1], 1); + out0[3] = amd_bytealign (in0[1], in0[0], 1); + out0[2] = amd_bytealign (in0[0], 0, 1); + out0[1] = 0; + out0[0] = 0; break; - case 24: out0[0] = in1[2]; - out0[1] = in1[3]; + case 12: out1[3] = in1[0]; + out1[2] = in0[3]; + out1[1] = in0[2]; + out1[0] = in0[1]; + out0[3] = in0[0]; out0[2] = 0; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; + out0[1] = 0; + out0[0] = 0; break; - case 25: out0[0] = in1[2] >> 8 | in1[3] << 24; - out0[1] = in1[3] >> 8; + case 13: out1[3] = amd_bytealign (in1[0], in0[3], 3); + out1[2] = amd_bytealign (in0[3], in0[2], 3); + out1[1] = amd_bytealign (in0[2], in0[1], 3); + out1[0] = amd_bytealign (in0[1], in0[0], 3); + out0[3] = amd_bytealign (in0[0], 0, 3); out0[2] = 0; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; + out0[1] = 0; + out0[0] = 0; break; - case 26: out0[0] = in1[2] >> 16 | in1[3] << 16; - out0[1] = in1[3] >> 16; + case 14: out1[3] = amd_bytealign (in1[0], in0[3], 2); + out1[2] = amd_bytealign (in0[3], in0[2], 2); + out1[1] = amd_bytealign (in0[2], in0[1], 2); + out1[0] = amd_bytealign (in0[1], in0[0], 2); + out0[3] = amd_bytealign (in0[0], 0, 2); out0[2] = 0; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; + out0[1] = 0; + out0[0] = 0; break; - case 27: out0[0] = in1[2] >> 24 | in1[3] << 8; - out0[1] = in1[3] >> 24; + case 15: out1[3] = amd_bytealign (in1[0], in0[3], 1); + out1[2] = amd_bytealign (in0[3], in0[2], 1); + out1[1] = amd_bytealign (in0[2], in0[1], 1); + out1[0] = amd_bytealign (in0[1], in0[0], 1); + out0[3] = amd_bytealign (in0[0], 0, 1); out0[2] = 0; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 28: out0[0] = in1[3]; out0[1] = 0; - out0[2] = 0; + out0[0] = 0; + break; + case 16: out1[3] = in0[3]; + out1[2] = in0[2]; + out1[1] = in0[1]; + out1[0] = in0[0]; out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; break; - case 29: out0[0] = in1[3] >> 8; + case 17: out1[3] = amd_bytealign (in0[3], in0[2], 3); + out1[2] = amd_bytealign (in0[2], in0[1], 3); + out1[1] = amd_bytealign (in0[1], in0[0], 3); + out1[0] = amd_bytealign (in0[0], 0, 3); + out0[3] = 0; + out0[2] = 0; out0[1] = 0; + out0[0] = 0; + break; + case 18: out1[3] = amd_bytealign (in0[3], in0[2], 2); + out1[2] = amd_bytealign (in0[2], in0[1], 2); + out1[1] = amd_bytealign (in0[1], in0[0], 2); + out1[0] = amd_bytealign (in0[0], 0, 2); + out0[3] = 0; out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 19: out1[3] = amd_bytealign (in0[3], in0[2], 1); + out1[2] = amd_bytealign (in0[2], in0[1], 1); + out1[1] = amd_bytealign (in0[1], in0[0], 1); + out1[0] = amd_bytealign (in0[0], 0, 1); out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 20: out1[3] = in0[2]; + out1[2] = in0[1]; + out1[1] = in0[0]; out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; break; - case 30: out0[0] = in1[3] >> 16; + case 21: out1[3] = amd_bytealign (in0[2], in0[1], 3); + out1[2] = amd_bytealign (in0[1], in0[0], 3); + out1[1] = amd_bytealign (in0[0], 0, 3); + out1[0] = 0; + out0[3] = 0; + out0[2] = 0; out0[1] = 0; + out0[0] = 0; + break; + case 22: out1[3] = amd_bytealign (in0[2], in0[1], 2); + out1[2] = amd_bytealign (in0[1], in0[0], 2); + out1[1] = amd_bytealign (in0[0], 0, 2); + out1[0] = 0; + out0[3] = 0; out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 23: out1[3] = amd_bytealign (in0[2], in0[1], 1); + out1[2] = amd_bytealign (in0[1], in0[0], 1); + out1[1] = amd_bytealign (in0[0], 0, 1); + out1[0] = 0; out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 24: out1[3] = in0[1]; + out1[2] = in0[0]; + out1[1] = 0; out1[0] = 0; + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 25: out1[3] = amd_bytealign (in0[1], in0[0], 3); + out1[2] = amd_bytealign (in0[0], 0, 3); out1[1] = 0; - out1[2] = 0; - out1[3] = 0; + out1[0] = 0; + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; break; - case 31: out0[0] = in1[3] >> 24; + case 26: out1[3] = amd_bytealign (in0[1], in0[0], 2); + out1[2] = amd_bytealign (in0[0], 0, 2); + out1[1] = 0; + out1[0] = 0; + out0[3] = 0; + out0[2] = 0; out0[1] = 0; + out0[0] = 0; + break; + case 27: out1[3] = amd_bytealign (in0[1], in0[0], 1); + out1[2] = amd_bytealign (in0[0], 0, 1); + out1[1] = 0; + out1[0] = 0; + out0[3] = 0; out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 28: out1[3] = in0[0]; + out1[2] = 0; + out1[1] = 0; + out1[0] = 0; out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 29: out1[3] = amd_bytealign (in0[0], 0, 3); + out1[2] = 0; + out1[1] = 0; out1[0] = 0; + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 30: out1[3] = amd_bytealign (in0[0], 0, 2); + out1[2] = 0; out1[1] = 0; + out1[0] = 0; + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; + break; + case 31: out1[3] = amd_bytealign (in0[0], 0, 1); out1[2] = 0; - out1[3] = 0; + out1[1] = 0; + out1[0] = 0; + out0[3] = 0; + out0[2] = 0; + out0[1] = 0; + out0[0] = 0; break; } - #endif } -__device__ static void append_block1 (const u32 offset, u32x dst0[4], u32x dst1[4], const u32x src_r0) +static void append_block1 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 src_r0) { - u32x tmp[2]; + u32 tmp[2]; switch (offset & 3) { @@ -1410,10 +1401,9 @@ __device__ static void append_block1 (const u32 offset, u32x dst0[4], u32x dst1[ } } -__device__ static void append_block8 (const u32 offset, u32x dst0[4], u32x dst1[4], const u32x src_l0[4], const u32x src_l1[4], const u32x src_r0[4], const u32x src_r1[4]) +static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 src_l0[4], const u32 src_l1[4], const u32 src_r0[4], const u32 src_r1[4]) { - #if __CUDA_ARCH__ >= 200 - + #ifdef IS_NV switch (offset) { case 0: @@ -1656,9 +1646,9 @@ __device__ static void append_block8 (const u32 offset, u32x dst0[4], u32x dst1[ dst1[3] = __byte_perm (src_l1[3], src_r0[0], 0x4210); break; } + #endif - #else - + #ifdef IS_AMD switch (offset) { case 0: @@ -1673,36 +1663,39 @@ __device__ static void append_block8 (const u32 offset, u32x dst0[4], u32x dst1[ break; case 1: - dst0[0] = src_l0[0] | src_r0[0] << 8; - dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[0] = src_r0[3] >> 24 | src_r1[0] << 8; - dst1[1] = src_r1[0] >> 24 | src_r1[1] << 8; - dst1[2] = src_r1[1] >> 24 | src_r1[2] << 8; - dst1[3] = src_r1[2] >> 24 | src_r1[3] << 8; + dst0[0] = src_l0[0] + | src_r0[0] << 8; + dst0[1] = amd_bytealign (src_r0[1], src_r0[0], 3); + dst0[2] = amd_bytealign (src_r0[2], src_r0[1], 3); + dst0[3] = amd_bytealign (src_r0[3], src_r0[2], 3); + dst1[0] = amd_bytealign (src_r1[0], src_r0[3], 3); + dst1[1] = amd_bytealign (src_r1[1], src_r1[0], 3); + dst1[2] = amd_bytealign (src_r1[2], src_r1[1], 3); + dst1[3] = amd_bytealign (src_r1[3], src_r1[2], 3); break; case 2: - dst0[0] = src_l0[0] | src_r0[0] << 16; - dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[0] = src_r0[3] >> 16 | src_r1[0] << 16; - dst1[1] = src_r1[0] >> 16 | src_r1[1] << 16; - dst1[2] = src_r1[1] >> 16 | src_r1[2] << 16; - dst1[3] = src_r1[2] >> 16 | src_r1[3] << 16; + dst0[0] = src_l0[0] + | src_r0[0] << 16; + dst0[1] = amd_bytealign (src_r0[1], src_r0[0], 2); + dst0[2] = amd_bytealign (src_r0[2], src_r0[1], 2); + dst0[3] = amd_bytealign (src_r0[3], src_r0[2], 2); + dst1[0] = amd_bytealign (src_r1[0], src_r0[3], 2); + dst1[1] = amd_bytealign (src_r1[1], src_r1[0], 2); + dst1[2] = amd_bytealign (src_r1[2], src_r1[1], 2); + dst1[3] = amd_bytealign (src_r1[3], src_r1[2], 2); break; case 3: - dst0[0] = src_l0[0] | src_r0[0] << 24; - dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[0] = src_r0[3] >> 8 | src_r1[0] << 24; - dst1[1] = src_r1[0] >> 8 | src_r1[1] << 24; - dst1[2] = src_r1[1] >> 8 | src_r1[2] << 24; - dst1[3] = src_r1[2] >> 8 | src_r1[3] << 24; + dst0[0] = src_l0[0] + | src_r0[0] << 24; + dst0[1] = amd_bytealign (src_r0[1], src_r0[0], 1); + dst0[2] = amd_bytealign (src_r0[2], src_r0[1], 1); + dst0[3] = amd_bytealign (src_r0[3], src_r0[2], 1); + dst1[0] = amd_bytealign (src_r1[0], src_r0[3], 1); + dst1[1] = amd_bytealign (src_r1[1], src_r1[0], 1); + dst1[2] = amd_bytealign (src_r1[2], src_r1[1], 1); + dst1[3] = amd_bytealign (src_r1[3], src_r1[2], 1); break; case 4: @@ -1716,33 +1709,36 @@ __device__ static void append_block8 (const u32 offset, u32x dst0[4], u32x dst1[ break; case 5: - dst0[1] = src_l0[1] | src_r0[0] << 8; - dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[1] = src_r0[3] >> 24 | src_r1[0] << 8; - dst1[2] = src_r1[0] >> 24 | src_r1[1] << 8; - dst1[3] = src_r1[1] >> 24 | src_r1[2] << 8; + dst0[1] = src_l0[1] + | src_r0[0] << 8; + dst0[2] = amd_bytealign (src_r0[1], src_r0[0], 3); + dst0[3] = amd_bytealign (src_r0[2], src_r0[1], 3); + dst1[0] = amd_bytealign (src_r0[3], src_r0[2], 3); + dst1[1] = amd_bytealign (src_r1[0], src_r0[3], 3); + dst1[2] = amd_bytealign (src_r1[1], src_r1[0], 3); + dst1[3] = amd_bytealign (src_r1[2], src_r1[1], 3); break; case 6: - dst0[1] = src_l0[1] | src_r0[0] << 16; - dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[1] = src_r0[3] >> 16 | src_r1[0] << 16; - dst1[2] = src_r1[0] >> 16 | src_r1[1] << 16; - dst1[3] = src_r1[1] >> 16 | src_r1[2] << 16; + dst0[1] = src_l0[1] + | src_r0[0] << 16; + dst0[2] = amd_bytealign (src_r0[1], src_r0[0], 2); + dst0[3] = amd_bytealign (src_r0[2], src_r0[1], 2); + dst1[0] = amd_bytealign (src_r0[3], src_r0[2], 2); + dst1[1] = amd_bytealign (src_r1[0], src_r0[3], 2); + dst1[2] = amd_bytealign (src_r1[1], src_r1[0], 2); + dst1[3] = amd_bytealign (src_r1[2], src_r1[1], 2); break; case 7: - dst0[1] = src_l0[1] | src_r0[0] << 24; - dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[1] = src_r0[3] >> 8 | src_r1[0] << 24; - dst1[2] = src_r1[0] >> 8 | src_r1[1] << 24; - dst1[3] = src_r1[1] >> 8 | src_r1[2] << 24; + dst0[1] = src_l0[1] + | src_r0[0] << 24; + dst0[2] = amd_bytealign (src_r0[1], src_r0[0], 1); + dst0[3] = amd_bytealign (src_r0[2], src_r0[1], 1); + dst1[0] = amd_bytealign (src_r0[3], src_r0[2], 1); + dst1[1] = amd_bytealign (src_r1[0], src_r0[3], 1); + dst1[2] = amd_bytealign (src_r1[1], src_r1[0], 1); + dst1[3] = amd_bytealign (src_r1[2], src_r1[1], 1); break; case 8: @@ -1755,30 +1751,33 @@ __device__ static void append_block8 (const u32 offset, u32x dst0[4], u32x dst1[ break; case 9: - dst0[2] = src_l0[2] | src_r0[0] << 8; - dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[2] = src_r0[3] >> 24 | src_r1[0] << 8; - dst1[3] = src_r1[0] >> 24 | src_r1[1] << 8; + dst0[2] = src_l0[2] + | src_r0[0] << 8; + dst0[3] = amd_bytealign (src_r0[1], src_r0[0], 3); + dst1[0] = amd_bytealign (src_r0[2], src_r0[1], 3); + dst1[1] = amd_bytealign (src_r0[3], src_r0[2], 3); + dst1[2] = amd_bytealign (src_r1[0], src_r0[3], 3); + dst1[3] = amd_bytealign (src_r1[1], src_r1[0], 3); break; case 10: - dst0[2] = src_l0[2] | src_r0[0] << 16; - dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[2] = src_r0[3] >> 16 | src_r1[0] << 16; - dst1[3] = src_r1[0] >> 16 | src_r1[1] << 16; + dst0[2] = src_l0[2] + | src_r0[0] << 16; + dst0[3] = amd_bytealign (src_r0[1], src_r0[0], 2); + dst1[0] = amd_bytealign (src_r0[2], src_r0[1], 2); + dst1[1] = amd_bytealign (src_r0[3], src_r0[2], 2); + dst1[2] = amd_bytealign (src_r1[0], src_r0[3], 2); + dst1[3] = amd_bytealign (src_r1[1], src_r1[0], 2); break; case 11: - dst0[2] = src_l0[2] | src_r0[0] << 24; - dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[2] = src_r0[3] >> 8 | src_r1[0] << 24; - dst1[3] = src_r1[0] >> 8 | src_r1[1] << 24; + dst0[2] = src_l0[2] + | src_r0[0] << 24; + dst0[3] = amd_bytealign (src_r0[1], src_r0[0], 1); + dst1[0] = amd_bytealign (src_r0[2], src_r0[1], 1); + dst1[1] = amd_bytealign (src_r0[3], src_r0[2], 1); + dst1[2] = amd_bytealign (src_r1[0], src_r0[3], 1); + dst1[3] = amd_bytealign (src_r1[1], src_r1[0], 1); break; case 12: @@ -1790,27 +1789,30 @@ __device__ static void append_block8 (const u32 offset, u32x dst0[4], u32x dst1[ break; case 13: - dst0[3] = src_l0[3] | src_r0[0] << 8; - dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[3] = src_r0[3] >> 24 | src_r1[0] << 8; + dst0[3] = src_l0[3] + | src_r0[0] << 8; + dst1[0] = amd_bytealign (src_r0[1], src_r0[0], 3); + dst1[1] = amd_bytealign (src_r0[2], src_r0[1], 3); + dst1[2] = amd_bytealign (src_r0[3], src_r0[2], 3); + dst1[3] = amd_bytealign (src_r1[0], src_r0[3], 3); break; case 14: - dst0[3] = src_l0[3] | src_r0[0] << 16; - dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[3] = src_r0[3] >> 16 | src_r1[0] << 16; + dst0[3] = src_l0[3] + | src_r0[0] << 16; + dst1[0] = amd_bytealign (src_r0[1], src_r0[0], 2); + dst1[1] = amd_bytealign (src_r0[2], src_r0[1], 2); + dst1[2] = amd_bytealign (src_r0[3], src_r0[2], 2); + dst1[3] = amd_bytealign (src_r1[0], src_r0[3], 2); break; case 15: - dst0[3] = src_l0[3] | src_r0[0] << 24; - dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[3] = src_r0[3] >> 8 | src_r1[0] << 24; + dst0[3] = src_l0[3] + | src_r0[0] << 24; + dst1[0] = amd_bytealign (src_r0[1], src_r0[0], 1); + dst1[1] = amd_bytealign (src_r0[2], src_r0[1], 1); + dst1[2] = amd_bytealign (src_r0[3], src_r0[2], 1); + dst1[3] = amd_bytealign (src_r1[0], src_r0[3], 1); break; case 16: @@ -1821,24 +1823,27 @@ __device__ static void append_block8 (const u32 offset, u32x dst0[4], u32x dst1[ break; case 17: - dst1[0] = src_l1[0] | src_r0[0] << 8; - dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8; + dst1[0] = src_l1[0] + | src_r0[0] << 8; + dst1[1] = amd_bytealign (src_r0[1], src_r0[0], 3); + dst1[2] = amd_bytealign (src_r0[2], src_r0[1], 3); + dst1[3] = amd_bytealign (src_r0[3], src_r0[2], 3); break; case 18: - dst1[0] = src_l1[0] | src_r0[0] << 16; - dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16; + dst1[0] = src_l1[0] + | src_r0[0] << 16; + dst1[1] = amd_bytealign (src_r0[1], src_r0[0], 2); + dst1[2] = amd_bytealign (src_r0[2], src_r0[1], 2); + dst1[3] = amd_bytealign (src_r0[3], src_r0[2], 2); break; case 19: - dst1[0] = src_l1[0] | src_r0[0] << 24; - dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24; + dst1[0] = src_l1[0] + | src_r0[0] << 24; + dst1[1] = amd_bytealign (src_r0[1], src_r0[0], 1); + dst1[2] = amd_bytealign (src_r0[2], src_r0[1], 1); + dst1[3] = amd_bytealign (src_r0[3], src_r0[2], 1); break; case 20: @@ -1848,21 +1853,24 @@ __device__ static void append_block8 (const u32 offset, u32x dst0[4], u32x dst1[ break; case 21: - dst1[1] = src_l1[1] | src_r0[0] << 8; - dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8; + dst1[1] = src_l1[1] + | src_r0[0] << 8; + dst1[2] = amd_bytealign (src_r0[1], src_r0[0], 3); + dst1[3] = amd_bytealign (src_r0[2], src_r0[1], 3); break; case 22: - dst1[1] = src_l1[1] | src_r0[0] << 16; - dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16; + dst1[1] = src_l1[1] + | src_r0[0] << 16; + dst1[2] = amd_bytealign (src_r0[1], src_r0[0], 2); + dst1[3] = amd_bytealign (src_r0[2], src_r0[1], 2); break; case 23: - dst1[1] = src_l1[1] | src_r0[0] << 24; - dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24; + dst1[1] = src_l1[1] + | src_r0[0] << 24; + dst1[2] = amd_bytealign (src_r0[1], src_r0[0], 1); + dst1[3] = amd_bytealign (src_r0[2], src_r0[1], 1); break; case 24: @@ -1871,18 +1879,21 @@ __device__ static void append_block8 (const u32 offset, u32x dst0[4], u32x dst1[ break; case 25: - dst1[2] = src_l1[2] | src_r0[0] << 8; - dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8; + dst1[2] = src_l1[2] + | src_r0[0] << 8; + dst1[3] = amd_bytealign (src_r0[1], src_r0[0], 3); break; case 26: - dst1[2] = src_l1[2] | src_r0[0] << 16; - dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16; + dst1[2] = src_l1[2] + | src_r0[0] << 16; + dst1[3] = amd_bytealign (src_r0[1], src_r0[0], 2); break; case 27: - dst1[2] = src_l1[2] | src_r0[0] << 24; - dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24; + dst1[2] = src_l1[2] + | src_r0[0] << 24; + dst1[3] = amd_bytealign (src_r0[1], src_r0[0], 1); break; case 28: @@ -1890,27 +1901,29 @@ __device__ static void append_block8 (const u32 offset, u32x dst0[4], u32x dst1[ break; case 29: - dst1[3] = src_l1[3] | src_r0[0] << 8; + dst1[3] = src_l1[3] + | src_r0[0] << 8; break; case 30: - dst1[3] = src_l1[3] | src_r0[0] << 16; + dst1[3] = src_l1[3] + | src_r0[0] << 16; break; case 31: - dst1[3] = src_l1[3] | src_r0[0] << 24; + dst1[3] = src_l1[3] + | src_r0[0] << 24; break; } - #endif } -__device__ static void reverse_block (u32x in0[4], u32x in1[4], u32x out0[4], u32x out1[4], const u32 len) +static void reverse_block (u32 in0[4], u32 in1[4], u32 out0[4], u32 out1[4], const u32 len) { rshift_block_N (in0, in1, out0, out1, 32 - len); - u32x tib40[4]; - u32x tib41[4]; + u32 tib40[4]; + u32 tib41[4]; tib40[0] = out1[3]; tib40[1] = out1[2]; @@ -1921,17 +1934,17 @@ __device__ static void reverse_block (u32x in0[4], u32x in1[4], u32x out0[4], u3 tib41[2] = out0[1]; tib41[3] = out0[0]; - out0[0] = swap_workaround (tib40[0]); - out0[1] = swap_workaround (tib40[1]); - out0[2] = swap_workaround (tib40[2]); - out0[3] = swap_workaround (tib40[3]); - out1[0] = swap_workaround (tib41[0]); - out1[1] = swap_workaround (tib41[1]); - out1[2] = swap_workaround (tib41[2]); - out1[3] = swap_workaround (tib41[3]); + out0[0] = swap32 (tib40[0]); + out0[1] = swap32 (tib40[1]); + out0[2] = swap32 (tib40[2]); + out0[3] = swap32 (tib40[3]); + out1[0] = swap32 (tib41[0]); + out1[1] = swap32 (tib41[1]); + out1[2] = swap32 (tib41[2]); + out1[3] = swap32 (tib41[3]); } -__device__ static u32 rule_op_mangle_lrest (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_lrest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { buf0[0] |= (generate_cmask (buf0[0])); buf0[1] |= (generate_cmask (buf0[1])); @@ -1945,7 +1958,7 @@ __device__ static u32 rule_op_mangle_lrest (const u32 p0, const u32 p1, u32x buf return in_len; } -__device__ static u32 rule_op_mangle_urest (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_urest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { buf0[0] &= ~(generate_cmask (buf0[0])); buf0[1] &= ~(generate_cmask (buf0[1])); @@ -1959,7 +1972,7 @@ __device__ static u32 rule_op_mangle_urest (const u32 p0, const u32 p1, u32x buf return in_len; } -__device__ static u32 rule_op_mangle_lrest_ufirst (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_lrest_ufirst (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { rule_op_mangle_lrest (p0, p1, buf0, buf1, in_len); @@ -1968,7 +1981,7 @@ __device__ static u32 rule_op_mangle_lrest_ufirst (const u32 p0, const u32 p1, u return in_len; } -__device__ static u32 rule_op_mangle_urest_lfirst (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_urest_lfirst (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { rule_op_mangle_urest (p0, p1, buf0, buf1, in_len); @@ -1977,7 +1990,7 @@ __device__ static u32 rule_op_mangle_urest_lfirst (const u32 p0, const u32 p1, u return in_len; } -__device__ static u32 rule_op_mangle_trest (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_trest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { buf0[0] ^= (generate_cmask (buf0[0])); buf0[1] ^= (generate_cmask (buf0[1])); @@ -1991,7 +2004,7 @@ __device__ static u32 rule_op_mangle_trest (const u32 p0, const u32 p1, u32x buf return in_len; } -__device__ static u32 rule_op_mangle_toggle_at (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_toggle_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { if (p0 >= in_len) return (in_len); @@ -2012,21 +2025,21 @@ __device__ static u32 rule_op_mangle_toggle_at (const u32 p0, const u32 p1, u32x return in_len; } -__device__ static u32 rule_op_mangle_reverse (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_reverse (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { reverse_block (buf0, buf1, buf0, buf1, in_len); return in_len; } -__device__ static u32 rule_op_mangle_dupeword (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_dupeword (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { if ((in_len + in_len) >= 32) return (in_len); u32 out_len = in_len; - u32x tib40[4]; - u32x tib41[4]; + u32 tib40[4]; + u32 tib41[4]; tib40[0] = buf0[0]; tib40[1] = buf0[1]; @@ -2044,14 +2057,14 @@ __device__ static u32 rule_op_mangle_dupeword (const u32 p0, const u32 p1, u32x return out_len; } -__device__ static u32 rule_op_mangle_dupeword_times (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_dupeword_times (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { if (((in_len * p0) + in_len) >= 32) return (in_len); u32 out_len = in_len; - u32x tib40[4]; - u32x tib41[4]; + u32 tib40[4]; + u32 tib41[4]; tib40[0] = buf0[0]; tib40[1] = buf0[1]; @@ -2072,14 +2085,14 @@ __device__ static u32 rule_op_mangle_dupeword_times (const u32 p0, const u32 p1, return out_len; } -__device__ static u32 rule_op_mangle_reflect (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_reflect (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { if ((in_len + in_len) >= 32) return (in_len); u32 out_len = in_len; - u32x tib40[4]; - u32x tib41[4]; + u32 tib40[4]; + u32 tib41[4]; reverse_block (buf0, buf1, tib40, tib41, out_len); @@ -2090,7 +2103,7 @@ __device__ static u32 rule_op_mangle_reflect (const u32 p0, const u32 p1, u32x b return out_len; } -__device__ static u32 rule_op_mangle_append (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_append (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { if ((in_len + 1) >= 32) return (in_len); @@ -2103,7 +2116,7 @@ __device__ static u32 rule_op_mangle_append (const u32 p0, const u32 p1, u32x bu return out_len; } -__device__ static u32 rule_op_mangle_prepend (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_prepend (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { if ((in_len + 1) >= 32) return (in_len); @@ -2118,7 +2131,7 @@ __device__ static u32 rule_op_mangle_prepend (const u32 p0, const u32 p1, u32x b return out_len; } -__device__ static u32 rule_op_mangle_rotate_left (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_rotate_left (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { if (in_len == 0) return (in_len); @@ -2126,7 +2139,7 @@ __device__ static u32 rule_op_mangle_rotate_left (const u32 p0, const u32 p1, u3 const u32 sh = (in_len1 & 3) * 8; - const u32x tmp = (buf0[0] & 0xff) << sh; + const u32 tmp = (buf0[0] & 0xff) << sh; lshift_block (buf0, buf1, buf0, buf1); @@ -2145,7 +2158,7 @@ __device__ static u32 rule_op_mangle_rotate_left (const u32 p0, const u32 p1, u3 return in_len; } -__device__ static u32 rule_op_mangle_rotate_right (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_rotate_right (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { if (in_len == 0) return (in_len); @@ -2153,7 +2166,7 @@ __device__ static u32 rule_op_mangle_rotate_right (const u32 p0, const u32 p1, u const u32 sh = (in_len1 & 3) * 8; - u32x tmp = 0; + u32 tmp = 0; switch (in_len1 / 4) { @@ -2176,7 +2189,7 @@ __device__ static u32 rule_op_mangle_rotate_right (const u32 p0, const u32 p1, u return in_len; } -__device__ static u32 rule_op_mangle_delete_first (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_delete_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { if (in_len == 0) return (in_len); @@ -2187,7 +2200,7 @@ __device__ static u32 rule_op_mangle_delete_first (const u32 p0, const u32 p1, u return in_len1; } -__device__ static u32 rule_op_mangle_delete_last (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_delete_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { if (in_len == 0) return (in_len); @@ -2210,14 +2223,14 @@ __device__ static u32 rule_op_mangle_delete_last (const u32 p0, const u32 p1, u3 return in_len1; } -__device__ static u32 rule_op_mangle_delete_at (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_delete_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { if (p0 >= in_len) return (in_len); u32 out_len = in_len; - u32x tib40[4]; - u32x tib41[4]; + u32 tib40[4]; + u32 tib41[4]; lshift_block (buf0, buf1, tib40, tib41); @@ -2285,7 +2298,7 @@ __device__ static u32 rule_op_mangle_delete_at (const u32 p0, const u32 p1, u32x return out_len; } -__device__ static u32 rule_op_mangle_extract (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_extract (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { if (p0 >= in_len) return (in_len); @@ -2300,7 +2313,7 @@ __device__ static u32 rule_op_mangle_extract (const u32 p0, const u32 p1, u32x b return out_len; } -__device__ static u32 rule_op_mangle_omit (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_omit (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { if (p0 >= in_len) return (in_len); @@ -2308,8 +2321,8 @@ __device__ static u32 rule_op_mangle_omit (const u32 p0, const u32 p1, u32x buf0 u32 out_len = in_len; - u32x tib40[4]; - u32x tib41[4]; + u32 tib40[4]; + u32 tib41[4]; tib40[0] = 0; tib40[1] = 0; @@ -2386,7 +2399,7 @@ __device__ static u32 rule_op_mangle_omit (const u32 p0, const u32 p1, u32x buf0 return out_len; } -__device__ static u32 rule_op_mangle_insert (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_insert (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { if (p0 > in_len) return (in_len); @@ -2394,8 +2407,8 @@ __device__ static u32 rule_op_mangle_insert (const u32 p0, const u32 p1, u32x bu u32 out_len = in_len; - u32x tib40[4]; - u32x tib41[4]; + u32 tib40[4]; + u32 tib41[4]; rshift_block (buf0, buf1, tib40, tib41); @@ -2458,7 +2471,7 @@ __device__ static u32 rule_op_mangle_insert (const u32 p0, const u32 p1, u32x bu return out_len; } -__device__ static u32 rule_op_mangle_overstrike (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_overstrike (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { if (p0 >= in_len) return (in_len); @@ -2481,7 +2494,7 @@ __device__ static u32 rule_op_mangle_overstrike (const u32 p0, const u32 p1, u32 return in_len; } -__device__ static u32 rule_op_mangle_truncate_at (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_truncate_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { if (p0 >= in_len) return (in_len); @@ -2490,12 +2503,9 @@ __device__ static u32 rule_op_mangle_truncate_at (const u32 p0, const u32 p1, u3 return p0; } -__device__ static u32 rule_op_mangle_replace (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_replace (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { - - - #ifdef VECT_SIZE1 - + #ifdef IS_NV for (u32 i = 0; i < in_len; i++) { switch (i) @@ -2566,316 +2576,51 @@ __device__ static u32 rule_op_mangle_replace (const u32 p0, const u32 p1, u32x b break; } } - - #endif - - #ifdef VECT_SIZE2 - - for (u32 i = 0; i < in_len; i++) - { - switch (i) - { - case 0: if ((__byte_perm (buf0[0].x, 0, 0x6540)) == p0) buf0[0].x = __byte_perm (p1, buf0[0].x, 0x7650); - if ((__byte_perm (buf0[0].y, 0, 0x6540)) == p0) buf0[0].y = __byte_perm (p1, buf0[0].y, 0x7650); - break; - case 1: if ((__byte_perm (buf0[0].x, 0, 0x6541)) == p0) buf0[0].x = __byte_perm (p1, buf0[0].x, 0x7604); - if ((__byte_perm (buf0[0].y, 0, 0x6541)) == p0) buf0[0].y = __byte_perm (p1, buf0[0].y, 0x7604); - break; - case 2: if ((__byte_perm (buf0[0].x, 0, 0x6542)) == p0) buf0[0].x = __byte_perm (p1, buf0[0].x, 0x7054); - if ((__byte_perm (buf0[0].y, 0, 0x6542)) == p0) buf0[0].y = __byte_perm (p1, buf0[0].y, 0x7054); - break; - case 3: if ((__byte_perm (buf0[0].x, 0, 0x6543)) == p0) buf0[0].x = __byte_perm (p1, buf0[0].x, 0x0654); - if ((__byte_perm (buf0[0].y, 0, 0x6543)) == p0) buf0[0].y = __byte_perm (p1, buf0[0].y, 0x0654); - break; - case 4: if ((__byte_perm (buf0[1].x, 0, 0x6540)) == p0) buf0[1].x = __byte_perm (p1, buf0[1].x, 0x7650); - if ((__byte_perm (buf0[1].y, 0, 0x6540)) == p0) buf0[1].y = __byte_perm (p1, buf0[1].y, 0x7650); - break; - case 5: if ((__byte_perm (buf0[1].x, 0, 0x6541)) == p0) buf0[1].x = __byte_perm (p1, buf0[1].x, 0x7604); - if ((__byte_perm (buf0[1].y, 0, 0x6541)) == p0) buf0[1].y = __byte_perm (p1, buf0[1].y, 0x7604); - break; - case 6: if ((__byte_perm (buf0[1].x, 0, 0x6542)) == p0) buf0[1].x = __byte_perm (p1, buf0[1].x, 0x7054); - if ((__byte_perm (buf0[1].y, 0, 0x6542)) == p0) buf0[1].y = __byte_perm (p1, buf0[1].y, 0x7054); - break; - case 7: if ((__byte_perm (buf0[1].x, 0, 0x6543)) == p0) buf0[1].x = __byte_perm (p1, buf0[1].x, 0x0654); - if ((__byte_perm (buf0[1].y, 0, 0x6543)) == p0) buf0[1].y = __byte_perm (p1, buf0[1].y, 0x0654); - break; - case 8: if ((__byte_perm (buf0[2].x, 0, 0x6540)) == p0) buf0[2].x = __byte_perm (p1, buf0[2].x, 0x7650); - if ((__byte_perm (buf0[2].y, 0, 0x6540)) == p0) buf0[2].y = __byte_perm (p1, buf0[2].y, 0x7650); - break; - case 9: if ((__byte_perm (buf0[2].x, 0, 0x6541)) == p0) buf0[2].x = __byte_perm (p1, buf0[2].x, 0x7604); - if ((__byte_perm (buf0[2].y, 0, 0x6541)) == p0) buf0[2].y = __byte_perm (p1, buf0[2].y, 0x7604); - break; - case 10: if ((__byte_perm (buf0[2].x, 0, 0x6542)) == p0) buf0[2].x = __byte_perm (p1, buf0[2].x, 0x7054); - if ((__byte_perm (buf0[2].y, 0, 0x6542)) == p0) buf0[2].y = __byte_perm (p1, buf0[2].y, 0x7054); - break; - case 11: if ((__byte_perm (buf0[2].x, 0, 0x6543)) == p0) buf0[2].x = __byte_perm (p1, buf0[2].x, 0x0654); - if ((__byte_perm (buf0[2].y, 0, 0x6543)) == p0) buf0[2].y = __byte_perm (p1, buf0[2].y, 0x0654); - break; - case 12: if ((__byte_perm (buf0[3].x, 0, 0x6540)) == p0) buf0[3].x = __byte_perm (p1, buf0[3].x, 0x7650); - if ((__byte_perm (buf0[3].y, 0, 0x6540)) == p0) buf0[3].y = __byte_perm (p1, buf0[3].y, 0x7650); - break; - case 13: if ((__byte_perm (buf0[3].x, 0, 0x6541)) == p0) buf0[3].x = __byte_perm (p1, buf0[3].x, 0x7604); - if ((__byte_perm (buf0[3].y, 0, 0x6541)) == p0) buf0[3].y = __byte_perm (p1, buf0[3].y, 0x7604); - break; - case 14: if ((__byte_perm (buf0[3].x, 0, 0x6542)) == p0) buf0[3].x = __byte_perm (p1, buf0[3].x, 0x7054); - if ((__byte_perm (buf0[3].y, 0, 0x6542)) == p0) buf0[3].y = __byte_perm (p1, buf0[3].y, 0x7054); - break; - case 15: if ((__byte_perm (buf0[3].x, 0, 0x6543)) == p0) buf0[3].x = __byte_perm (p1, buf0[3].x, 0x0654); - if ((__byte_perm (buf0[3].y, 0, 0x6543)) == p0) buf0[3].y = __byte_perm (p1, buf0[3].y, 0x0654); - break; - case 16: if ((__byte_perm (buf1[0].x, 0, 0x6540)) == p0) buf1[0].x = __byte_perm (p1, buf1[0].x, 0x7650); - if ((__byte_perm (buf1[0].y, 0, 0x6540)) == p0) buf1[0].y = __byte_perm (p1, buf1[0].y, 0x7650); - break; - case 17: if ((__byte_perm (buf1[0].x, 0, 0x6541)) == p0) buf1[0].x = __byte_perm (p1, buf1[0].x, 0x7604); - if ((__byte_perm (buf1[0].y, 0, 0x6541)) == p0) buf1[0].y = __byte_perm (p1, buf1[0].y, 0x7604); - break; - case 18: if ((__byte_perm (buf1[0].x, 0, 0x6542)) == p0) buf1[0].x = __byte_perm (p1, buf1[0].x, 0x7054); - if ((__byte_perm (buf1[0].y, 0, 0x6542)) == p0) buf1[0].y = __byte_perm (p1, buf1[0].y, 0x7054); - break; - case 19: if ((__byte_perm (buf1[0].x, 0, 0x6543)) == p0) buf1[0].x = __byte_perm (p1, buf1[0].x, 0x0654); - if ((__byte_perm (buf1[0].y, 0, 0x6543)) == p0) buf1[0].y = __byte_perm (p1, buf1[0].y, 0x0654); - break; - case 20: if ((__byte_perm (buf1[1].x, 0, 0x6540)) == p0) buf1[1].x = __byte_perm (p1, buf1[1].x, 0x7650); - if ((__byte_perm (buf1[1].y, 0, 0x6540)) == p0) buf1[1].y = __byte_perm (p1, buf1[1].y, 0x7650); - break; - case 21: if ((__byte_perm (buf1[1].x, 0, 0x6541)) == p0) buf1[1].x = __byte_perm (p1, buf1[1].x, 0x7604); - if ((__byte_perm (buf1[1].y, 0, 0x6541)) == p0) buf1[1].y = __byte_perm (p1, buf1[1].y, 0x7604); - break; - case 22: if ((__byte_perm (buf1[1].x, 0, 0x6542)) == p0) buf1[1].x = __byte_perm (p1, buf1[1].x, 0x7054); - if ((__byte_perm (buf1[1].y, 0, 0x6542)) == p0) buf1[1].y = __byte_perm (p1, buf1[1].y, 0x7054); - break; - case 23: if ((__byte_perm (buf1[1].x, 0, 0x6543)) == p0) buf1[1].x = __byte_perm (p1, buf1[1].x, 0x0654); - if ((__byte_perm (buf1[1].y, 0, 0x6543)) == p0) buf1[1].y = __byte_perm (p1, buf1[1].y, 0x0654); - break; - case 24: if ((__byte_perm (buf1[2].x, 0, 0x6540)) == p0) buf1[2].x = __byte_perm (p1, buf1[2].x, 0x7650); - if ((__byte_perm (buf1[2].y, 0, 0x6540)) == p0) buf1[2].y = __byte_perm (p1, buf1[2].y, 0x7650); - break; - case 25: if ((__byte_perm (buf1[2].x, 0, 0x6541)) == p0) buf1[2].x = __byte_perm (p1, buf1[2].x, 0x7604); - if ((__byte_perm (buf1[2].y, 0, 0x6541)) == p0) buf1[2].y = __byte_perm (p1, buf1[2].y, 0x7604); - break; - case 26: if ((__byte_perm (buf1[2].x, 0, 0x6542)) == p0) buf1[2].x = __byte_perm (p1, buf1[2].x, 0x7054); - if ((__byte_perm (buf1[2].y, 0, 0x6542)) == p0) buf1[2].y = __byte_perm (p1, buf1[2].y, 0x7054); - break; - case 27: if ((__byte_perm (buf1[2].x, 0, 0x6543)) == p0) buf1[2].x = __byte_perm (p1, buf1[2].x, 0x0654); - if ((__byte_perm (buf1[2].y, 0, 0x6543)) == p0) buf1[2].y = __byte_perm (p1, buf1[2].y, 0x0654); - break; - case 28: if ((__byte_perm (buf1[3].x, 0, 0x6540)) == p0) buf1[3].x = __byte_perm (p1, buf1[3].x, 0x7650); - if ((__byte_perm (buf1[3].y, 0, 0x6540)) == p0) buf1[3].y = __byte_perm (p1, buf1[3].y, 0x7650); - break; - case 29: if ((__byte_perm (buf1[3].x, 0, 0x6541)) == p0) buf1[3].x = __byte_perm (p1, buf1[3].x, 0x7604); - if ((__byte_perm (buf1[3].y, 0, 0x6541)) == p0) buf1[3].y = __byte_perm (p1, buf1[3].y, 0x7604); - break; - case 30: if ((__byte_perm (buf1[3].x, 0, 0x6542)) == p0) buf1[3].x = __byte_perm (p1, buf1[3].x, 0x7054); - if ((__byte_perm (buf1[3].y, 0, 0x6542)) == p0) buf1[3].y = __byte_perm (p1, buf1[3].y, 0x7054); - break; - case 31: if ((__byte_perm (buf1[3].x, 0, 0x6543)) == p0) buf1[3].x = __byte_perm (p1, buf1[3].x, 0x0654); - if ((__byte_perm (buf1[3].y, 0, 0x6543)) == p0) buf1[3].y = __byte_perm (p1, buf1[3].y, 0x0654); - break; - } - } - #endif - #ifdef VECT_SIZE4 + #ifdef IS_AMD + const uchar4 tmp0 = (uchar4) (p0); + const uchar4 tmp1 = (uchar4) (p1); - for (u32 i = 0; i < in_len; i++) - { - switch (i) - { - case 0: if ((__byte_perm (buf0[0].x, 0, 0x6540)) == p0) buf0[0].x = __byte_perm (p1, buf0[0].x, 0x7650); - if ((__byte_perm (buf0[0].y, 0, 0x6540)) == p0) buf0[0].y = __byte_perm (p1, buf0[0].y, 0x7650); - if ((__byte_perm (buf0[0].z, 0, 0x6540)) == p0) buf0[0].z = __byte_perm (p1, buf0[0].z, 0x7650); - if ((__byte_perm (buf0[0].w, 0, 0x6540)) == p0) buf0[0].w = __byte_perm (p1, buf0[0].w, 0x7650); - break; - case 1: if ((__byte_perm (buf0[0].x, 0, 0x6541)) == p0) buf0[0].x = __byte_perm (p1, buf0[0].x, 0x7604); - if ((__byte_perm (buf0[0].y, 0, 0x6541)) == p0) buf0[0].y = __byte_perm (p1, buf0[0].y, 0x7604); - if ((__byte_perm (buf0[0].z, 0, 0x6541)) == p0) buf0[0].z = __byte_perm (p1, buf0[0].z, 0x7604); - if ((__byte_perm (buf0[0].w, 0, 0x6541)) == p0) buf0[0].w = __byte_perm (p1, buf0[0].w, 0x7604); - break; - case 2: if ((__byte_perm (buf0[0].x, 0, 0x6542)) == p0) buf0[0].x = __byte_perm (p1, buf0[0].x, 0x7054); - if ((__byte_perm (buf0[0].y, 0, 0x6542)) == p0) buf0[0].y = __byte_perm (p1, buf0[0].y, 0x7054); - if ((__byte_perm (buf0[0].z, 0, 0x6542)) == p0) buf0[0].z = __byte_perm (p1, buf0[0].z, 0x7054); - if ((__byte_perm (buf0[0].w, 0, 0x6542)) == p0) buf0[0].w = __byte_perm (p1, buf0[0].w, 0x7054); - break; - case 3: if ((__byte_perm (buf0[0].x, 0, 0x6543)) == p0) buf0[0].x = __byte_perm (p1, buf0[0].x, 0x0654); - if ((__byte_perm (buf0[0].y, 0, 0x6543)) == p0) buf0[0].y = __byte_perm (p1, buf0[0].y, 0x0654); - if ((__byte_perm (buf0[0].z, 0, 0x6543)) == p0) buf0[0].z = __byte_perm (p1, buf0[0].z, 0x0654); - if ((__byte_perm (buf0[0].w, 0, 0x6543)) == p0) buf0[0].w = __byte_perm (p1, buf0[0].w, 0x0654); - break; - case 4: if ((__byte_perm (buf0[1].x, 0, 0x6540)) == p0) buf0[1].x = __byte_perm (p1, buf0[1].x, 0x7650); - if ((__byte_perm (buf0[1].y, 0, 0x6540)) == p0) buf0[1].y = __byte_perm (p1, buf0[1].y, 0x7650); - if ((__byte_perm (buf0[1].z, 0, 0x6540)) == p0) buf0[1].z = __byte_perm (p1, buf0[1].z, 0x7650); - if ((__byte_perm (buf0[1].w, 0, 0x6540)) == p0) buf0[1].w = __byte_perm (p1, buf0[1].w, 0x7650); - break; - case 5: if ((__byte_perm (buf0[1].x, 0, 0x6541)) == p0) buf0[1].x = __byte_perm (p1, buf0[1].x, 0x7604); - if ((__byte_perm (buf0[1].y, 0, 0x6541)) == p0) buf0[1].y = __byte_perm (p1, buf0[1].y, 0x7604); - if ((__byte_perm (buf0[1].z, 0, 0x6541)) == p0) buf0[1].z = __byte_perm (p1, buf0[1].z, 0x7604); - if ((__byte_perm (buf0[1].w, 0, 0x6541)) == p0) buf0[1].w = __byte_perm (p1, buf0[1].w, 0x7604); - break; - case 6: if ((__byte_perm (buf0[1].x, 0, 0x6542)) == p0) buf0[1].x = __byte_perm (p1, buf0[1].x, 0x7054); - if ((__byte_perm (buf0[1].y, 0, 0x6542)) == p0) buf0[1].y = __byte_perm (p1, buf0[1].y, 0x7054); - if ((__byte_perm (buf0[1].z, 0, 0x6542)) == p0) buf0[1].z = __byte_perm (p1, buf0[1].z, 0x7054); - if ((__byte_perm (buf0[1].w, 0, 0x6542)) == p0) buf0[1].w = __byte_perm (p1, buf0[1].w, 0x7054); - break; - case 7: if ((__byte_perm (buf0[1].x, 0, 0x6543)) == p0) buf0[1].x = __byte_perm (p1, buf0[1].x, 0x0654); - if ((__byte_perm (buf0[1].y, 0, 0x6543)) == p0) buf0[1].y = __byte_perm (p1, buf0[1].y, 0x0654); - if ((__byte_perm (buf0[1].z, 0, 0x6543)) == p0) buf0[1].z = __byte_perm (p1, buf0[1].z, 0x0654); - if ((__byte_perm (buf0[1].w, 0, 0x6543)) == p0) buf0[1].w = __byte_perm (p1, buf0[1].w, 0x0654); - break; - case 8: if ((__byte_perm (buf0[2].x, 0, 0x6540)) == p0) buf0[2].x = __byte_perm (p1, buf0[2].x, 0x7650); - if ((__byte_perm (buf0[2].y, 0, 0x6540)) == p0) buf0[2].y = __byte_perm (p1, buf0[2].y, 0x7650); - if ((__byte_perm (buf0[2].z, 0, 0x6540)) == p0) buf0[2].z = __byte_perm (p1, buf0[2].z, 0x7650); - if ((__byte_perm (buf0[2].w, 0, 0x6540)) == p0) buf0[2].w = __byte_perm (p1, buf0[2].w, 0x7650); - break; - case 9: if ((__byte_perm (buf0[2].x, 0, 0x6541)) == p0) buf0[2].x = __byte_perm (p1, buf0[2].x, 0x7604); - if ((__byte_perm (buf0[2].y, 0, 0x6541)) == p0) buf0[2].y = __byte_perm (p1, buf0[2].y, 0x7604); - if ((__byte_perm (buf0[2].z, 0, 0x6541)) == p0) buf0[2].z = __byte_perm (p1, buf0[2].z, 0x7604); - if ((__byte_perm (buf0[2].w, 0, 0x6541)) == p0) buf0[2].w = __byte_perm (p1, buf0[2].w, 0x7604); - break; - case 10: if ((__byte_perm (buf0[2].x, 0, 0x6542)) == p0) buf0[2].x = __byte_perm (p1, buf0[2].x, 0x7054); - if ((__byte_perm (buf0[2].y, 0, 0x6542)) == p0) buf0[2].y = __byte_perm (p1, buf0[2].y, 0x7054); - if ((__byte_perm (buf0[2].z, 0, 0x6542)) == p0) buf0[2].z = __byte_perm (p1, buf0[2].z, 0x7054); - if ((__byte_perm (buf0[2].w, 0, 0x6542)) == p0) buf0[2].w = __byte_perm (p1, buf0[2].w, 0x7054); - break; - case 11: if ((__byte_perm (buf0[2].x, 0, 0x6543)) == p0) buf0[2].x = __byte_perm (p1, buf0[2].x, 0x0654); - if ((__byte_perm (buf0[2].y, 0, 0x6543)) == p0) buf0[2].y = __byte_perm (p1, buf0[2].y, 0x0654); - if ((__byte_perm (buf0[2].z, 0, 0x6543)) == p0) buf0[2].z = __byte_perm (p1, buf0[2].z, 0x0654); - if ((__byte_perm (buf0[2].w, 0, 0x6543)) == p0) buf0[2].w = __byte_perm (p1, buf0[2].w, 0x0654); - break; - case 12: if ((__byte_perm (buf0[3].x, 0, 0x6540)) == p0) buf0[3].x = __byte_perm (p1, buf0[3].x, 0x7650); - if ((__byte_perm (buf0[3].y, 0, 0x6540)) == p0) buf0[3].y = __byte_perm (p1, buf0[3].y, 0x7650); - if ((__byte_perm (buf0[3].z, 0, 0x6540)) == p0) buf0[3].z = __byte_perm (p1, buf0[3].z, 0x7650); - if ((__byte_perm (buf0[3].w, 0, 0x6540)) == p0) buf0[3].w = __byte_perm (p1, buf0[3].w, 0x7650); - break; - case 13: if ((__byte_perm (buf0[3].x, 0, 0x6541)) == p0) buf0[3].x = __byte_perm (p1, buf0[3].x, 0x7604); - if ((__byte_perm (buf0[3].y, 0, 0x6541)) == p0) buf0[3].y = __byte_perm (p1, buf0[3].y, 0x7604); - if ((__byte_perm (buf0[3].z, 0, 0x6541)) == p0) buf0[3].z = __byte_perm (p1, buf0[3].z, 0x7604); - if ((__byte_perm (buf0[3].w, 0, 0x6541)) == p0) buf0[3].w = __byte_perm (p1, buf0[3].w, 0x7604); - break; - case 14: if ((__byte_perm (buf0[3].x, 0, 0x6542)) == p0) buf0[3].x = __byte_perm (p1, buf0[3].x, 0x7054); - if ((__byte_perm (buf0[3].y, 0, 0x6542)) == p0) buf0[3].y = __byte_perm (p1, buf0[3].y, 0x7054); - if ((__byte_perm (buf0[3].z, 0, 0x6542)) == p0) buf0[3].z = __byte_perm (p1, buf0[3].z, 0x7054); - if ((__byte_perm (buf0[3].w, 0, 0x6542)) == p0) buf0[3].w = __byte_perm (p1, buf0[3].w, 0x7054); - break; - case 15: if ((__byte_perm (buf0[3].x, 0, 0x6543)) == p0) buf0[3].x = __byte_perm (p1, buf0[3].x, 0x0654); - if ((__byte_perm (buf0[3].y, 0, 0x6543)) == p0) buf0[3].y = __byte_perm (p1, buf0[3].y, 0x0654); - if ((__byte_perm (buf0[3].z, 0, 0x6543)) == p0) buf0[3].z = __byte_perm (p1, buf0[3].z, 0x0654); - if ((__byte_perm (buf0[3].w, 0, 0x6543)) == p0) buf0[3].w = __byte_perm (p1, buf0[3].w, 0x0654); - break; - case 16: if ((__byte_perm (buf1[0].x, 0, 0x6540)) == p0) buf1[0].x = __byte_perm (p1, buf1[0].x, 0x7650); - if ((__byte_perm (buf1[0].y, 0, 0x6540)) == p0) buf1[0].y = __byte_perm (p1, buf1[0].y, 0x7650); - if ((__byte_perm (buf1[0].z, 0, 0x6540)) == p0) buf1[0].z = __byte_perm (p1, buf1[0].z, 0x7650); - if ((__byte_perm (buf1[0].w, 0, 0x6540)) == p0) buf1[0].w = __byte_perm (p1, buf1[0].w, 0x7650); - break; - case 17: if ((__byte_perm (buf1[0].x, 0, 0x6541)) == p0) buf1[0].x = __byte_perm (p1, buf1[0].x, 0x7604); - if ((__byte_perm (buf1[0].y, 0, 0x6541)) == p0) buf1[0].y = __byte_perm (p1, buf1[0].y, 0x7604); - if ((__byte_perm (buf1[0].z, 0, 0x6541)) == p0) buf1[0].z = __byte_perm (p1, buf1[0].z, 0x7604); - if ((__byte_perm (buf1[0].w, 0, 0x6541)) == p0) buf1[0].w = __byte_perm (p1, buf1[0].w, 0x7604); - break; - case 18: if ((__byte_perm (buf1[0].x, 0, 0x6542)) == p0) buf1[0].x = __byte_perm (p1, buf1[0].x, 0x7054); - if ((__byte_perm (buf1[0].y, 0, 0x6542)) == p0) buf1[0].y = __byte_perm (p1, buf1[0].y, 0x7054); - if ((__byte_perm (buf1[0].z, 0, 0x6542)) == p0) buf1[0].z = __byte_perm (p1, buf1[0].z, 0x7054); - if ((__byte_perm (buf1[0].w, 0, 0x6542)) == p0) buf1[0].w = __byte_perm (p1, buf1[0].w, 0x7054); - break; - case 19: if ((__byte_perm (buf1[0].x, 0, 0x6543)) == p0) buf1[0].x = __byte_perm (p1, buf1[0].x, 0x0654); - if ((__byte_perm (buf1[0].y, 0, 0x6543)) == p0) buf1[0].y = __byte_perm (p1, buf1[0].y, 0x0654); - if ((__byte_perm (buf1[0].z, 0, 0x6543)) == p0) buf1[0].z = __byte_perm (p1, buf1[0].z, 0x0654); - if ((__byte_perm (buf1[0].w, 0, 0x6543)) == p0) buf1[0].w = __byte_perm (p1, buf1[0].w, 0x0654); - break; - case 20: if ((__byte_perm (buf1[1].x, 0, 0x6540)) == p0) buf1[1].x = __byte_perm (p1, buf1[1].x, 0x7650); - if ((__byte_perm (buf1[1].y, 0, 0x6540)) == p0) buf1[1].y = __byte_perm (p1, buf1[1].y, 0x7650); - if ((__byte_perm (buf1[1].z, 0, 0x6540)) == p0) buf1[1].z = __byte_perm (p1, buf1[1].z, 0x7650); - if ((__byte_perm (buf1[1].w, 0, 0x6540)) == p0) buf1[1].w = __byte_perm (p1, buf1[1].w, 0x7650); - break; - case 21: if ((__byte_perm (buf1[1].x, 0, 0x6541)) == p0) buf1[1].x = __byte_perm (p1, buf1[1].x, 0x7604); - if ((__byte_perm (buf1[1].y, 0, 0x6541)) == p0) buf1[1].y = __byte_perm (p1, buf1[1].y, 0x7604); - if ((__byte_perm (buf1[1].z, 0, 0x6541)) == p0) buf1[1].z = __byte_perm (p1, buf1[1].z, 0x7604); - if ((__byte_perm (buf1[1].w, 0, 0x6541)) == p0) buf1[1].w = __byte_perm (p1, buf1[1].w, 0x7604); - break; - case 22: if ((__byte_perm (buf1[1].x, 0, 0x6542)) == p0) buf1[1].x = __byte_perm (p1, buf1[1].x, 0x7054); - if ((__byte_perm (buf1[1].y, 0, 0x6542)) == p0) buf1[1].y = __byte_perm (p1, buf1[1].y, 0x7054); - if ((__byte_perm (buf1[1].z, 0, 0x6542)) == p0) buf1[1].z = __byte_perm (p1, buf1[1].z, 0x7054); - if ((__byte_perm (buf1[1].w, 0, 0x6542)) == p0) buf1[1].w = __byte_perm (p1, buf1[1].w, 0x7054); - break; - case 23: if ((__byte_perm (buf1[1].x, 0, 0x6543)) == p0) buf1[1].x = __byte_perm (p1, buf1[1].x, 0x0654); - if ((__byte_perm (buf1[1].y, 0, 0x6543)) == p0) buf1[1].y = __byte_perm (p1, buf1[1].y, 0x0654); - if ((__byte_perm (buf1[1].z, 0, 0x6543)) == p0) buf1[1].z = __byte_perm (p1, buf1[1].z, 0x0654); - if ((__byte_perm (buf1[1].w, 0, 0x6543)) == p0) buf1[1].w = __byte_perm (p1, buf1[1].w, 0x0654); - break; - case 24: if ((__byte_perm (buf1[2].x, 0, 0x6540)) == p0) buf1[2].x = __byte_perm (p1, buf1[2].x, 0x7650); - if ((__byte_perm (buf1[2].y, 0, 0x6540)) == p0) buf1[2].y = __byte_perm (p1, buf1[2].y, 0x7650); - if ((__byte_perm (buf1[2].z, 0, 0x6540)) == p0) buf1[2].z = __byte_perm (p1, buf1[2].z, 0x7650); - if ((__byte_perm (buf1[2].w, 0, 0x6540)) == p0) buf1[2].w = __byte_perm (p1, buf1[2].w, 0x7650); - break; - case 25: if ((__byte_perm (buf1[2].x, 0, 0x6541)) == p0) buf1[2].x = __byte_perm (p1, buf1[2].x, 0x7604); - if ((__byte_perm (buf1[2].y, 0, 0x6541)) == p0) buf1[2].y = __byte_perm (p1, buf1[2].y, 0x7604); - if ((__byte_perm (buf1[2].z, 0, 0x6541)) == p0) buf1[2].z = __byte_perm (p1, buf1[2].z, 0x7604); - if ((__byte_perm (buf1[2].w, 0, 0x6541)) == p0) buf1[2].w = __byte_perm (p1, buf1[2].w, 0x7604); - break; - case 26: if ((__byte_perm (buf1[2].x, 0, 0x6542)) == p0) buf1[2].x = __byte_perm (p1, buf1[2].x, 0x7054); - if ((__byte_perm (buf1[2].y, 0, 0x6542)) == p0) buf1[2].y = __byte_perm (p1, buf1[2].y, 0x7054); - if ((__byte_perm (buf1[2].z, 0, 0x6542)) == p0) buf1[2].z = __byte_perm (p1, buf1[2].z, 0x7054); - if ((__byte_perm (buf1[2].w, 0, 0x6542)) == p0) buf1[2].w = __byte_perm (p1, buf1[2].w, 0x7054); - break; - case 27: if ((__byte_perm (buf1[2].x, 0, 0x6543)) == p0) buf1[2].x = __byte_perm (p1, buf1[2].x, 0x0654); - if ((__byte_perm (buf1[2].y, 0, 0x6543)) == p0) buf1[2].y = __byte_perm (p1, buf1[2].y, 0x0654); - if ((__byte_perm (buf1[2].z, 0, 0x6543)) == p0) buf1[2].z = __byte_perm (p1, buf1[2].z, 0x0654); - if ((__byte_perm (buf1[2].w, 0, 0x6543)) == p0) buf1[2].w = __byte_perm (p1, buf1[2].w, 0x0654); - break; - case 28: if ((__byte_perm (buf1[3].x, 0, 0x6540)) == p0) buf1[3].x = __byte_perm (p1, buf1[3].x, 0x7650); - if ((__byte_perm (buf1[3].y, 0, 0x6540)) == p0) buf1[3].y = __byte_perm (p1, buf1[3].y, 0x7650); - if ((__byte_perm (buf1[3].z, 0, 0x6540)) == p0) buf1[3].z = __byte_perm (p1, buf1[3].z, 0x7650); - if ((__byte_perm (buf1[3].w, 0, 0x6540)) == p0) buf1[3].w = __byte_perm (p1, buf1[3].w, 0x7650); - break; - case 29: if ((__byte_perm (buf1[3].x, 0, 0x6541)) == p0) buf1[3].x = __byte_perm (p1, buf1[3].x, 0x7604); - if ((__byte_perm (buf1[3].y, 0, 0x6541)) == p0) buf1[3].y = __byte_perm (p1, buf1[3].y, 0x7604); - if ((__byte_perm (buf1[3].z, 0, 0x6541)) == p0) buf1[3].z = __byte_perm (p1, buf1[3].z, 0x7604); - if ((__byte_perm (buf1[3].w, 0, 0x6541)) == p0) buf1[3].w = __byte_perm (p1, buf1[3].w, 0x7604); - break; - case 30: if ((__byte_perm (buf1[3].x, 0, 0x6542)) == p0) buf1[3].x = __byte_perm (p1, buf1[3].x, 0x7054); - if ((__byte_perm (buf1[3].y, 0, 0x6542)) == p0) buf1[3].y = __byte_perm (p1, buf1[3].y, 0x7054); - if ((__byte_perm (buf1[3].z, 0, 0x6542)) == p0) buf1[3].z = __byte_perm (p1, buf1[3].z, 0x7054); - if ((__byte_perm (buf1[3].w, 0, 0x6542)) == p0) buf1[3].w = __byte_perm (p1, buf1[3].w, 0x7054); - break; - case 31: if ((__byte_perm (buf1[3].x, 0, 0x6543)) == p0) buf1[3].x = __byte_perm (p1, buf1[3].x, 0x0654); - if ((__byte_perm (buf1[3].y, 0, 0x6543)) == p0) buf1[3].y = __byte_perm (p1, buf1[3].y, 0x0654); - if ((__byte_perm (buf1[3].z, 0, 0x6543)) == p0) buf1[3].z = __byte_perm (p1, buf1[3].z, 0x0654); - if ((__byte_perm (buf1[3].w, 0, 0x6543)) == p0) buf1[3].w = __byte_perm (p1, buf1[3].w, 0x0654); - break; - } - } + uchar4 tmp; + tmp = as_uchar4 (buf0[0]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[0] = as_uint (tmp); + tmp = as_uchar4 (buf0[1]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[1] = as_uint (tmp); + tmp = as_uchar4 (buf0[2]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[2] = as_uint (tmp); + tmp = as_uchar4 (buf0[3]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[3] = as_uint (tmp); + tmp = as_uchar4 (buf1[0]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[0] = as_uint (tmp); + tmp = as_uchar4 (buf1[1]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[1] = as_uint (tmp); + tmp = as_uchar4 (buf1[2]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[2] = as_uint (tmp); + tmp = as_uchar4 (buf1[3]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[3] = as_uint (tmp); #endif - return in_len; } -__device__ static u32 rule_op_mangle_purgechar (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_purgechar (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { // TODO return in_len; } -__device__ static u32 rule_op_mangle_togglecase_rec (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_togglecase_rec (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { // TODO return in_len; } -__device__ static u32 rule_op_mangle_dupechar_first (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_dupechar_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { if ( in_len == 0) return (in_len); if ((in_len + p0) >= 32) return (in_len); u32 out_len = in_len; - const u32x tmp = buf0[0] & 0xFF; + const u32 tmp = buf0[0] & 0xFF; rshift_block_N (buf0, buf1, buf0, buf1, p0); - #if __CUDA_ARCH__ >= 200 - + #ifdef IS_NV switch (p0) { case 1: buf0[0] |= tmp; @@ -3046,9 +2791,9 @@ __device__ static u32 rule_op_mangle_dupechar_first (const u32 p0, const u32 p1, buf1[3] |= __byte_perm (tmp, 0, 0x4000); break; } + #endif - #else - + #ifdef IS_AMD switch (p0) { case 1: buf0[0] |= tmp << 0; @@ -3219,7 +2964,6 @@ __device__ static u32 rule_op_mangle_dupechar_first (const u32 p0, const u32 p1, buf1[3] |= tmp << 0 | tmp << 8 | tmp << 16; break; } - #endif out_len += p0; @@ -3227,7 +2971,7 @@ __device__ static u32 rule_op_mangle_dupechar_first (const u32 p0, const u32 p1, return out_len; } -__device__ static u32 rule_op_mangle_dupechar_last (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_dupechar_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { if ( in_len == 0) return (in_len); if ((in_len + p0) >= 32) return (in_len); @@ -3236,7 +2980,7 @@ __device__ static u32 rule_op_mangle_dupechar_last (const u32 p0, const u32 p1, const u32 sh = (in_len1 & 3) * 8; - u32x tmp = 0; + u32 tmp = 0; switch (in_len1 / 4) { @@ -3262,18 +3006,17 @@ __device__ static u32 rule_op_mangle_dupechar_last (const u32 p0, const u32 p1, return out_len; } -__device__ static u32 rule_op_mangle_dupechar_all (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_dupechar_all (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { if ( in_len == 0) return (in_len); if ((in_len + in_len) >= 32) return (in_len); u32 out_len = in_len; - u32x tib40[4]; - u32x tib41[4]; - - #if __CUDA_ARCH__ >= 200 + u32 tib40[4]; + u32 tib41[4]; + #ifdef IS_NV tib40[0] = __byte_perm (buf0[0], 0, 0x1100); tib40[1] = __byte_perm (buf0[0], 0, 0x3322); tib40[2] = __byte_perm (buf0[1], 0, 0x1100); @@ -3291,9 +3034,9 @@ __device__ static u32 rule_op_mangle_dupechar_all (const u32 p0, const u32 p1, u buf1[1] = tib41[1]; buf1[2] = tib41[2]; buf1[3] = tib41[3]; + #endif - #else - + #ifdef IS_AMD tib40[0] = ((buf0[0] & 0x000000FF) << 0) | ((buf0[0] & 0x0000FF00) << 8); tib40[1] = ((buf0[0] & 0x00FF0000) >> 16) | ((buf0[0] & 0xFF000000) >> 8); tib40[2] = ((buf0[1] & 0x000000FF) << 0) | ((buf0[1] & 0x0000FF00) << 8); @@ -3311,7 +3054,6 @@ __device__ static u32 rule_op_mangle_dupechar_all (const u32 p0, const u32 p1, u buf1[1] = tib41[1] | (tib41[1] << 8); buf1[2] = tib41[2] | (tib41[2] << 8); buf1[3] = tib41[3] | (tib41[3] << 8); - #endif out_len = out_len + out_len; @@ -3319,29 +3061,26 @@ __device__ static u32 rule_op_mangle_dupechar_all (const u32 p0, const u32 p1, u return out_len; } -__device__ static u32 rule_op_mangle_switch_first (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_switch_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { if (in_len < 2) return (in_len); - #if __CUDA_ARCH__ >= 200 - + #ifdef IS_NV buf0[0] = __byte_perm (buf0[0], 0, 0x3201); + #endif - #else - + #ifdef IS_AMD buf0[0] = (buf0[0] & 0xFFFF0000) | ((buf0[0] << 8) & 0x0000FF00) | ((buf0[0] >> 8) & 0x000000FF); - #endif return in_len; } -__device__ static u32 rule_op_mangle_switch_last (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_switch_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { if (in_len < 2) return (in_len); - #if __CUDA_ARCH__ >= 200 - + #ifdef IS_NV switch (in_len) { case 2: buf0[0] = __byte_perm (buf0[0], 0, 0x5401); @@ -3419,9 +3158,9 @@ __device__ static u32 rule_op_mangle_switch_last (const u32 p0, const u32 p1, u3 case 31: buf1[3] = __byte_perm (buf1[3], 0, 0x4120); break; } + #endif - #else - + #ifdef IS_AMD switch (in_len) { case 2: buf0[0] = ((buf0[0] << 8) & 0x0000FF00) | ((buf0[0] >> 8) & 0x000000FF); @@ -3499,22 +3238,20 @@ __device__ static u32 rule_op_mangle_switch_last (const u32 p0, const u32 p1, u3 case 31: buf1[3] = (buf1[3] & 0x000000FF) | ((buf1[3] << 8) & 0x00FF0000) | ((buf1[3] >> 8) & 0x0000FF00); break; } - #endif return in_len; } -__device__ static u32 rule_op_mangle_switch_at (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_switch_at (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { if (p0 >= in_len) return (in_len); if (p1 >= in_len) return (in_len); - u32x tmp0 = 0; - u32x tmp1 = 0; - - #if __CUDA_ARCH__ >= 200 + u32 tmp0 = 0; + u32 tmp1 = 0; + #ifdef IS_NV switch (p0) { case 0: tmp0 = __byte_perm (buf0[0], 0, 0x6540); @@ -3750,9 +3487,9 @@ __device__ static u32 rule_op_mangle_switch_at (const u32 p0, const u32 p1, u32x case 31: buf1[3] = __byte_perm (tmp1, buf1[3], 0x0654); break; } + #endif - #else - + #ifdef IS_AMD switch (p0) { case 0: tmp0 = (buf0[0] >> 0) & 0xFF; @@ -3988,13 +3725,12 @@ __device__ static u32 rule_op_mangle_switch_at (const u32 p0, const u32 p1, u32x case 31: buf1[3] = (buf1[3] & 0x00ffffff) | tmp1 << 24; break; } - #endif return in_len; } -__device__ static u32 rule_op_mangle_chr_shiftl (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_chr_shiftl (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { if (p0 >= in_len) return (in_len); @@ -4016,7 +3752,7 @@ __device__ static u32 rule_op_mangle_chr_shiftl (const u32 p0, const u32 p1, u32 return in_len; } -__device__ static u32 rule_op_mangle_chr_shiftr (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_chr_shiftr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { if (p0 >= in_len) return (in_len); @@ -4038,7 +3774,7 @@ __device__ static u32 rule_op_mangle_chr_shiftr (const u32 p0, const u32 p1, u32 return in_len; } -__device__ static u32 rule_op_mangle_chr_incr (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_chr_incr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { if (p0 >= in_len) return (in_len); @@ -4062,7 +3798,7 @@ __device__ static u32 rule_op_mangle_chr_incr (const u32 p0, const u32 p1, u32x return in_len; } -__device__ static u32 rule_op_mangle_chr_decr (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_chr_decr (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { if (p0 >= in_len) return (in_len); @@ -4086,12 +3822,12 @@ __device__ static u32 rule_op_mangle_chr_decr (const u32 p0, const u32 p1, u32x return in_len; } -__device__ static u32 rule_op_mangle_replace_np1 (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_replace_np1 (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { if ((p0 + 1) >= in_len) return (in_len); - u32x tib40[4]; - u32x tib41[4]; + u32 tib40[4]; + u32 tib41[4]; lshift_block (buf0, buf1, tib40, tib41); @@ -4113,14 +3849,14 @@ __device__ static u32 rule_op_mangle_replace_np1 (const u32 p0, const u32 p1, u3 return in_len; } -__device__ static u32 rule_op_mangle_replace_nm1 (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_replace_nm1 (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { if (p0 == 0) return (in_len); if (p0 >= in_len) return (in_len); - u32x tib40[4]; - u32x tib41[4]; + u32 tib40[4]; + u32 tib41[4]; rshift_block (buf0, buf1, tib40, tib41); @@ -4142,7 +3878,7 @@ __device__ static u32 rule_op_mangle_replace_nm1 (const u32 p0, const u32 p1, u3 return in_len; } -__device__ static u32 rule_op_mangle_dupeblock_first (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_dupeblock_first (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { if (p0 > in_len) return (in_len); @@ -4150,8 +3886,8 @@ __device__ static u32 rule_op_mangle_dupeblock_first (const u32 p0, const u32 p1 u32 out_len = in_len; - u32x tib40[4]; - u32x tib41[4]; + u32 tib40[4]; + u32 tib41[4]; tib40[0] = buf0[0]; tib40[1] = buf0[1]; @@ -4180,7 +3916,7 @@ __device__ static u32 rule_op_mangle_dupeblock_first (const u32 p0, const u32 p1 return out_len; } -__device__ static u32 rule_op_mangle_dupeblock_last (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_dupeblock_last (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { if (p0 > in_len) return (in_len); @@ -4188,8 +3924,8 @@ __device__ static u32 rule_op_mangle_dupeblock_last (const u32 p0, const u32 p1, u32 out_len = in_len; - u32x tib40[4]; - u32x tib41[4]; + u32 tib40[4]; + u32 tib41[4]; rshift_block_N (buf0, buf1, tib40, tib41, p0); @@ -4209,7 +3945,7 @@ __device__ static u32 rule_op_mangle_dupeblock_last (const u32 p0, const u32 p1, return out_len; } -__device__ static u32 rule_op_mangle_title (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +static u32 rule_op_mangle_title (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { buf0[0] |= (generate_cmask (buf0[0])); buf0[1] |= (generate_cmask (buf0[1])); @@ -4220,14 +3956,13 @@ __device__ static u32 rule_op_mangle_title (const u32 p0, const u32 p1, u32x buf buf1[2] |= (generate_cmask (buf1[2])); buf1[3] |= (generate_cmask (buf1[3])); + #ifdef IS_NV buf0[0] &= ~(0x00000020 & generate_cmask (buf0[0])); - - for (u32 i = 0; i < in_len; i++) { - u32x tmp0; - u32x tmp1; + u32 tmp0; + u32 tmp1; switch (i) { @@ -4295,7 +4030,6 @@ __device__ static u32 rule_op_mangle_title (const u32 p0, const u32 p1, u32x buf tmp1 = ~(0x20000000 & generate_cmask (buf1[3])); break; } - #ifdef VECT_SIZE1 if (i < 3) { if (tmp0 == ' ') buf0[0] &= tmp1 ; @@ -4328,117 +4062,44 @@ __device__ static u32 rule_op_mangle_title (const u32 p0, const u32 p1, u32x buf { if (tmp0 == ' ') buf1[3] &= tmp1 ; } - #endif - - #ifdef VECT_SIZE2 - if (i < 3) - { - if (tmp0.x == ' ') buf0[0].x &= tmp1.x; - if (tmp0.y == ' ') buf0[0].y &= tmp1.y; - } - else if (i < 7) - { - if (tmp0.x == ' ') buf0[1].x &= tmp1.x; - if (tmp0.y == ' ') buf0[1].y &= tmp1.y; - } - else if (i < 11) - { - if (tmp0.x == ' ') buf0[2].x &= tmp1.x; - if (tmp0.y == ' ') buf0[2].y &= tmp1.y; - } - else if (i < 15) - { - if (tmp0.x == ' ') buf0[3].x &= tmp1.x; - if (tmp0.y == ' ') buf0[3].y &= tmp1.y; - } - else if (i < 19) - { - if (tmp0.x == ' ') buf1[0].x &= tmp1.x; - if (tmp0.y == ' ') buf1[0].y &= tmp1.y; - } - else if (i < 23) - { - if (tmp0.x == ' ') buf1[1].x &= tmp1.x; - if (tmp0.y == ' ') buf1[1].y &= tmp1.y; - } - else if (i < 27) - { - if (tmp0.x == ' ') buf1[2].x &= tmp1.x; - if (tmp0.y == ' ') buf1[2].y &= tmp1.y; - } - else if (i < 31) - { - if (tmp0.x == ' ') buf1[3].x &= tmp1.x; - if (tmp0.y == ' ') buf1[3].y &= tmp1.y; - } - #endif - - #ifdef VECT_SIZE4 - if (i < 3) - { - if (tmp0.x == ' ') buf0[0].x &= tmp1.x; - if (tmp0.y == ' ') buf0[0].y &= tmp1.y; - if (tmp0.z == ' ') buf0[0].z &= tmp1.z; - if (tmp0.w == ' ') buf0[0].w &= tmp1.w; - } - else if (i < 7) - { - if (tmp0.x == ' ') buf0[1].x &= tmp1.x; - if (tmp0.y == ' ') buf0[1].y &= tmp1.y; - if (tmp0.z == ' ') buf0[1].z &= tmp1.z; - if (tmp0.w == ' ') buf0[1].w &= tmp1.w; - } - else if (i < 11) - { - if (tmp0.x == ' ') buf0[2].x &= tmp1.x; - if (tmp0.y == ' ') buf0[2].y &= tmp1.y; - if (tmp0.z == ' ') buf0[2].z &= tmp1.z; - if (tmp0.w == ' ') buf0[2].w &= tmp1.w; - } - else if (i < 15) - { - if (tmp0.x == ' ') buf0[3].x &= tmp1.x; - if (tmp0.y == ' ') buf0[3].y &= tmp1.y; - if (tmp0.z == ' ') buf0[3].z &= tmp1.z; - if (tmp0.w == ' ') buf0[3].w &= tmp1.w; - } - else if (i < 19) - { - if (tmp0.x == ' ') buf1[0].x &= tmp1.x; - if (tmp0.y == ' ') buf1[0].y &= tmp1.y; - if (tmp0.z == ' ') buf1[0].z &= tmp1.z; - if (tmp0.w == ' ') buf1[0].w &= tmp1.w; - } - else if (i < 23) - { - if (tmp0.x == ' ') buf1[1].x &= tmp1.x; - if (tmp0.y == ' ') buf1[1].y &= tmp1.y; - if (tmp0.z == ' ') buf1[1].z &= tmp1.z; - if (tmp0.w == ' ') buf1[1].w &= tmp1.w; - } - else if (i < 27) - { - if (tmp0.x == ' ') buf1[2].x &= tmp1.x; - if (tmp0.y == ' ') buf1[2].y &= tmp1.y; - if (tmp0.z == ' ') buf1[2].z &= tmp1.z; - if (tmp0.w == ' ') buf1[2].w &= tmp1.w; - } - else if (i < 31) - { - if (tmp0.x == ' ') buf1[3].x &= tmp1.x; - if (tmp0.y == ' ') buf1[3].y &= tmp1.y; - if (tmp0.z == ' ') buf1[3].z &= tmp1.z; - if (tmp0.w == ' ') buf1[3].w &= tmp1.w; - } - #endif } + #endif - + #ifdef IS_AMD + u32 tib40[4]; + u32 tib41[4]; + + const uchar4 tmp0 = (uchar4) (' '); + const uchar4 tmp1 = (uchar4) (0x00); + const uchar4 tmp2 = (uchar4) (0xff); + + uchar4 tmp; + + tmp = as_uchar4 (buf0[0]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[0] = as_uint (tmp); + tmp = as_uchar4 (buf0[1]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[1] = as_uint (tmp); + tmp = as_uchar4 (buf0[2]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[2] = as_uint (tmp); + tmp = as_uchar4 (buf0[3]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[3] = as_uint (tmp); + tmp = as_uchar4 (buf1[0]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[0] = as_uint (tmp); + tmp = as_uchar4 (buf1[1]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[1] = as_uint (tmp); + tmp = as_uchar4 (buf1[2]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[2] = as_uint (tmp); + tmp = as_uchar4 (buf1[3]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[3] = as_uint (tmp); + + rshift_block (tib40, tib41, tib40, tib41); tib40[0] |= 0xff; + + buf0[0] &= ~(generate_cmask (buf0[0]) & tib40[0]); + buf0[1] &= ~(generate_cmask (buf0[1]) & tib40[1]); + buf0[2] &= ~(generate_cmask (buf0[2]) & tib40[2]); + buf0[3] &= ~(generate_cmask (buf0[3]) & tib40[3]); + buf1[0] &= ~(generate_cmask (buf1[0]) & tib41[0]); + buf1[1] &= ~(generate_cmask (buf1[1]) & tib41[1]); + buf1[2] &= ~(generate_cmask (buf1[2]) & tib41[2]); + buf1[3] &= ~(generate_cmask (buf1[3]) & tib41[3]); + #endif return in_len; } -__device__ static u32 apply_rule (const u32 name, const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) +u32 apply_rule (const u32 name, const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { u32 out_len = in_len; @@ -4489,7 +4150,7 @@ __device__ static u32 apply_rule (const u32 name, const u32 p0, const u32 p1, u3 return out_len; } -__device__ static u32 apply_rules (u32 *cmds, u32x buf0[4], u32x buf1[4], const u32 len) +u32 apply_rules (__global u32 *cmds, u32 buf0[4], u32 buf1[4], const u32 len) { u32 out_len = len; diff --git a/amd/types_amd.c b/OpenCL/types_ocl.c similarity index 59% rename from amd/types_amd.c rename to OpenCL/types_ocl.c index 8adb2ab..c052384 100644 --- a/amd/types_amd.c +++ b/OpenCL/types_ocl.c @@ -8,63 +8,56 @@ typedef ushort u16; typedef uint u32; typedef ulong u64; -#ifdef VECT_SIZE1 -#define VECT_SHIFT 0 -#define VECT_DIV 1 -typedef uchar u8x; -typedef uint u32x; -typedef ulong u64x; -#endif +#ifdef IS_AMD -#ifdef VECT_SIZE2 -#define VECT_SHIFT 1 -#define VECT_DIV 2 -typedef uchar2 u8x; -typedef uint2 u32x; -typedef ulong2 u64x; - -#define u8x(a,b) (u8x) (a,b) -#define u16x(a,b) (u16x) (a,b) -#define u32x(a,b) (u32x) (a,b) -#define u64x(a,b) (u64x) (a,b) -#endif +static u32 swap32 (const u32 v) +{ + return (as_uint (as_uchar4 (v).s3210)); +} + +static u64 swap64 (const u64 v) +{ + return (as_ulong (as_uchar8 (v).s76543210)); +} -#ifdef VECT_SIZE4 -#define VECT_SHIFT 2 -#define VECT_DIV 4 -typedef uchar4 u8x; -typedef uint4 u32x; -typedef ulong4 u64x; - -#define u8x(a,b,c,d) (u8x) (a,b,c,d) -#define u16x(a,b,c,d) (u16x) (a,b,c,d) -#define u32x(a,b,c,d) (u32x) (a,b,c,d) -#define u64x(a,b,c,d) (u64x) (a,b,c,d) #endif -static inline bool allx (const u32 r) +#ifdef IS_NV + +static u32 __byte_perm (const u32 a, const u32 b, const u32 c) { + u32 r; + + asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c)); + return r; } -static inline u32 rotr32 (const u32 a, const u32 n) +static u32 swap32 (const u32 v) { - return amd_bitalign (a, a, n); + return __byte_perm (v, 0, 0x0123); } -static inline u32 rotl32 (const u32 a, const u32 n) +static u64 swap64 (const u64 v) { - return rotate (a, n); + return (as_ulong (as_uchar8 (v).s76543210)); } -static inline u32 l32_from_64 (u64 a) +#endif + +static bool allx (const u32 r) +{ + return r; +} + +static u32 l32_from_64 (u64 a) { const u32 r = (uint) (a); return r; } -static inline u32 h32_from_64 (u64 a) +static u32 h32_from_64 (u64 a) { a >>= 32; @@ -73,12 +66,24 @@ static inline u32 h32_from_64 (u64 a) return r; } -static inline u64 hl32_to_64 (const u32 a, const u32 b) +static u64 hl32_to_64 (const u32 a, const u32 b) { return as_ulong ((uint2) (b, a)); } -static inline u64 rotr64 (const u64 a, const u32 n) +static u32 rotr32 (const u32 a, const u32 n) +{ + return rotate (a, 32 - n); +} + +static u32 rotl32 (const u32 a, const u32 n) +{ + return rotate (a, n); +} + +#ifdef IS_AMD + +static u64 rotr64 (const u64 a, const u32 n) { uint2 a2 = as_uint2 (a); @@ -92,123 +97,54 @@ static inline u64 rotr64 (const u64 a, const u32 n) return as_ulong (t); } -static inline u64 rotl64 (const u64 a, const u32 n) -{ - return rotr64 (a, 64 - n); -} - -#ifdef VECT_SIZE2 -static inline bool allx (const int2 r) -{ - return all (r); -} +#endif -static inline u32x rotl32 (const u32x a, const u32 n) -{ - return (u32x) (rotl32 (a.s0, n), - rotl32 (a.s1, n)); -} +#ifdef IS_NV -static inline u32x rotr32 (const u32x a, const u32 n) -{ - return (u32x) (rotr32 (a.s0, n), - rotr32 (a.s1, n)); -} +#if CUDA_ARCH >= 350 -static inline u64x rotl64 (const u64x a, const u32 n) +static u64 rotr64 (const u64 a, const u32 n) { - return (u64x) (rotl64 (a.s0, n), - rotl64 (a.s1, n)); -} + u32 il; + u32 ir; -static inline u64x rotr64 (const u64x a, const u32 n) -{ - return (u64x) (rotr64 (a.s0, n), - rotr64 (a.s1, n)); -} + asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a)); -static inline u32x l32_from_64 (const u64x a) -{ - return (u32x) (l32_from_64 (a.s0), - l32_from_64 (a.s1)); -} + u32 tl; + u32 tr; -static inline u32x h32_from_64 (const u64x a) -{ - return (u32x) (h32_from_64 (a.s0), - h32_from_64 (a.s1)); -} - -static inline u64x hl32_to_64 (const u32x a, const u32x b) -{ - return (u64x) (hl32_to_64 (a.s0, b.s0), - hl32_to_64 (a.s1, b.s1)); -} - - -#endif + if (n >= 32) + { + asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32)); + asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32)); + } + else + { + asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n)); + asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n)); + } -#ifdef VECT_SIZE4 -static inline bool allx (const int4 r) -{ - return all (r); -} + u64 r; -static inline u32x rotl32 (const u32x a, const u32 n) -{ - return (u32x) (rotl32 (a.s0, n), - rotl32 (a.s1, n), - rotl32 (a.s2, n), - rotl32 (a.s3, n)); -} + asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tl), "r"(tr)); -static inline u32x rotr32 (const u32x a, const u32 n) -{ - return (u32x) (rotr32 (a.s0, n), - rotr32 (a.s1, n), - rotr32 (a.s2, n), - rotr32 (a.s3, n)); + return r; } -static inline u64x rotl64 (const u64x a, const u32 n) -{ - return (u64x) (rotl64 (a.s0, n), - rotl64 (a.s1, n), - rotl64 (a.s2, n), - rotl64 (a.s3, n)); -} +#else -static inline u64x rotr64 (const u64x a, const u32 n) +static u64 rotr64 (const u64 a, const u32 n) { - return (u64x) (rotr64 (a.s0, n), - rotr64 (a.s1, n), - rotr64 (a.s2, n), - rotr64 (a.s3, n)); + return (((a) >> (n)) | ((a) << (64 - (n)))); } -static inline u32x l32_from_64 (const u64x a) -{ - return (u32x) (l32_from_64 (a.s0), - l32_from_64 (a.s1), - l32_from_64 (a.s2), - l32_from_64 (a.s3)); -} +#endif -static inline u32x h32_from_64 (const u64x a) +static u64 rotl64 (const u64 a, const u32 n) { - return (u32x) (h32_from_64 (a.s0), - h32_from_64 (a.s1), - h32_from_64 (a.s2), - h32_from_64 (a.s3)); + return rotr64 (a, 64 - n); } -static inline u64x hl32_to_64 (const u32x a, const u32x b) -{ - return (u64x) (hl32_to_64 (a.s0, b.s0), - hl32_to_64 (a.s1, b.s1), - hl32_to_64 (a.s2, b.s2), - hl32_to_64 (a.s3, b.s3)); -} #endif typedef struct @@ -519,8 +455,8 @@ typedef struct typedef struct { - u32x digest[4]; - u32x out[4]; + u32 digest[4]; + u32 out[4]; } pdf14_tmp_t; @@ -539,189 +475,189 @@ typedef struct typedef struct { - u32x digest_buf[4]; + u32 digest_buf[4]; } phpass_tmp_t; typedef struct { - u32x digest_buf[4]; + u32 digest_buf[4]; } md5crypt_tmp_t; typedef struct { - u32x alt_result[8]; + u32 alt_result[8]; - u32x p_bytes[4]; - u32x s_bytes[4]; + u32 p_bytes[4]; + u32 s_bytes[4]; } sha256crypt_tmp_t; typedef struct { - u64x l_alt_result[8]; + u64 l_alt_result[8]; - u64x l_p_bytes[2]; - u64x l_s_bytes[2]; + u64 l_p_bytes[2]; + u64 l_s_bytes[2]; } sha512crypt_tmp_t; typedef struct { - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; - u32x dgst[10]; - u32x out[10]; + u32 dgst[10]; + u32 out[10]; } wpa_tmp_t; typedef struct { - u64x dgst[8]; + u64 dgst[8]; } bitcoin_wallet_tmp_t; typedef struct { - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; - u32x dgst[5]; - u32x out[4]; + u32 dgst[5]; + u32 out[4]; } dcc2_tmp_t; typedef struct { - u32x P[18]; + u32 P[18]; - u32x S0[256]; - u32x S1[256]; - u32x S2[256]; - u32x S3[256]; + u32 S0[256]; + u32 S1[256]; + u32 S2[256]; + u32 S3[256]; } bcrypt_tmp_t; typedef struct { - u32x digest[2]; + u32 digest[2]; - u32x P[18]; + u32 P[18]; - u32x S0[256]; - u32x S1[256]; - u32x S2[256]; - u32x S3[256]; + u32 S0[256]; + u32 S1[256]; + u32 S2[256]; + u32 S3[256]; } pwsafe2_tmp_t; typedef struct { - u32x digest_buf[8]; + u32 digest_buf[8]; } pwsafe3_tmp_t; typedef struct { - u32x digest_buf[5]; + u32 digest_buf[5]; } androidpin_tmp_t; typedef struct { - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; - u32x dgst[10]; - u32x out[10]; + u32 dgst[10]; + u32 out[10]; } androidfde_tmp_t; typedef struct { - u32x ipad[16]; - u32x opad[16]; + u32 ipad[16]; + u32 opad[16]; - u32x dgst[64]; - u32x out[64]; + u32 dgst[64]; + u32 out[64]; } tc_tmp_t; typedef struct { - u64x ipad[8]; - u64x opad[8]; + u64 ipad[8]; + u64 opad[8]; - u64x dgst[32]; - u64x out[32]; + u64 dgst[32]; + u64 out[32]; } tc64_tmp_t; typedef struct { - u32x ipad[4]; - u32x opad[4]; + u32 ipad[4]; + u32 opad[4]; - u32x dgst[32]; - u32x out[32]; + u32 dgst[32]; + u32 out[32]; } pbkdf2_md5_tmp_t; typedef struct { - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; - u32x dgst[32]; - u32x out[32]; + u32 dgst[32]; + u32 out[32]; } pbkdf2_sha1_tmp_t; typedef struct { - u32x ipad[8]; - u32x opad[8]; + u32 ipad[8]; + u32 opad[8]; - u32x dgst[32]; - u32x out[32]; + u32 dgst[32]; + u32 out[32]; } pbkdf2_sha256_tmp_t; typedef struct { - u64x ipad[8]; - u64x opad[8]; + u64 ipad[8]; + u64 opad[8]; - u64x dgst[16]; - u64x out[16]; + u64 dgst[16]; + u64 out[16]; } pbkdf2_sha512_tmp_t; typedef struct { - u64x out[8]; + u64 out[8]; } ecryptfs_tmp_t; typedef struct { - u64x ipad[8]; - u64x opad[8]; + u64 ipad[8]; + u64 opad[8]; - u64x dgst[16]; - u64x out[16]; + u64 dgst[16]; + u64 out[16]; } oraclet_tmp_t; typedef struct { - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; - u32x dgst[5]; - u32x out[5]; + u32 dgst[5]; + u32 out[5]; } agilekey_tmp_t; @@ -740,101 +676,101 @@ typedef struct typedef struct { - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; - u32x dgst[5]; - u32x out[5]; + u32 dgst[5]; + u32 out[5]; } sha1aix_tmp_t; typedef struct { - u32x ipad[8]; - u32x opad[8]; + u32 ipad[8]; + u32 opad[8]; - u32x dgst[8]; - u32x out[8]; + u32 dgst[8]; + u32 out[8]; } sha256aix_tmp_t; typedef struct { - u64x ipad[8]; - u64x opad[8]; + u64 ipad[8]; + u64 opad[8]; - u64x dgst[8]; - u64x out[8]; + u64 dgst[8]; + u64 out[8]; } sha512aix_tmp_t; typedef struct { - u32x ipad[8]; - u32x opad[8]; + u32 ipad[8]; + u32 opad[8]; - u32x dgst[8]; - u32x out[8]; + u32 dgst[8]; + u32 out[8]; } lastpass_tmp_t; typedef struct { - u64x digest_buf[8]; + u64 digest_buf[8]; } drupal7_tmp_t; typedef struct { - u32x ipad[5]; - u32x opad[5]; + u32 ipad[5]; + u32 opad[5]; - u32x dgst[5]; - u32x out[5]; + u32 dgst[5]; + u32 out[5]; } lotus8_tmp_t; typedef struct { - u32x out[5]; + u32 out[5]; } office2007_tmp_t; typedef struct { - u32x out[5]; + u32 out[5]; } office2010_tmp_t; typedef struct { - u64x out[8]; + u64 out[8]; } office2013_tmp_t; typedef struct { - u32x digest_buf[5]; + u32 digest_buf[5]; } saph_sha1_tmp_t; typedef struct { - u32x block[16]; + u32 block[16]; - u32x dgst[8]; + u32 dgst[8]; - u32x block_len; - u32x final_len; + u32 block_len; + u32 final_len; } seven_zip_tmp_t; typedef struct { - u32x Kc[16]; - u32x Kd[16]; + u32 Kc[16]; + u32 Kd[16]; - u32x iv[2]; + u32 iv[2]; } bsdicrypt_tmp_t; @@ -905,23 +841,10 @@ typedef struct typedef struct { - #ifdef _SCALAR_ u32 i[64]; - #else - #ifdef VECT_SIZE4 - u32x i[16]; - #endif - - #ifdef VECT_SIZE2 - u32x i[32]; - #endif - - #ifdef VECT_SIZE1 - u32x i[64]; - #endif - #endif u32 pw_len; + u32 alignment_placeholder_1; u32 alignment_placeholder_2; u32 alignment_placeholder_3; diff --git a/amd/amp_a0_v2.cl b/amd/amp_a0_v2.cl deleted file mode 100644 index 0343f81..0000000 --- a/amd/amp_a0_v2.cl +++ /dev/null @@ -1,52 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define VECT_SIZE2 - -#include "include/constants.h" -#include "include/kernel_vendor.h" -#include "types_amd.c" - -static u32x swap_workaround (const u32x v) -{ - return (v << 24) + ((v & 0x0000FF00) << 8) + ((v & 0x00FF0000) >> 8) + (v >> 24); -} - -#include "include/rp_gpu.h" -#include "rp_amd.c" - -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) amp (__global pw_t *pws, __global pw_t *pws_amp, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = get_global_id (0); - - if (gid >= gid_max) return; - - const u32 pw_len = pws[gid].pw_len; - - u32x w0[4]; - u32x w1[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - const u32 out_len = apply_rules (rules_buf[0].cmds, w0, w1, pw_len); - - pws_amp[gid].i[0] = w0[0]; - pws_amp[gid].i[1] = w0[1]; - pws_amp[gid].i[2] = w0[2]; - pws_amp[gid].i[3] = w0[3]; - pws_amp[gid].i[4] = w1[0]; - pws_amp[gid].i[5] = w1[1]; - pws_amp[gid].i[6] = w1[2]; - pws_amp[gid].i[7] = w1[3]; - - pws_amp[gid].pw_len = out_len; -} diff --git a/amd/amp_a0_v4.cl b/amd/amp_a0_v4.cl deleted file mode 100644 index d470f34..0000000 --- a/amd/amp_a0_v4.cl +++ /dev/null @@ -1,52 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define VECT_SIZE4 - -#include "include/constants.h" -#include "include/kernel_vendor.h" -#include "types_amd.c" - -static u32x swap_workaround (const u32x v) -{ - return (v << 24) + ((v & 0x0000FF00) << 8) + ((v & 0x00FF0000) >> 8) + (v >> 24); -} - -#include "include/rp_gpu.h" -#include "rp_amd.c" - -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) amp (__global pw_t *pws, __global pw_t *pws_amp, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = get_global_id (0); - - if (gid >= gid_max) return; - - const u32 pw_len = pws[gid].pw_len; - - u32x w0[4]; - u32x w1[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - const u32 out_len = apply_rules (rules_buf[0].cmds, w0, w1, pw_len); - - pws_amp[gid].i[0] = w0[0]; - pws_amp[gid].i[1] = w0[1]; - pws_amp[gid].i[2] = w0[2]; - pws_amp[gid].i[3] = w0[3]; - pws_amp[gid].i[4] = w1[0]; - pws_amp[gid].i[5] = w1[1]; - pws_amp[gid].i[6] = w1[2]; - pws_amp[gid].i[7] = w1[3]; - - pws_amp[gid].pw_len = out_len; -} diff --git a/amd/amp_a1_v2.cl b/amd/amp_a1_v2.cl deleted file mode 100644 index 0beca7d..0000000 --- a/amd/amp_a1_v2.cl +++ /dev/null @@ -1,587 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define VECT_SIZE2 - -#include "include/constants.h" -#include "include/kernel_vendor.h" -#include "types_amd.c" - -static void switch_buffer_by_offset (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 offset) -{ - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset; - - switch (offset / 4) - { - case 0: - w3[2] = amd_bytealign ( 0, w3[1], offset_minus_4); - w3[1] = amd_bytealign (w3[1], w3[0], offset_minus_4); - w3[0] = amd_bytealign (w3[0], w2[3], offset_minus_4); - w2[3] = amd_bytealign (w2[3], w2[2], offset_minus_4); - w2[2] = amd_bytealign (w2[2], w2[1], offset_minus_4); - w2[1] = amd_bytealign (w2[1], w2[0], offset_minus_4); - w2[0] = amd_bytealign (w2[0], w1[3], offset_minus_4); - w1[3] = amd_bytealign (w1[3], w1[2], offset_minus_4); - w1[2] = amd_bytealign (w1[2], w1[1], offset_minus_4); - w1[1] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w1[0] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w0[3] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w0[2] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w0[1] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w0[0] = amd_bytealign (w0[0], 0, offset_minus_4); - - if (offset_mod_4 == 0) - { - w0[0] = w0[1]; - w0[1] = w0[2]; - w0[2] = w0[3]; - w0[3] = w1[0]; - w1[0] = w1[1]; - w1[1] = w1[2]; - w1[2] = w1[3]; - w1[3] = w2[0]; - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 1: - w3[2] = amd_bytealign ( 0, w3[0], offset_minus_4); - w3[1] = amd_bytealign (w3[0], w2[3], offset_minus_4); - w3[0] = amd_bytealign (w2[3], w2[2], offset_minus_4); - w2[3] = amd_bytealign (w2[2], w2[1], offset_minus_4); - w2[2] = amd_bytealign (w2[1], w2[0], offset_minus_4); - w2[1] = amd_bytealign (w2[0], w1[3], offset_minus_4); - w2[0] = amd_bytealign (w1[3], w1[2], offset_minus_4); - w1[3] = amd_bytealign (w1[2], w1[1], offset_minus_4); - w1[2] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w1[1] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w1[0] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w0[3] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w0[2] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w0[1] = amd_bytealign (w0[0], 0, offset_minus_4); - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w0[1] = w0[2]; - w0[2] = w0[3]; - w0[3] = w1[0]; - w1[0] = w1[1]; - w1[1] = w1[2]; - w1[2] = w1[3]; - w1[3] = w2[0]; - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 2: - w3[2] = amd_bytealign ( 0, w2[3], offset_minus_4); - w3[1] = amd_bytealign (w2[3], w2[2], offset_minus_4); - w3[0] = amd_bytealign (w2[2], w2[1], offset_minus_4); - w2[3] = amd_bytealign (w2[1], w2[0], offset_minus_4); - w2[2] = amd_bytealign (w2[0], w1[3], offset_minus_4); - w2[1] = amd_bytealign (w1[3], w1[2], offset_minus_4); - w2[0] = amd_bytealign (w1[2], w1[1], offset_minus_4); - w1[3] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w1[2] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w1[1] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w1[0] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w0[3] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w0[2] = amd_bytealign (w0[0], 0, offset_minus_4); - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w0[2] = w0[3]; - w0[3] = w1[0]; - w1[0] = w1[1]; - w1[1] = w1[2]; - w1[2] = w1[3]; - w1[3] = w2[0]; - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 3: - w3[2] = amd_bytealign ( 0, w2[2], offset_minus_4); - w3[1] = amd_bytealign (w2[2], w2[1], offset_minus_4); - w3[0] = amd_bytealign (w2[1], w2[0], offset_minus_4); - w2[3] = amd_bytealign (w2[0], w1[3], offset_minus_4); - w2[2] = amd_bytealign (w1[3], w1[2], offset_minus_4); - w2[1] = amd_bytealign (w1[2], w1[1], offset_minus_4); - w2[0] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w1[3] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w1[2] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w1[1] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w1[0] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w0[3] = amd_bytealign (w0[0], 0, offset_minus_4); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w0[3] = w1[0]; - w1[0] = w1[1]; - w1[1] = w1[2]; - w1[2] = w1[3]; - w1[3] = w2[0]; - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 4: - w3[2] = amd_bytealign ( 0, w2[1], offset_minus_4); - w3[1] = amd_bytealign (w2[1], w2[0], offset_minus_4); - w3[0] = amd_bytealign (w2[0], w1[3], offset_minus_4); - w2[3] = amd_bytealign (w1[3], w1[2], offset_minus_4); - w2[2] = amd_bytealign (w1[2], w1[1], offset_minus_4); - w2[1] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w2[0] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w1[3] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w1[2] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w1[1] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w1[0] = amd_bytealign (w0[0], 0, offset_minus_4); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w1[0] = w1[1]; - w1[1] = w1[2]; - w1[2] = w1[3]; - w1[3] = w2[0]; - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 5: - w3[2] = amd_bytealign ( 0, w2[0], offset_minus_4); - w3[1] = amd_bytealign (w2[0], w1[3], offset_minus_4); - w3[0] = amd_bytealign (w1[3], w1[2], offset_minus_4); - w2[3] = amd_bytealign (w1[2], w1[1], offset_minus_4); - w2[2] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w2[1] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w2[0] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w1[3] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w1[2] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w1[1] = amd_bytealign (w0[0], 0, offset_minus_4); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w1[1] = w1[2]; - w1[2] = w1[3]; - w1[3] = w2[0]; - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 6: - w3[2] = amd_bytealign ( 0, w1[3], offset_minus_4); - w3[1] = amd_bytealign (w1[3], w1[2], offset_minus_4); - w3[0] = amd_bytealign (w1[2], w1[1], offset_minus_4); - w2[3] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w2[2] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w2[1] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w2[0] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w1[3] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w1[2] = amd_bytealign (w0[0], 0, offset_minus_4); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w1[2] = w1[3]; - w1[3] = w2[0]; - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 7: - w3[2] = amd_bytealign ( 0, w1[2], offset_minus_4); - w3[1] = amd_bytealign (w1[2], w1[1], offset_minus_4); - w3[0] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w2[3] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w2[2] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w2[1] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w2[0] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w1[3] = amd_bytealign (w0[0], 0, offset_minus_4); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w1[3] = w2[0]; - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 8: - w3[2] = amd_bytealign ( 0, w1[1], offset_minus_4); - w3[1] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w3[0] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w2[3] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w2[2] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w2[1] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w2[0] = amd_bytealign (w0[0], 0, offset_minus_4); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 9: - w3[2] = amd_bytealign ( 0, w1[0], offset_minus_4); - w3[1] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w3[0] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w2[3] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w2[2] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w2[1] = amd_bytealign (w0[0], 0, offset_minus_4); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 10: - w3[2] = amd_bytealign ( 0, w0[3], offset_minus_4); - w3[1] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w3[0] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w2[3] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w2[2] = amd_bytealign (w0[0], 0, offset_minus_4); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 11: - w3[2] = amd_bytealign ( 0, w0[2], offset_minus_4); - w3[1] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w3[0] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w2[3] = amd_bytealign (w0[0], 0, offset_minus_4); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 12: - w3[2] = amd_bytealign ( 0, w0[1], offset_minus_4); - w3[1] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w3[0] = amd_bytealign (w0[0], 0, offset_minus_4); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 13: - w3[2] = amd_bytealign ( 0, w0[0], offset_minus_4); - w3[1] = amd_bytealign (w0[0], 0, offset_minus_4); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - } -} - -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) amp (__global pw_t *pws, __global pw_t *pws_amp, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = get_global_id (0); - - if (gid >= gid_max) return; - - const u32 pw_l_len = pws[gid].pw_len; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_r_len = combs_buf[0].pw_len; - - u32x wordr0[4]; - - wordr0[0] = combs_buf[0].i[0]; - wordr0[1] = combs_buf[0].i[1]; - wordr0[2] = combs_buf[0].i[2]; - wordr0[3] = combs_buf[0].i[3]; - - u32x wordr1[4]; - - wordr1[0] = combs_buf[0].i[4]; - wordr1[1] = combs_buf[0].i[5]; - wordr1[2] = combs_buf[0].i[6]; - wordr1[3] = combs_buf[0].i[7]; - - u32x wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32x wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, pw_r_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - const u32 pw_len = pw_l_len + pw_r_len; - - pws_amp[gid].i[ 0] = w0[0]; - pws_amp[gid].i[ 1] = w0[1]; - pws_amp[gid].i[ 2] = w0[2]; - pws_amp[gid].i[ 3] = w0[3]; - pws_amp[gid].i[ 4] = w1[0]; - pws_amp[gid].i[ 5] = w1[1]; - pws_amp[gid].i[ 6] = w1[2]; - pws_amp[gid].i[ 7] = w1[3]; - pws_amp[gid].i[ 8] = w2[0]; - pws_amp[gid].i[ 9] = w2[1]; - pws_amp[gid].i[10] = w2[2]; - pws_amp[gid].i[11] = w2[3]; - pws_amp[gid].i[12] = w3[0]; - pws_amp[gid].i[13] = w3[1]; - pws_amp[gid].i[14] = w3[2]; - pws_amp[gid].i[15] = w3[3]; - - pws_amp[gid].pw_len = pw_len; -} diff --git a/amd/amp_a1_v4.cl b/amd/amp_a1_v4.cl deleted file mode 100644 index 801d11f..0000000 --- a/amd/amp_a1_v4.cl +++ /dev/null @@ -1,587 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define VECT_SIZE4 - -#include "include/constants.h" -#include "include/kernel_vendor.h" -#include "types_amd.c" - -static void switch_buffer_by_offset (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 offset) -{ - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset; - - switch (offset / 4) - { - case 0: - w3[2] = amd_bytealign ( 0, w3[1], offset_minus_4); - w3[1] = amd_bytealign (w3[1], w3[0], offset_minus_4); - w3[0] = amd_bytealign (w3[0], w2[3], offset_minus_4); - w2[3] = amd_bytealign (w2[3], w2[2], offset_minus_4); - w2[2] = amd_bytealign (w2[2], w2[1], offset_minus_4); - w2[1] = amd_bytealign (w2[1], w2[0], offset_minus_4); - w2[0] = amd_bytealign (w2[0], w1[3], offset_minus_4); - w1[3] = amd_bytealign (w1[3], w1[2], offset_minus_4); - w1[2] = amd_bytealign (w1[2], w1[1], offset_minus_4); - w1[1] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w1[0] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w0[3] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w0[2] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w0[1] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w0[0] = amd_bytealign (w0[0], 0, offset_minus_4); - - if (offset_mod_4 == 0) - { - w0[0] = w0[1]; - w0[1] = w0[2]; - w0[2] = w0[3]; - w0[3] = w1[0]; - w1[0] = w1[1]; - w1[1] = w1[2]; - w1[2] = w1[3]; - w1[3] = w2[0]; - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 1: - w3[2] = amd_bytealign ( 0, w3[0], offset_minus_4); - w3[1] = amd_bytealign (w3[0], w2[3], offset_minus_4); - w3[0] = amd_bytealign (w2[3], w2[2], offset_minus_4); - w2[3] = amd_bytealign (w2[2], w2[1], offset_minus_4); - w2[2] = amd_bytealign (w2[1], w2[0], offset_minus_4); - w2[1] = amd_bytealign (w2[0], w1[3], offset_minus_4); - w2[0] = amd_bytealign (w1[3], w1[2], offset_minus_4); - w1[3] = amd_bytealign (w1[2], w1[1], offset_minus_4); - w1[2] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w1[1] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w1[0] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w0[3] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w0[2] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w0[1] = amd_bytealign (w0[0], 0, offset_minus_4); - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w0[1] = w0[2]; - w0[2] = w0[3]; - w0[3] = w1[0]; - w1[0] = w1[1]; - w1[1] = w1[2]; - w1[2] = w1[3]; - w1[3] = w2[0]; - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 2: - w3[2] = amd_bytealign ( 0, w2[3], offset_minus_4); - w3[1] = amd_bytealign (w2[3], w2[2], offset_minus_4); - w3[0] = amd_bytealign (w2[2], w2[1], offset_minus_4); - w2[3] = amd_bytealign (w2[1], w2[0], offset_minus_4); - w2[2] = amd_bytealign (w2[0], w1[3], offset_minus_4); - w2[1] = amd_bytealign (w1[3], w1[2], offset_minus_4); - w2[0] = amd_bytealign (w1[2], w1[1], offset_minus_4); - w1[3] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w1[2] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w1[1] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w1[0] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w0[3] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w0[2] = amd_bytealign (w0[0], 0, offset_minus_4); - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w0[2] = w0[3]; - w0[3] = w1[0]; - w1[0] = w1[1]; - w1[1] = w1[2]; - w1[2] = w1[3]; - w1[3] = w2[0]; - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 3: - w3[2] = amd_bytealign ( 0, w2[2], offset_minus_4); - w3[1] = amd_bytealign (w2[2], w2[1], offset_minus_4); - w3[0] = amd_bytealign (w2[1], w2[0], offset_minus_4); - w2[3] = amd_bytealign (w2[0], w1[3], offset_minus_4); - w2[2] = amd_bytealign (w1[3], w1[2], offset_minus_4); - w2[1] = amd_bytealign (w1[2], w1[1], offset_minus_4); - w2[0] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w1[3] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w1[2] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w1[1] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w1[0] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w0[3] = amd_bytealign (w0[0], 0, offset_minus_4); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w0[3] = w1[0]; - w1[0] = w1[1]; - w1[1] = w1[2]; - w1[2] = w1[3]; - w1[3] = w2[0]; - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 4: - w3[2] = amd_bytealign ( 0, w2[1], offset_minus_4); - w3[1] = amd_bytealign (w2[1], w2[0], offset_minus_4); - w3[0] = amd_bytealign (w2[0], w1[3], offset_minus_4); - w2[3] = amd_bytealign (w1[3], w1[2], offset_minus_4); - w2[2] = amd_bytealign (w1[2], w1[1], offset_minus_4); - w2[1] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w2[0] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w1[3] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w1[2] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w1[1] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w1[0] = amd_bytealign (w0[0], 0, offset_minus_4); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w1[0] = w1[1]; - w1[1] = w1[2]; - w1[2] = w1[3]; - w1[3] = w2[0]; - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 5: - w3[2] = amd_bytealign ( 0, w2[0], offset_minus_4); - w3[1] = amd_bytealign (w2[0], w1[3], offset_minus_4); - w3[0] = amd_bytealign (w1[3], w1[2], offset_minus_4); - w2[3] = amd_bytealign (w1[2], w1[1], offset_minus_4); - w2[2] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w2[1] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w2[0] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w1[3] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w1[2] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w1[1] = amd_bytealign (w0[0], 0, offset_minus_4); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w1[1] = w1[2]; - w1[2] = w1[3]; - w1[3] = w2[0]; - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 6: - w3[2] = amd_bytealign ( 0, w1[3], offset_minus_4); - w3[1] = amd_bytealign (w1[3], w1[2], offset_minus_4); - w3[0] = amd_bytealign (w1[2], w1[1], offset_minus_4); - w2[3] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w2[2] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w2[1] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w2[0] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w1[3] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w1[2] = amd_bytealign (w0[0], 0, offset_minus_4); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w1[2] = w1[3]; - w1[3] = w2[0]; - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 7: - w3[2] = amd_bytealign ( 0, w1[2], offset_minus_4); - w3[1] = amd_bytealign (w1[2], w1[1], offset_minus_4); - w3[0] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w2[3] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w2[2] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w2[1] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w2[0] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w1[3] = amd_bytealign (w0[0], 0, offset_minus_4); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w1[3] = w2[0]; - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 8: - w3[2] = amd_bytealign ( 0, w1[1], offset_minus_4); - w3[1] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w3[0] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w2[3] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w2[2] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w2[1] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w2[0] = amd_bytealign (w0[0], 0, offset_minus_4); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 9: - w3[2] = amd_bytealign ( 0, w1[0], offset_minus_4); - w3[1] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w3[0] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w2[3] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w2[2] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w2[1] = amd_bytealign (w0[0], 0, offset_minus_4); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 10: - w3[2] = amd_bytealign ( 0, w0[3], offset_minus_4); - w3[1] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w3[0] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w2[3] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w2[2] = amd_bytealign (w0[0], 0, offset_minus_4); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 11: - w3[2] = amd_bytealign ( 0, w0[2], offset_minus_4); - w3[1] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w3[0] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w2[3] = amd_bytealign (w0[0], 0, offset_minus_4); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 12: - w3[2] = amd_bytealign ( 0, w0[1], offset_minus_4); - w3[1] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w3[0] = amd_bytealign (w0[0], 0, offset_minus_4); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 13: - w3[2] = amd_bytealign ( 0, w0[0], offset_minus_4); - w3[1] = amd_bytealign (w0[0], 0, offset_minus_4); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - } -} - -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) amp (__global pw_t *pws, __global pw_t *pws_amp, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = get_global_id (0); - - if (gid >= gid_max) return; - - const u32 pw_l_len = pws[gid].pw_len; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_r_len = combs_buf[0].pw_len; - - u32x wordr0[4]; - - wordr0[0] = combs_buf[0].i[0]; - wordr0[1] = combs_buf[0].i[1]; - wordr0[2] = combs_buf[0].i[2]; - wordr0[3] = combs_buf[0].i[3]; - - u32x wordr1[4]; - - wordr1[0] = combs_buf[0].i[4]; - wordr1[1] = combs_buf[0].i[5]; - wordr1[2] = combs_buf[0].i[6]; - wordr1[3] = combs_buf[0].i[7]; - - u32x wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32x wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, pw_r_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - const u32 pw_len = pw_l_len + pw_r_len; - - pws_amp[gid].i[ 0] = w0[0]; - pws_amp[gid].i[ 1] = w0[1]; - pws_amp[gid].i[ 2] = w0[2]; - pws_amp[gid].i[ 3] = w0[3]; - pws_amp[gid].i[ 4] = w1[0]; - pws_amp[gid].i[ 5] = w1[1]; - pws_amp[gid].i[ 6] = w1[2]; - pws_amp[gid].i[ 7] = w1[3]; - pws_amp[gid].i[ 8] = w2[0]; - pws_amp[gid].i[ 9] = w2[1]; - pws_amp[gid].i[10] = w2[2]; - pws_amp[gid].i[11] = w2[3]; - pws_amp[gid].i[12] = w3[0]; - pws_amp[gid].i[13] = w3[1]; - pws_amp[gid].i[14] = w3[2]; - pws_amp[gid].i[15] = w3[3]; - - pws_amp[gid].pw_len = pw_len; -} diff --git a/amd/amp_a3_v1.cl b/amd/amp_a3_v1.cl deleted file mode 100644 index f1b4ad5..0000000 --- a/amd/amp_a3_v1.cl +++ /dev/null @@ -1,62 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define VECT_SIZE1 - -#include "include/constants.h" -#include "include/kernel_vendor.h" -#include "types_amd.c" - -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) amp (__global pw_t *pws, __global pw_t *pws_amp, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = get_global_id (0); - - if (gid >= gid_max) return; - - const u32 pw_len = pws[gid].pw_len; - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 w0r = bfs_buf[0].i; - - pws_amp[gid].i[ 0] = w0[0] | w0r; - pws_amp[gid].i[ 1] = w0[1]; - pws_amp[gid].i[ 2] = w0[2]; - pws_amp[gid].i[ 3] = w0[3]; - pws_amp[gid].i[ 4] = w1[0]; - pws_amp[gid].i[ 5] = w1[1]; - pws_amp[gid].i[ 6] = w1[2]; - pws_amp[gid].i[ 7] = w1[3]; - pws_amp[gid].i[ 8] = w2[0]; - pws_amp[gid].i[ 9] = w2[1]; - pws_amp[gid].i[10] = w2[2]; - pws_amp[gid].i[11] = w2[3]; - pws_amp[gid].i[12] = w3[0]; - pws_amp[gid].i[13] = w3[1]; - pws_amp[gid].i[14] = w3[2]; - pws_amp[gid].i[15] = w3[3]; - - pws_amp[gid].pw_len = pw_len; -} diff --git a/amd/amp_a3_v4.cl b/amd/amp_a3_v4.cl deleted file mode 100644 index 187d950..0000000 --- a/amd/amp_a3_v4.cl +++ /dev/null @@ -1,62 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define VECT_SIZE4 - -#include "include/constants.h" -#include "include/kernel_vendor.h" -#include "types_amd.c" - -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) amp (__global pw_t *pws, __global pw_t *pws_amp, __global gpu_rule_t *rules_buf, __global comb_t *combs_buf, __global bf_t *bfs_buf, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = get_global_id (0); - - if (gid >= gid_max) return; - - const u32 pw_len = pws[gid].pw_len; - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 w0r = bfs_buf[0].i; - - pws_amp[gid].i[ 0] = w0[0] | w0r; - pws_amp[gid].i[ 1] = w0[1]; - pws_amp[gid].i[ 2] = w0[2]; - pws_amp[gid].i[ 3] = w0[3]; - pws_amp[gid].i[ 4] = w1[0]; - pws_amp[gid].i[ 5] = w1[1]; - pws_amp[gid].i[ 6] = w1[2]; - pws_amp[gid].i[ 7] = w1[3]; - pws_amp[gid].i[ 8] = w2[0]; - pws_amp[gid].i[ 9] = w2[1]; - pws_amp[gid].i[10] = w2[2]; - pws_amp[gid].i[11] = w2[3]; - pws_amp[gid].i[12] = w3[0]; - pws_amp[gid].i[13] = w3[1]; - pws_amp[gid].i[14] = w3[2]; - pws_amp[gid].i[15] = w3[3]; - - pws_amp[gid].pw_len = pw_len; -} diff --git a/amd/check_multi_vect1_comp4_warp.c b/amd/check_multi_vect1_comp4_warp.c deleted file mode 100644 index b512a71..0000000 --- a/amd/check_multi_vect1_comp4_warp.c +++ /dev/null @@ -1,34 +0,0 @@ -u32 digest_tp[4]; - -digest_tp[0] = r0; -digest_tp[1] = r1; -digest_tp[2] = r2; -digest_tp[3] = r3; - -if (check (digest_tp, - bitmaps_buf_s1_a, - bitmaps_buf_s1_b, - bitmaps_buf_s1_c, - bitmaps_buf_s1_d, - bitmaps_buf_s2_a, - bitmaps_buf_s2_b, - bitmaps_buf_s2_c, - bitmaps_buf_s2_d, - bitmap_mask, - bitmap_shift1, - bitmap_shift2)) -{ - int hash_pos = find_hash (digest_tp, digests_cnt, &digests_buf[digests_offset]); - - if (hash_pos != -1) - { - const u32 final_hash_pos = digests_offset + hash_pos; - - if ((atomic_add (&hashes_shown[final_hash_pos], 1) == 0) && (check_vector_accessible (il_pos, bf_loops, bfs_cnt, 0) == 1)) - { - mark_hash_s0_warp (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } - } -} diff --git a/amd/check_multi_vect1_comp4_warp_bs.c b/amd/check_multi_vect1_comp4_warp_bs.c deleted file mode 100644 index 735bee1..0000000 --- a/amd/check_multi_vect1_comp4_warp_bs.c +++ /dev/null @@ -1,34 +0,0 @@ -u32 digest_tp[4]; - -digest_tp[0] = r0; -digest_tp[1] = r1; -digest_tp[2] = r2; -digest_tp[3] = r3; - -if (check (digest_tp, - bitmaps_buf_s1_a, - bitmaps_buf_s1_b, - bitmaps_buf_s1_c, - bitmaps_buf_s1_d, - bitmaps_buf_s2_a, - bitmaps_buf_s2_b, - bitmaps_buf_s2_c, - bitmaps_buf_s2_d, - bitmap_mask, - bitmap_shift1, - bitmap_shift2)) -{ - int hash_pos = find_hash (digest_tp, digests_cnt, &digests_buf[digests_offset]); - - if (hash_pos != -1) - { - const u32 final_hash_pos = digests_offset + hash_pos; - - if ((atomic_add (&hashes_shown[final_hash_pos], 1) == 0) && (check_vector_accessible (il_pos + slice, bf_loops, bfs_cnt, 0) == 1)) - { - mark_hash_s0_warp (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + slice); - - d_return_buf[lid] = 1; - } - } -} diff --git a/amd/check_multi_vect2_comp4.c b/amd/check_multi_vect2_comp4.c deleted file mode 100644 index 7264c9e..0000000 --- a/amd/check_multi_vect2_comp4.c +++ /dev/null @@ -1,67 +0,0 @@ -u32 digest_tp[4]; - -digest_tp[0] = r0.s0; -digest_tp[1] = r1.s0; -digest_tp[2] = r2.s0; -digest_tp[3] = r3.s0; - -if (check (digest_tp, - bitmaps_buf_s1_a, - bitmaps_buf_s1_b, - bitmaps_buf_s1_c, - bitmaps_buf_s1_d, - bitmaps_buf_s2_a, - bitmaps_buf_s2_b, - bitmaps_buf_s2_c, - bitmaps_buf_s2_d, - bitmap_mask, - bitmap_shift1, - bitmap_shift2)) -{ - int hash_pos = find_hash (digest_tp, digests_cnt, &digests_buf[digests_offset]); - - if (hash_pos != -1) - { - const u32 final_hash_pos = digests_offset + hash_pos; - - if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) - { - mark_hash_s0 (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } - } -} - -digest_tp[0] = r0.s1; -digest_tp[1] = r1.s1; -digest_tp[2] = r2.s1; -digest_tp[3] = r3.s1; - -if (check (digest_tp, - bitmaps_buf_s1_a, - bitmaps_buf_s1_b, - bitmaps_buf_s1_c, - bitmaps_buf_s1_d, - bitmaps_buf_s2_a, - bitmaps_buf_s2_b, - bitmaps_buf_s2_c, - bitmaps_buf_s2_d, - bitmap_mask, - bitmap_shift1, - bitmap_shift2)) -{ - int hash_pos = find_hash (digest_tp, digests_cnt, &digests_buf[digests_offset]); - - if (hash_pos != -1) - { - const u32 final_hash_pos = digests_offset + hash_pos; - - if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) - { - mark_hash_s1 (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } - } -} diff --git a/amd/check_multi_vect2_comp4_warp.c b/amd/check_multi_vect2_comp4_warp.c deleted file mode 100644 index b028667..0000000 --- a/amd/check_multi_vect2_comp4_warp.c +++ /dev/null @@ -1,67 +0,0 @@ -u32 digest_tp[4]; - -digest_tp[0] = r0.s0; -digest_tp[1] = r1.s0; -digest_tp[2] = r2.s0; -digest_tp[3] = r3.s0; - -if (check (digest_tp, - bitmaps_buf_s1_a, - bitmaps_buf_s1_b, - bitmaps_buf_s1_c, - bitmaps_buf_s1_d, - bitmaps_buf_s2_a, - bitmaps_buf_s2_b, - bitmaps_buf_s2_c, - bitmaps_buf_s2_d, - bitmap_mask, - bitmap_shift1, - bitmap_shift2)) -{ - int hash_pos = find_hash (digest_tp, digests_cnt, &digests_buf[digests_offset]); - - if (hash_pos != -1) - { - const u32 final_hash_pos = digests_offset + hash_pos; - - if ((atomic_add (&hashes_shown[final_hash_pos], 1) == 0) && (check_vector_accessible (il_pos, bf_loops, bfs_cnt, 0) == 1)) - { - mark_hash_s0_warp (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } - } -} - -digest_tp[0] = r0.s1; -digest_tp[1] = r1.s1; -digest_tp[2] = r2.s1; -digest_tp[3] = r3.s1; - -if (check (digest_tp, - bitmaps_buf_s1_a, - bitmaps_buf_s1_b, - bitmaps_buf_s1_c, - bitmaps_buf_s1_d, - bitmaps_buf_s2_a, - bitmaps_buf_s2_b, - bitmaps_buf_s2_c, - bitmaps_buf_s2_d, - bitmap_mask, - bitmap_shift1, - bitmap_shift2)) -{ - int hash_pos = find_hash (digest_tp, digests_cnt, &digests_buf[digests_offset]); - - if (hash_pos != -1) - { - const u32 final_hash_pos = digests_offset + hash_pos; - - if ((atomic_add (&hashes_shown[final_hash_pos], 1) == 0) && (check_vector_accessible (il_pos, bf_loops, bfs_cnt, 1) == 1)) - { - mark_hash_s1_warp (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } - } -} diff --git a/amd/check_multi_vect4_comp4.c b/amd/check_multi_vect4_comp4.c deleted file mode 100644 index ec104c6..0000000 --- a/amd/check_multi_vect4_comp4.c +++ /dev/null @@ -1,133 +0,0 @@ -u32 digest_tp[4]; - -digest_tp[0] = r0.s0; -digest_tp[1] = r1.s0; -digest_tp[2] = r2.s0; -digest_tp[3] = r3.s0; - -if (check (digest_tp, - bitmaps_buf_s1_a, - bitmaps_buf_s1_b, - bitmaps_buf_s1_c, - bitmaps_buf_s1_d, - bitmaps_buf_s2_a, - bitmaps_buf_s2_b, - bitmaps_buf_s2_c, - bitmaps_buf_s2_d, - bitmap_mask, - bitmap_shift1, - bitmap_shift2)) -{ - int hash_pos = find_hash (digest_tp, digests_cnt, &digests_buf[digests_offset]); - - if (hash_pos != -1) - { - const u32 final_hash_pos = digests_offset + hash_pos; - - if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) - { - mark_hash_s0 (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } - } -} - -digest_tp[0] = r0.s1; -digest_tp[1] = r1.s1; -digest_tp[2] = r2.s1; -digest_tp[3] = r3.s1; - -if (check (digest_tp, - bitmaps_buf_s1_a, - bitmaps_buf_s1_b, - bitmaps_buf_s1_c, - bitmaps_buf_s1_d, - bitmaps_buf_s2_a, - bitmaps_buf_s2_b, - bitmaps_buf_s2_c, - bitmaps_buf_s2_d, - bitmap_mask, - bitmap_shift1, - bitmap_shift2)) -{ - int hash_pos = find_hash (digest_tp, digests_cnt, &digests_buf[digests_offset]); - - if (hash_pos != -1) - { - const u32 final_hash_pos = digests_offset + hash_pos; - - if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) - { - mark_hash_s1 (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } - } -} - -digest_tp[0] = r0.s2; -digest_tp[1] = r1.s2; -digest_tp[2] = r2.s2; -digest_tp[3] = r3.s2; - -if (check (digest_tp, - bitmaps_buf_s1_a, - bitmaps_buf_s1_b, - bitmaps_buf_s1_c, - bitmaps_buf_s1_d, - bitmaps_buf_s2_a, - bitmaps_buf_s2_b, - bitmaps_buf_s2_c, - bitmaps_buf_s2_d, - bitmap_mask, - bitmap_shift1, - bitmap_shift2)) -{ - int hash_pos = find_hash (digest_tp, digests_cnt, &digests_buf[digests_offset]); - - if (hash_pos != -1) - { - const u32 final_hash_pos = digests_offset + hash_pos; - - if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) - { - mark_hash_s2 (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } - } -} - -digest_tp[0] = r0.s3; -digest_tp[1] = r1.s3; -digest_tp[2] = r2.s3; -digest_tp[3] = r3.s3; - -if (check (digest_tp, - bitmaps_buf_s1_a, - bitmaps_buf_s1_b, - bitmaps_buf_s1_c, - bitmaps_buf_s1_d, - bitmaps_buf_s2_a, - bitmaps_buf_s2_b, - bitmaps_buf_s2_c, - bitmaps_buf_s2_d, - bitmap_mask, - bitmap_shift1, - bitmap_shift2)) -{ - int hash_pos = find_hash (digest_tp, digests_cnt, &digests_buf[digests_offset]); - - if (hash_pos != -1) - { - const u32 final_hash_pos = digests_offset + hash_pos; - - if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) - { - mark_hash_s3 (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } - } -} diff --git a/amd/check_multi_vect4_comp4_warp.c b/amd/check_multi_vect4_comp4_warp.c deleted file mode 100644 index 00e732f..0000000 --- a/amd/check_multi_vect4_comp4_warp.c +++ /dev/null @@ -1,133 +0,0 @@ -u32 digest_tp[4]; - -digest_tp[0] = r0.s0; -digest_tp[1] = r1.s0; -digest_tp[2] = r2.s0; -digest_tp[3] = r3.s0; - -if (check (digest_tp, - bitmaps_buf_s1_a, - bitmaps_buf_s1_b, - bitmaps_buf_s1_c, - bitmaps_buf_s1_d, - bitmaps_buf_s2_a, - bitmaps_buf_s2_b, - bitmaps_buf_s2_c, - bitmaps_buf_s2_d, - bitmap_mask, - bitmap_shift1, - bitmap_shift2)) -{ - int hash_pos = find_hash (digest_tp, digests_cnt, &digests_buf[digests_offset]); - - if (hash_pos != -1) - { - const u32 final_hash_pos = digests_offset + hash_pos; - - if ((atomic_add (&hashes_shown[final_hash_pos], 1) == 0) && (check_vector_accessible (il_pos, bf_loops, bfs_cnt, 0) == 1)) - { - mark_hash_s0_warp (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } - } -} - -digest_tp[0] = r0.s1; -digest_tp[1] = r1.s1; -digest_tp[2] = r2.s1; -digest_tp[3] = r3.s1; - -if (check (digest_tp, - bitmaps_buf_s1_a, - bitmaps_buf_s1_b, - bitmaps_buf_s1_c, - bitmaps_buf_s1_d, - bitmaps_buf_s2_a, - bitmaps_buf_s2_b, - bitmaps_buf_s2_c, - bitmaps_buf_s2_d, - bitmap_mask, - bitmap_shift1, - bitmap_shift2)) -{ - int hash_pos = find_hash (digest_tp, digests_cnt, &digests_buf[digests_offset]); - - if (hash_pos != -1) - { - const u32 final_hash_pos = digests_offset + hash_pos; - - if ((atomic_add (&hashes_shown[final_hash_pos], 1) == 0) && (check_vector_accessible (il_pos, bf_loops, bfs_cnt, 1) == 1)) - { - mark_hash_s1_warp (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } - } -} - -digest_tp[0] = r0.s2; -digest_tp[1] = r1.s2; -digest_tp[2] = r2.s2; -digest_tp[3] = r3.s2; - -if (check (digest_tp, - bitmaps_buf_s1_a, - bitmaps_buf_s1_b, - bitmaps_buf_s1_c, - bitmaps_buf_s1_d, - bitmaps_buf_s2_a, - bitmaps_buf_s2_b, - bitmaps_buf_s2_c, - bitmaps_buf_s2_d, - bitmap_mask, - bitmap_shift1, - bitmap_shift2)) -{ - int hash_pos = find_hash (digest_tp, digests_cnt, &digests_buf[digests_offset]); - - if (hash_pos != -1) - { - const u32 final_hash_pos = digests_offset + hash_pos; - - if ((atomic_add (&hashes_shown[final_hash_pos], 1) == 0) && (check_vector_accessible (il_pos, bf_loops, bfs_cnt, 2) == 1)) - { - mark_hash_s2_warp (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } - } -} - -digest_tp[0] = r0.s3; -digest_tp[1] = r1.s3; -digest_tp[2] = r2.s3; -digest_tp[3] = r3.s3; - -if (check (digest_tp, - bitmaps_buf_s1_a, - bitmaps_buf_s1_b, - bitmaps_buf_s1_c, - bitmaps_buf_s1_d, - bitmaps_buf_s2_a, - bitmaps_buf_s2_b, - bitmaps_buf_s2_c, - bitmaps_buf_s2_d, - bitmap_mask, - bitmap_shift1, - bitmap_shift2)) -{ - int hash_pos = find_hash (digest_tp, digests_cnt, &digests_buf[digests_offset]); - - if (hash_pos != -1) - { - const u32 final_hash_pos = digests_offset + hash_pos; - - if ((atomic_add (&hashes_shown[final_hash_pos], 1) == 0) && (check_vector_accessible (il_pos, bf_loops, bfs_cnt, 3) == 1)) - { - mark_hash_s3_warp (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } - } -} diff --git a/amd/check_single_vect1_comp4_warp.c b/amd/check_single_vect1_comp4_warp.c deleted file mode 100644 index 9672b3c..0000000 --- a/amd/check_single_vect1_comp4_warp.c +++ /dev/null @@ -1,14 +0,0 @@ -if ((r0 == search[0]) - && (r1 == search[1]) - && (r2 == search[2]) - && (r3 == search[3])) -{ - const u32 final_hash_pos = digests_offset + 0; - - if ((atomic_add (&hashes_shown[final_hash_pos], 1) == 0) && (check_vector_accessible (il_pos, bf_loops, bfs_cnt, 0) == 1)) - { - mark_hash_s0_warp (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } -} diff --git a/amd/check_single_vect1_comp4_warp_bs.c b/amd/check_single_vect1_comp4_warp_bs.c deleted file mode 100644 index 3d5729f..0000000 --- a/amd/check_single_vect1_comp4_warp_bs.c +++ /dev/null @@ -1,3 +0,0 @@ -mark_hash_s0_warp (plains_buf, hashes_shown, 0, gid, il_pos + slice); - -d_return_buf[lid] = 1; diff --git a/amd/check_single_vect2_comp4.c b/amd/check_single_vect2_comp4.c deleted file mode 100644 index 3ae8c7d..0000000 --- a/amd/check_single_vect2_comp4.c +++ /dev/null @@ -1,29 +0,0 @@ -if ((r0.s0 == search[0]) - && (r1.s0 == search[1]) - && (r2.s0 == search[2]) - && (r3.s0 == search[3])) -{ - const u32 final_hash_pos = digests_offset + 0; - - if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) - { - mark_hash_s0 (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } -} - -if ((r0.s1 == search[0]) - && (r1.s1 == search[1]) - && (r2.s1 == search[2]) - && (r3.s1 == search[3])) -{ - const u32 final_hash_pos = digests_offset + 0; - - if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) - { - mark_hash_s1 (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } -} diff --git a/amd/check_single_vect2_comp4_warp.c b/amd/check_single_vect2_comp4_warp.c deleted file mode 100644 index 67c306e..0000000 --- a/amd/check_single_vect2_comp4_warp.c +++ /dev/null @@ -1,29 +0,0 @@ -if ((r0.s0 == search[0]) - && (r1.s0 == search[1]) - && (r2.s0 == search[2]) - && (r3.s0 == search[3])) -{ - const u32 final_hash_pos = digests_offset + 0; - - if ((atomic_add (&hashes_shown[final_hash_pos], 1) == 0) && (check_vector_accessible (il_pos, bf_loops, bfs_cnt, 0) == 1)) - { - mark_hash_s0_warp (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } -} - -if ((r0.s1 == search[0]) - && (r1.s1 == search[1]) - && (r2.s1 == search[2]) - && (r3.s1 == search[3])) -{ - const u32 final_hash_pos = digests_offset + 0; - - if ((atomic_add (&hashes_shown[final_hash_pos], 1) == 0) && (check_vector_accessible (il_pos, bf_loops, bfs_cnt, 1) == 1)) - { - mark_hash_s1_warp (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } -} diff --git a/amd/check_single_vect4_comp4.c b/amd/check_single_vect4_comp4.c deleted file mode 100644 index 029a9a9..0000000 --- a/amd/check_single_vect4_comp4.c +++ /dev/null @@ -1,59 +0,0 @@ -if ((r0.s0 == search[0]) - && (r1.s0 == search[1]) - && (r2.s0 == search[2]) - && (r3.s0 == search[3])) -{ - const u32 final_hash_pos = digests_offset + 0; - - if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) - { - mark_hash_s0 (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } -} - -if ((r0.s1 == search[0]) - && (r1.s1 == search[1]) - && (r2.s1 == search[2]) - && (r3.s1 == search[3])) -{ - const u32 final_hash_pos = digests_offset + 0; - - if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) - { - mark_hash_s1 (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } -} - -if ((r0.s2 == search[0]) - && (r1.s2 == search[1]) - && (r2.s2 == search[2]) - && (r3.s2 == search[3])) -{ - const u32 final_hash_pos = digests_offset + 0; - - if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) - { - mark_hash_s2 (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } -} - -if ((r0.s3 == search[0]) - && (r1.s3 == search[1]) - && (r2.s3 == search[2]) - && (r3.s3 == search[3])) -{ - const u32 final_hash_pos = digests_offset + 0; - - if (atomic_add (&hashes_shown[final_hash_pos], 1) == 0) - { - mark_hash_s3 (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } -} diff --git a/amd/check_single_vect4_comp4_warp.c b/amd/check_single_vect4_comp4_warp.c deleted file mode 100644 index 0e43782..0000000 --- a/amd/check_single_vect4_comp4_warp.c +++ /dev/null @@ -1,59 +0,0 @@ -if ((r0.s0 == search[0]) - && (r1.s0 == search[1]) - && (r2.s0 == search[2]) - && (r3.s0 == search[3])) -{ - const u32 final_hash_pos = digests_offset + 0; - - if ((atomic_add (&hashes_shown[final_hash_pos], 1) == 0) && (check_vector_accessible (il_pos, bf_loops, bfs_cnt, 0) == 1)) - { - mark_hash_s0_warp (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } -} - -if ((r0.s1 == search[0]) - && (r1.s1 == search[1]) - && (r2.s1 == search[2]) - && (r3.s1 == search[3])) -{ - const u32 final_hash_pos = digests_offset + 0; - - if ((atomic_add (&hashes_shown[final_hash_pos], 1) == 0) && (check_vector_accessible (il_pos, bf_loops, bfs_cnt, 1) == 1)) - { - mark_hash_s1_warp (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } -} - -if ((r0.s2 == search[0]) - && (r1.s2 == search[1]) - && (r2.s2 == search[2]) - && (r3.s2 == search[3])) -{ - const u32 final_hash_pos = digests_offset + 0; - - if ((atomic_add (&hashes_shown[final_hash_pos], 1) == 0) && (check_vector_accessible (il_pos, bf_loops, bfs_cnt, 2) == 1)) - { - mark_hash_s2_warp (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } -} - -if ((r0.s3 == search[0]) - && (r1.s3 == search[1]) - && (r2.s3 == search[2]) - && (r3.s3 == search[3])) -{ - const u32 final_hash_pos = digests_offset + 0; - - if ((atomic_add (&hashes_shown[final_hash_pos], 1) == 0) && (check_vector_accessible (il_pos, bf_loops, bfs_cnt, 3) == 1)) - { - mark_hash_s3_warp (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } -} diff --git a/amd/common_amd.c b/amd/common_amd.c deleted file mode 100644 index 3ac5c85..0000000 --- a/amd/common_amd.c +++ /dev/null @@ -1,15303 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -static int device_memcmp (const u32 d1[4], __global u32 *d2) -{ - if (d1[3] > d2[DGST_R3]) return ( 1); - if (d1[3] < d2[DGST_R3]) return (-1); - if (d1[2] > d2[DGST_R2]) return ( 1); - if (d1[2] < d2[DGST_R2]) return (-1); - if (d1[1] > d2[DGST_R1]) return ( 1); - if (d1[1] < d2[DGST_R1]) return (-1); - if (d1[0] > d2[DGST_R0]) return ( 1); - if (d1[0] < d2[DGST_R0]) return (-1); - - return (0); -} - -static int find_hash (const u32 digest[4], const u32 digests_cnt, __global digest_t *digests_buf) -{ - for (u32 l = 0, r = digests_cnt; r; r >>= 1) - { - const u32 m = r >> 1; - - const u32 c = l + m; - - const int cmp = device_memcmp (digest, digests_buf[c].digest_buf); - - if (cmp > 0) - { - l += m + 1; - - r--; - } - - if (cmp == 0) return (c); - } - - return (-1); -} - -static u32 check_bitmap (__global u32 *bitmap, const u32 bitmap_mask, const u32 bitmap_shift, const u32 digest) -{ - return (bitmap[(digest >> bitmap_shift) & bitmap_mask] & (1 << (digest & 0x1f))); -} - -static u32 check (const u32 digest[2], __global u32 *bitmap_s1_a, __global u32 *bitmap_s1_b, __global u32 *bitmap_s1_c, __global u32 *bitmap_s1_d, __global u32 *bitmap_s2_a, __global u32 *bitmap_s2_b, __global u32 *bitmap_s2_c, __global u32 *bitmap_s2_d, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2) -{ - if (check_bitmap (bitmap_s1_a, bitmap_mask, bitmap_shift1, digest[0]) == 0) return (0); - if (check_bitmap (bitmap_s1_b, bitmap_mask, bitmap_shift1, digest[1]) == 0) return (0); - if (check_bitmap (bitmap_s1_c, bitmap_mask, bitmap_shift1, digest[2]) == 0) return (0); - if (check_bitmap (bitmap_s1_d, bitmap_mask, bitmap_shift1, digest[3]) == 0) return (0); - - if (check_bitmap (bitmap_s2_a, bitmap_mask, bitmap_shift2, digest[0]) == 0) return (0); - if (check_bitmap (bitmap_s2_b, bitmap_mask, bitmap_shift2, digest[1]) == 0) return (0); - if (check_bitmap (bitmap_s2_c, bitmap_mask, bitmap_shift2, digest[2]) == 0) return (0); - if (check_bitmap (bitmap_s2_d, bitmap_mask, bitmap_shift2, digest[3]) == 0) return (0); - - return (1); -} - -#ifdef VECT_SIZE1 -static void mark_hash_s0 (__global plain_t *plains_buf, __global u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos) -{ - hashes_shown[hash_pos] = 1; - - plains_buf[hash_pos].gidvid = (gid * 1) + 0; - plains_buf[hash_pos].il_pos = il_pos; -} - -static void mark_hash_s0_warp (__global plain_t *plains_buf, __global u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos) -{ - hashes_shown[hash_pos] = 1; - - plains_buf[hash_pos].gidvid = gid; - plains_buf[hash_pos].il_pos = (il_pos * 1) + 0; -} -#endif - -#ifdef VECT_SIZE2 -static void mark_hash_s0 (__global plain_t *plains_buf, __global u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos) -{ - hashes_shown[hash_pos] = 1; - - plains_buf[hash_pos].gidvid = (gid * 2) + 0; - plains_buf[hash_pos].il_pos = il_pos; -} - -static void mark_hash_s1 (__global plain_t *plains_buf, __global u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos) -{ - hashes_shown[hash_pos] = 1; - - plains_buf[hash_pos].gidvid = (gid * 2) + 1; - plains_buf[hash_pos].il_pos = il_pos; -} - -static void mark_hash_s0_warp (__global plain_t *plains_buf, __global u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos) -{ - hashes_shown[hash_pos] = 1; - - plains_buf[hash_pos].gidvid = gid; - plains_buf[hash_pos].il_pos = (il_pos * 2) + 0; -} - -static void mark_hash_s1_warp (__global plain_t *plains_buf, __global u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos) -{ - hashes_shown[hash_pos] = 1; - - plains_buf[hash_pos].gidvid = gid; - plains_buf[hash_pos].il_pos = (il_pos * 2) + 1; -} -#endif - -#ifdef VECT_SIZE4 -static void mark_hash_s0 (__global plain_t *plains_buf, __global u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos) -{ - hashes_shown[hash_pos] = 1; - - plains_buf[hash_pos].gidvid = (gid * 4) + 0; - plains_buf[hash_pos].il_pos = il_pos; -} - -static void mark_hash_s1 (__global plain_t *plains_buf, __global u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos) -{ - hashes_shown[hash_pos] = 1; - - plains_buf[hash_pos].gidvid = (gid * 4) + 1; - plains_buf[hash_pos].il_pos = il_pos; -} - -static void mark_hash_s2 (__global plain_t *plains_buf, __global u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos) -{ - hashes_shown[hash_pos] = 1; - - plains_buf[hash_pos].gidvid = (gid * 4) + 2; - plains_buf[hash_pos].il_pos = il_pos; -} - -static void mark_hash_s3 (__global plain_t *plains_buf, __global u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos) -{ - hashes_shown[hash_pos] = 1; - - plains_buf[hash_pos].gidvid = (gid * 4) + 3; - plains_buf[hash_pos].il_pos = il_pos; -} - -static void mark_hash_s0_warp (__global plain_t *plains_buf, __global u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos) -{ - hashes_shown[hash_pos] = 1; - - plains_buf[hash_pos].gidvid = gid; - plains_buf[hash_pos].il_pos = (il_pos * 4) + 0; -} - -static void mark_hash_s1_warp (__global plain_t *plains_buf, __global u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos) -{ - hashes_shown[hash_pos] = 1; - - plains_buf[hash_pos].gidvid = gid; - plains_buf[hash_pos].il_pos = (il_pos * 4) + 1; -} - -static void mark_hash_s2_warp (__global plain_t *plains_buf, __global u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos) -{ - hashes_shown[hash_pos] = 1; - - plains_buf[hash_pos].gidvid = gid; - plains_buf[hash_pos].il_pos = (il_pos * 4) + 2; -} - -static void mark_hash_s3_warp (__global plain_t *plains_buf, __global u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos) -{ - hashes_shown[hash_pos] = 1; - - plains_buf[hash_pos].gidvid = gid; - plains_buf[hash_pos].il_pos = (il_pos * 4) + 3; -} -#endif - -/** - * scalar - */ - -static u32 swap_workaround (const u32 v) -{ - return (as_uint (as_uchar4 (v).s3210)); -} - -static u64 swap_workaround (const u64 v) -{ - return (as_ulong (as_uchar8 (v).s76543210)); -} - -static void truncate_block (u32 w[4], const u32 len) -{ - switch (len) - { - case 0: w[0] &= 0; - w[1] &= 0; - w[2] &= 0; - w[3] &= 0; - break; - case 1: w[0] &= 0x000000FF; - w[1] &= 0; - w[2] &= 0; - w[3] &= 0; - break; - case 2: w[0] &= 0x0000FFFF; - w[1] &= 0; - w[2] &= 0; - w[3] &= 0; - break; - case 3: w[0] &= 0x00FFFFFF; - w[1] &= 0; - w[2] &= 0; - w[3] &= 0; - break; - case 4: w[1] &= 0; - w[2] &= 0; - w[3] &= 0; - break; - case 5: w[1] &= 0x000000FF; - w[2] &= 0; - w[3] &= 0; - break; - case 6: w[1] &= 0x0000FFFF; - w[2] &= 0; - w[3] &= 0; - break; - case 7: w[1] &= 0x00FFFFFF; - w[2] &= 0; - w[3] &= 0; - break; - case 8: w[2] &= 0; - w[3] &= 0; - break; - case 9: w[2] &= 0x000000FF; - w[3] &= 0; - break; - case 10: w[2] &= 0x0000FFFF; - w[3] &= 0; - break; - case 11: w[2] &= 0x00FFFFFF; - w[3] &= 0; - break; - case 12: w[3] &= 0; - break; - case 13: w[3] &= 0x000000FF; - break; - case 14: w[3] &= 0x0000FFFF; - break; - case 15: w[3] &= 0x00FFFFFF; - break; - } -} - -static void make_unicode (const u32 in[4], u32 out1[4], u32 out2[4]) -{ - out2[3] = ((in[3] >> 8) & 0x00FF0000) | ((in[3] >> 16) & 0x000000FF); - out2[2] = ((in[3] << 8) & 0x00FF0000) | ((in[3] >> 0) & 0x000000FF); - out2[1] = ((in[2] >> 8) & 0x00FF0000) | ((in[2] >> 16) & 0x000000FF); - out2[0] = ((in[2] << 8) & 0x00FF0000) | ((in[2] >> 0) & 0x000000FF); - out1[3] = ((in[1] >> 8) & 0x00FF0000) | ((in[1] >> 16) & 0x000000FF); - out1[2] = ((in[1] << 8) & 0x00FF0000) | ((in[1] >> 0) & 0x000000FF); - out1[1] = ((in[0] >> 8) & 0x00FF0000) | ((in[0] >> 16) & 0x000000FF); - out1[0] = ((in[0] << 8) & 0x00FF0000) | ((in[0] >> 0) & 0x000000FF); -} - -static void undo_unicode (const u32 in1[4], const u32 in2[4], u32 out[4]) -{ - out[0] = ((in1[0] & 0x000000ff) >> 0) | ((in1[0] & 0x00ff0000) >> 8) - | ((in1[1] & 0x000000ff) << 16) | ((in1[1] & 0x00ff0000) << 8); - out[1] = ((in1[2] & 0x000000ff) >> 0) | ((in1[2] & 0x00ff0000) >> 8) - | ((in1[3] & 0x000000ff) << 16) | ((in1[3] & 0x00ff0000) << 8); - out[2] = ((in2[0] & 0x000000ff) >> 0) | ((in2[0] & 0x00ff0000) >> 8) - | ((in2[1] & 0x000000ff) << 16) | ((in2[1] & 0x00ff0000) << 8); - out[3] = ((in2[2] & 0x000000ff) >> 0) | ((in2[2] & 0x00ff0000) >> 8) - | ((in2[3] & 0x000000ff) << 16) | ((in2[3] & 0x00ff0000) << 8); -} - -static void append_0x01_1 (u32 w0[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x01; - break; - - case 1: - w0[0] = w0[0] | 0x0100; - break; - - case 2: - w0[0] = w0[0] | 0x010000; - break; - - case 3: - w0[0] = w0[0] | 0x01000000; - break; - - case 4: - w0[1] = 0x01; - break; - - case 5: - w0[1] = w0[1] | 0x0100; - break; - - case 6: - w0[1] = w0[1] | 0x010000; - break; - - case 7: - w0[1] = w0[1] | 0x01000000; - break; - - case 8: - w0[2] = 0x01; - break; - - case 9: - w0[2] = w0[2] | 0x0100; - break; - - case 10: - w0[2] = w0[2] | 0x010000; - break; - - case 11: - w0[2] = w0[2] | 0x01000000; - break; - - case 12: - w0[3] = 0x01; - break; - - case 13: - w0[3] = w0[3] | 0x0100; - break; - - case 14: - w0[3] = w0[3] | 0x010000; - break; - - case 15: - w0[3] = w0[3] | 0x01000000; - break; - } -} - -static void append_0x01_2 (u32 w0[4], u32 w1[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x01; - break; - - case 1: - w0[0] = w0[0] | 0x0100; - break; - - case 2: - w0[0] = w0[0] | 0x010000; - break; - - case 3: - w0[0] = w0[0] | 0x01000000; - break; - - case 4: - w0[1] = 0x01; - break; - - case 5: - w0[1] = w0[1] | 0x0100; - break; - - case 6: - w0[1] = w0[1] | 0x010000; - break; - - case 7: - w0[1] = w0[1] | 0x01000000; - break; - - case 8: - w0[2] = 0x01; - break; - - case 9: - w0[2] = w0[2] | 0x0100; - break; - - case 10: - w0[2] = w0[2] | 0x010000; - break; - - case 11: - w0[2] = w0[2] | 0x01000000; - break; - - case 12: - w0[3] = 0x01; - break; - - case 13: - w0[3] = w0[3] | 0x0100; - break; - - case 14: - w0[3] = w0[3] | 0x010000; - break; - - case 15: - w0[3] = w0[3] | 0x01000000; - break; - - case 16: - w1[0] = 0x01; - break; - - case 17: - w1[0] = w1[0] | 0x0100; - break; - - case 18: - w1[0] = w1[0] | 0x010000; - break; - - case 19: - w1[0] = w1[0] | 0x01000000; - break; - - case 20: - w1[1] = 0x01; - break; - - case 21: - w1[1] = w1[1] | 0x0100; - break; - - case 22: - w1[1] = w1[1] | 0x010000; - break; - - case 23: - w1[1] = w1[1] | 0x01000000; - break; - - case 24: - w1[2] = 0x01; - break; - - case 25: - w1[2] = w1[2] | 0x0100; - break; - - case 26: - w1[2] = w1[2] | 0x010000; - break; - - case 27: - w1[2] = w1[2] | 0x01000000; - break; - - case 28: - w1[3] = 0x01; - break; - - case 29: - w1[3] = w1[3] | 0x0100; - break; - - case 30: - w1[3] = w1[3] | 0x010000; - break; - - case 31: - w1[3] = w1[3] | 0x01000000; - break; - } -} - -static void append_0x01_3 (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x01; - break; - - case 1: - w0[0] = w0[0] | 0x0100; - break; - - case 2: - w0[0] = w0[0] | 0x010000; - break; - - case 3: - w0[0] = w0[0] | 0x01000000; - break; - - case 4: - w0[1] = 0x01; - break; - - case 5: - w0[1] = w0[1] | 0x0100; - break; - - case 6: - w0[1] = w0[1] | 0x010000; - break; - - case 7: - w0[1] = w0[1] | 0x01000000; - break; - - case 8: - w0[2] = 0x01; - break; - - case 9: - w0[2] = w0[2] | 0x0100; - break; - - case 10: - w0[2] = w0[2] | 0x010000; - break; - - case 11: - w0[2] = w0[2] | 0x01000000; - break; - - case 12: - w0[3] = 0x01; - break; - - case 13: - w0[3] = w0[3] | 0x0100; - break; - - case 14: - w0[3] = w0[3] | 0x010000; - break; - - case 15: - w0[3] = w0[3] | 0x01000000; - break; - - case 16: - w1[0] = 0x01; - break; - - case 17: - w1[0] = w1[0] | 0x0100; - break; - - case 18: - w1[0] = w1[0] | 0x010000; - break; - - case 19: - w1[0] = w1[0] | 0x01000000; - break; - - case 20: - w1[1] = 0x01; - break; - - case 21: - w1[1] = w1[1] | 0x0100; - break; - - case 22: - w1[1] = w1[1] | 0x010000; - break; - - case 23: - w1[1] = w1[1] | 0x01000000; - break; - - case 24: - w1[2] = 0x01; - break; - - case 25: - w1[2] = w1[2] | 0x0100; - break; - - case 26: - w1[2] = w1[2] | 0x010000; - break; - - case 27: - w1[2] = w1[2] | 0x01000000; - break; - - case 28: - w1[3] = 0x01; - break; - - case 29: - w1[3] = w1[3] | 0x0100; - break; - - case 30: - w1[3] = w1[3] | 0x010000; - break; - - case 31: - w1[3] = w1[3] | 0x01000000; - break; - - case 32: - w2[0] = 0x01; - break; - - case 33: - w2[0] = w2[0] | 0x0100; - break; - - case 34: - w2[0] = w2[0] | 0x010000; - break; - - case 35: - w2[0] = w2[0] | 0x01000000; - break; - - case 36: - w2[1] = 0x01; - break; - - case 37: - w2[1] = w2[1] | 0x0100; - break; - - case 38: - w2[1] = w2[1] | 0x010000; - break; - - case 39: - w2[1] = w2[1] | 0x01000000; - break; - - case 40: - w2[2] = 0x01; - break; - - case 41: - w2[2] = w2[2] | 0x0100; - break; - - case 42: - w2[2] = w2[2] | 0x010000; - break; - - case 43: - w2[2] = w2[2] | 0x01000000; - break; - - case 44: - w2[3] = 0x01; - break; - - case 45: - w2[3] = w2[3] | 0x0100; - break; - - case 46: - w2[3] = w2[3] | 0x010000; - break; - - case 47: - w2[3] = w2[3] | 0x01000000; - break; - } -} - -static void append_0x01_4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x01; - break; - - case 1: - w0[0] = w0[0] | 0x0100; - break; - - case 2: - w0[0] = w0[0] | 0x010000; - break; - - case 3: - w0[0] = w0[0] | 0x01000000; - break; - - case 4: - w0[1] = 0x01; - break; - - case 5: - w0[1] = w0[1] | 0x0100; - break; - - case 6: - w0[1] = w0[1] | 0x010000; - break; - - case 7: - w0[1] = w0[1] | 0x01000000; - break; - - case 8: - w0[2] = 0x01; - break; - - case 9: - w0[2] = w0[2] | 0x0100; - break; - - case 10: - w0[2] = w0[2] | 0x010000; - break; - - case 11: - w0[2] = w0[2] | 0x01000000; - break; - - case 12: - w0[3] = 0x01; - break; - - case 13: - w0[3] = w0[3] | 0x0100; - break; - - case 14: - w0[3] = w0[3] | 0x010000; - break; - - case 15: - w0[3] = w0[3] | 0x01000000; - break; - - case 16: - w1[0] = 0x01; - break; - - case 17: - w1[0] = w1[0] | 0x0100; - break; - - case 18: - w1[0] = w1[0] | 0x010000; - break; - - case 19: - w1[0] = w1[0] | 0x01000000; - break; - - case 20: - w1[1] = 0x01; - break; - - case 21: - w1[1] = w1[1] | 0x0100; - break; - - case 22: - w1[1] = w1[1] | 0x010000; - break; - - case 23: - w1[1] = w1[1] | 0x01000000; - break; - - case 24: - w1[2] = 0x01; - break; - - case 25: - w1[2] = w1[2] | 0x0100; - break; - - case 26: - w1[2] = w1[2] | 0x010000; - break; - - case 27: - w1[2] = w1[2] | 0x01000000; - break; - - case 28: - w1[3] = 0x01; - break; - - case 29: - w1[3] = w1[3] | 0x0100; - break; - - case 30: - w1[3] = w1[3] | 0x010000; - break; - - case 31: - w1[3] = w1[3] | 0x01000000; - break; - - case 32: - w2[0] = 0x01; - break; - - case 33: - w2[0] = w2[0] | 0x0100; - break; - - case 34: - w2[0] = w2[0] | 0x010000; - break; - - case 35: - w2[0] = w2[0] | 0x01000000; - break; - - case 36: - w2[1] = 0x01; - break; - - case 37: - w2[1] = w2[1] | 0x0100; - break; - - case 38: - w2[1] = w2[1] | 0x010000; - break; - - case 39: - w2[1] = w2[1] | 0x01000000; - break; - - case 40: - w2[2] = 0x01; - break; - - case 41: - w2[2] = w2[2] | 0x0100; - break; - - case 42: - w2[2] = w2[2] | 0x010000; - break; - - case 43: - w2[2] = w2[2] | 0x01000000; - break; - - case 44: - w2[3] = 0x01; - break; - - case 45: - w2[3] = w2[3] | 0x0100; - break; - - case 46: - w2[3] = w2[3] | 0x010000; - break; - - case 47: - w2[3] = w2[3] | 0x01000000; - break; - - case 48: - w3[0] = 0x01; - break; - - case 49: - w3[0] = w3[0] | 0x0100; - break; - - case 50: - w3[0] = w3[0] | 0x010000; - break; - - case 51: - w3[0] = w3[0] | 0x01000000; - break; - - case 52: - w3[1] = 0x01; - break; - - case 53: - w3[1] = w3[1] | 0x0100; - break; - - case 54: - w3[1] = w3[1] | 0x010000; - break; - - case 55: - w3[1] = w3[1] | 0x01000000; - break; - - case 56: - w3[2] = 0x01; - break; - - case 57: - w3[2] = w3[2] | 0x0100; - break; - - case 58: - w3[2] = w3[2] | 0x010000; - break; - - case 59: - w3[2] = w3[2] | 0x01000000; - break; - - case 60: - w3[3] = 0x01; - break; - - case 61: - w3[3] = w3[3] | 0x0100; - break; - - case 62: - w3[3] = w3[3] | 0x010000; - break; - - case 63: - w3[3] = w3[3] | 0x01000000; - break; - } -} - -static void append_0x01_8 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x01; - break; - - case 1: - w0[0] = w0[0] | 0x0100; - break; - - case 2: - w0[0] = w0[0] | 0x010000; - break; - - case 3: - w0[0] = w0[0] | 0x01000000; - break; - - case 4: - w0[1] = 0x01; - break; - - case 5: - w0[1] = w0[1] | 0x0100; - break; - - case 6: - w0[1] = w0[1] | 0x010000; - break; - - case 7: - w0[1] = w0[1] | 0x01000000; - break; - - case 8: - w0[2] = 0x01; - break; - - case 9: - w0[2] = w0[2] | 0x0100; - break; - - case 10: - w0[2] = w0[2] | 0x010000; - break; - - case 11: - w0[2] = w0[2] | 0x01000000; - break; - - case 12: - w0[3] = 0x01; - break; - - case 13: - w0[3] = w0[3] | 0x0100; - break; - - case 14: - w0[3] = w0[3] | 0x010000; - break; - - case 15: - w0[3] = w0[3] | 0x01000000; - break; - - case 16: - w1[0] = 0x01; - break; - - case 17: - w1[0] = w1[0] | 0x0100; - break; - - case 18: - w1[0] = w1[0] | 0x010000; - break; - - case 19: - w1[0] = w1[0] | 0x01000000; - break; - - case 20: - w1[1] = 0x01; - break; - - case 21: - w1[1] = w1[1] | 0x0100; - break; - - case 22: - w1[1] = w1[1] | 0x010000; - break; - - case 23: - w1[1] = w1[1] | 0x01000000; - break; - - case 24: - w1[2] = 0x01; - break; - - case 25: - w1[2] = w1[2] | 0x0100; - break; - - case 26: - w1[2] = w1[2] | 0x010000; - break; - - case 27: - w1[2] = w1[2] | 0x01000000; - break; - - case 28: - w1[3] = 0x01; - break; - - case 29: - w1[3] = w1[3] | 0x0100; - break; - - case 30: - w1[3] = w1[3] | 0x010000; - break; - - case 31: - w1[3] = w1[3] | 0x01000000; - break; - - case 32: - w2[0] = 0x01; - break; - - case 33: - w2[0] = w2[0] | 0x0100; - break; - - case 34: - w2[0] = w2[0] | 0x010000; - break; - - case 35: - w2[0] = w2[0] | 0x01000000; - break; - - case 36: - w2[1] = 0x01; - break; - - case 37: - w2[1] = w2[1] | 0x0100; - break; - - case 38: - w2[1] = w2[1] | 0x010000; - break; - - case 39: - w2[1] = w2[1] | 0x01000000; - break; - - case 40: - w2[2] = 0x01; - break; - - case 41: - w2[2] = w2[2] | 0x0100; - break; - - case 42: - w2[2] = w2[2] | 0x010000; - break; - - case 43: - w2[2] = w2[2] | 0x01000000; - break; - - case 44: - w2[3] = 0x01; - break; - - case 45: - w2[3] = w2[3] | 0x0100; - break; - - case 46: - w2[3] = w2[3] | 0x010000; - break; - - case 47: - w2[3] = w2[3] | 0x01000000; - break; - - case 48: - w3[0] = 0x01; - break; - - case 49: - w3[0] = w3[0] | 0x0100; - break; - - case 50: - w3[0] = w3[0] | 0x010000; - break; - - case 51: - w3[0] = w3[0] | 0x01000000; - break; - - case 52: - w3[1] = 0x01; - break; - - case 53: - w3[1] = w3[1] | 0x0100; - break; - - case 54: - w3[1] = w3[1] | 0x010000; - break; - - case 55: - w3[1] = w3[1] | 0x01000000; - break; - - case 56: - w3[2] = 0x01; - break; - - case 57: - w3[2] = w3[2] | 0x0100; - break; - - case 58: - w3[2] = w3[2] | 0x010000; - break; - - case 59: - w3[2] = w3[2] | 0x01000000; - break; - - case 60: - w3[3] = 0x01; - break; - - case 61: - w3[3] = w3[3] | 0x0100; - break; - - case 62: - w3[3] = w3[3] | 0x010000; - break; - - case 63: - w3[3] = w3[3] | 0x01000000; - break; - - case 64: - w4[0] = 0x01; - break; - - case 65: - w4[0] = w4[0] | 0x0100; - break; - - case 66: - w4[0] = w4[0] | 0x010000; - break; - - case 67: - w4[0] = w4[0] | 0x01000000; - break; - - case 68: - w4[1] = 0x01; - break; - - case 69: - w4[1] = w4[1] | 0x0100; - break; - - case 70: - w4[1] = w4[1] | 0x010000; - break; - - case 71: - w4[1] = w4[1] | 0x01000000; - break; - - case 72: - w4[2] = 0x01; - break; - - case 73: - w4[2] = w4[2] | 0x0100; - break; - - case 74: - w4[2] = w4[2] | 0x010000; - break; - - case 75: - w4[2] = w4[2] | 0x01000000; - break; - - case 76: - w4[3] = 0x01; - break; - - case 77: - w4[3] = w4[3] | 0x0100; - break; - - case 78: - w4[3] = w4[3] | 0x010000; - break; - - case 79: - w4[3] = w4[3] | 0x01000000; - break; - - case 80: - w5[0] = 0x01; - break; - - case 81: - w5[0] = w5[0] | 0x0100; - break; - - case 82: - w5[0] = w5[0] | 0x010000; - break; - - case 83: - w5[0] = w5[0] | 0x01000000; - break; - - case 84: - w5[1] = 0x01; - break; - - case 85: - w5[1] = w5[1] | 0x0100; - break; - - case 86: - w5[1] = w5[1] | 0x010000; - break; - - case 87: - w5[1] = w5[1] | 0x01000000; - break; - - case 88: - w5[2] = 0x01; - break; - - case 89: - w5[2] = w5[2] | 0x0100; - break; - - case 90: - w5[2] = w5[2] | 0x010000; - break; - - case 91: - w5[2] = w5[2] | 0x01000000; - break; - - case 92: - w5[3] = 0x01; - break; - - case 93: - w5[3] = w5[3] | 0x0100; - break; - - case 94: - w5[3] = w5[3] | 0x010000; - break; - - case 95: - w5[3] = w5[3] | 0x01000000; - break; - - case 96: - w6[0] = 0x01; - break; - - case 97: - w6[0] = w6[0] | 0x0100; - break; - - case 98: - w6[0] = w6[0] | 0x010000; - break; - - case 99: - w6[0] = w6[0] | 0x01000000; - break; - - case 100: - w6[1] = 0x01; - break; - - case 101: - w6[1] = w6[1] | 0x0100; - break; - - case 102: - w6[1] = w6[1] | 0x010000; - break; - - case 103: - w6[1] = w6[1] | 0x01000000; - break; - - case 104: - w6[2] = 0x01; - break; - - case 105: - w6[2] = w6[2] | 0x0100; - break; - - case 106: - w6[2] = w6[2] | 0x010000; - break; - - case 107: - w6[2] = w6[2] | 0x01000000; - break; - - case 108: - w6[3] = 0x01; - break; - - case 109: - w6[3] = w6[3] | 0x0100; - break; - - case 110: - w6[3] = w6[3] | 0x010000; - break; - - case 111: - w6[3] = w6[3] | 0x01000000; - break; - - case 112: - w7[0] = 0x01; - break; - - case 113: - w7[0] = w7[0] | 0x0100; - break; - - case 114: - w7[0] = w7[0] | 0x010000; - break; - - case 115: - w7[0] = w7[0] | 0x01000000; - break; - - case 116: - w7[1] = 0x01; - break; - - case 117: - w7[1] = w7[1] | 0x0100; - break; - - case 118: - w7[1] = w7[1] | 0x010000; - break; - - case 119: - w7[1] = w7[1] | 0x01000000; - break; - - case 120: - w7[2] = 0x01; - break; - - case 121: - w7[2] = w7[2] | 0x0100; - break; - - case 122: - w7[2] = w7[2] | 0x010000; - break; - - case 123: - w7[2] = w7[2] | 0x01000000; - break; - - case 124: - w7[3] = 0x01; - break; - - case 125: - w7[3] = w7[3] | 0x0100; - break; - - case 126: - w7[3] = w7[3] | 0x010000; - break; - - case 127: - w7[3] = w7[3] | 0x01000000; - break; - } -} - -static void append_0x02_1 (u32 w0[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x02; - break; - - case 1: - w0[0] = w0[0] | 0x0200; - break; - - case 2: - w0[0] = w0[0] | 0x020000; - break; - - case 3: - w0[0] = w0[0] | 0x02000000; - break; - - case 4: - w0[1] = 0x02; - break; - - case 5: - w0[1] = w0[1] | 0x0200; - break; - - case 6: - w0[1] = w0[1] | 0x020000; - break; - - case 7: - w0[1] = w0[1] | 0x02000000; - break; - - case 8: - w0[2] = 0x02; - break; - - case 9: - w0[2] = w0[2] | 0x0200; - break; - - case 10: - w0[2] = w0[2] | 0x020000; - break; - - case 11: - w0[2] = w0[2] | 0x02000000; - break; - - case 12: - w0[3] = 0x02; - break; - - case 13: - w0[3] = w0[3] | 0x0200; - break; - - case 14: - w0[3] = w0[3] | 0x020000; - break; - - case 15: - w0[3] = w0[3] | 0x02000000; - break; - } -} - -static void append_0x02_2 (u32 w0[4], u32 w1[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x02; - break; - - case 1: - w0[0] = w0[0] | 0x0200; - break; - - case 2: - w0[0] = w0[0] | 0x020000; - break; - - case 3: - w0[0] = w0[0] | 0x02000000; - break; - - case 4: - w0[1] = 0x02; - break; - - case 5: - w0[1] = w0[1] | 0x0200; - break; - - case 6: - w0[1] = w0[1] | 0x020000; - break; - - case 7: - w0[1] = w0[1] | 0x02000000; - break; - - case 8: - w0[2] = 0x02; - break; - - case 9: - w0[2] = w0[2] | 0x0200; - break; - - case 10: - w0[2] = w0[2] | 0x020000; - break; - - case 11: - w0[2] = w0[2] | 0x02000000; - break; - - case 12: - w0[3] = 0x02; - break; - - case 13: - w0[3] = w0[3] | 0x0200; - break; - - case 14: - w0[3] = w0[3] | 0x020000; - break; - - case 15: - w0[3] = w0[3] | 0x02000000; - break; - - case 16: - w1[0] = 0x02; - break; - - case 17: - w1[0] = w1[0] | 0x0200; - break; - - case 18: - w1[0] = w1[0] | 0x020000; - break; - - case 19: - w1[0] = w1[0] | 0x02000000; - break; - - case 20: - w1[1] = 0x02; - break; - - case 21: - w1[1] = w1[1] | 0x0200; - break; - - case 22: - w1[1] = w1[1] | 0x020000; - break; - - case 23: - w1[1] = w1[1] | 0x02000000; - break; - - case 24: - w1[2] = 0x02; - break; - - case 25: - w1[2] = w1[2] | 0x0200; - break; - - case 26: - w1[2] = w1[2] | 0x020000; - break; - - case 27: - w1[2] = w1[2] | 0x02000000; - break; - - case 28: - w1[3] = 0x02; - break; - - case 29: - w1[3] = w1[3] | 0x0200; - break; - - case 30: - w1[3] = w1[3] | 0x020000; - break; - - case 31: - w1[3] = w1[3] | 0x02000000; - break; - } -} - -static void append_0x02_3 (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x02; - break; - - case 1: - w0[0] = w0[0] | 0x0200; - break; - - case 2: - w0[0] = w0[0] | 0x020000; - break; - - case 3: - w0[0] = w0[0] | 0x02000000; - break; - - case 4: - w0[1] = 0x02; - break; - - case 5: - w0[1] = w0[1] | 0x0200; - break; - - case 6: - w0[1] = w0[1] | 0x020000; - break; - - case 7: - w0[1] = w0[1] | 0x02000000; - break; - - case 8: - w0[2] = 0x02; - break; - - case 9: - w0[2] = w0[2] | 0x0200; - break; - - case 10: - w0[2] = w0[2] | 0x020000; - break; - - case 11: - w0[2] = w0[2] | 0x02000000; - break; - - case 12: - w0[3] = 0x02; - break; - - case 13: - w0[3] = w0[3] | 0x0200; - break; - - case 14: - w0[3] = w0[3] | 0x020000; - break; - - case 15: - w0[3] = w0[3] | 0x02000000; - break; - - case 16: - w1[0] = 0x02; - break; - - case 17: - w1[0] = w1[0] | 0x0200; - break; - - case 18: - w1[0] = w1[0] | 0x020000; - break; - - case 19: - w1[0] = w1[0] | 0x02000000; - break; - - case 20: - w1[1] = 0x02; - break; - - case 21: - w1[1] = w1[1] | 0x0200; - break; - - case 22: - w1[1] = w1[1] | 0x020000; - break; - - case 23: - w1[1] = w1[1] | 0x02000000; - break; - - case 24: - w1[2] = 0x02; - break; - - case 25: - w1[2] = w1[2] | 0x0200; - break; - - case 26: - w1[2] = w1[2] | 0x020000; - break; - - case 27: - w1[2] = w1[2] | 0x02000000; - break; - - case 28: - w1[3] = 0x02; - break; - - case 29: - w1[3] = w1[3] | 0x0200; - break; - - case 30: - w1[3] = w1[3] | 0x020000; - break; - - case 31: - w1[3] = w1[3] | 0x02000000; - break; - - case 32: - w2[0] = 0x02; - break; - - case 33: - w2[0] = w2[0] | 0x0200; - break; - - case 34: - w2[0] = w2[0] | 0x020000; - break; - - case 35: - w2[0] = w2[0] | 0x02000000; - break; - - case 36: - w2[1] = 0x02; - break; - - case 37: - w2[1] = w2[1] | 0x0200; - break; - - case 38: - w2[1] = w2[1] | 0x020000; - break; - - case 39: - w2[1] = w2[1] | 0x02000000; - break; - - case 40: - w2[2] = 0x02; - break; - - case 41: - w2[2] = w2[2] | 0x0200; - break; - - case 42: - w2[2] = w2[2] | 0x020000; - break; - - case 43: - w2[2] = w2[2] | 0x02000000; - break; - - case 44: - w2[3] = 0x02; - break; - - case 45: - w2[3] = w2[3] | 0x0200; - break; - - case 46: - w2[3] = w2[3] | 0x020000; - break; - - case 47: - w2[3] = w2[3] | 0x02000000; - break; - } -} - -static void append_0x02_4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x02; - break; - - case 1: - w0[0] = w0[0] | 0x0200; - break; - - case 2: - w0[0] = w0[0] | 0x020000; - break; - - case 3: - w0[0] = w0[0] | 0x02000000; - break; - - case 4: - w0[1] = 0x02; - break; - - case 5: - w0[1] = w0[1] | 0x0200; - break; - - case 6: - w0[1] = w0[1] | 0x020000; - break; - - case 7: - w0[1] = w0[1] | 0x02000000; - break; - - case 8: - w0[2] = 0x02; - break; - - case 9: - w0[2] = w0[2] | 0x0200; - break; - - case 10: - w0[2] = w0[2] | 0x020000; - break; - - case 11: - w0[2] = w0[2] | 0x02000000; - break; - - case 12: - w0[3] = 0x02; - break; - - case 13: - w0[3] = w0[3] | 0x0200; - break; - - case 14: - w0[3] = w0[3] | 0x020000; - break; - - case 15: - w0[3] = w0[3] | 0x02000000; - break; - - case 16: - w1[0] = 0x02; - break; - - case 17: - w1[0] = w1[0] | 0x0200; - break; - - case 18: - w1[0] = w1[0] | 0x020000; - break; - - case 19: - w1[0] = w1[0] | 0x02000000; - break; - - case 20: - w1[1] = 0x02; - break; - - case 21: - w1[1] = w1[1] | 0x0200; - break; - - case 22: - w1[1] = w1[1] | 0x020000; - break; - - case 23: - w1[1] = w1[1] | 0x02000000; - break; - - case 24: - w1[2] = 0x02; - break; - - case 25: - w1[2] = w1[2] | 0x0200; - break; - - case 26: - w1[2] = w1[2] | 0x020000; - break; - - case 27: - w1[2] = w1[2] | 0x02000000; - break; - - case 28: - w1[3] = 0x02; - break; - - case 29: - w1[3] = w1[3] | 0x0200; - break; - - case 30: - w1[3] = w1[3] | 0x020000; - break; - - case 31: - w1[3] = w1[3] | 0x02000000; - break; - - case 32: - w2[0] = 0x02; - break; - - case 33: - w2[0] = w2[0] | 0x0200; - break; - - case 34: - w2[0] = w2[0] | 0x020000; - break; - - case 35: - w2[0] = w2[0] | 0x02000000; - break; - - case 36: - w2[1] = 0x02; - break; - - case 37: - w2[1] = w2[1] | 0x0200; - break; - - case 38: - w2[1] = w2[1] | 0x020000; - break; - - case 39: - w2[1] = w2[1] | 0x02000000; - break; - - case 40: - w2[2] = 0x02; - break; - - case 41: - w2[2] = w2[2] | 0x0200; - break; - - case 42: - w2[2] = w2[2] | 0x020000; - break; - - case 43: - w2[2] = w2[2] | 0x02000000; - break; - - case 44: - w2[3] = 0x02; - break; - - case 45: - w2[3] = w2[3] | 0x0200; - break; - - case 46: - w2[3] = w2[3] | 0x020000; - break; - - case 47: - w2[3] = w2[3] | 0x02000000; - break; - - case 48: - w3[0] = 0x02; - break; - - case 49: - w3[0] = w3[0] | 0x0200; - break; - - case 50: - w3[0] = w3[0] | 0x020000; - break; - - case 51: - w3[0] = w3[0] | 0x02000000; - break; - - case 52: - w3[1] = 0x02; - break; - - case 53: - w3[1] = w3[1] | 0x0200; - break; - - case 54: - w3[1] = w3[1] | 0x020000; - break; - - case 55: - w3[1] = w3[1] | 0x02000000; - break; - - case 56: - w3[2] = 0x02; - break; - - case 57: - w3[2] = w3[2] | 0x0200; - break; - - case 58: - w3[2] = w3[2] | 0x020000; - break; - - case 59: - w3[2] = w3[2] | 0x02000000; - break; - - case 60: - w3[3] = 0x02; - break; - - case 61: - w3[3] = w3[3] | 0x0200; - break; - - case 62: - w3[3] = w3[3] | 0x020000; - break; - - case 63: - w3[3] = w3[3] | 0x02000000; - break; - } -} - -static void append_0x02_8 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x02; - break; - - case 1: - w0[0] = w0[0] | 0x0200; - break; - - case 2: - w0[0] = w0[0] | 0x020000; - break; - - case 3: - w0[0] = w0[0] | 0x02000000; - break; - - case 4: - w0[1] = 0x02; - break; - - case 5: - w0[1] = w0[1] | 0x0200; - break; - - case 6: - w0[1] = w0[1] | 0x020000; - break; - - case 7: - w0[1] = w0[1] | 0x02000000; - break; - - case 8: - w0[2] = 0x02; - break; - - case 9: - w0[2] = w0[2] | 0x0200; - break; - - case 10: - w0[2] = w0[2] | 0x020000; - break; - - case 11: - w0[2] = w0[2] | 0x02000000; - break; - - case 12: - w0[3] = 0x02; - break; - - case 13: - w0[3] = w0[3] | 0x0200; - break; - - case 14: - w0[3] = w0[3] | 0x020000; - break; - - case 15: - w0[3] = w0[3] | 0x02000000; - break; - - case 16: - w1[0] = 0x02; - break; - - case 17: - w1[0] = w1[0] | 0x0200; - break; - - case 18: - w1[0] = w1[0] | 0x020000; - break; - - case 19: - w1[0] = w1[0] | 0x02000000; - break; - - case 20: - w1[1] = 0x02; - break; - - case 21: - w1[1] = w1[1] | 0x0200; - break; - - case 22: - w1[1] = w1[1] | 0x020000; - break; - - case 23: - w1[1] = w1[1] | 0x02000000; - break; - - case 24: - w1[2] = 0x02; - break; - - case 25: - w1[2] = w1[2] | 0x0200; - break; - - case 26: - w1[2] = w1[2] | 0x020000; - break; - - case 27: - w1[2] = w1[2] | 0x02000000; - break; - - case 28: - w1[3] = 0x02; - break; - - case 29: - w1[3] = w1[3] | 0x0200; - break; - - case 30: - w1[3] = w1[3] | 0x020000; - break; - - case 31: - w1[3] = w1[3] | 0x02000000; - break; - - case 32: - w2[0] = 0x02; - break; - - case 33: - w2[0] = w2[0] | 0x0200; - break; - - case 34: - w2[0] = w2[0] | 0x020000; - break; - - case 35: - w2[0] = w2[0] | 0x02000000; - break; - - case 36: - w2[1] = 0x02; - break; - - case 37: - w2[1] = w2[1] | 0x0200; - break; - - case 38: - w2[1] = w2[1] | 0x020000; - break; - - case 39: - w2[1] = w2[1] | 0x02000000; - break; - - case 40: - w2[2] = 0x02; - break; - - case 41: - w2[2] = w2[2] | 0x0200; - break; - - case 42: - w2[2] = w2[2] | 0x020000; - break; - - case 43: - w2[2] = w2[2] | 0x02000000; - break; - - case 44: - w2[3] = 0x02; - break; - - case 45: - w2[3] = w2[3] | 0x0200; - break; - - case 46: - w2[3] = w2[3] | 0x020000; - break; - - case 47: - w2[3] = w2[3] | 0x02000000; - break; - - case 48: - w3[0] = 0x02; - break; - - case 49: - w3[0] = w3[0] | 0x0200; - break; - - case 50: - w3[0] = w3[0] | 0x020000; - break; - - case 51: - w3[0] = w3[0] | 0x02000000; - break; - - case 52: - w3[1] = 0x02; - break; - - case 53: - w3[1] = w3[1] | 0x0200; - break; - - case 54: - w3[1] = w3[1] | 0x020000; - break; - - case 55: - w3[1] = w3[1] | 0x02000000; - break; - - case 56: - w3[2] = 0x02; - break; - - case 57: - w3[2] = w3[2] | 0x0200; - break; - - case 58: - w3[2] = w3[2] | 0x020000; - break; - - case 59: - w3[2] = w3[2] | 0x02000000; - break; - - case 60: - w3[3] = 0x02; - break; - - case 61: - w3[3] = w3[3] | 0x0200; - break; - - case 62: - w3[3] = w3[3] | 0x020000; - break; - - case 63: - w3[3] = w3[3] | 0x02000000; - break; - - case 64: - w4[0] = 0x02; - break; - - case 65: - w4[0] = w4[0] | 0x0200; - break; - - case 66: - w4[0] = w4[0] | 0x020000; - break; - - case 67: - w4[0] = w4[0] | 0x02000000; - break; - - case 68: - w4[1] = 0x02; - break; - - case 69: - w4[1] = w4[1] | 0x0200; - break; - - case 70: - w4[1] = w4[1] | 0x020000; - break; - - case 71: - w4[1] = w4[1] | 0x02000000; - break; - - case 72: - w4[2] = 0x02; - break; - - case 73: - w4[2] = w4[2] | 0x0200; - break; - - case 74: - w4[2] = w4[2] | 0x020000; - break; - - case 75: - w4[2] = w4[2] | 0x02000000; - break; - - case 76: - w4[3] = 0x02; - break; - - case 77: - w4[3] = w4[3] | 0x0200; - break; - - case 78: - w4[3] = w4[3] | 0x020000; - break; - - case 79: - w4[3] = w4[3] | 0x02000000; - break; - - case 80: - w5[0] = 0x02; - break; - - case 81: - w5[0] = w5[0] | 0x0200; - break; - - case 82: - w5[0] = w5[0] | 0x020000; - break; - - case 83: - w5[0] = w5[0] | 0x02000000; - break; - - case 84: - w5[1] = 0x02; - break; - - case 85: - w5[1] = w5[1] | 0x0200; - break; - - case 86: - w5[1] = w5[1] | 0x020000; - break; - - case 87: - w5[1] = w5[1] | 0x02000000; - break; - - case 88: - w5[2] = 0x02; - break; - - case 89: - w5[2] = w5[2] | 0x0200; - break; - - case 90: - w5[2] = w5[2] | 0x020000; - break; - - case 91: - w5[2] = w5[2] | 0x02000000; - break; - - case 92: - w5[3] = 0x02; - break; - - case 93: - w5[3] = w5[3] | 0x0200; - break; - - case 94: - w5[3] = w5[3] | 0x020000; - break; - - case 95: - w5[3] = w5[3] | 0x02000000; - break; - - case 96: - w6[0] = 0x02; - break; - - case 97: - w6[0] = w6[0] | 0x0200; - break; - - case 98: - w6[0] = w6[0] | 0x020000; - break; - - case 99: - w6[0] = w6[0] | 0x02000000; - break; - - case 100: - w6[1] = 0x02; - break; - - case 101: - w6[1] = w6[1] | 0x0200; - break; - - case 102: - w6[1] = w6[1] | 0x020000; - break; - - case 103: - w6[1] = w6[1] | 0x02000000; - break; - - case 104: - w6[2] = 0x02; - break; - - case 105: - w6[2] = w6[2] | 0x0200; - break; - - case 106: - w6[2] = w6[2] | 0x020000; - break; - - case 107: - w6[2] = w6[2] | 0x02000000; - break; - - case 108: - w6[3] = 0x02; - break; - - case 109: - w6[3] = w6[3] | 0x0200; - break; - - case 110: - w6[3] = w6[3] | 0x020000; - break; - - case 111: - w6[3] = w6[3] | 0x02000000; - break; - - case 112: - w7[0] = 0x02; - break; - - case 113: - w7[0] = w7[0] | 0x0200; - break; - - case 114: - w7[0] = w7[0] | 0x020000; - break; - - case 115: - w7[0] = w7[0] | 0x02000000; - break; - - case 116: - w7[1] = 0x02; - break; - - case 117: - w7[1] = w7[1] | 0x0200; - break; - - case 118: - w7[1] = w7[1] | 0x020000; - break; - - case 119: - w7[1] = w7[1] | 0x02000000; - break; - - case 120: - w7[2] = 0x02; - break; - - case 121: - w7[2] = w7[2] | 0x0200; - break; - - case 122: - w7[2] = w7[2] | 0x020000; - break; - - case 123: - w7[2] = w7[2] | 0x02000000; - break; - - case 124: - w7[3] = 0x02; - break; - - case 125: - w7[3] = w7[3] | 0x0200; - break; - - case 126: - w7[3] = w7[3] | 0x020000; - break; - - case 127: - w7[3] = w7[3] | 0x02000000; - break; - } -} - -static void append_0x80_1 (u32 w0[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x80; - break; - - case 1: - w0[0] = w0[0] | 0x8000; - break; - - case 2: - w0[0] = w0[0] | 0x800000; - break; - - case 3: - w0[0] = w0[0] | 0x80000000; - break; - - case 4: - w0[1] = 0x80; - break; - - case 5: - w0[1] = w0[1] | 0x8000; - break; - - case 6: - w0[1] = w0[1] | 0x800000; - break; - - case 7: - w0[1] = w0[1] | 0x80000000; - break; - - case 8: - w0[2] = 0x80; - break; - - case 9: - w0[2] = w0[2] | 0x8000; - break; - - case 10: - w0[2] = w0[2] | 0x800000; - break; - - case 11: - w0[2] = w0[2] | 0x80000000; - break; - - case 12: - w0[3] = 0x80; - break; - - case 13: - w0[3] = w0[3] | 0x8000; - break; - - case 14: - w0[3] = w0[3] | 0x800000; - break; - - case 15: - w0[3] = w0[3] | 0x80000000; - break; - } -} - -static void append_0x80_2 (u32 w0[4], u32 w1[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x80; - break; - - case 1: - w0[0] = w0[0] | 0x8000; - break; - - case 2: - w0[0] = w0[0] | 0x800000; - break; - - case 3: - w0[0] = w0[0] | 0x80000000; - break; - - case 4: - w0[1] = 0x80; - break; - - case 5: - w0[1] = w0[1] | 0x8000; - break; - - case 6: - w0[1] = w0[1] | 0x800000; - break; - - case 7: - w0[1] = w0[1] | 0x80000000; - break; - - case 8: - w0[2] = 0x80; - break; - - case 9: - w0[2] = w0[2] | 0x8000; - break; - - case 10: - w0[2] = w0[2] | 0x800000; - break; - - case 11: - w0[2] = w0[2] | 0x80000000; - break; - - case 12: - w0[3] = 0x80; - break; - - case 13: - w0[3] = w0[3] | 0x8000; - break; - - case 14: - w0[3] = w0[3] | 0x800000; - break; - - case 15: - w0[3] = w0[3] | 0x80000000; - break; - - case 16: - w1[0] = 0x80; - break; - - case 17: - w1[0] = w1[0] | 0x8000; - break; - - case 18: - w1[0] = w1[0] | 0x800000; - break; - - case 19: - w1[0] = w1[0] | 0x80000000; - break; - - case 20: - w1[1] = 0x80; - break; - - case 21: - w1[1] = w1[1] | 0x8000; - break; - - case 22: - w1[1] = w1[1] | 0x800000; - break; - - case 23: - w1[1] = w1[1] | 0x80000000; - break; - - case 24: - w1[2] = 0x80; - break; - - case 25: - w1[2] = w1[2] | 0x8000; - break; - - case 26: - w1[2] = w1[2] | 0x800000; - break; - - case 27: - w1[2] = w1[2] | 0x80000000; - break; - - case 28: - w1[3] = 0x80; - break; - - case 29: - w1[3] = w1[3] | 0x8000; - break; - - case 30: - w1[3] = w1[3] | 0x800000; - break; - - case 31: - w1[3] = w1[3] | 0x80000000; - break; - } -} - -static void append_0x80_2_be (u32 w0[4], u32 w1[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] |= 0x80000000; - break; - - case 1: - w0[0] |= 0x800000; - break; - - case 2: - w0[0] |= 0x8000; - break; - - case 3: - w0[0] |= 0x80; - break; - - case 4: - w0[1] |= 0x80000000; - break; - - case 5: - w0[1] |= 0x800000; - break; - - case 6: - w0[1] |= 0x8000; - break; - - case 7: - w0[1] |= 0x80; - break; - - case 8: - w0[2] |= 0x80000000; - break; - - case 9: - w0[2] |= 0x800000; - break; - - case 10: - w0[2] |= 0x8000; - break; - - case 11: - w0[2] |= 0x80; - break; - - case 12: - w0[3] |= 0x80000000; - break; - - case 13: - w0[3] |= 0x800000; - break; - - case 14: - w0[3] |= 0x8000; - break; - - case 15: - w0[3] |= 0x80; - break; - - case 16: - w1[0] |= 0x80000000; - break; - - case 17: - w1[0] |= 0x800000; - break; - - case 18: - w1[0] |= 0x8000; - break; - - case 19: - w1[0] |= 0x80; - break; - - case 20: - w1[1] |= 0x80000000; - break; - - case 21: - w1[1] |= 0x800000; - break; - - case 22: - w1[1] |= 0x8000; - break; - - case 23: - w1[1] |= 0x80; - break; - - case 24: - w1[2] |= 0x80000000; - break; - - case 25: - w1[2] |= 0x800000; - break; - - case 26: - w1[2] |= 0x8000; - break; - - case 27: - w1[2] |= 0x80; - break; - - case 28: - w1[3] |= 0x80000000; - break; - - case 29: - w1[3] |= 0x800000; - break; - - case 30: - w1[3] |= 0x8000; - break; - - case 31: - w1[3] |= 0x80; - break; - } -} - -static void append_0x80_3 (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x80; - break; - - case 1: - w0[0] = w0[0] | 0x8000; - break; - - case 2: - w0[0] = w0[0] | 0x800000; - break; - - case 3: - w0[0] = w0[0] | 0x80000000; - break; - - case 4: - w0[1] = 0x80; - break; - - case 5: - w0[1] = w0[1] | 0x8000; - break; - - case 6: - w0[1] = w0[1] | 0x800000; - break; - - case 7: - w0[1] = w0[1] | 0x80000000; - break; - - case 8: - w0[2] = 0x80; - break; - - case 9: - w0[2] = w0[2] | 0x8000; - break; - - case 10: - w0[2] = w0[2] | 0x800000; - break; - - case 11: - w0[2] = w0[2] | 0x80000000; - break; - - case 12: - w0[3] = 0x80; - break; - - case 13: - w0[3] = w0[3] | 0x8000; - break; - - case 14: - w0[3] = w0[3] | 0x800000; - break; - - case 15: - w0[3] = w0[3] | 0x80000000; - break; - - case 16: - w1[0] = 0x80; - break; - - case 17: - w1[0] = w1[0] | 0x8000; - break; - - case 18: - w1[0] = w1[0] | 0x800000; - break; - - case 19: - w1[0] = w1[0] | 0x80000000; - break; - - case 20: - w1[1] = 0x80; - break; - - case 21: - w1[1] = w1[1] | 0x8000; - break; - - case 22: - w1[1] = w1[1] | 0x800000; - break; - - case 23: - w1[1] = w1[1] | 0x80000000; - break; - - case 24: - w1[2] = 0x80; - break; - - case 25: - w1[2] = w1[2] | 0x8000; - break; - - case 26: - w1[2] = w1[2] | 0x800000; - break; - - case 27: - w1[2] = w1[2] | 0x80000000; - break; - - case 28: - w1[3] = 0x80; - break; - - case 29: - w1[3] = w1[3] | 0x8000; - break; - - case 30: - w1[3] = w1[3] | 0x800000; - break; - - case 31: - w1[3] = w1[3] | 0x80000000; - break; - - case 32: - w2[0] = 0x80; - break; - - case 33: - w2[0] = w2[0] | 0x8000; - break; - - case 34: - w2[0] = w2[0] | 0x800000; - break; - - case 35: - w2[0] = w2[0] | 0x80000000; - break; - - case 36: - w2[1] = 0x80; - break; - - case 37: - w2[1] = w2[1] | 0x8000; - break; - - case 38: - w2[1] = w2[1] | 0x800000; - break; - - case 39: - w2[1] = w2[1] | 0x80000000; - break; - - case 40: - w2[2] = 0x80; - break; - - case 41: - w2[2] = w2[2] | 0x8000; - break; - - case 42: - w2[2] = w2[2] | 0x800000; - break; - - case 43: - w2[2] = w2[2] | 0x80000000; - break; - - case 44: - w2[3] = 0x80; - break; - - case 45: - w2[3] = w2[3] | 0x8000; - break; - - case 46: - w2[3] = w2[3] | 0x800000; - break; - - case 47: - w2[3] = w2[3] | 0x80000000; - break; - } -} - -static void append_0x80_4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x80; - break; - - case 1: - w0[0] = w0[0] | 0x8000; - break; - - case 2: - w0[0] = w0[0] | 0x800000; - break; - - case 3: - w0[0] = w0[0] | 0x80000000; - break; - - case 4: - w0[1] = 0x80; - break; - - case 5: - w0[1] = w0[1] | 0x8000; - break; - - case 6: - w0[1] = w0[1] | 0x800000; - break; - - case 7: - w0[1] = w0[1] | 0x80000000; - break; - - case 8: - w0[2] = 0x80; - break; - - case 9: - w0[2] = w0[2] | 0x8000; - break; - - case 10: - w0[2] = w0[2] | 0x800000; - break; - - case 11: - w0[2] = w0[2] | 0x80000000; - break; - - case 12: - w0[3] = 0x80; - break; - - case 13: - w0[3] = w0[3] | 0x8000; - break; - - case 14: - w0[3] = w0[3] | 0x800000; - break; - - case 15: - w0[3] = w0[3] | 0x80000000; - break; - - case 16: - w1[0] = 0x80; - break; - - case 17: - w1[0] = w1[0] | 0x8000; - break; - - case 18: - w1[0] = w1[0] | 0x800000; - break; - - case 19: - w1[0] = w1[0] | 0x80000000; - break; - - case 20: - w1[1] = 0x80; - break; - - case 21: - w1[1] = w1[1] | 0x8000; - break; - - case 22: - w1[1] = w1[1] | 0x800000; - break; - - case 23: - w1[1] = w1[1] | 0x80000000; - break; - - case 24: - w1[2] = 0x80; - break; - - case 25: - w1[2] = w1[2] | 0x8000; - break; - - case 26: - w1[2] = w1[2] | 0x800000; - break; - - case 27: - w1[2] = w1[2] | 0x80000000; - break; - - case 28: - w1[3] = 0x80; - break; - - case 29: - w1[3] = w1[3] | 0x8000; - break; - - case 30: - w1[3] = w1[3] | 0x800000; - break; - - case 31: - w1[3] = w1[3] | 0x80000000; - break; - - case 32: - w2[0] = 0x80; - break; - - case 33: - w2[0] = w2[0] | 0x8000; - break; - - case 34: - w2[0] = w2[0] | 0x800000; - break; - - case 35: - w2[0] = w2[0] | 0x80000000; - break; - - case 36: - w2[1] = 0x80; - break; - - case 37: - w2[1] = w2[1] | 0x8000; - break; - - case 38: - w2[1] = w2[1] | 0x800000; - break; - - case 39: - w2[1] = w2[1] | 0x80000000; - break; - - case 40: - w2[2] = 0x80; - break; - - case 41: - w2[2] = w2[2] | 0x8000; - break; - - case 42: - w2[2] = w2[2] | 0x800000; - break; - - case 43: - w2[2] = w2[2] | 0x80000000; - break; - - case 44: - w2[3] = 0x80; - break; - - case 45: - w2[3] = w2[3] | 0x8000; - break; - - case 46: - w2[3] = w2[3] | 0x800000; - break; - - case 47: - w2[3] = w2[3] | 0x80000000; - break; - - case 48: - w3[0] = 0x80; - break; - - case 49: - w3[0] = w3[0] | 0x8000; - break; - - case 50: - w3[0] = w3[0] | 0x800000; - break; - - case 51: - w3[0] = w3[0] | 0x80000000; - break; - - case 52: - w3[1] = 0x80; - break; - - case 53: - w3[1] = w3[1] | 0x8000; - break; - - case 54: - w3[1] = w3[1] | 0x800000; - break; - - case 55: - w3[1] = w3[1] | 0x80000000; - break; - - case 56: - w3[2] = 0x80; - break; - - case 57: - w3[2] = w3[2] | 0x8000; - break; - - case 58: - w3[2] = w3[2] | 0x800000; - break; - - case 59: - w3[2] = w3[2] | 0x80000000; - break; - - case 60: - w3[3] = 0x80; - break; - - case 61: - w3[3] = w3[3] | 0x8000; - break; - - case 62: - w3[3] = w3[3] | 0x800000; - break; - - case 63: - w3[3] = w3[3] | 0x80000000; - break; - } -} - -static void append_0x80_8 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x80; - break; - - case 1: - w0[0] = w0[0] | 0x8000; - break; - - case 2: - w0[0] = w0[0] | 0x800000; - break; - - case 3: - w0[0] = w0[0] | 0x80000000; - break; - - case 4: - w0[1] = 0x80; - break; - - case 5: - w0[1] = w0[1] | 0x8000; - break; - - case 6: - w0[1] = w0[1] | 0x800000; - break; - - case 7: - w0[1] = w0[1] | 0x80000000; - break; - - case 8: - w0[2] = 0x80; - break; - - case 9: - w0[2] = w0[2] | 0x8000; - break; - - case 10: - w0[2] = w0[2] | 0x800000; - break; - - case 11: - w0[2] = w0[2] | 0x80000000; - break; - - case 12: - w0[3] = 0x80; - break; - - case 13: - w0[3] = w0[3] | 0x8000; - break; - - case 14: - w0[3] = w0[3] | 0x800000; - break; - - case 15: - w0[3] = w0[3] | 0x80000000; - break; - - case 16: - w1[0] = 0x80; - break; - - case 17: - w1[0] = w1[0] | 0x8000; - break; - - case 18: - w1[0] = w1[0] | 0x800000; - break; - - case 19: - w1[0] = w1[0] | 0x80000000; - break; - - case 20: - w1[1] = 0x80; - break; - - case 21: - w1[1] = w1[1] | 0x8000; - break; - - case 22: - w1[1] = w1[1] | 0x800000; - break; - - case 23: - w1[1] = w1[1] | 0x80000000; - break; - - case 24: - w1[2] = 0x80; - break; - - case 25: - w1[2] = w1[2] | 0x8000; - break; - - case 26: - w1[2] = w1[2] | 0x800000; - break; - - case 27: - w1[2] = w1[2] | 0x80000000; - break; - - case 28: - w1[3] = 0x80; - break; - - case 29: - w1[3] = w1[3] | 0x8000; - break; - - case 30: - w1[3] = w1[3] | 0x800000; - break; - - case 31: - w1[3] = w1[3] | 0x80000000; - break; - - case 32: - w2[0] = 0x80; - break; - - case 33: - w2[0] = w2[0] | 0x8000; - break; - - case 34: - w2[0] = w2[0] | 0x800000; - break; - - case 35: - w2[0] = w2[0] | 0x80000000; - break; - - case 36: - w2[1] = 0x80; - break; - - case 37: - w2[1] = w2[1] | 0x8000; - break; - - case 38: - w2[1] = w2[1] | 0x800000; - break; - - case 39: - w2[1] = w2[1] | 0x80000000; - break; - - case 40: - w2[2] = 0x80; - break; - - case 41: - w2[2] = w2[2] | 0x8000; - break; - - case 42: - w2[2] = w2[2] | 0x800000; - break; - - case 43: - w2[2] = w2[2] | 0x80000000; - break; - - case 44: - w2[3] = 0x80; - break; - - case 45: - w2[3] = w2[3] | 0x8000; - break; - - case 46: - w2[3] = w2[3] | 0x800000; - break; - - case 47: - w2[3] = w2[3] | 0x80000000; - break; - - case 48: - w3[0] = 0x80; - break; - - case 49: - w3[0] = w3[0] | 0x8000; - break; - - case 50: - w3[0] = w3[0] | 0x800000; - break; - - case 51: - w3[0] = w3[0] | 0x80000000; - break; - - case 52: - w3[1] = 0x80; - break; - - case 53: - w3[1] = w3[1] | 0x8000; - break; - - case 54: - w3[1] = w3[1] | 0x800000; - break; - - case 55: - w3[1] = w3[1] | 0x80000000; - break; - - case 56: - w3[2] = 0x80; - break; - - case 57: - w3[2] = w3[2] | 0x8000; - break; - - case 58: - w3[2] = w3[2] | 0x800000; - break; - - case 59: - w3[2] = w3[2] | 0x80000000; - break; - - case 60: - w3[3] = 0x80; - break; - - case 61: - w3[3] = w3[3] | 0x8000; - break; - - case 62: - w3[3] = w3[3] | 0x800000; - break; - - case 63: - w3[3] = w3[3] | 0x80000000; - break; - - case 64: - w4[0] = 0x80; - break; - - case 65: - w4[0] = w4[0] | 0x8000; - break; - - case 66: - w4[0] = w4[0] | 0x800000; - break; - - case 67: - w4[0] = w4[0] | 0x80000000; - break; - - case 68: - w4[1] = 0x80; - break; - - case 69: - w4[1] = w4[1] | 0x8000; - break; - - case 70: - w4[1] = w4[1] | 0x800000; - break; - - case 71: - w4[1] = w4[1] | 0x80000000; - break; - - case 72: - w4[2] = 0x80; - break; - - case 73: - w4[2] = w4[2] | 0x8000; - break; - - case 74: - w4[2] = w4[2] | 0x800000; - break; - - case 75: - w4[2] = w4[2] | 0x80000000; - break; - - case 76: - w4[3] = 0x80; - break; - - case 77: - w4[3] = w4[3] | 0x8000; - break; - - case 78: - w4[3] = w4[3] | 0x800000; - break; - - case 79: - w4[3] = w4[3] | 0x80000000; - break; - - case 80: - w5[0] = 0x80; - break; - - case 81: - w5[0] = w5[0] | 0x8000; - break; - - case 82: - w5[0] = w5[0] | 0x800000; - break; - - case 83: - w5[0] = w5[0] | 0x80000000; - break; - - case 84: - w5[1] = 0x80; - break; - - case 85: - w5[1] = w5[1] | 0x8000; - break; - - case 86: - w5[1] = w5[1] | 0x800000; - break; - - case 87: - w5[1] = w5[1] | 0x80000000; - break; - - case 88: - w5[2] = 0x80; - break; - - case 89: - w5[2] = w5[2] | 0x8000; - break; - - case 90: - w5[2] = w5[2] | 0x800000; - break; - - case 91: - w5[2] = w5[2] | 0x80000000; - break; - - case 92: - w5[3] = 0x80; - break; - - case 93: - w5[3] = w5[3] | 0x8000; - break; - - case 94: - w5[3] = w5[3] | 0x800000; - break; - - case 95: - w5[3] = w5[3] | 0x80000000; - break; - - case 96: - w6[0] = 0x80; - break; - - case 97: - w6[0] = w6[0] | 0x8000; - break; - - case 98: - w6[0] = w6[0] | 0x800000; - break; - - case 99: - w6[0] = w6[0] | 0x80000000; - break; - - case 100: - w6[1] = 0x80; - break; - - case 101: - w6[1] = w6[1] | 0x8000; - break; - - case 102: - w6[1] = w6[1] | 0x800000; - break; - - case 103: - w6[1] = w6[1] | 0x80000000; - break; - - case 104: - w6[2] = 0x80; - break; - - case 105: - w6[2] = w6[2] | 0x8000; - break; - - case 106: - w6[2] = w6[2] | 0x800000; - break; - - case 107: - w6[2] = w6[2] | 0x80000000; - break; - - case 108: - w6[3] = 0x80; - break; - - case 109: - w6[3] = w6[3] | 0x8000; - break; - - case 110: - w6[3] = w6[3] | 0x800000; - break; - - case 111: - w6[3] = w6[3] | 0x80000000; - break; - - case 112: - w7[0] = 0x80; - break; - - case 113: - w7[0] = w7[0] | 0x8000; - break; - - case 114: - w7[0] = w7[0] | 0x800000; - break; - - case 115: - w7[0] = w7[0] | 0x80000000; - break; - - case 116: - w7[1] = 0x80; - break; - - case 117: - w7[1] = w7[1] | 0x8000; - break; - - case 118: - w7[1] = w7[1] | 0x800000; - break; - - case 119: - w7[1] = w7[1] | 0x80000000; - break; - - case 120: - w7[2] = 0x80; - break; - - case 121: - w7[2] = w7[2] | 0x8000; - break; - - case 122: - w7[2] = w7[2] | 0x800000; - break; - - case 123: - w7[2] = w7[2] | 0x80000000; - break; - - case 124: - w7[3] = 0x80; - break; - - case 125: - w7[3] = w7[3] | 0x8000; - break; - - case 126: - w7[3] = w7[3] | 0x800000; - break; - - case 127: - w7[3] = w7[3] | 0x80000000; - break; - } -} - -static void append_0x80_4 (u32 w[16], const u32 offset) -{ - switch (offset) - { - case 0: - w[ 0] = 0x80; - break; - - case 1: - w[ 0] = w[ 0] | 0x8000; - break; - - case 2: - w[ 0] = w[ 0] | 0x800000; - break; - - case 3: - w[ 0] = w[ 0] | 0x80000000; - break; - - case 4: - w[ 1] = 0x80; - break; - - case 5: - w[ 1] = w[ 1] | 0x8000; - break; - - case 6: - w[ 1] = w[ 1] | 0x800000; - break; - - case 7: - w[ 1] = w[ 1] | 0x80000000; - break; - - case 8: - w[ 2] = 0x80; - break; - - case 9: - w[ 2] = w[ 2] | 0x8000; - break; - - case 10: - w[ 2] = w[ 2] | 0x800000; - break; - - case 11: - w[ 2] = w[ 2] | 0x80000000; - break; - - case 12: - w[ 3] = 0x80; - break; - - case 13: - w[ 3] = w[ 3] | 0x8000; - break; - - case 14: - w[ 3] = w[ 3] | 0x800000; - break; - - case 15: - w[ 3] = w[ 3] | 0x80000000; - break; - - case 16: - w[ 4] = 0x80; - break; - - case 17: - w[ 4] = w[ 4] | 0x8000; - break; - - case 18: - w[ 4] = w[ 4] | 0x800000; - break; - - case 19: - w[ 4] = w[ 4] | 0x80000000; - break; - - case 20: - w[ 5] = 0x80; - break; - - case 21: - w[ 5] = w[ 5] | 0x8000; - break; - - case 22: - w[ 5] = w[ 5] | 0x800000; - break; - - case 23: - w[ 5] = w[ 5] | 0x80000000; - break; - - case 24: - w[ 6] = 0x80; - break; - - case 25: - w[ 6] = w[ 6] | 0x8000; - break; - - case 26: - w[ 6] = w[ 6] | 0x800000; - break; - - case 27: - w[ 6] = w[ 6] | 0x80000000; - break; - - case 28: - w[ 7] = 0x80; - break; - - case 29: - w[ 7] = w[ 7] | 0x8000; - break; - - case 30: - w[ 7] = w[ 7] | 0x800000; - break; - - case 31: - w[ 7] = w[ 7] | 0x80000000; - break; - - case 32: - w[ 8] = 0x80; - break; - - case 33: - w[ 8] = w[ 8] | 0x8000; - break; - - case 34: - w[ 8] = w[ 8] | 0x800000; - break; - - case 35: - w[ 8] = w[ 8] | 0x80000000; - break; - - case 36: - w[ 9] = 0x80; - break; - - case 37: - w[ 9] = w[ 9] | 0x8000; - break; - - case 38: - w[ 9] = w[ 9] | 0x800000; - break; - - case 39: - w[ 9] = w[ 9] | 0x80000000; - break; - - case 40: - w[10] = 0x80; - break; - - case 41: - w[10] = w[10] | 0x8000; - break; - - case 42: - w[10] = w[10] | 0x800000; - break; - - case 43: - w[10] = w[10] | 0x80000000; - break; - - case 44: - w[11] = 0x80; - break; - - case 45: - w[11] = w[11] | 0x8000; - break; - - case 46: - w[11] = w[11] | 0x800000; - break; - - case 47: - w[11] = w[11] | 0x80000000; - break; - - case 48: - w[12] = 0x80; - break; - - case 49: - w[12] = w[12] | 0x8000; - break; - - case 50: - w[12] = w[12] | 0x800000; - break; - - case 51: - w[12] = w[12] | 0x80000000; - break; - - case 52: - w[13] = 0x80; - break; - - case 53: - w[13] = w[13] | 0x8000; - break; - - case 54: - w[13] = w[13] | 0x800000; - break; - - case 55: - w[13] = w[13] | 0x80000000; - break; - - case 56: - w[14] = 0x80; - break; - - case 57: - w[14] = w[14] | 0x8000; - break; - - case 58: - w[14] = w[14] | 0x800000; - break; - - case 59: - w[14] = w[14] | 0x80000000; - break; - - case 60: - w[15] = 0x80; - break; - - case 61: - w[15] = w[15] | 0x8000; - break; - - case 62: - w[15] = w[15] | 0x800000; - break; - - case 63: - w[15] = w[15] | 0x80000000; - break; - } -} - -static void append_0x80_8 (u32 w[32], const u32 offset) -{ - switch (offset) - { - case 0: - w[ 0] = 0x80; - break; - - case 1: - w[ 0] = w[ 0] | 0x8000; - break; - - case 2: - w[ 0] = w[ 0] | 0x800000; - break; - - case 3: - w[ 0] = w[ 0] | 0x80000000; - break; - - case 4: - w[ 1] = 0x80; - break; - - case 5: - w[ 1] = w[ 1] | 0x8000; - break; - - case 6: - w[ 1] = w[ 1] | 0x800000; - break; - - case 7: - w[ 1] = w[ 1] | 0x80000000; - break; - - case 8: - w[ 2] = 0x80; - break; - - case 9: - w[ 2] = w[ 2] | 0x8000; - break; - - case 10: - w[ 2] = w[ 2] | 0x800000; - break; - - case 11: - w[ 2] = w[ 2] | 0x80000000; - break; - - case 12: - w[ 3] = 0x80; - break; - - case 13: - w[ 3] = w[ 3] | 0x8000; - break; - - case 14: - w[ 3] = w[ 3] | 0x800000; - break; - - case 15: - w[ 3] = w[ 3] | 0x80000000; - break; - - case 16: - w[ 4] = 0x80; - break; - - case 17: - w[ 4] = w[ 4] | 0x8000; - break; - - case 18: - w[ 4] = w[ 4] | 0x800000; - break; - - case 19: - w[ 4] = w[ 4] | 0x80000000; - break; - - case 20: - w[ 5] = 0x80; - break; - - case 21: - w[ 5] = w[ 5] | 0x8000; - break; - - case 22: - w[ 5] = w[ 5] | 0x800000; - break; - - case 23: - w[ 5] = w[ 5] | 0x80000000; - break; - - case 24: - w[ 6] = 0x80; - break; - - case 25: - w[ 6] = w[ 6] | 0x8000; - break; - - case 26: - w[ 6] = w[ 6] | 0x800000; - break; - - case 27: - w[ 6] = w[ 6] | 0x80000000; - break; - - case 28: - w[ 7] = 0x80; - break; - - case 29: - w[ 7] = w[ 7] | 0x8000; - break; - - case 30: - w[ 7] = w[ 7] | 0x800000; - break; - - case 31: - w[ 7] = w[ 7] | 0x80000000; - break; - - case 32: - w[ 8] = 0x80; - break; - - case 33: - w[ 8] = w[ 8] | 0x8000; - break; - - case 34: - w[ 8] = w[ 8] | 0x800000; - break; - - case 35: - w[ 8] = w[ 8] | 0x80000000; - break; - - case 36: - w[ 9] = 0x80; - break; - - case 37: - w[ 9] = w[ 9] | 0x8000; - break; - - case 38: - w[ 9] = w[ 9] | 0x800000; - break; - - case 39: - w[ 9] = w[ 9] | 0x80000000; - break; - - case 40: - w[10] = 0x80; - break; - - case 41: - w[10] = w[10] | 0x8000; - break; - - case 42: - w[10] = w[10] | 0x800000; - break; - - case 43: - w[10] = w[10] | 0x80000000; - break; - - case 44: - w[11] = 0x80; - break; - - case 45: - w[11] = w[11] | 0x8000; - break; - - case 46: - w[11] = w[11] | 0x800000; - break; - - case 47: - w[11] = w[11] | 0x80000000; - break; - - case 48: - w[12] = 0x80; - break; - - case 49: - w[12] = w[12] | 0x8000; - break; - - case 50: - w[12] = w[12] | 0x800000; - break; - - case 51: - w[12] = w[12] | 0x80000000; - break; - - case 52: - w[13] = 0x80; - break; - - case 53: - w[13] = w[13] | 0x8000; - break; - - case 54: - w[13] = w[13] | 0x800000; - break; - - case 55: - w[13] = w[13] | 0x80000000; - break; - - case 56: - w[14] = 0x80; - break; - - case 57: - w[14] = w[14] | 0x8000; - break; - - case 58: - w[14] = w[14] | 0x800000; - break; - - case 59: - w[14] = w[14] | 0x80000000; - break; - - case 60: - w[15] = 0x80; - break; - - case 61: - w[15] = w[15] | 0x8000; - break; - - case 62: - w[15] = w[15] | 0x800000; - break; - - case 63: - w[15] = w[15] | 0x80000000; - break; - - case 64: - w[16] = 0x80; - break; - - case 65: - w[16] = w[16] | 0x8000; - break; - - case 66: - w[16] = w[16] | 0x800000; - break; - - case 67: - w[16] = w[16] | 0x80000000; - break; - - case 68: - w[17] = 0x80; - break; - - case 69: - w[17] = w[17] | 0x8000; - break; - - case 70: - w[17] = w[17] | 0x800000; - break; - - case 71: - w[17] = w[17] | 0x80000000; - break; - - case 72: - w[18] = 0x80; - break; - - case 73: - w[18] = w[18] | 0x8000; - break; - - case 74: - w[18] = w[18] | 0x800000; - break; - - case 75: - w[18] = w[18] | 0x80000000; - break; - - case 76: - w[19] = 0x80; - break; - - case 77: - w[19] = w[19] | 0x8000; - break; - - case 78: - w[19] = w[19] | 0x800000; - break; - - case 79: - w[19] = w[19] | 0x80000000; - break; - - case 80: - w[20] = 0x80; - break; - - case 81: - w[20] = w[20] | 0x8000; - break; - - case 82: - w[20] = w[20] | 0x800000; - break; - - case 83: - w[20] = w[20] | 0x80000000; - break; - - case 84: - w[21] = 0x80; - break; - - case 85: - w[21] = w[21] | 0x8000; - break; - - case 86: - w[21] = w[21] | 0x800000; - break; - - case 87: - w[21] = w[21] | 0x80000000; - break; - - case 88: - w[22] = 0x80; - break; - - case 89: - w[22] = w[22] | 0x8000; - break; - - case 90: - w[22] = w[22] | 0x800000; - break; - - case 91: - w[22] = w[22] | 0x80000000; - break; - - case 92: - w[23] = 0x80; - break; - - case 93: - w[23] = w[23] | 0x8000; - break; - - case 94: - w[23] = w[23] | 0x800000; - break; - - case 95: - w[23] = w[23] | 0x80000000; - break; - - case 96: - w[24] = 0x80; - break; - - case 97: - w[24] = w[24] | 0x8000; - break; - - case 98: - w[24] = w[24] | 0x800000; - break; - - case 99: - w[24] = w[24] | 0x80000000; - break; - - case 100: - w[25] = 0x80; - break; - - case 101: - w[25] = w[25] | 0x8000; - break; - - case 102: - w[25] = w[25] | 0x800000; - break; - - case 103: - w[25] = w[25] | 0x80000000; - break; - - case 104: - w[26] = 0x80; - break; - - case 105: - w[26] = w[26] | 0x8000; - break; - - case 106: - w[26] = w[26] | 0x800000; - break; - - case 107: - w[26] = w[26] | 0x80000000; - break; - - case 108: - w[27] = 0x80; - break; - - case 109: - w[27] = w[27] | 0x8000; - break; - - case 110: - w[27] = w[27] | 0x800000; - break; - - case 111: - w[27] = w[27] | 0x80000000; - break; - - case 112: - w[28] = 0x80; - break; - - case 113: - w[28] = w[28] | 0x8000; - break; - - case 114: - w[28] = w[28] | 0x800000; - break; - - case 115: - w[28] = w[28] | 0x80000000; - break; - - case 116: - w[29] = 0x80; - break; - - case 117: - w[29] = w[29] | 0x8000; - break; - - case 118: - w[29] = w[29] | 0x800000; - break; - - case 119: - w[29] = w[29] | 0x80000000; - break; - - case 120: - w[30] = 0x80; - break; - - case 121: - w[30] = w[30] | 0x8000; - break; - - case 122: - w[30] = w[30] | 0x800000; - break; - - case 123: - w[30] = w[30] | 0x80000000; - break; - - case 124: - w[31] = 0x80; - break; - - case 125: - w[31] = w[31] | 0x8000; - break; - - case 126: - w[31] = w[31] | 0x800000; - break; - - case 127: - w[31] = w[31] | 0x80000000; - break; - } -} - -static void device_memcat2L (const u32 offset, u32 dst0[2], u32 src_l0[2], u32 src_r0[2]) -{ - switch (offset) - { - case 1: - dst0[0] = src_l0[0] | src_r0[0] << 8; - dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; - break; - - case 2: - dst0[0] = src_l0[0] | src_r0[0] << 16; - dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; - break; - - case 3: - dst0[0] = src_l0[0] | src_r0[0] << 24; - dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; - break; - - case 4: - dst0[1] = src_r0[0]; - break; - - case 5: - dst0[1] = src_l0[1] | src_r0[0] << 8; - break; - - case 6: - dst0[1] = src_l0[1] | src_r0[0] << 16; - break; - - case 7: - dst0[1] = src_l0[1] | src_r0[0] << 24; - break; - } -} - -static void device_memcat4L (const u32 offset, u32 dst0[4], u32 src_l0[4], u32 src_r0[4]) -{ - switch (offset) - { - case 1: - dst0[0] = src_l0[0] | src_r0[0] << 8; - dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8; - break; - - case 2: - dst0[0] = src_l0[0] | src_r0[0] << 16; - dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16; - break; - - case 3: - dst0[0] = src_l0[0] | src_r0[0] << 24; - dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24; - break; - - case 4: - dst0[1] = src_r0[0]; - dst0[2] = src_r0[1]; - dst0[3] = src_r0[2]; - break; - - case 5: - dst0[1] = src_l0[1] | src_r0[0] << 8; - dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8; - break; - - case 6: - dst0[1] = src_l0[1] | src_r0[0] << 16; - dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16; - break; - - case 7: - dst0[1] = src_l0[1] | src_r0[0] << 24; - dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24; - break; - - case 8: - dst0[2] = src_r0[0]; - dst0[3] = src_r0[1]; - break; - - case 9: - dst0[2] = src_l0[2] | src_r0[0] << 8; - dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8; - break; - - case 10: - dst0[2] = src_l0[2] | src_r0[0] << 16; - dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16; - break; - - case 11: - dst0[2] = src_l0[2] | src_r0[0] << 24; - dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24; - break; - - case 12: - dst0[3] = src_r0[0]; - break; - - case 13: - dst0[3] = src_l0[3] | src_r0[0] << 8; - break; - - case 14: - dst0[3] = src_l0[3] | src_r0[0] << 16; - break; - - case 15: - dst0[3] = src_l0[3] | src_r0[0] << 24; - break; - } -} - -static void device_memcat8L (const u32 offset, u32 dst0[4], u32 dst1[4], u32 src_l0[4], u32 src_l1[4], u32 src_r0[4]) -{ - switch (offset) - { - case 1: - dst0[0] = src_l0[0] | src_r0[0] << 8; - dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[0] = src_r0[3] >> 24; - break; - - case 2: - dst0[0] = src_l0[0] | src_r0[0] << 16; - dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[0] = src_r0[3] >> 16; - break; - - case 3: - dst0[0] = src_l0[0] | src_r0[0] << 24; - dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[0] = src_r0[3] >> 8; - break; - - case 4: - dst0[1] = src_r0[0]; - dst0[2] = src_r0[1]; - dst0[3] = src_r0[2]; - dst1[0] = src_r0[3]; - break; - - case 5: - dst0[1] = src_l0[1] | src_r0[0] << 8; - dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[1] = src_r0[3] >> 24; - break; - - case 6: - dst0[1] = src_l0[1] | src_r0[0] << 16; - dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[1] = src_r0[3] >> 16; - break; - - case 7: - dst0[1] = src_l0[1] | src_r0[0] << 24; - dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[1] = src_r0[3] >> 8; - break; - - case 8: - dst0[2] = src_r0[0]; - dst0[3] = src_r0[1]; - dst1[0] = src_r0[2]; - dst1[1] = src_r0[3]; - break; - - case 9: - dst0[2] = src_l0[2] | src_r0[0] << 8; - dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[2] = src_r0[3] >> 24; - break; - - case 10: - dst0[2] = src_l0[2] | src_r0[0] << 16; - dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[2] = src_r0[3] >> 16; - break; - - case 11: - dst0[2] = src_l0[2] | src_r0[0] << 24; - dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[2] = src_r0[3] >> 8; - break; - - case 12: - dst0[3] = src_r0[0]; - dst1[0] = src_r0[1]; - dst1[1] = src_r0[2]; - dst1[2] = src_r0[3]; - break; - - case 13: - dst0[3] = src_l0[3] | src_r0[0] << 8; - dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[3] = src_r0[3] >> 24; - break; - - case 14: - dst0[3] = src_l0[3] | src_r0[0] << 16; - dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[3] = src_r0[3] >> 16; - break; - - case 15: - dst0[3] = src_l0[3] | src_r0[0] << 24; - dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[3] = src_r0[3] >> 8; - break; - - case 16: - dst1[0] = src_r0[0]; - dst1[1] = src_r0[1]; - dst1[2] = src_r0[2]; - dst1[3] = src_r0[3]; - break; - - case 17: - dst1[0] = src_l1[0] | src_r0[0] << 8; - dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8; - break; - - case 18: - dst1[0] = src_l1[0] | src_r0[0] << 16; - dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16; - break; - - case 19: - dst1[0] = src_l1[0] | src_r0[0] << 24; - dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24; - break; - - case 20: - dst1[1] = src_r0[0]; - dst1[2] = src_r0[1]; - dst1[3] = src_r0[2]; - break; - - case 21: - dst1[1] = src_l1[1] | src_r0[0] << 8; - dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8; - break; - - case 22: - dst1[1] = src_l1[1] | src_r0[0] << 16; - dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16; - break; - - case 23: - dst1[1] = src_l1[1] | src_r0[0] << 24; - dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24; - break; - - case 24: - dst1[2] = src_r0[0]; - dst1[3] = src_r0[1]; - break; - - case 25: - dst1[2] = src_l1[2] | src_r0[0] << 8; - dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8; - break; - - case 26: - dst1[2] = src_l1[2] | src_r0[0] << 16; - dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16; - break; - - case 27: - dst1[2] = src_l1[2] | src_r0[0] << 24; - dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24; - break; - - case 28: - dst1[3] = src_r0[0]; - break; - - case 29: - dst1[3] = src_l1[3] | src_r0[0] << 8; - break; - - case 30: - dst1[3] = src_l1[3] | src_r0[0] << 16; - break; - - case 31: - dst1[3] = src_l1[3] | src_r0[0] << 24; - break; - } -} - -static void device_memcat12L (const u32 offset, u32 dst0[4], u32 dst1[4], u32 dst2[4], u32 src_l0[4], u32 src_l1[4], u32 src_l2[4], u32 src_r0[4]) -{ - switch (offset) - { - case 1: - dst0[0] = src_l0[0] | src_r0[0] << 8; - dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[0] = src_r0[3] >> 24; - break; - - case 2: - dst0[0] = src_l0[0] | src_r0[0] << 16; - dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[0] = src_r0[3] >> 16; - break; - - case 3: - dst0[0] = src_l0[0] | src_r0[0] << 24; - dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[0] = src_r0[3] >> 8; - break; - - case 4: - dst0[1] = src_r0[0]; - dst0[2] = src_r0[1]; - dst0[3] = src_r0[2]; - dst1[0] = src_r0[3]; - break; - - case 5: - dst0[1] = src_l0[1] | src_r0[0] << 8; - dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[1] = src_r0[3] >> 24; - break; - - case 6: - dst0[1] = src_l0[1] | src_r0[0] << 16; - dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[1] = src_r0[3] >> 16; - break; - - case 7: - dst0[1] = src_l0[1] | src_r0[0] << 24; - dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[1] = src_r0[3] >> 8; - break; - - case 8: - dst0[2] = src_r0[0]; - dst0[3] = src_r0[1]; - dst1[0] = src_r0[2]; - dst1[1] = src_r0[3]; - break; - - case 9: - dst0[2] = src_l0[2] | src_r0[0] << 8; - dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[2] = src_r0[3] >> 24; - break; - - case 10: - dst0[2] = src_l0[2] | src_r0[0] << 16; - dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[2] = src_r0[3] >> 16; - break; - - case 11: - dst0[2] = src_l0[2] | src_r0[0] << 24; - dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[2] = src_r0[3] >> 8; - break; - - case 12: - dst0[3] = src_r0[0]; - dst1[0] = src_r0[1]; - dst1[1] = src_r0[2]; - dst1[2] = src_r0[3]; - break; - - case 13: - dst0[3] = src_l0[3] | src_r0[0] << 8; - dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[3] = src_r0[3] >> 24; - break; - - case 14: - dst0[3] = src_l0[3] | src_r0[0] << 16; - dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[3] = src_r0[3] >> 16; - break; - - case 15: - dst0[3] = src_l0[3] | src_r0[0] << 24; - dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[3] = src_r0[3] >> 8; - break; - - case 16: - dst1[0] = src_r0[0]; - dst1[1] = src_r0[1]; - dst1[2] = src_r0[2]; - dst1[3] = src_r0[3]; - break; - - case 17: - dst1[0] = src_l1[0] | src_r0[0] << 8; - dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[0] = src_r0[3] >> 24; - break; - - case 18: - dst1[0] = src_l1[0] | src_r0[0] << 16; - dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[0] = src_r0[3] >> 16; - break; - - case 19: - dst1[0] = src_l1[0] | src_r0[0] << 24; - dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[0] = src_r0[3] >> 8; - break; - - case 20: - dst1[1] = src_r0[0]; - dst1[2] = src_r0[1]; - dst1[3] = src_r0[2]; - dst2[0] = src_r0[3]; - break; - - case 21: - dst1[1] = src_l1[1] | src_r0[0] << 8; - dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[0] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[1] = src_r0[3] >> 24; - break; - - case 22: - dst1[1] = src_l1[1] | src_r0[0] << 16; - dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[0] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[1] = src_r0[3] >> 16; - break; - - case 23: - dst1[1] = src_l1[1] | src_r0[0] << 24; - dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[0] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[1] = src_r0[3] >> 8; - break; - - case 24: - dst1[2] = src_r0[0]; - dst1[3] = src_r0[1]; - dst2[0] = src_r0[2]; - dst2[1] = src_r0[3]; - break; - - case 25: - dst1[2] = src_l1[2] | src_r0[0] << 8; - dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[0] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[1] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[2] = src_r0[3] >> 24; - break; - - case 26: - dst1[2] = src_l1[2] | src_r0[0] << 16; - dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[0] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[1] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[2] = src_r0[3] >> 16; - break; - - case 27: - dst1[2] = src_l1[2] | src_r0[0] << 24; - dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[0] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[1] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[2] = src_r0[3] >> 8; - break; - - case 28: - dst1[3] = src_r0[0]; - dst2[0] = src_r0[1]; - dst2[1] = src_r0[2]; - dst2[2] = src_r0[3]; - break; - - case 29: - dst1[3] = src_l1[3] | src_r0[0] << 8; - dst2[0] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[1] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[2] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[3] = src_r0[3] >> 24; - break; - - case 30: - dst1[3] = src_l1[3] | src_r0[0] << 16; - dst2[0] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[1] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[2] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[3] = src_r0[3] >> 16; - break; - - case 31: - dst1[3] = src_l1[3] | src_r0[0] << 24; - dst2[0] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[1] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[2] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[3] = src_r0[3] >> 8; - break; - - case 32: - dst2[0] = src_r0[0]; - dst2[1] = src_r0[1]; - dst2[2] = src_r0[2]; - dst2[3] = src_r0[3]; - break; - - case 33: - dst2[0] = src_l2[0] | src_r0[0] << 8; - dst2[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[3] = src_r0[2] >> 24 | src_r0[3] << 8; - break; - - case 34: - dst2[0] = src_l2[0] | src_r0[0] << 16; - dst2[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[3] = src_r0[2] >> 16 | src_r0[3] << 16; - break; - - case 35: - dst2[0] = src_l2[0] | src_r0[0] << 24; - dst2[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[3] = src_r0[2] >> 8 | src_r0[3] << 24; - break; - - case 36: - dst2[1] = src_r0[0]; - dst2[2] = src_r0[1]; - dst2[3] = src_r0[2]; - break; - - case 37: - dst2[1] = src_l2[1] | src_r0[0] << 8; - dst2[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[3] = src_r0[1] >> 24 | src_r0[2] << 8; - break; - - case 38: - dst2[1] = src_l2[1] | src_r0[0] << 16; - dst2[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[3] = src_r0[1] >> 16 | src_r0[2] << 16; - break; - - case 39: - dst2[1] = src_l2[1] | src_r0[0] << 24; - dst2[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[3] = src_r0[1] >> 8 | src_r0[2] << 24; - break; - - case 40: - dst2[2] = src_r0[0]; - dst2[3] = src_r0[1]; - break; - - case 41: - dst2[2] = src_l2[2] | src_r0[0] << 8; - dst2[3] = src_r0[0] >> 24 | src_r0[1] << 8; - break; - - case 42: - dst2[2] = src_l2[2] | src_r0[0] << 16; - dst2[3] = src_r0[0] >> 16 | src_r0[1] << 16; - break; - - case 43: - dst2[2] = src_l2[2] | src_r0[0] << 24; - dst2[3] = src_r0[0] >> 8 | src_r0[1] << 24; - break; - - case 44: - dst2[3] = src_r0[0]; - break; - - case 45: - dst2[3] = src_l2[3] | src_r0[0] << 8; - break; - - case 46: - dst2[3] = src_l2[3] | src_r0[0] << 16; - break; - - case 47: - dst2[3] = src_l2[3] | src_r0[0] << 24; - break; - } -} - -static void device_memcat12L (const u32 offset, u32 dst0[4], u32 dst1[4], u32 dst2[4], u32 src_l0[4], u32 src_l1[4], u32 src_l2[4], u32 src_r0[4], u32 src_r1[4]) -{ - switch (offset) - { - case 0: - dst0[0] = src_r0[0]; - dst0[1] = src_r0[1]; - dst0[2] = src_r0[2]; - dst0[3] = src_r0[3]; - dst1[0] = src_r1[0]; - dst1[1] = src_r1[1]; - dst1[2] = src_r1[2]; - dst1[3] = src_r1[3]; - break; - - case 1: - dst0[0] = src_l0[0] | src_r0[0] << 8; - dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[0] = src_r0[3] >> 24 | src_r1[0] << 8; - dst1[1] = src_r1[0] >> 24 | src_r1[1] << 8; - dst1[2] = src_r1[1] >> 24 | src_r1[2] << 8; - dst1[3] = src_r1[2] >> 24 | src_r1[3] << 8; - dst2[0] = src_r1[3] >> 24; - break; - - case 2: - dst0[0] = src_l0[0] | src_r0[0] << 16; - dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[0] = src_r0[3] >> 16 | src_r1[0] << 16; - dst1[1] = src_r1[0] >> 16 | src_r1[1] << 16; - dst1[2] = src_r1[1] >> 16 | src_r1[2] << 16; - dst1[3] = src_r1[2] >> 16 | src_r1[3] << 16; - dst2[0] = src_r1[3] >> 16; - break; - - case 3: - dst0[0] = src_l0[0] | src_r0[0] << 24; - dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[0] = src_r0[3] >> 8 | src_r1[0] << 24; - dst1[1] = src_r1[0] >> 8 | src_r1[1] << 24; - dst1[2] = src_r1[1] >> 8 | src_r1[2] << 24; - dst1[3] = src_r1[2] >> 8 | src_r1[3] << 24; - dst2[0] = src_r1[3] >> 8; - break; - - case 4: - dst0[1] = src_r0[0]; - dst0[2] = src_r0[1]; - dst0[3] = src_r0[2]; - dst1[0] = src_r0[3]; - dst1[1] = src_r1[0]; - dst1[2] = src_r1[1]; - dst1[3] = src_r1[2]; - dst2[0] = src_r1[3]; - break; - - case 5: - dst0[1] = src_l0[1] | src_r0[0] << 8; - dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[1] = src_r0[3] >> 24 | src_r1[0] << 8; - dst1[2] = src_r1[0] >> 24 | src_r1[1] << 8; - dst1[3] = src_r1[1] >> 24 | src_r1[2] << 8; - dst2[0] = src_r1[2] >> 24 | src_r1[3] << 8; - dst2[1] = src_r1[3] >> 24; - break; - - case 6: - dst0[1] = src_l0[1] | src_r0[0] << 16; - dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[1] = src_r0[3] >> 16 | src_r1[0] << 16; - dst1[2] = src_r1[0] >> 16 | src_r1[1] << 16; - dst1[3] = src_r1[1] >> 16 | src_r1[2] << 16; - dst2[0] = src_r1[2] >> 16 | src_r1[3] << 16; - dst2[1] = src_r1[3] >> 16; - break; - - case 7: - dst0[1] = src_l0[1] | src_r0[0] << 24; - dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[1] = src_r0[3] >> 8 | src_r1[0] << 24; - dst1[2] = src_r1[0] >> 8 | src_r1[1] << 24; - dst1[3] = src_r1[1] >> 8 | src_r1[2] << 24; - dst2[0] = src_r1[2] >> 8 | src_r1[3] << 24; - dst2[1] = src_r1[3] >> 8; - break; - - case 8: - dst0[2] = src_r0[0]; - dst0[3] = src_r0[1]; - dst1[0] = src_r0[2]; - dst1[1] = src_r0[3]; - dst1[2] = src_r1[0]; - dst1[3] = src_r1[1]; - dst2[0] = src_r1[2]; - dst2[1] = src_r1[3]; - break; - - case 9: - dst0[2] = src_l0[2] | src_r0[0] << 8; - dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[2] = src_r0[3] >> 24 | src_r1[0] << 8; - dst1[3] = src_r1[0] >> 24 | src_r1[1] << 8; - dst2[0] = src_r1[1] >> 24 | src_r1[2] << 8; - dst2[1] = src_r1[2] >> 24 | src_r1[3] << 8; - dst2[2] = src_r1[3] >> 24; - break; - - case 10: - dst0[2] = src_l0[2] | src_r0[0] << 16; - dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[2] = src_r0[3] >> 16 | src_r1[0] << 16; - dst1[3] = src_r1[0] >> 16 | src_r1[1] << 16; - dst2[0] = src_r1[1] >> 16 | src_r1[2] << 16; - dst2[1] = src_r1[2] >> 16 | src_r1[3] << 16; - dst2[2] = src_r1[3] >> 16; - break; - - case 11: - dst0[2] = src_l0[2] | src_r0[0] << 24; - dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[2] = src_r0[3] >> 8 | src_r1[0] << 24; - dst1[3] = src_r1[0] >> 8 | src_r1[1] << 24; - dst2[0] = src_r1[1] >> 8 | src_r1[2] << 24; - dst2[1] = src_r1[2] >> 8 | src_r1[3] << 24; - dst2[2] = src_r1[3] >> 8; - break; - - case 12: - dst0[3] = src_r0[0]; - dst1[0] = src_r0[1]; - dst1[1] = src_r0[2]; - dst1[2] = src_r0[3]; - dst1[3] = src_r1[0]; - dst2[0] = src_r1[1]; - dst2[1] = src_r1[2]; - dst2[2] = src_r1[3]; - break; - - case 13: - dst0[3] = src_l0[3] | src_r0[0] << 8; - dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[3] = src_r0[3] >> 24 | src_r1[0] << 8; - dst2[0] = src_r1[0] >> 24 | src_r1[1] << 8; - dst2[1] = src_r1[1] >> 24 | src_r1[2] << 8; - dst2[2] = src_r1[2] >> 24 | src_r1[3] << 8; - dst2[3] = src_r1[3] >> 24; - break; - - case 14: - dst0[3] = src_l0[3] | src_r0[0] << 16; - dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[3] = src_r0[3] >> 16 | src_r1[0] << 16; - dst2[0] = src_r1[0] >> 16 | src_r1[1] << 16; - dst2[1] = src_r1[1] >> 16 | src_r1[2] << 16; - dst2[2] = src_r1[2] >> 16 | src_r1[3] << 16; - dst2[3] = src_r1[3] >> 16; - break; - - case 15: - dst0[3] = src_l0[3] | src_r0[0] << 24; - dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[3] = src_r0[3] >> 8 | src_r1[0] << 24; - dst2[0] = src_r1[0] >> 8 | src_r1[1] << 24; - dst2[1] = src_r1[1] >> 8 | src_r1[2] << 24; - dst2[2] = src_r1[2] >> 8 | src_r1[3] << 24; - dst2[3] = src_r1[3] >> 8; - break; - - case 16: - dst1[0] = src_r0[0]; - dst1[1] = src_r0[1]; - dst1[2] = src_r0[2]; - dst1[3] = src_r0[3]; - dst2[0] = src_r1[0]; - dst2[1] = src_r1[1]; - dst2[2] = src_r1[2]; - dst2[3] = src_r1[3]; - break; - - case 17: - dst1[0] = src_l1[0] | src_r0[0] << 8; - dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[0] = src_r0[3] >> 24 | src_r1[0] << 8; - dst2[1] = src_r1[0] >> 24 | src_r1[1] << 8; - dst2[2] = src_r1[1] >> 24 | src_r1[2] << 8; - dst2[3] = src_r1[2] >> 24 | src_r1[3] << 8; - break; - - case 18: - dst1[0] = src_l1[0] | src_r0[0] << 16; - dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[0] = src_r0[3] >> 16 | src_r1[0] << 16; - dst2[1] = src_r1[0] >> 16 | src_r1[1] << 16; - dst2[2] = src_r1[1] >> 16 | src_r1[2] << 16; - dst2[3] = src_r1[2] >> 16 | src_r1[3] << 16; - break; - - case 19: - dst1[0] = src_l1[0] | src_r0[0] << 24; - dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[0] = src_r0[3] >> 8 | src_r1[0] << 24; - dst2[1] = src_r1[0] >> 8 | src_r1[1] << 24; - dst2[2] = src_r1[1] >> 8 | src_r1[2] << 24; - dst2[3] = src_r1[2] >> 8 | src_r1[3] << 24; - break; - - case 20: - dst1[1] = src_r1[0]; - dst1[2] = src_r0[1]; - dst1[3] = src_r0[2]; - dst2[0] = src_r0[3]; - dst2[1] = src_r1[0]; - dst2[2] = src_r1[1]; - dst2[3] = src_r1[2]; - break; - - case 21: - dst1[1] = src_l1[1] | src_r0[0] << 8; - dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[0] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[1] = src_r0[3] >> 24 | src_r1[0] << 8; - dst2[2] = src_r1[0] >> 24 | src_r1[1] << 8; - dst2[3] = src_r1[1] >> 24 | src_r1[2] << 8; - break; - - case 22: - dst1[1] = src_l1[1] | src_r0[0] << 16; - dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[0] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[1] = src_r0[3] >> 16 | src_r1[0] << 16; - dst2[2] = src_r1[0] >> 16 | src_r1[1] << 16; - dst2[3] = src_r1[1] >> 16 | src_r1[2] << 16; - break; - - case 23: - dst1[1] = src_l1[1] | src_r0[0] << 24; - dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[0] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[1] = src_r0[3] >> 8 | src_r1[0] << 24; - dst2[2] = src_r1[0] >> 8 | src_r1[1] << 24; - dst2[3] = src_r1[1] >> 8 | src_r1[2] << 24; - break; - - case 24: - dst1[2] = src_r1[0]; - dst1[3] = src_r0[1]; - dst2[0] = src_r0[2]; - dst2[1] = src_r0[3]; - dst2[2] = src_r1[0]; - dst2[3] = src_r1[1]; - break; - - case 25: - dst1[2] = src_l1[2] | src_r0[0] << 8; - dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[0] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[1] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[2] = src_r0[3] >> 24 | src_r1[0] << 8; - dst2[3] = src_r1[0] >> 24 | src_r1[1] << 8; - break; - - case 26: - dst1[2] = src_l1[2] | src_r0[0] << 16; - dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[0] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[1] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[2] = src_r0[3] >> 16 | src_r1[0] << 16; - dst2[3] = src_r1[0] >> 16 | src_r1[1] << 16; - break; - - case 27: - dst1[2] = src_l1[2] | src_r0[0] << 24; - dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[0] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[1] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[2] = src_r0[3] >> 8 | src_r1[0] << 24; - dst2[3] = src_r1[0] >> 8 | src_r1[1] << 24; - break; - - case 28: - dst1[3] = src_r1[0]; - dst2[0] = src_r0[1]; - dst2[1] = src_r0[2]; - dst2[2] = src_r0[3]; - dst2[3] = src_r1[0]; - break; - - case 29: - dst1[3] = src_l1[3] | src_r0[0] << 8; - dst2[0] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[1] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[2] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[3] = src_r0[3] >> 24 | src_r1[0] << 8; - break; - - case 30: - dst1[3] = src_l1[3] | src_r0[0] << 16; - dst2[0] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[1] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[2] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[3] = src_r0[3] >> 16 | src_r1[0] << 16; - break; - - case 31: - dst1[3] = src_l1[3] | src_r0[0] << 24; - dst2[0] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[1] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[2] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[3] = src_r0[3] >> 8 | src_r1[0] << 24; - break; - - case 32: - dst2[0] = src_r0[0]; - dst2[1] = src_r0[1]; - dst2[2] = src_r0[2]; - dst2[3] = src_r0[3]; - break; - - case 33: - dst2[0] = src_l2[0] | src_r0[0] << 8; - dst2[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[3] = src_r0[2] >> 24 | src_r0[3] << 8; - break; - - case 34: - dst2[0] = src_l2[0] | src_r0[0] << 16; - dst2[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[3] = src_r0[2] >> 16 | src_r0[3] << 16; - break; - - case 35: - dst2[0] = src_l2[0] | src_r0[0] << 24; - dst2[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[3] = src_r0[2] >> 8 | src_r0[3] << 24; - break; - - case 36: - dst2[1] = src_r0[0]; - dst2[2] = src_r0[1]; - dst2[3] = src_r0[2]; - break; - - case 37: - dst2[1] = src_l2[1] | src_r0[0] << 8; - dst2[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[3] = src_r0[1] >> 24 | src_r0[2] << 8; - break; - - case 38: - dst2[1] = src_l2[1] | src_r0[0] << 16; - dst2[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[3] = src_r0[1] >> 16 | src_r0[2] << 16; - break; - - case 39: - dst2[1] = src_l2[1] | src_r0[0] << 24; - dst2[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[3] = src_r0[1] >> 8 | src_r0[2] << 24; - break; - - case 40: - dst2[2] = src_r0[0]; - dst2[3] = src_r0[1]; - break; - - case 41: - dst2[2] = src_l2[2] | src_r0[0] << 8; - dst2[3] = src_r0[0] >> 24 | src_r0[1] << 8; - break; - - case 42: - dst2[2] = src_l2[2] | src_r0[0] << 16; - dst2[3] = src_r0[0] >> 16 | src_r0[1] << 16; - break; - - case 43: - dst2[2] = src_l2[2] | src_r0[0] << 24; - dst2[3] = src_r0[0] >> 8 | src_r0[1] << 24; - break; - - case 44: - dst2[3] = src_r0[0]; - break; - - case 45: - dst2[3] = src_l2[3] | src_r0[0] << 8; - break; - - case 46: - dst2[3] = src_l2[3] | src_r0[0] << 16; - break; - - case 47: - dst2[3] = src_l2[3] | src_r0[0] << 24; - break; - } -} - -static void memcat16_9 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = append0[0]; - w0[1] = append0[1]; - w0[2] = append0[2]; - w0[3] = append0[3]; - w1[0] = append1[0]; - w1[1] = append1[1]; - w1[2] = append1[2]; - w1[3] = append1[3]; - w2[0] = append2[0]; - break; - - case 1: - w0[0] = w0[0] | append0[0] << 8; - w0[1] = append0[0] >> 24 | append0[1] << 8; - w0[2] = append0[1] >> 24 | append0[2] << 8; - w0[3] = append0[2] >> 24 | append0[3] << 8; - w1[0] = append0[3] >> 24 | append1[0] << 8; - w1[1] = append1[0] >> 24 | append1[1] << 8; - w1[2] = append1[1] >> 24 | append1[2] << 8; - w1[3] = append1[2] >> 24 | append1[3] << 8; - w2[0] = append1[3] >> 24 | append2[0] << 8; - w2[1] = append2[0] >> 24; - break; - - case 2: - w0[0] = w0[0] | append0[0] << 16; - w0[1] = append0[0] >> 16 | append0[1] << 16; - w0[2] = append0[1] >> 16 | append0[2] << 16; - w0[3] = append0[2] >> 16 | append0[3] << 16; - w1[0] = append0[3] >> 16 | append1[0] << 16; - w1[1] = append1[0] >> 16 | append1[1] << 16; - w1[2] = append1[1] >> 16 | append1[2] << 16; - w1[3] = append1[2] >> 16 | append1[3] << 16; - w2[0] = append1[3] >> 16 | append2[0] << 16; - w2[1] = append2[0] >> 16; - break; - - case 3: - w0[0] = w0[0] | append0[0] << 24; - w0[1] = append0[0] >> 8 | append0[1] << 24; - w0[2] = append0[1] >> 8 | append0[2] << 24; - w0[3] = append0[2] >> 8 | append0[3] << 24; - w1[0] = append0[3] >> 8 | append1[0] << 24; - w1[1] = append1[0] >> 8 | append1[1] << 24; - w1[2] = append1[1] >> 8 | append1[2] << 24; - w1[3] = append1[2] >> 8 | append1[3] << 24; - w2[0] = append1[3] >> 8 | append2[0] << 24; - w2[1] = append2[0] >> 8; - break; - - case 4: - w0[1] = append0[0]; - w0[2] = append0[1]; - w0[3] = append0[2]; - w1[0] = append0[3]; - w1[1] = append1[0]; - w1[2] = append1[1]; - w1[3] = append1[2]; - w2[0] = append1[3]; - w2[1] = append2[0]; - break; - - case 5: - w0[1] = w0[1] | append0[0] << 8; - w0[2] = append0[0] >> 24 | append0[1] << 8; - w0[3] = append0[1] >> 24 | append0[2] << 8; - w1[0] = append0[2] >> 24 | append0[3] << 8; - w1[1] = append0[3] >> 24 | append1[0] << 8; - w1[2] = append1[0] >> 24 | append1[1] << 8; - w1[3] = append1[1] >> 24 | append1[2] << 8; - w2[0] = append1[2] >> 24 | append1[3] << 8; - w2[1] = append1[3] >> 24 | append2[0] << 8; - w2[2] = append2[0] >> 24; - break; - - case 6: - w0[1] = w0[1] | append0[0] << 16; - w0[2] = append0[0] >> 16 | append0[1] << 16; - w0[3] = append0[1] >> 16 | append0[2] << 16; - w1[0] = append0[2] >> 16 | append0[3] << 16; - w1[1] = append0[3] >> 16 | append1[0] << 16; - w1[2] = append1[0] >> 16 | append1[1] << 16; - w1[3] = append1[1] >> 16 | append1[2] << 16; - w2[0] = append1[2] >> 16 | append1[3] << 16; - w2[1] = append1[3] >> 16 | append2[0] << 16; - w2[2] = append2[0] >> 16; - break; - - case 7: - w0[1] = w0[1] | append0[0] << 24; - w0[2] = append0[0] >> 8 | append0[1] << 24; - w0[3] = append0[1] >> 8 | append0[2] << 24; - w1[0] = append0[2] >> 8 | append0[3] << 24; - w1[1] = append0[3] >> 8 | append1[0] << 24; - w1[2] = append1[0] >> 8 | append1[1] << 24; - w1[3] = append1[1] >> 8 | append1[2] << 24; - w2[0] = append1[2] >> 8 | append1[3] << 24; - w2[1] = append1[3] >> 8 | append2[0] << 24; - w2[2] = append2[0] >> 8; - break; - - case 8: - w0[2] = append0[0]; - w0[3] = append0[1]; - w1[0] = append0[2]; - w1[1] = append0[3]; - w1[2] = append1[0]; - w1[3] = append1[1]; - w2[0] = append1[2]; - w2[1] = append1[3]; - w2[2] = append2[0]; - break; - - case 9: - w0[2] = w0[2] | append0[0] << 8; - w0[3] = append0[0] >> 24 | append0[1] << 8; - w1[0] = append0[1] >> 24 | append0[2] << 8; - w1[1] = append0[2] >> 24 | append0[3] << 8; - w1[2] = append0[3] >> 24 | append1[0] << 8; - w1[3] = append1[0] >> 24 | append1[1] << 8; - w2[0] = append1[1] >> 24 | append1[2] << 8; - w2[1] = append1[2] >> 24 | append1[3] << 8; - w2[2] = append1[3] >> 24 | append2[0] << 8; - w2[3] = append2[0] >> 24; - break; - - case 10: - w0[2] = w0[2] | append0[0] << 16; - w0[3] = append0[0] >> 16 | append0[1] << 16; - w1[0] = append0[1] >> 16 | append0[2] << 16; - w1[1] = append0[2] >> 16 | append0[3] << 16; - w1[2] = append0[3] >> 16 | append1[0] << 16; - w1[3] = append1[0] >> 16 | append1[1] << 16; - w2[0] = append1[1] >> 16 | append1[2] << 16; - w2[1] = append1[2] >> 16 | append1[3] << 16; - w2[2] = append1[3] >> 16 | append2[0] << 16; - w2[3] = append2[0] >> 16; - break; - - case 11: - w0[2] = w0[2] | append0[0] << 24; - w0[3] = append0[0] >> 8 | append0[1] << 24; - w1[0] = append0[1] >> 8 | append0[2] << 24; - w1[1] = append0[2] >> 8 | append0[3] << 24; - w1[2] = append0[3] >> 8 | append1[0] << 24; - w1[3] = append1[0] >> 8 | append1[1] << 24; - w2[0] = append1[1] >> 8 | append1[2] << 24; - w2[1] = append1[2] >> 8 | append1[3] << 24; - w2[2] = append1[3] >> 8 | append2[0] << 24; - w2[3] = append2[0] >> 8; - break; - - case 12: - w0[3] = append0[0]; - w1[0] = append0[1]; - w1[1] = append0[2]; - w1[2] = append0[3]; - w1[3] = append1[0]; - w2[0] = append1[1]; - w2[1] = append1[2]; - w2[2] = append1[3]; - w2[3] = append2[0]; - break; - - case 13: - w0[3] = w0[3] | append0[0] << 8; - w1[0] = append0[0] >> 24 | append0[1] << 8; - w1[1] = append0[1] >> 24 | append0[2] << 8; - w1[2] = append0[2] >> 24 | append0[3] << 8; - w1[3] = append0[3] >> 24 | append1[0] << 8; - w2[0] = append1[0] >> 24 | append1[1] << 8; - w2[1] = append1[1] >> 24 | append1[2] << 8; - w2[2] = append1[2] >> 24 | append1[3] << 8; - w2[3] = append1[3] >> 24 | append2[0] << 8; - w3[0] = append2[0] >> 24; - break; - - case 14: - w0[3] = w0[3] | append0[0] << 16; - w1[0] = append0[0] >> 16 | append0[1] << 16; - w1[1] = append0[1] >> 16 | append0[2] << 16; - w1[2] = append0[2] >> 16 | append0[3] << 16; - w1[3] = append0[3] >> 16 | append1[0] << 16; - w2[0] = append1[0] >> 16 | append1[1] << 16; - w2[1] = append1[1] >> 16 | append1[2] << 16; - w2[2] = append1[2] >> 16 | append1[3] << 16; - w2[3] = append1[3] >> 16 | append2[0] << 16; - w3[0] = append2[0] >> 16; - break; - - case 15: - w0[3] = w0[3] | append0[0] << 24; - w1[0] = append0[0] >> 8 | append0[1] << 24; - w1[1] = append0[1] >> 8 | append0[2] << 24; - w1[2] = append0[2] >> 8 | append0[3] << 24; - w1[3] = append0[3] >> 8 | append1[0] << 24; - w2[0] = append1[0] >> 8 | append1[1] << 24; - w2[1] = append1[1] >> 8 | append1[2] << 24; - w2[2] = append1[2] >> 8 | append1[3] << 24; - w2[3] = append1[3] >> 8 | append2[0] << 24; - w3[0] = append2[0] >> 8; - break; - } -} - -static void memcat32_8 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = append0[0]; - w0[1] = append0[1]; - w0[2] = append0[2]; - w0[3] = append0[3]; - w1[0] = append1[0]; - w1[1] = append1[1]; - w1[2] = append1[2]; - w1[3] = append1[3]; - break; - - case 1: - w0[0] = w0[0] | append0[0] << 8; - w0[1] = append0[0] >> 24 | append0[1] << 8; - w0[2] = append0[1] >> 24 | append0[2] << 8; - w0[3] = append0[2] >> 24 | append0[3] << 8; - w1[0] = append0[3] >> 24 | append1[0] << 8; - w1[1] = append1[0] >> 24 | append1[1] << 8; - w1[2] = append1[1] >> 24 | append1[2] << 8; - w1[3] = append1[2] >> 24 | append1[3] << 8; - w2[0] = append1[3] >> 24; - break; - - case 2: - w0[0] = w0[0] | append0[0] << 16; - w0[1] = append0[0] >> 16 | append0[1] << 16; - w0[2] = append0[1] >> 16 | append0[2] << 16; - w0[3] = append0[2] >> 16 | append0[3] << 16; - w1[0] = append0[3] >> 16 | append1[0] << 16; - w1[1] = append1[0] >> 16 | append1[1] << 16; - w1[2] = append1[1] >> 16 | append1[2] << 16; - w1[3] = append1[2] >> 16 | append1[3] << 16; - w2[0] = append1[3] >> 16; - break; - - case 3: - w0[0] = w0[0] | append0[0] << 24; - w0[1] = append0[0] >> 8 | append0[1] << 24; - w0[2] = append0[1] >> 8 | append0[2] << 24; - w0[3] = append0[2] >> 8 | append0[3] << 24; - w1[0] = append0[3] >> 8 | append1[0] << 24; - w1[1] = append1[0] >> 8 | append1[1] << 24; - w1[2] = append1[1] >> 8 | append1[2] << 24; - w1[3] = append1[2] >> 8 | append1[3] << 24; - w2[0] = append1[3] >> 8; - break; - - case 4: - w0[1] = append0[0]; - w0[2] = append0[1]; - w0[3] = append0[2]; - w1[0] = append0[3]; - w1[1] = append1[0]; - w1[2] = append1[1]; - w1[3] = append1[2]; - w2[0] = append1[3]; - break; - - case 5: - w0[1] = w0[1] | append0[0] << 8; - w0[2] = append0[0] >> 24 | append0[1] << 8; - w0[3] = append0[1] >> 24 | append0[2] << 8; - w1[0] = append0[2] >> 24 | append0[3] << 8; - w1[1] = append0[3] >> 24 | append1[0] << 8; - w1[2] = append1[0] >> 24 | append1[1] << 8; - w1[3] = append1[1] >> 24 | append1[2] << 8; - w2[0] = append1[2] >> 24 | append1[3] << 8; - w2[1] = append1[3] >> 24; - break; - - case 6: - w0[1] = w0[1] | append0[0] << 16; - w0[2] = append0[0] >> 16 | append0[1] << 16; - w0[3] = append0[1] >> 16 | append0[2] << 16; - w1[0] = append0[2] >> 16 | append0[3] << 16; - w1[1] = append0[3] >> 16 | append1[0] << 16; - w1[2] = append1[0] >> 16 | append1[1] << 16; - w1[3] = append1[1] >> 16 | append1[2] << 16; - w2[0] = append1[2] >> 16 | append1[3] << 16; - w2[1] = append1[3] >> 16; - break; - - case 7: - w0[1] = w0[1] | append0[0] << 24; - w0[2] = append0[0] >> 8 | append0[1] << 24; - w0[3] = append0[1] >> 8 | append0[2] << 24; - w1[0] = append0[2] >> 8 | append0[3] << 24; - w1[1] = append0[3] >> 8 | append1[0] << 24; - w1[2] = append1[0] >> 8 | append1[1] << 24; - w1[3] = append1[1] >> 8 | append1[2] << 24; - w2[0] = append1[2] >> 8 | append1[3] << 24; - w2[1] = append1[3] >> 8; - break; - - case 8: - w0[2] = append0[0]; - w0[3] = append0[1]; - w1[0] = append0[2]; - w1[1] = append0[3]; - w1[2] = append1[0]; - w1[3] = append1[1]; - w2[0] = append1[2]; - w2[1] = append1[3]; - break; - - case 9: - w0[2] = w0[2] | append0[0] << 8; - w0[3] = append0[0] >> 24 | append0[1] << 8; - w1[0] = append0[1] >> 24 | append0[2] << 8; - w1[1] = append0[2] >> 24 | append0[3] << 8; - w1[2] = append0[3] >> 24 | append1[0] << 8; - w1[3] = append1[0] >> 24 | append1[1] << 8; - w2[0] = append1[1] >> 24 | append1[2] << 8; - w2[1] = append1[2] >> 24 | append1[3] << 8; - w2[2] = append1[3] >> 24; - break; - - case 10: - w0[2] = w0[2] | append0[0] << 16; - w0[3] = append0[0] >> 16 | append0[1] << 16; - w1[0] = append0[1] >> 16 | append0[2] << 16; - w1[1] = append0[2] >> 16 | append0[3] << 16; - w1[2] = append0[3] >> 16 | append1[0] << 16; - w1[3] = append1[0] >> 16 | append1[1] << 16; - w2[0] = append1[1] >> 16 | append1[2] << 16; - w2[1] = append1[2] >> 16 | append1[3] << 16; - w2[2] = append1[3] >> 16; - break; - - case 11: - w0[2] = w0[2] | append0[0] << 24; - w0[3] = append0[0] >> 8 | append0[1] << 24; - w1[0] = append0[1] >> 8 | append0[2] << 24; - w1[1] = append0[2] >> 8 | append0[3] << 24; - w1[2] = append0[3] >> 8 | append1[0] << 24; - w1[3] = append1[0] >> 8 | append1[1] << 24; - w2[0] = append1[1] >> 8 | append1[2] << 24; - w2[1] = append1[2] >> 8 | append1[3] << 24; - w2[2] = append1[3] >> 8; - break; - - case 12: - w0[3] = append0[0]; - w1[0] = append0[1]; - w1[1] = append0[2]; - w1[2] = append0[3]; - w1[3] = append1[0]; - w2[0] = append1[1]; - w2[1] = append1[2]; - w2[2] = append1[3]; - break; - - case 13: - w0[3] = w0[3] | append0[0] << 8; - w1[0] = append0[0] >> 24 | append0[1] << 8; - w1[1] = append0[1] >> 24 | append0[2] << 8; - w1[2] = append0[2] >> 24 | append0[3] << 8; - w1[3] = append0[3] >> 24 | append1[0] << 8; - w2[0] = append1[0] >> 24 | append1[1] << 8; - w2[1] = append1[1] >> 24 | append1[2] << 8; - w2[2] = append1[2] >> 24 | append1[3] << 8; - w2[3] = append1[3] >> 24; - break; - - case 14: - w0[3] = w0[3] | append0[0] << 16; - w1[0] = append0[0] >> 16 | append0[1] << 16; - w1[1] = append0[1] >> 16 | append0[2] << 16; - w1[2] = append0[2] >> 16 | append0[3] << 16; - w1[3] = append0[3] >> 16 | append1[0] << 16; - w2[0] = append1[0] >> 16 | append1[1] << 16; - w2[1] = append1[1] >> 16 | append1[2] << 16; - w2[2] = append1[2] >> 16 | append1[3] << 16; - w2[3] = append1[3] >> 16; - break; - - case 15: - w0[3] = w0[3] | append0[0] << 24; - w1[0] = append0[0] >> 8 | append0[1] << 24; - w1[1] = append0[1] >> 8 | append0[2] << 24; - w1[2] = append0[2] >> 8 | append0[3] << 24; - w1[3] = append0[3] >> 8 | append1[0] << 24; - w2[0] = append1[0] >> 8 | append1[1] << 24; - w2[1] = append1[1] >> 8 | append1[2] << 24; - w2[2] = append1[2] >> 8 | append1[3] << 24; - w2[3] = append1[3] >> 8; - break; - - case 16: - w1[0] = append0[0]; - w1[1] = append0[1]; - w1[2] = append0[2]; - w1[3] = append0[3]; - w2[0] = append1[0]; - w2[1] = append1[1]; - w2[2] = append1[2]; - w2[3] = append1[3]; - break; - - case 17: - w1[0] = w1[0] | append0[0] << 8; - w1[1] = append0[0] >> 24 | append0[1] << 8; - w1[2] = append0[1] >> 24 | append0[2] << 8; - w1[3] = append0[2] >> 24 | append0[3] << 8; - w2[0] = append0[3] >> 24 | append1[0] << 8; - w2[1] = append1[0] >> 24 | append1[1] << 8; - w2[2] = append1[1] >> 24 | append1[2] << 8; - w2[3] = append1[2] >> 24 | append1[3] << 8; - w3[0] = append1[3] >> 24; - break; - - case 18: - w1[0] = w1[0] | append0[0] << 16; - w1[1] = append0[0] >> 16 | append0[1] << 16; - w1[2] = append0[1] >> 16 | append0[2] << 16; - w1[3] = append0[2] >> 16 | append0[3] << 16; - w2[0] = append0[3] >> 16 | append1[0] << 16; - w2[1] = append1[0] >> 16 | append1[1] << 16; - w2[2] = append1[1] >> 16 | append1[2] << 16; - w2[3] = append1[2] >> 16 | append1[3] << 16; - w3[0] = append1[3] >> 16; - break; - - case 19: - w1[0] = w1[0] | append0[0] << 24; - w1[1] = append0[0] >> 8 | append0[1] << 24; - w1[2] = append0[1] >> 8 | append0[2] << 24; - w1[3] = append0[2] >> 8 | append0[3] << 24; - w2[0] = append0[3] >> 8 | append1[0] << 24; - w2[1] = append1[0] >> 8 | append1[1] << 24; - w2[2] = append1[1] >> 8 | append1[2] << 24; - w2[3] = append1[2] >> 8 | append1[3] << 24; - w3[0] = append1[3] >> 8; - break; - - case 20: - w1[1] = append0[0]; - w1[2] = append0[1]; - w1[3] = append0[2]; - w2[0] = append0[3]; - w2[1] = append1[0]; - w2[2] = append1[1]; - w2[3] = append1[2]; - w3[0] = append1[3]; - break; - - case 21: - w1[1] = w1[1] | append0[0] << 8; - w1[2] = append0[0] >> 24 | append0[1] << 8; - w1[3] = append0[1] >> 24 | append0[2] << 8; - w2[0] = append0[2] >> 24 | append0[3] << 8; - w2[1] = append0[3] >> 24 | append1[0] << 8; - w2[2] = append1[0] >> 24 | append1[1] << 8; - w2[3] = append1[1] >> 24 | append1[2] << 8; - w3[0] = append1[2] >> 24 | append1[3] << 8; - w3[1] = append1[3] >> 24; - break; - - case 22: - w1[1] = w1[1] | append0[0] << 16; - w1[2] = append0[0] >> 16 | append0[1] << 16; - w1[3] = append0[1] >> 16 | append0[2] << 16; - w2[0] = append0[2] >> 16 | append0[3] << 16; - w2[1] = append0[3] >> 16 | append1[0] << 16; - w2[2] = append1[0] >> 16 | append1[1] << 16; - w2[3] = append1[1] >> 16 | append1[2] << 16; - w3[0] = append1[2] >> 16 | append1[3] << 16; - w3[1] = append1[3] >> 16; - break; - - case 23: - w1[1] = w1[1] | append0[0] << 24; - w1[2] = append0[0] >> 8 | append0[1] << 24; - w1[3] = append0[1] >> 8 | append0[2] << 24; - w2[0] = append0[2] >> 8 | append0[3] << 24; - w2[1] = append0[3] >> 8 | append1[0] << 24; - w2[2] = append1[0] >> 8 | append1[1] << 24; - w2[3] = append1[1] >> 8 | append1[2] << 24; - w3[0] = append1[2] >> 8 | append1[3] << 24; - w3[1] = append1[3] >> 8; - break; - - case 24: - w1[2] = append0[0]; - w1[3] = append0[1]; - w2[0] = append0[2]; - w2[1] = append0[3]; - w2[2] = append1[0]; - w2[3] = append1[1]; - w3[0] = append1[2]; - w3[1] = append1[3]; - break; - - case 25: - w1[2] = w1[2] | append0[0] << 8; - w1[3] = append0[0] >> 24 | append0[1] << 8; - w2[0] = append0[1] >> 24 | append0[2] << 8; - w2[1] = append0[2] >> 24 | append0[3] << 8; - w2[2] = append0[3] >> 24 | append1[0] << 8; - w2[3] = append1[0] >> 24 | append1[1] << 8; - w3[0] = append1[1] >> 24 | append1[2] << 8; - w3[1] = append1[2] >> 24 | append1[3] << 8; - break; - - case 26: - w1[2] = w1[2] | append0[0] << 16; - w1[3] = append0[0] >> 16 | append0[1] << 16; - w2[0] = append0[1] >> 16 | append0[2] << 16; - w2[1] = append0[2] >> 16 | append0[3] << 16; - w2[2] = append0[3] >> 16 | append1[0] << 16; - w2[3] = append1[0] >> 16 | append1[1] << 16; - w3[0] = append1[1] >> 16 | append1[2] << 16; - w3[1] = append1[2] >> 16 | append1[3] << 16; - break; - - case 27: - w1[2] = w1[2] | append0[0] << 24; - w1[3] = append0[0] >> 8 | append0[1] << 24; - w2[0] = append0[1] >> 8 | append0[2] << 24; - w2[1] = append0[2] >> 8 | append0[3] << 24; - w2[2] = append0[3] >> 8 | append1[0] << 24; - w2[3] = append1[0] >> 8 | append1[1] << 24; - w3[0] = append1[1] >> 8 | append1[2] << 24; - w3[1] = append1[2] >> 8 | append1[3] << 24; - break; - - case 28: - w1[3] = append0[0]; - w2[0] = append0[1]; - w2[1] = append0[2]; - w2[2] = append0[3]; - w2[3] = append1[0]; - w3[0] = append1[1]; - w3[1] = append1[2]; - break; - - case 29: - w1[3] = w1[3] | append0[0] << 8; - w2[0] = append0[0] >> 24 | append0[1] << 8; - w2[1] = append0[1] >> 24 | append0[2] << 8; - w2[2] = append0[2] >> 24 | append0[3] << 8; - w2[3] = append0[3] >> 24 | append1[0] << 8; - w3[0] = append1[0] >> 24 | append1[1] << 8; - w3[1] = append1[1] >> 24 | append1[2] << 8; - break; - - case 30: - w1[3] = w1[3] | append0[0] << 16; - w2[0] = append0[0] >> 16 | append0[1] << 16; - w2[1] = append0[1] >> 16 | append0[2] << 16; - w2[2] = append0[2] >> 16 | append0[3] << 16; - w2[3] = append0[3] >> 16 | append1[0] << 16; - w3[0] = append1[0] >> 16 | append1[1] << 16; - w3[1] = append1[1] >> 16 | append1[2] << 16; - break; - - case 31: - w1[3] = w1[3] | append0[0] << 24; - w2[0] = append0[0] >> 8 | append0[1] << 24; - w2[1] = append0[1] >> 8 | append0[2] << 24; - w2[2] = append0[2] >> 8 | append0[3] << 24; - w2[3] = append0[3] >> 8 | append1[0] << 24; - w3[0] = append1[0] >> 8 | append1[1] << 24; - w3[1] = append1[1] >> 8 | append1[2] << 24; - break; - - case 32: - w2[0] = append0[0]; - w2[1] = append0[1]; - w2[2] = append0[2]; - w2[3] = append0[3]; - w3[0] = append1[0]; - w3[1] = append1[1]; - break; - } -} - -static void memcat32_9 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = append0[0]; - w0[1] = append0[1]; - w0[2] = append0[2]; - w0[3] = append0[3]; - w1[0] = append1[0]; - w1[1] = append1[1]; - w1[2] = append1[2]; - w1[3] = append1[3]; - w2[0] = append2[0]; - break; - - case 1: - w0[0] = w0[0] | append0[0] << 8; - w0[1] = append0[0] >> 24 | append0[1] << 8; - w0[2] = append0[1] >> 24 | append0[2] << 8; - w0[3] = append0[2] >> 24 | append0[3] << 8; - w1[0] = append0[3] >> 24 | append1[0] << 8; - w1[1] = append1[0] >> 24 | append1[1] << 8; - w1[2] = append1[1] >> 24 | append1[2] << 8; - w1[3] = append1[2] >> 24 | append1[3] << 8; - w2[0] = append1[3] >> 24 | append2[0] << 8; - w2[1] = append2[0] >> 24; - break; - - case 2: - w0[0] = w0[0] | append0[0] << 16; - w0[1] = append0[0] >> 16 | append0[1] << 16; - w0[2] = append0[1] >> 16 | append0[2] << 16; - w0[3] = append0[2] >> 16 | append0[3] << 16; - w1[0] = append0[3] >> 16 | append1[0] << 16; - w1[1] = append1[0] >> 16 | append1[1] << 16; - w1[2] = append1[1] >> 16 | append1[2] << 16; - w1[3] = append1[2] >> 16 | append1[3] << 16; - w2[0] = append1[3] >> 16 | append2[0] << 16; - w2[1] = append2[0] >> 16; - break; - - case 3: - w0[0] = w0[0] | append0[0] << 24; - w0[1] = append0[0] >> 8 | append0[1] << 24; - w0[2] = append0[1] >> 8 | append0[2] << 24; - w0[3] = append0[2] >> 8 | append0[3] << 24; - w1[0] = append0[3] >> 8 | append1[0] << 24; - w1[1] = append1[0] >> 8 | append1[1] << 24; - w1[2] = append1[1] >> 8 | append1[2] << 24; - w1[3] = append1[2] >> 8 | append1[3] << 24; - w2[0] = append1[3] >> 8 | append2[0] << 24; - w2[1] = append2[0] >> 8; - break; - - case 4: - w0[1] = append0[0]; - w0[2] = append0[1]; - w0[3] = append0[2]; - w1[0] = append0[3]; - w1[1] = append1[0]; - w1[2] = append1[1]; - w1[3] = append1[2]; - w2[0] = append1[3]; - w2[1] = append2[0]; - break; - - case 5: - w0[1] = w0[1] | append0[0] << 8; - w0[2] = append0[0] >> 24 | append0[1] << 8; - w0[3] = append0[1] >> 24 | append0[2] << 8; - w1[0] = append0[2] >> 24 | append0[3] << 8; - w1[1] = append0[3] >> 24 | append1[0] << 8; - w1[2] = append1[0] >> 24 | append1[1] << 8; - w1[3] = append1[1] >> 24 | append1[2] << 8; - w2[0] = append1[2] >> 24 | append1[3] << 8; - w2[1] = append1[3] >> 24 | append2[0] << 8; - w2[2] = append2[0] >> 24; - break; - - case 6: - w0[1] = w0[1] | append0[0] << 16; - w0[2] = append0[0] >> 16 | append0[1] << 16; - w0[3] = append0[1] >> 16 | append0[2] << 16; - w1[0] = append0[2] >> 16 | append0[3] << 16; - w1[1] = append0[3] >> 16 | append1[0] << 16; - w1[2] = append1[0] >> 16 | append1[1] << 16; - w1[3] = append1[1] >> 16 | append1[2] << 16; - w2[0] = append1[2] >> 16 | append1[3] << 16; - w2[1] = append1[3] >> 16 | append2[0] << 16; - w2[2] = append2[0] >> 16; - break; - - case 7: - w0[1] = w0[1] | append0[0] << 24; - w0[2] = append0[0] >> 8 | append0[1] << 24; - w0[3] = append0[1] >> 8 | append0[2] << 24; - w1[0] = append0[2] >> 8 | append0[3] << 24; - w1[1] = append0[3] >> 8 | append1[0] << 24; - w1[2] = append1[0] >> 8 | append1[1] << 24; - w1[3] = append1[1] >> 8 | append1[2] << 24; - w2[0] = append1[2] >> 8 | append1[3] << 24; - w2[1] = append1[3] >> 8 | append2[0] << 24; - w2[2] = append2[0] >> 8; - break; - - case 8: - w0[2] = append0[0]; - w0[3] = append0[1]; - w1[0] = append0[2]; - w1[1] = append0[3]; - w1[2] = append1[0]; - w1[3] = append1[1]; - w2[0] = append1[2]; - w2[1] = append1[3]; - w2[2] = append2[0]; - break; - - case 9: - w0[2] = w0[2] | append0[0] << 8; - w0[3] = append0[0] >> 24 | append0[1] << 8; - w1[0] = append0[1] >> 24 | append0[2] << 8; - w1[1] = append0[2] >> 24 | append0[3] << 8; - w1[2] = append0[3] >> 24 | append1[0] << 8; - w1[3] = append1[0] >> 24 | append1[1] << 8; - w2[0] = append1[1] >> 24 | append1[2] << 8; - w2[1] = append1[2] >> 24 | append1[3] << 8; - w2[2] = append1[3] >> 24 | append2[0] << 8; - w2[3] = append2[0] >> 24; - break; - - case 10: - w0[2] = w0[2] | append0[0] << 16; - w0[3] = append0[0] >> 16 | append0[1] << 16; - w1[0] = append0[1] >> 16 | append0[2] << 16; - w1[1] = append0[2] >> 16 | append0[3] << 16; - w1[2] = append0[3] >> 16 | append1[0] << 16; - w1[3] = append1[0] >> 16 | append1[1] << 16; - w2[0] = append1[1] >> 16 | append1[2] << 16; - w2[1] = append1[2] >> 16 | append1[3] << 16; - w2[2] = append1[3] >> 16 | append2[0] << 16; - w2[3] = append2[0] >> 16; - break; - - case 11: - w0[2] = w0[2] | append0[0] << 24; - w0[3] = append0[0] >> 8 | append0[1] << 24; - w1[0] = append0[1] >> 8 | append0[2] << 24; - w1[1] = append0[2] >> 8 | append0[3] << 24; - w1[2] = append0[3] >> 8 | append1[0] << 24; - w1[3] = append1[0] >> 8 | append1[1] << 24; - w2[0] = append1[1] >> 8 | append1[2] << 24; - w2[1] = append1[2] >> 8 | append1[3] << 24; - w2[2] = append1[3] >> 8 | append2[0] << 24; - w2[3] = append2[0] >> 8; - break; - - case 12: - w0[3] = append0[0]; - w1[0] = append0[1]; - w1[1] = append0[2]; - w1[2] = append0[3]; - w1[3] = append1[0]; - w2[0] = append1[1]; - w2[1] = append1[2]; - w2[2] = append1[3]; - w2[3] = append2[0]; - break; - - case 13: - w0[3] = w0[3] | append0[0] << 8; - w1[0] = append0[0] >> 24 | append0[1] << 8; - w1[1] = append0[1] >> 24 | append0[2] << 8; - w1[2] = append0[2] >> 24 | append0[3] << 8; - w1[3] = append0[3] >> 24 | append1[0] << 8; - w2[0] = append1[0] >> 24 | append1[1] << 8; - w2[1] = append1[1] >> 24 | append1[2] << 8; - w2[2] = append1[2] >> 24 | append1[3] << 8; - w2[3] = append1[3] >> 24 | append2[0] << 8; - w3[0] = append2[0] >> 24; - break; - - case 14: - w0[3] = w0[3] | append0[0] << 16; - w1[0] = append0[0] >> 16 | append0[1] << 16; - w1[1] = append0[1] >> 16 | append0[2] << 16; - w1[2] = append0[2] >> 16 | append0[3] << 16; - w1[3] = append0[3] >> 16 | append1[0] << 16; - w2[0] = append1[0] >> 16 | append1[1] << 16; - w2[1] = append1[1] >> 16 | append1[2] << 16; - w2[2] = append1[2] >> 16 | append1[3] << 16; - w2[3] = append1[3] >> 16 | append2[0] << 16; - w3[0] = append2[0] >> 16; - break; - - case 15: - w0[3] = w0[3] | append0[0] << 24; - w1[0] = append0[0] >> 8 | append0[1] << 24; - w1[1] = append0[1] >> 8 | append0[2] << 24; - w1[2] = append0[2] >> 8 | append0[3] << 24; - w1[3] = append0[3] >> 8 | append1[0] << 24; - w2[0] = append1[0] >> 8 | append1[1] << 24; - w2[1] = append1[1] >> 8 | append1[2] << 24; - w2[2] = append1[2] >> 8 | append1[3] << 24; - w2[3] = append1[3] >> 8 | append2[0] << 24; - w3[0] = append2[0] >> 8; - break; - - case 16: - w1[0] = append0[0]; - w1[1] = append0[1]; - w1[2] = append0[2]; - w1[3] = append0[3]; - w2[0] = append1[0]; - w2[1] = append1[1]; - w2[2] = append1[2]; - w2[3] = append1[3]; - w3[0] = append2[0]; - break; - - case 17: - w1[0] = w1[0] | append0[0] << 8; - w1[1] = append0[0] >> 24 | append0[1] << 8; - w1[2] = append0[1] >> 24 | append0[2] << 8; - w1[3] = append0[2] >> 24 | append0[3] << 8; - w2[0] = append0[3] >> 24 | append1[0] << 8; - w2[1] = append1[0] >> 24 | append1[1] << 8; - w2[2] = append1[1] >> 24 | append1[2] << 8; - w2[3] = append1[2] >> 24 | append1[3] << 8; - w3[0] = append1[3] >> 24 | append2[0] << 8; - w3[1] = append2[0] >> 24; - break; - - case 18: - w1[0] = w1[0] | append0[0] << 16; - w1[1] = append0[0] >> 16 | append0[1] << 16; - w1[2] = append0[1] >> 16 | append0[2] << 16; - w1[3] = append0[2] >> 16 | append0[3] << 16; - w2[0] = append0[3] >> 16 | append1[0] << 16; - w2[1] = append1[0] >> 16 | append1[1] << 16; - w2[2] = append1[1] >> 16 | append1[2] << 16; - w2[3] = append1[2] >> 16 | append1[3] << 16; - w3[0] = append1[3] >> 16 | append2[0] << 16; - w3[1] = append2[0] >> 16; - break; - - case 19: - w1[0] = w1[0] | append0[0] << 24; - w1[1] = append0[0] >> 8 | append0[1] << 24; - w1[2] = append0[1] >> 8 | append0[2] << 24; - w1[3] = append0[2] >> 8 | append0[3] << 24; - w2[0] = append0[3] >> 8 | append1[0] << 24; - w2[1] = append1[0] >> 8 | append1[1] << 24; - w2[2] = append1[1] >> 8 | append1[2] << 24; - w2[3] = append1[2] >> 8 | append1[3] << 24; - w3[0] = append1[3] >> 8 | append2[0] << 24; - w3[1] = append2[0] >> 8; - break; - - case 20: - w1[1] = append0[0]; - w1[2] = append0[1]; - w1[3] = append0[2]; - w2[0] = append0[3]; - w2[1] = append1[0]; - w2[2] = append1[1]; - w2[3] = append1[2]; - w3[0] = append1[3]; - w3[1] = append2[0]; - break; - - case 21: - w1[1] = w1[1] | append0[0] << 8; - w1[2] = append0[0] >> 24 | append0[1] << 8; - w1[3] = append0[1] >> 24 | append0[2] << 8; - w2[0] = append0[2] >> 24 | append0[3] << 8; - w2[1] = append0[3] >> 24 | append1[0] << 8; - w2[2] = append1[0] >> 24 | append1[1] << 8; - w2[3] = append1[1] >> 24 | append1[2] << 8; - w3[0] = append1[2] >> 24 | append1[3] << 8; - w3[1] = append1[3] >> 24 | append2[0] << 8; - break; - - case 22: - w1[1] = w1[1] | append0[0] << 16; - w1[2] = append0[0] >> 16 | append0[1] << 16; - w1[3] = append0[1] >> 16 | append0[2] << 16; - w2[0] = append0[2] >> 16 | append0[3] << 16; - w2[1] = append0[3] >> 16 | append1[0] << 16; - w2[2] = append1[0] >> 16 | append1[1] << 16; - w2[3] = append1[1] >> 16 | append1[2] << 16; - w3[0] = append1[2] >> 16 | append1[3] << 16; - w3[1] = append1[3] >> 16 | append2[0] << 16; - break; - - case 23: - w1[1] = w1[1] | append0[0] << 24; - w1[2] = append0[0] >> 8 | append0[1] << 24; - w1[3] = append0[1] >> 8 | append0[2] << 24; - w2[0] = append0[2] >> 8 | append0[3] << 24; - w2[1] = append0[3] >> 8 | append1[0] << 24; - w2[2] = append1[0] >> 8 | append1[1] << 24; - w2[3] = append1[1] >> 8 | append1[2] << 24; - w3[0] = append1[2] >> 8 | append1[3] << 24; - w3[1] = append1[3] >> 8 | append2[0] << 24; - break; - - case 24: - w1[2] = append0[0]; - w1[3] = append0[1]; - w2[0] = append0[2]; - w2[1] = append0[3]; - w2[2] = append1[0]; - w2[3] = append1[1]; - w3[0] = append1[2]; - w3[1] = append1[3]; - break; - - case 25: - w1[2] = w1[2] | append0[0] << 8; - w1[3] = append0[0] >> 24 | append0[1] << 8; - w2[0] = append0[1] >> 24 | append0[2] << 8; - w2[1] = append0[2] >> 24 | append0[3] << 8; - w2[2] = append0[3] >> 24 | append1[0] << 8; - w2[3] = append1[0] >> 24 | append1[1] << 8; - w3[0] = append1[1] >> 24 | append1[2] << 8; - w3[1] = append1[2] >> 24 | append1[3] << 8; - break; - - case 26: - w1[2] = w1[2] | append0[0] << 16; - w1[3] = append0[0] >> 16 | append0[1] << 16; - w2[0] = append0[1] >> 16 | append0[2] << 16; - w2[1] = append0[2] >> 16 | append0[3] << 16; - w2[2] = append0[3] >> 16 | append1[0] << 16; - w2[3] = append1[0] >> 16 | append1[1] << 16; - w3[0] = append1[1] >> 16 | append1[2] << 16; - w3[1] = append1[2] >> 16 | append1[3] << 16; - break; - - case 27: - w1[2] = w1[2] | append0[0] << 24; - w1[3] = append0[0] >> 8 | append0[1] << 24; - w2[0] = append0[1] >> 8 | append0[2] << 24; - w2[1] = append0[2] >> 8 | append0[3] << 24; - w2[2] = append0[3] >> 8 | append1[0] << 24; - w2[3] = append1[0] >> 8 | append1[1] << 24; - w3[0] = append1[1] >> 8 | append1[2] << 24; - w3[1] = append1[2] >> 8 | append1[3] << 24; - break; - - case 28: - w1[3] = append0[0]; - w2[0] = append0[1]; - w2[1] = append0[2]; - w2[2] = append0[3]; - w2[3] = append1[0]; - w3[0] = append1[1]; - w3[1] = append1[2]; - break; - - case 29: - w1[3] = w1[3] | append0[0] << 8; - w2[0] = append0[0] >> 24 | append0[1] << 8; - w2[1] = append0[1] >> 24 | append0[2] << 8; - w2[2] = append0[2] >> 24 | append0[3] << 8; - w2[3] = append0[3] >> 24 | append1[0] << 8; - w3[0] = append1[0] >> 24 | append1[1] << 8; - w3[1] = append1[1] >> 24 | append1[2] << 8; - break; - - case 30: - w1[3] = w1[3] | append0[0] << 16; - w2[0] = append0[0] >> 16 | append0[1] << 16; - w2[1] = append0[1] >> 16 | append0[2] << 16; - w2[2] = append0[2] >> 16 | append0[3] << 16; - w2[3] = append0[3] >> 16 | append1[0] << 16; - w3[0] = append1[0] >> 16 | append1[1] << 16; - w3[1] = append1[1] >> 16 | append1[2] << 16; - break; - - case 31: - w1[3] = w1[3] | append0[0] << 24; - w2[0] = append0[0] >> 8 | append0[1] << 24; - w2[1] = append0[1] >> 8 | append0[2] << 24; - w2[2] = append0[2] >> 8 | append0[3] << 24; - w2[3] = append0[3] >> 8 | append1[0] << 24; - w3[0] = append1[0] >> 8 | append1[1] << 24; - w3[1] = append1[1] >> 8 | append1[2] << 24; - break; - - case 32: - w2[0] = append0[0]; - w2[1] = append0[1]; - w2[2] = append0[2]; - w2[3] = append0[3]; - w3[0] = append1[0]; - w3[1] = append1[1]; - break; - } -} - -static void switch_buffer_by_offset (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset) -{ - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset; - - switch (offset / 4) - { - case 0: - w3[2] = amd_bytealign ( 0, w3[1], offset_minus_4); - w3[1] = amd_bytealign (w3[1], w3[0], offset_minus_4); - w3[0] = amd_bytealign (w3[0], w2[3], offset_minus_4); - w2[3] = amd_bytealign (w2[3], w2[2], offset_minus_4); - w2[2] = amd_bytealign (w2[2], w2[1], offset_minus_4); - w2[1] = amd_bytealign (w2[1], w2[0], offset_minus_4); - w2[0] = amd_bytealign (w2[0], w1[3], offset_minus_4); - w1[3] = amd_bytealign (w1[3], w1[2], offset_minus_4); - w1[2] = amd_bytealign (w1[2], w1[1], offset_minus_4); - w1[1] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w1[0] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w0[3] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w0[2] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w0[1] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w0[0] = amd_bytealign (w0[0], 0, offset_minus_4); - - if (offset_mod_4 == 0) - { - w0[0] = w0[1]; - w0[1] = w0[2]; - w0[2] = w0[3]; - w0[3] = w1[0]; - w1[0] = w1[1]; - w1[1] = w1[2]; - w1[2] = w1[3]; - w1[3] = w2[0]; - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 1: - w3[2] = amd_bytealign ( 0, w3[0], offset_minus_4); - w3[1] = amd_bytealign (w3[0], w2[3], offset_minus_4); - w3[0] = amd_bytealign (w2[3], w2[2], offset_minus_4); - w2[3] = amd_bytealign (w2[2], w2[1], offset_minus_4); - w2[2] = amd_bytealign (w2[1], w2[0], offset_minus_4); - w2[1] = amd_bytealign (w2[0], w1[3], offset_minus_4); - w2[0] = amd_bytealign (w1[3], w1[2], offset_minus_4); - w1[3] = amd_bytealign (w1[2], w1[1], offset_minus_4); - w1[2] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w1[1] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w1[0] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w0[3] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w0[2] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w0[1] = amd_bytealign (w0[0], 0, offset_minus_4); - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w0[1] = w0[2]; - w0[2] = w0[3]; - w0[3] = w1[0]; - w1[0] = w1[1]; - w1[1] = w1[2]; - w1[2] = w1[3]; - w1[3] = w2[0]; - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 2: - w3[2] = amd_bytealign ( 0, w2[3], offset_minus_4); - w3[1] = amd_bytealign (w2[3], w2[2], offset_minus_4); - w3[0] = amd_bytealign (w2[2], w2[1], offset_minus_4); - w2[3] = amd_bytealign (w2[1], w2[0], offset_minus_4); - w2[2] = amd_bytealign (w2[0], w1[3], offset_minus_4); - w2[1] = amd_bytealign (w1[3], w1[2], offset_minus_4); - w2[0] = amd_bytealign (w1[2], w1[1], offset_minus_4); - w1[3] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w1[2] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w1[1] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w1[0] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w0[3] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w0[2] = amd_bytealign (w0[0], 0, offset_minus_4); - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w0[2] = w0[3]; - w0[3] = w1[0]; - w1[0] = w1[1]; - w1[1] = w1[2]; - w1[2] = w1[3]; - w1[3] = w2[0]; - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 3: - w3[2] = amd_bytealign ( 0, w2[2], offset_minus_4); - w3[1] = amd_bytealign (w2[2], w2[1], offset_minus_4); - w3[0] = amd_bytealign (w2[1], w2[0], offset_minus_4); - w2[3] = amd_bytealign (w2[0], w1[3], offset_minus_4); - w2[2] = amd_bytealign (w1[3], w1[2], offset_minus_4); - w2[1] = amd_bytealign (w1[2], w1[1], offset_minus_4); - w2[0] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w1[3] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w1[2] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w1[1] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w1[0] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w0[3] = amd_bytealign (w0[0], 0, offset_minus_4); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w0[3] = w1[0]; - w1[0] = w1[1]; - w1[1] = w1[2]; - w1[2] = w1[3]; - w1[3] = w2[0]; - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 4: - w3[2] = amd_bytealign ( 0, w2[1], offset_minus_4); - w3[1] = amd_bytealign (w2[1], w2[0], offset_minus_4); - w3[0] = amd_bytealign (w2[0], w1[3], offset_minus_4); - w2[3] = amd_bytealign (w1[3], w1[2], offset_minus_4); - w2[2] = amd_bytealign (w1[2], w1[1], offset_minus_4); - w2[1] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w2[0] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w1[3] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w1[2] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w1[1] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w1[0] = amd_bytealign (w0[0], 0, offset_minus_4); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w1[0] = w1[1]; - w1[1] = w1[2]; - w1[2] = w1[3]; - w1[3] = w2[0]; - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 5: - w3[2] = amd_bytealign ( 0, w2[0], offset_minus_4); - w3[1] = amd_bytealign (w2[0], w1[3], offset_minus_4); - w3[0] = amd_bytealign (w1[3], w1[2], offset_minus_4); - w2[3] = amd_bytealign (w1[2], w1[1], offset_minus_4); - w2[2] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w2[1] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w2[0] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w1[3] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w1[2] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w1[1] = amd_bytealign (w0[0], 0, offset_minus_4); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w1[1] = w1[2]; - w1[2] = w1[3]; - w1[3] = w2[0]; - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 6: - w3[2] = amd_bytealign ( 0, w1[3], offset_minus_4); - w3[1] = amd_bytealign (w1[3], w1[2], offset_minus_4); - w3[0] = amd_bytealign (w1[2], w1[1], offset_minus_4); - w2[3] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w2[2] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w2[1] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w2[0] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w1[3] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w1[2] = amd_bytealign (w0[0], 0, offset_minus_4); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w1[2] = w1[3]; - w1[3] = w2[0]; - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 7: - w3[2] = amd_bytealign ( 0, w1[2], offset_minus_4); - w3[1] = amd_bytealign (w1[2], w1[1], offset_minus_4); - w3[0] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w2[3] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w2[2] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w2[1] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w2[0] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w1[3] = amd_bytealign (w0[0], 0, offset_minus_4); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w1[3] = w2[0]; - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 8: - w3[2] = amd_bytealign ( 0, w1[1], offset_minus_4); - w3[1] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w3[0] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w2[3] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w2[2] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w2[1] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w2[0] = amd_bytealign (w0[0], 0, offset_minus_4); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 9: - w3[2] = amd_bytealign ( 0, w1[0], offset_minus_4); - w3[1] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w3[0] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w2[3] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w2[2] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w2[1] = amd_bytealign (w0[0], 0, offset_minus_4); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 10: - w3[2] = amd_bytealign ( 0, w0[3], offset_minus_4); - w3[1] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w3[0] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w2[3] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w2[2] = amd_bytealign (w0[0], 0, offset_minus_4); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 11: - w3[2] = amd_bytealign ( 0, w0[2], offset_minus_4); - w3[1] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w3[0] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w2[3] = amd_bytealign (w0[0], 0, offset_minus_4); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 12: - w3[2] = amd_bytealign ( 0, w0[1], offset_minus_4); - w3[1] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w3[0] = amd_bytealign (w0[0], 0, offset_minus_4); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 13: - w3[2] = amd_bytealign ( 0, w0[0], offset_minus_4); - w3[1] = amd_bytealign (w0[0], 0, offset_minus_4); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - } -} - -static void switch_buffer_by_offset_be (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset) -{ - switch (offset / 4) - { - case 0: - w3[2] = amd_bytealign (w3[1], 0, offset); - w3[1] = amd_bytealign (w3[0], w3[1], offset); - w3[0] = amd_bytealign (w2[3], w3[0], offset); - w2[3] = amd_bytealign (w2[2], w2[3], offset); - w2[2] = amd_bytealign (w2[1], w2[2], offset); - w2[1] = amd_bytealign (w2[0], w2[1], offset); - w2[0] = amd_bytealign (w1[3], w2[0], offset); - w1[3] = amd_bytealign (w1[2], w1[3], offset); - w1[2] = amd_bytealign (w1[1], w1[2], offset); - w1[1] = amd_bytealign (w1[0], w1[1], offset); - w1[0] = amd_bytealign (w0[3], w1[0], offset); - w0[3] = amd_bytealign (w0[2], w0[3], offset); - w0[2] = amd_bytealign (w0[1], w0[2], offset); - w0[1] = amd_bytealign (w0[0], w0[1], offset); - w0[0] = amd_bytealign ( 0, w0[0], offset); - break; - - case 1: - w3[2] = amd_bytealign (w3[0], 0, offset); - w3[1] = amd_bytealign (w2[3], w3[0], offset); - w3[0] = amd_bytealign (w2[2], w2[3], offset); - w2[3] = amd_bytealign (w2[1], w2[2], offset); - w2[2] = amd_bytealign (w2[0], w2[1], offset); - w2[1] = amd_bytealign (w1[3], w2[0], offset); - w2[0] = amd_bytealign (w1[2], w1[3], offset); - w1[3] = amd_bytealign (w1[1], w1[2], offset); - w1[2] = amd_bytealign (w1[0], w1[1], offset); - w1[1] = amd_bytealign (w0[3], w1[0], offset); - w1[0] = amd_bytealign (w0[2], w0[3], offset); - w0[3] = amd_bytealign (w0[1], w0[2], offset); - w0[2] = amd_bytealign (w0[0], w0[1], offset); - w0[1] = amd_bytealign ( 0, w0[0], offset); - w0[0] = 0; - break; - - case 2: - w3[2] = amd_bytealign (w2[3], 0, offset); - w3[1] = amd_bytealign (w2[2], w2[3], offset); - w3[0] = amd_bytealign (w2[1], w2[2], offset); - w2[3] = amd_bytealign (w2[0], w2[1], offset); - w2[2] = amd_bytealign (w1[3], w2[0], offset); - w2[1] = amd_bytealign (w1[2], w1[3], offset); - w2[0] = amd_bytealign (w1[1], w1[2], offset); - w1[3] = amd_bytealign (w1[0], w1[1], offset); - w1[2] = amd_bytealign (w0[3], w1[0], offset); - w1[1] = amd_bytealign (w0[2], w0[3], offset); - w1[0] = amd_bytealign (w0[1], w0[2], offset); - w0[3] = amd_bytealign (w0[0], w0[1], offset); - w0[2] = amd_bytealign ( 0, w0[0], offset); - w0[1] = 0; - w0[0] = 0; - break; - - case 3: - w3[2] = amd_bytealign (w2[2], 0, offset); - w3[1] = amd_bytealign (w2[1], w2[2], offset); - w3[0] = amd_bytealign (w2[0], w2[1], offset); - w2[3] = amd_bytealign (w1[3], w2[0], offset); - w2[2] = amd_bytealign (w1[2], w1[3], offset); - w2[1] = amd_bytealign (w1[1], w1[2], offset); - w2[0] = amd_bytealign (w1[0], w1[1], offset); - w1[3] = amd_bytealign (w0[3], w1[0], offset); - w1[2] = amd_bytealign (w0[2], w0[3], offset); - w1[1] = amd_bytealign (w0[1], w0[2], offset); - w1[0] = amd_bytealign (w0[0], w0[1], offset); - w0[3] = amd_bytealign ( 0, w0[0], offset); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 4: - w3[2] = amd_bytealign (w2[1], 0, offset); - w3[1] = amd_bytealign (w2[0], w2[1], offset); - w3[0] = amd_bytealign (w1[3], w2[0], offset); - w2[3] = amd_bytealign (w1[2], w1[3], offset); - w2[2] = amd_bytealign (w1[1], w1[2], offset); - w2[1] = amd_bytealign (w1[0], w1[1], offset); - w2[0] = amd_bytealign (w0[3], w1[0], offset); - w1[3] = amd_bytealign (w0[2], w0[3], offset); - w1[2] = amd_bytealign (w0[1], w0[2], offset); - w1[1] = amd_bytealign (w0[0], w0[1], offset); - w1[0] = amd_bytealign ( 0, w0[0], offset); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 5: - w3[2] = amd_bytealign (w2[0], 0, offset); - w3[1] = amd_bytealign (w1[3], w2[0], offset); - w3[0] = amd_bytealign (w1[2], w1[3], offset); - w2[3] = amd_bytealign (w1[1], w1[2], offset); - w2[2] = amd_bytealign (w1[0], w1[1], offset); - w2[1] = amd_bytealign (w0[3], w1[0], offset); - w2[0] = amd_bytealign (w0[2], w0[3], offset); - w1[3] = amd_bytealign (w0[1], w0[2], offset); - w1[2] = amd_bytealign (w0[0], w0[1], offset); - w1[1] = amd_bytealign ( 0, w0[0], offset); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 6: - w3[2] = amd_bytealign (w1[3], 0, offset); - w3[1] = amd_bytealign (w1[2], w1[3], offset); - w3[0] = amd_bytealign (w1[1], w1[2], offset); - w2[3] = amd_bytealign (w1[0], w1[1], offset); - w2[2] = amd_bytealign (w0[3], w1[0], offset); - w2[1] = amd_bytealign (w0[2], w0[3], offset); - w2[0] = amd_bytealign (w0[1], w0[2], offset); - w1[3] = amd_bytealign (w0[0], w0[1], offset); - w1[2] = amd_bytealign ( 0, w0[0], offset); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 7: - w3[2] = amd_bytealign (w1[2], 0, offset); - w3[1] = amd_bytealign (w1[1], w1[2], offset); - w3[0] = amd_bytealign (w1[0], w1[1], offset); - w2[3] = amd_bytealign (w0[3], w1[0], offset); - w2[2] = amd_bytealign (w0[2], w0[3], offset); - w2[1] = amd_bytealign (w0[1], w0[2], offset); - w2[0] = amd_bytealign (w0[0], w0[1], offset); - w1[3] = amd_bytealign ( 0, w0[0], offset); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 8: - w3[2] = amd_bytealign (w1[1], 0, offset); - w3[1] = amd_bytealign (w1[0], w1[1], offset); - w3[0] = amd_bytealign (w0[3], w1[0], offset); - w2[3] = amd_bytealign (w0[2], w0[3], offset); - w2[2] = amd_bytealign (w0[1], w0[2], offset); - w2[1] = amd_bytealign (w0[0], w0[1], offset); - w2[0] = amd_bytealign ( 0, w0[0], offset); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 9: - w3[2] = amd_bytealign (w1[0], 0, offset); - w3[1] = amd_bytealign (w0[3], w1[0], offset); - w3[0] = amd_bytealign (w0[2], w0[3], offset); - w2[3] = amd_bytealign (w0[1], w0[2], offset); - w2[2] = amd_bytealign (w0[0], w0[1], offset); - w2[1] = amd_bytealign ( 0, w0[0], offset); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 10: - w3[2] = amd_bytealign (w0[3], 0, offset); - w3[1] = amd_bytealign (w0[2], w0[3], offset); - w3[0] = amd_bytealign (w0[1], w0[2], offset); - w2[3] = amd_bytealign (w0[0], w0[1], offset); - w2[2] = amd_bytealign ( 0, w0[0], offset); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 11: - w3[2] = amd_bytealign (w0[2], 0, offset); - w3[1] = amd_bytealign (w0[1], w0[2], offset); - w3[0] = amd_bytealign (w0[0], w0[1], offset); - w2[3] = amd_bytealign ( 0, w0[0], offset); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 12: - w3[2] = amd_bytealign (w0[1], 0, offset); - w3[1] = amd_bytealign (w0[0], w0[1], offset); - w3[0] = amd_bytealign ( 0, w0[0], offset); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 13: - w3[2] = amd_bytealign (w0[0], 0, offset); - w3[1] = amd_bytealign ( 0, w0[0], offset); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - } -} - -/** - * vector - */ - -#ifndef VECT_SIZE1 -static u32x swap_workaround (const u32x v) -{ - return rotl32 ((v & 0x00FF00FF), 24u) - | rotl32 ((v & 0xFF00FF00), 8u); -} - -static u64x swap_workaround (const u64x v) -{ - return (((v & 0xff00000000000000) >> 56) - | ((v & 0x00ff000000000000) >> 40) - | ((v & 0x0000ff0000000000) >> 24) - | ((v & 0x000000ff00000000) >> 8) - | ((v & 0x00000000ff000000) << 8) - | ((v & 0x0000000000ff0000) << 24) - | ((v & 0x000000000000ff00) << 40) - | ((v & 0x00000000000000ff) << 56)); -} - -static void truncate_block (u32x w[4], const u32 len) -{ - switch (len) - { - case 0: w[0] &= 0; - w[1] &= 0; - w[2] &= 0; - w[3] &= 0; - break; - case 1: w[0] &= 0x000000FF; - w[1] &= 0; - w[2] &= 0; - w[3] &= 0; - break; - case 2: w[0] &= 0x0000FFFF; - w[1] &= 0; - w[2] &= 0; - w[3] &= 0; - break; - case 3: w[0] &= 0x00FFFFFF; - w[1] &= 0; - w[2] &= 0; - w[3] &= 0; - break; - case 4: w[1] &= 0; - w[2] &= 0; - w[3] &= 0; - break; - case 5: w[1] &= 0x000000FF; - w[2] &= 0; - w[3] &= 0; - break; - case 6: w[1] &= 0x0000FFFF; - w[2] &= 0; - w[3] &= 0; - break; - case 7: w[1] &= 0x00FFFFFF; - w[2] &= 0; - w[3] &= 0; - break; - case 8: w[2] &= 0; - w[3] &= 0; - break; - case 9: w[2] &= 0x000000FF; - w[3] &= 0; - break; - case 10: w[2] &= 0x0000FFFF; - w[3] &= 0; - break; - case 11: w[2] &= 0x00FFFFFF; - w[3] &= 0; - break; - case 12: w[3] &= 0; - break; - case 13: w[3] &= 0x000000FF; - break; - case 14: w[3] &= 0x0000FFFF; - break; - case 15: w[3] &= 0x00FFFFFF; - break; - } -} - -static void make_unicode (const u32x in[4], u32x out1[4], u32x out2[4]) -{ - out2[3] = ((in[3] >> 8) & 0x00FF0000) | ((in[3] >> 16) & 0x000000FF); - out2[2] = ((in[3] << 8) & 0x00FF0000) | ((in[3] >> 0) & 0x000000FF); - out2[1] = ((in[2] >> 8) & 0x00FF0000) | ((in[2] >> 16) & 0x000000FF); - out2[0] = ((in[2] << 8) & 0x00FF0000) | ((in[2] >> 0) & 0x000000FF); - out1[3] = ((in[1] >> 8) & 0x00FF0000) | ((in[1] >> 16) & 0x000000FF); - out1[2] = ((in[1] << 8) & 0x00FF0000) | ((in[1] >> 0) & 0x000000FF); - out1[1] = ((in[0] >> 8) & 0x00FF0000) | ((in[0] >> 16) & 0x000000FF); - out1[0] = ((in[0] << 8) & 0x00FF0000) | ((in[0] >> 0) & 0x000000FF); -} - -static void append_0x01_1 (u32x w0[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x01; - break; - - case 1: - w0[0] = w0[0] | 0x0100; - break; - - case 2: - w0[0] = w0[0] | 0x010000; - break; - - case 3: - w0[0] = w0[0] | 0x01000000; - break; - - case 4: - w0[1] = 0x01; - break; - - case 5: - w0[1] = w0[1] | 0x0100; - break; - - case 6: - w0[1] = w0[1] | 0x010000; - break; - - case 7: - w0[1] = w0[1] | 0x01000000; - break; - - case 8: - w0[2] = 0x01; - break; - - case 9: - w0[2] = w0[2] | 0x0100; - break; - - case 10: - w0[2] = w0[2] | 0x010000; - break; - - case 11: - w0[2] = w0[2] | 0x01000000; - break; - - case 12: - w0[3] = 0x01; - break; - - case 13: - w0[3] = w0[3] | 0x0100; - break; - - case 14: - w0[3] = w0[3] | 0x010000; - break; - - case 15: - w0[3] = w0[3] | 0x01000000; - break; - } -} - -static void append_0x01_2 (u32x w0[4], u32x w1[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x01; - break; - - case 1: - w0[0] = w0[0] | 0x0100; - break; - - case 2: - w0[0] = w0[0] | 0x010000; - break; - - case 3: - w0[0] = w0[0] | 0x01000000; - break; - - case 4: - w0[1] = 0x01; - break; - - case 5: - w0[1] = w0[1] | 0x0100; - break; - - case 6: - w0[1] = w0[1] | 0x010000; - break; - - case 7: - w0[1] = w0[1] | 0x01000000; - break; - - case 8: - w0[2] = 0x01; - break; - - case 9: - w0[2] = w0[2] | 0x0100; - break; - - case 10: - w0[2] = w0[2] | 0x010000; - break; - - case 11: - w0[2] = w0[2] | 0x01000000; - break; - - case 12: - w0[3] = 0x01; - break; - - case 13: - w0[3] = w0[3] | 0x0100; - break; - - case 14: - w0[3] = w0[3] | 0x010000; - break; - - case 15: - w0[3] = w0[3] | 0x01000000; - break; - - case 16: - w1[0] = 0x01; - break; - - case 17: - w1[0] = w1[0] | 0x0100; - break; - - case 18: - w1[0] = w1[0] | 0x010000; - break; - - case 19: - w1[0] = w1[0] | 0x01000000; - break; - - case 20: - w1[1] = 0x01; - break; - - case 21: - w1[1] = w1[1] | 0x0100; - break; - - case 22: - w1[1] = w1[1] | 0x010000; - break; - - case 23: - w1[1] = w1[1] | 0x01000000; - break; - - case 24: - w1[2] = 0x01; - break; - - case 25: - w1[2] = w1[2] | 0x0100; - break; - - case 26: - w1[2] = w1[2] | 0x010000; - break; - - case 27: - w1[2] = w1[2] | 0x01000000; - break; - - case 28: - w1[3] = 0x01; - break; - - case 29: - w1[3] = w1[3] | 0x0100; - break; - - case 30: - w1[3] = w1[3] | 0x010000; - break; - - case 31: - w1[3] = w1[3] | 0x01000000; - break; - } -} - -static void append_0x01_3 (u32x w0[4], u32x w1[4], u32x w2[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x01; - break; - - case 1: - w0[0] = w0[0] | 0x0100; - break; - - case 2: - w0[0] = w0[0] | 0x010000; - break; - - case 3: - w0[0] = w0[0] | 0x01000000; - break; - - case 4: - w0[1] = 0x01; - break; - - case 5: - w0[1] = w0[1] | 0x0100; - break; - - case 6: - w0[1] = w0[1] | 0x010000; - break; - - case 7: - w0[1] = w0[1] | 0x01000000; - break; - - case 8: - w0[2] = 0x01; - break; - - case 9: - w0[2] = w0[2] | 0x0100; - break; - - case 10: - w0[2] = w0[2] | 0x010000; - break; - - case 11: - w0[2] = w0[2] | 0x01000000; - break; - - case 12: - w0[3] = 0x01; - break; - - case 13: - w0[3] = w0[3] | 0x0100; - break; - - case 14: - w0[3] = w0[3] | 0x010000; - break; - - case 15: - w0[3] = w0[3] | 0x01000000; - break; - - case 16: - w1[0] = 0x01; - break; - - case 17: - w1[0] = w1[0] | 0x0100; - break; - - case 18: - w1[0] = w1[0] | 0x010000; - break; - - case 19: - w1[0] = w1[0] | 0x01000000; - break; - - case 20: - w1[1] = 0x01; - break; - - case 21: - w1[1] = w1[1] | 0x0100; - break; - - case 22: - w1[1] = w1[1] | 0x010000; - break; - - case 23: - w1[1] = w1[1] | 0x01000000; - break; - - case 24: - w1[2] = 0x01; - break; - - case 25: - w1[2] = w1[2] | 0x0100; - break; - - case 26: - w1[2] = w1[2] | 0x010000; - break; - - case 27: - w1[2] = w1[2] | 0x01000000; - break; - - case 28: - w1[3] = 0x01; - break; - - case 29: - w1[3] = w1[3] | 0x0100; - break; - - case 30: - w1[3] = w1[3] | 0x010000; - break; - - case 31: - w1[3] = w1[3] | 0x01000000; - break; - - case 32: - w2[0] = 0x01; - break; - - case 33: - w2[0] = w2[0] | 0x0100; - break; - - case 34: - w2[0] = w2[0] | 0x010000; - break; - - case 35: - w2[0] = w2[0] | 0x01000000; - break; - - case 36: - w2[1] = 0x01; - break; - - case 37: - w2[1] = w2[1] | 0x0100; - break; - - case 38: - w2[1] = w2[1] | 0x010000; - break; - - case 39: - w2[1] = w2[1] | 0x01000000; - break; - - case 40: - w2[2] = 0x01; - break; - - case 41: - w2[2] = w2[2] | 0x0100; - break; - - case 42: - w2[2] = w2[2] | 0x010000; - break; - - case 43: - w2[2] = w2[2] | 0x01000000; - break; - - case 44: - w2[3] = 0x01; - break; - - case 45: - w2[3] = w2[3] | 0x0100; - break; - - case 46: - w2[3] = w2[3] | 0x010000; - break; - - case 47: - w2[3] = w2[3] | 0x01000000; - break; - } -} - -static void append_0x01_4 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x01; - break; - - case 1: - w0[0] = w0[0] | 0x0100; - break; - - case 2: - w0[0] = w0[0] | 0x010000; - break; - - case 3: - w0[0] = w0[0] | 0x01000000; - break; - - case 4: - w0[1] = 0x01; - break; - - case 5: - w0[1] = w0[1] | 0x0100; - break; - - case 6: - w0[1] = w0[1] | 0x010000; - break; - - case 7: - w0[1] = w0[1] | 0x01000000; - break; - - case 8: - w0[2] = 0x01; - break; - - case 9: - w0[2] = w0[2] | 0x0100; - break; - - case 10: - w0[2] = w0[2] | 0x010000; - break; - - case 11: - w0[2] = w0[2] | 0x01000000; - break; - - case 12: - w0[3] = 0x01; - break; - - case 13: - w0[3] = w0[3] | 0x0100; - break; - - case 14: - w0[3] = w0[3] | 0x010000; - break; - - case 15: - w0[3] = w0[3] | 0x01000000; - break; - - case 16: - w1[0] = 0x01; - break; - - case 17: - w1[0] = w1[0] | 0x0100; - break; - - case 18: - w1[0] = w1[0] | 0x010000; - break; - - case 19: - w1[0] = w1[0] | 0x01000000; - break; - - case 20: - w1[1] = 0x01; - break; - - case 21: - w1[1] = w1[1] | 0x0100; - break; - - case 22: - w1[1] = w1[1] | 0x010000; - break; - - case 23: - w1[1] = w1[1] | 0x01000000; - break; - - case 24: - w1[2] = 0x01; - break; - - case 25: - w1[2] = w1[2] | 0x0100; - break; - - case 26: - w1[2] = w1[2] | 0x010000; - break; - - case 27: - w1[2] = w1[2] | 0x01000000; - break; - - case 28: - w1[3] = 0x01; - break; - - case 29: - w1[3] = w1[3] | 0x0100; - break; - - case 30: - w1[3] = w1[3] | 0x010000; - break; - - case 31: - w1[3] = w1[3] | 0x01000000; - break; - - case 32: - w2[0] = 0x01; - break; - - case 33: - w2[0] = w2[0] | 0x0100; - break; - - case 34: - w2[0] = w2[0] | 0x010000; - break; - - case 35: - w2[0] = w2[0] | 0x01000000; - break; - - case 36: - w2[1] = 0x01; - break; - - case 37: - w2[1] = w2[1] | 0x0100; - break; - - case 38: - w2[1] = w2[1] | 0x010000; - break; - - case 39: - w2[1] = w2[1] | 0x01000000; - break; - - case 40: - w2[2] = 0x01; - break; - - case 41: - w2[2] = w2[2] | 0x0100; - break; - - case 42: - w2[2] = w2[2] | 0x010000; - break; - - case 43: - w2[2] = w2[2] | 0x01000000; - break; - - case 44: - w2[3] = 0x01; - break; - - case 45: - w2[3] = w2[3] | 0x0100; - break; - - case 46: - w2[3] = w2[3] | 0x010000; - break; - - case 47: - w2[3] = w2[3] | 0x01000000; - break; - - case 48: - w3[0] = 0x01; - break; - - case 49: - w3[0] = w3[0] | 0x0100; - break; - - case 50: - w3[0] = w3[0] | 0x010000; - break; - - case 51: - w3[0] = w3[0] | 0x01000000; - break; - - case 52: - w3[1] = 0x01; - break; - - case 53: - w3[1] = w3[1] | 0x0100; - break; - - case 54: - w3[1] = w3[1] | 0x010000; - break; - - case 55: - w3[1] = w3[1] | 0x01000000; - break; - - case 56: - w3[2] = 0x01; - break; - - case 57: - w3[2] = w3[2] | 0x0100; - break; - - case 58: - w3[2] = w3[2] | 0x010000; - break; - - case 59: - w3[2] = w3[2] | 0x01000000; - break; - - case 60: - w3[3] = 0x01; - break; - - case 61: - w3[3] = w3[3] | 0x0100; - break; - - case 62: - w3[3] = w3[3] | 0x010000; - break; - - case 63: - w3[3] = w3[3] | 0x01000000; - break; - } -} - -static void append_0x01_8 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x01; - break; - - case 1: - w0[0] = w0[0] | 0x0100; - break; - - case 2: - w0[0] = w0[0] | 0x010000; - break; - - case 3: - w0[0] = w0[0] | 0x01000000; - break; - - case 4: - w0[1] = 0x01; - break; - - case 5: - w0[1] = w0[1] | 0x0100; - break; - - case 6: - w0[1] = w0[1] | 0x010000; - break; - - case 7: - w0[1] = w0[1] | 0x01000000; - break; - - case 8: - w0[2] = 0x01; - break; - - case 9: - w0[2] = w0[2] | 0x0100; - break; - - case 10: - w0[2] = w0[2] | 0x010000; - break; - - case 11: - w0[2] = w0[2] | 0x01000000; - break; - - case 12: - w0[3] = 0x01; - break; - - case 13: - w0[3] = w0[3] | 0x0100; - break; - - case 14: - w0[3] = w0[3] | 0x010000; - break; - - case 15: - w0[3] = w0[3] | 0x01000000; - break; - - case 16: - w1[0] = 0x01; - break; - - case 17: - w1[0] = w1[0] | 0x0100; - break; - - case 18: - w1[0] = w1[0] | 0x010000; - break; - - case 19: - w1[0] = w1[0] | 0x01000000; - break; - - case 20: - w1[1] = 0x01; - break; - - case 21: - w1[1] = w1[1] | 0x0100; - break; - - case 22: - w1[1] = w1[1] | 0x010000; - break; - - case 23: - w1[1] = w1[1] | 0x01000000; - break; - - case 24: - w1[2] = 0x01; - break; - - case 25: - w1[2] = w1[2] | 0x0100; - break; - - case 26: - w1[2] = w1[2] | 0x010000; - break; - - case 27: - w1[2] = w1[2] | 0x01000000; - break; - - case 28: - w1[3] = 0x01; - break; - - case 29: - w1[3] = w1[3] | 0x0100; - break; - - case 30: - w1[3] = w1[3] | 0x010000; - break; - - case 31: - w1[3] = w1[3] | 0x01000000; - break; - - case 32: - w2[0] = 0x01; - break; - - case 33: - w2[0] = w2[0] | 0x0100; - break; - - case 34: - w2[0] = w2[0] | 0x010000; - break; - - case 35: - w2[0] = w2[0] | 0x01000000; - break; - - case 36: - w2[1] = 0x01; - break; - - case 37: - w2[1] = w2[1] | 0x0100; - break; - - case 38: - w2[1] = w2[1] | 0x010000; - break; - - case 39: - w2[1] = w2[1] | 0x01000000; - break; - - case 40: - w2[2] = 0x01; - break; - - case 41: - w2[2] = w2[2] | 0x0100; - break; - - case 42: - w2[2] = w2[2] | 0x010000; - break; - - case 43: - w2[2] = w2[2] | 0x01000000; - break; - - case 44: - w2[3] = 0x01; - break; - - case 45: - w2[3] = w2[3] | 0x0100; - break; - - case 46: - w2[3] = w2[3] | 0x010000; - break; - - case 47: - w2[3] = w2[3] | 0x01000000; - break; - - case 48: - w3[0] = 0x01; - break; - - case 49: - w3[0] = w3[0] | 0x0100; - break; - - case 50: - w3[0] = w3[0] | 0x010000; - break; - - case 51: - w3[0] = w3[0] | 0x01000000; - break; - - case 52: - w3[1] = 0x01; - break; - - case 53: - w3[1] = w3[1] | 0x0100; - break; - - case 54: - w3[1] = w3[1] | 0x010000; - break; - - case 55: - w3[1] = w3[1] | 0x01000000; - break; - - case 56: - w3[2] = 0x01; - break; - - case 57: - w3[2] = w3[2] | 0x0100; - break; - - case 58: - w3[2] = w3[2] | 0x010000; - break; - - case 59: - w3[2] = w3[2] | 0x01000000; - break; - - case 60: - w3[3] = 0x01; - break; - - case 61: - w3[3] = w3[3] | 0x0100; - break; - - case 62: - w3[3] = w3[3] | 0x010000; - break; - - case 63: - w3[3] = w3[3] | 0x01000000; - break; - - case 64: - w4[0] = 0x01; - break; - - case 65: - w4[0] = w4[0] | 0x0100; - break; - - case 66: - w4[0] = w4[0] | 0x010000; - break; - - case 67: - w4[0] = w4[0] | 0x01000000; - break; - - case 68: - w4[1] = 0x01; - break; - - case 69: - w4[1] = w4[1] | 0x0100; - break; - - case 70: - w4[1] = w4[1] | 0x010000; - break; - - case 71: - w4[1] = w4[1] | 0x01000000; - break; - - case 72: - w4[2] = 0x01; - break; - - case 73: - w4[2] = w4[2] | 0x0100; - break; - - case 74: - w4[2] = w4[2] | 0x010000; - break; - - case 75: - w4[2] = w4[2] | 0x01000000; - break; - - case 76: - w4[3] = 0x01; - break; - - case 77: - w4[3] = w4[3] | 0x0100; - break; - - case 78: - w4[3] = w4[3] | 0x010000; - break; - - case 79: - w4[3] = w4[3] | 0x01000000; - break; - - case 80: - w5[0] = 0x01; - break; - - case 81: - w5[0] = w5[0] | 0x0100; - break; - - case 82: - w5[0] = w5[0] | 0x010000; - break; - - case 83: - w5[0] = w5[0] | 0x01000000; - break; - - case 84: - w5[1] = 0x01; - break; - - case 85: - w5[1] = w5[1] | 0x0100; - break; - - case 86: - w5[1] = w5[1] | 0x010000; - break; - - case 87: - w5[1] = w5[1] | 0x01000000; - break; - - case 88: - w5[2] = 0x01; - break; - - case 89: - w5[2] = w5[2] | 0x0100; - break; - - case 90: - w5[2] = w5[2] | 0x010000; - break; - - case 91: - w5[2] = w5[2] | 0x01000000; - break; - - case 92: - w5[3] = 0x01; - break; - - case 93: - w5[3] = w5[3] | 0x0100; - break; - - case 94: - w5[3] = w5[3] | 0x010000; - break; - - case 95: - w5[3] = w5[3] | 0x01000000; - break; - - case 96: - w6[0] = 0x01; - break; - - case 97: - w6[0] = w6[0] | 0x0100; - break; - - case 98: - w6[0] = w6[0] | 0x010000; - break; - - case 99: - w6[0] = w6[0] | 0x01000000; - break; - - case 100: - w6[1] = 0x01; - break; - - case 101: - w6[1] = w6[1] | 0x0100; - break; - - case 102: - w6[1] = w6[1] | 0x010000; - break; - - case 103: - w6[1] = w6[1] | 0x01000000; - break; - - case 104: - w6[2] = 0x01; - break; - - case 105: - w6[2] = w6[2] | 0x0100; - break; - - case 106: - w6[2] = w6[2] | 0x010000; - break; - - case 107: - w6[2] = w6[2] | 0x01000000; - break; - - case 108: - w6[3] = 0x01; - break; - - case 109: - w6[3] = w6[3] | 0x0100; - break; - - case 110: - w6[3] = w6[3] | 0x010000; - break; - - case 111: - w6[3] = w6[3] | 0x01000000; - break; - - case 112: - w7[0] = 0x01; - break; - - case 113: - w7[0] = w7[0] | 0x0100; - break; - - case 114: - w7[0] = w7[0] | 0x010000; - break; - - case 115: - w7[0] = w7[0] | 0x01000000; - break; - - case 116: - w7[1] = 0x01; - break; - - case 117: - w7[1] = w7[1] | 0x0100; - break; - - case 118: - w7[1] = w7[1] | 0x010000; - break; - - case 119: - w7[1] = w7[1] | 0x01000000; - break; - - case 120: - w7[2] = 0x01; - break; - - case 121: - w7[2] = w7[2] | 0x0100; - break; - - case 122: - w7[2] = w7[2] | 0x010000; - break; - - case 123: - w7[2] = w7[2] | 0x01000000; - break; - - case 124: - w7[3] = 0x01; - break; - - case 125: - w7[3] = w7[3] | 0x0100; - break; - - case 126: - w7[3] = w7[3] | 0x010000; - break; - - case 127: - w7[3] = w7[3] | 0x01000000; - break; - } -} - -static void append_0x02_1 (u32x w0[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x02; - break; - - case 1: - w0[0] = w0[0] | 0x0200; - break; - - case 2: - w0[0] = w0[0] | 0x020000; - break; - - case 3: - w0[0] = w0[0] | 0x02000000; - break; - - case 4: - w0[1] = 0x02; - break; - - case 5: - w0[1] = w0[1] | 0x0200; - break; - - case 6: - w0[1] = w0[1] | 0x020000; - break; - - case 7: - w0[1] = w0[1] | 0x02000000; - break; - - case 8: - w0[2] = 0x02; - break; - - case 9: - w0[2] = w0[2] | 0x0200; - break; - - case 10: - w0[2] = w0[2] | 0x020000; - break; - - case 11: - w0[2] = w0[2] | 0x02000000; - break; - - case 12: - w0[3] = 0x02; - break; - - case 13: - w0[3] = w0[3] | 0x0200; - break; - - case 14: - w0[3] = w0[3] | 0x020000; - break; - - case 15: - w0[3] = w0[3] | 0x02000000; - break; - } -} - -static void append_0x02_2 (u32x w0[4], u32x w1[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x02; - break; - - case 1: - w0[0] = w0[0] | 0x0200; - break; - - case 2: - w0[0] = w0[0] | 0x020000; - break; - - case 3: - w0[0] = w0[0] | 0x02000000; - break; - - case 4: - w0[1] = 0x02; - break; - - case 5: - w0[1] = w0[1] | 0x0200; - break; - - case 6: - w0[1] = w0[1] | 0x020000; - break; - - case 7: - w0[1] = w0[1] | 0x02000000; - break; - - case 8: - w0[2] = 0x02; - break; - - case 9: - w0[2] = w0[2] | 0x0200; - break; - - case 10: - w0[2] = w0[2] | 0x020000; - break; - - case 11: - w0[2] = w0[2] | 0x02000000; - break; - - case 12: - w0[3] = 0x02; - break; - - case 13: - w0[3] = w0[3] | 0x0200; - break; - - case 14: - w0[3] = w0[3] | 0x020000; - break; - - case 15: - w0[3] = w0[3] | 0x02000000; - break; - - case 16: - w1[0] = 0x02; - break; - - case 17: - w1[0] = w1[0] | 0x0200; - break; - - case 18: - w1[0] = w1[0] | 0x020000; - break; - - case 19: - w1[0] = w1[0] | 0x02000000; - break; - - case 20: - w1[1] = 0x02; - break; - - case 21: - w1[1] = w1[1] | 0x0200; - break; - - case 22: - w1[1] = w1[1] | 0x020000; - break; - - case 23: - w1[1] = w1[1] | 0x02000000; - break; - - case 24: - w1[2] = 0x02; - break; - - case 25: - w1[2] = w1[2] | 0x0200; - break; - - case 26: - w1[2] = w1[2] | 0x020000; - break; - - case 27: - w1[2] = w1[2] | 0x02000000; - break; - - case 28: - w1[3] = 0x02; - break; - - case 29: - w1[3] = w1[3] | 0x0200; - break; - - case 30: - w1[3] = w1[3] | 0x020000; - break; - - case 31: - w1[3] = w1[3] | 0x02000000; - break; - } -} - -static void append_0x02_3 (u32x w0[4], u32x w1[4], u32x w2[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x02; - break; - - case 1: - w0[0] = w0[0] | 0x0200; - break; - - case 2: - w0[0] = w0[0] | 0x020000; - break; - - case 3: - w0[0] = w0[0] | 0x02000000; - break; - - case 4: - w0[1] = 0x02; - break; - - case 5: - w0[1] = w0[1] | 0x0200; - break; - - case 6: - w0[1] = w0[1] | 0x020000; - break; - - case 7: - w0[1] = w0[1] | 0x02000000; - break; - - case 8: - w0[2] = 0x02; - break; - - case 9: - w0[2] = w0[2] | 0x0200; - break; - - case 10: - w0[2] = w0[2] | 0x020000; - break; - - case 11: - w0[2] = w0[2] | 0x02000000; - break; - - case 12: - w0[3] = 0x02; - break; - - case 13: - w0[3] = w0[3] | 0x0200; - break; - - case 14: - w0[3] = w0[3] | 0x020000; - break; - - case 15: - w0[3] = w0[3] | 0x02000000; - break; - - case 16: - w1[0] = 0x02; - break; - - case 17: - w1[0] = w1[0] | 0x0200; - break; - - case 18: - w1[0] = w1[0] | 0x020000; - break; - - case 19: - w1[0] = w1[0] | 0x02000000; - break; - - case 20: - w1[1] = 0x02; - break; - - case 21: - w1[1] = w1[1] | 0x0200; - break; - - case 22: - w1[1] = w1[1] | 0x020000; - break; - - case 23: - w1[1] = w1[1] | 0x02000000; - break; - - case 24: - w1[2] = 0x02; - break; - - case 25: - w1[2] = w1[2] | 0x0200; - break; - - case 26: - w1[2] = w1[2] | 0x020000; - break; - - case 27: - w1[2] = w1[2] | 0x02000000; - break; - - case 28: - w1[3] = 0x02; - break; - - case 29: - w1[3] = w1[3] | 0x0200; - break; - - case 30: - w1[3] = w1[3] | 0x020000; - break; - - case 31: - w1[3] = w1[3] | 0x02000000; - break; - - case 32: - w2[0] = 0x02; - break; - - case 33: - w2[0] = w2[0] | 0x0200; - break; - - case 34: - w2[0] = w2[0] | 0x020000; - break; - - case 35: - w2[0] = w2[0] | 0x02000000; - break; - - case 36: - w2[1] = 0x02; - break; - - case 37: - w2[1] = w2[1] | 0x0200; - break; - - case 38: - w2[1] = w2[1] | 0x020000; - break; - - case 39: - w2[1] = w2[1] | 0x02000000; - break; - - case 40: - w2[2] = 0x02; - break; - - case 41: - w2[2] = w2[2] | 0x0200; - break; - - case 42: - w2[2] = w2[2] | 0x020000; - break; - - case 43: - w2[2] = w2[2] | 0x02000000; - break; - - case 44: - w2[3] = 0x02; - break; - - case 45: - w2[3] = w2[3] | 0x0200; - break; - - case 46: - w2[3] = w2[3] | 0x020000; - break; - - case 47: - w2[3] = w2[3] | 0x02000000; - break; - } -} - -static void append_0x02_4 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x02; - break; - - case 1: - w0[0] = w0[0] | 0x0200; - break; - - case 2: - w0[0] = w0[0] | 0x020000; - break; - - case 3: - w0[0] = w0[0] | 0x02000000; - break; - - case 4: - w0[1] = 0x02; - break; - - case 5: - w0[1] = w0[1] | 0x0200; - break; - - case 6: - w0[1] = w0[1] | 0x020000; - break; - - case 7: - w0[1] = w0[1] | 0x02000000; - break; - - case 8: - w0[2] = 0x02; - break; - - case 9: - w0[2] = w0[2] | 0x0200; - break; - - case 10: - w0[2] = w0[2] | 0x020000; - break; - - case 11: - w0[2] = w0[2] | 0x02000000; - break; - - case 12: - w0[3] = 0x02; - break; - - case 13: - w0[3] = w0[3] | 0x0200; - break; - - case 14: - w0[3] = w0[3] | 0x020000; - break; - - case 15: - w0[3] = w0[3] | 0x02000000; - break; - - case 16: - w1[0] = 0x02; - break; - - case 17: - w1[0] = w1[0] | 0x0200; - break; - - case 18: - w1[0] = w1[0] | 0x020000; - break; - - case 19: - w1[0] = w1[0] | 0x02000000; - break; - - case 20: - w1[1] = 0x02; - break; - - case 21: - w1[1] = w1[1] | 0x0200; - break; - - case 22: - w1[1] = w1[1] | 0x020000; - break; - - case 23: - w1[1] = w1[1] | 0x02000000; - break; - - case 24: - w1[2] = 0x02; - break; - - case 25: - w1[2] = w1[2] | 0x0200; - break; - - case 26: - w1[2] = w1[2] | 0x020000; - break; - - case 27: - w1[2] = w1[2] | 0x02000000; - break; - - case 28: - w1[3] = 0x02; - break; - - case 29: - w1[3] = w1[3] | 0x0200; - break; - - case 30: - w1[3] = w1[3] | 0x020000; - break; - - case 31: - w1[3] = w1[3] | 0x02000000; - break; - - case 32: - w2[0] = 0x02; - break; - - case 33: - w2[0] = w2[0] | 0x0200; - break; - - case 34: - w2[0] = w2[0] | 0x020000; - break; - - case 35: - w2[0] = w2[0] | 0x02000000; - break; - - case 36: - w2[1] = 0x02; - break; - - case 37: - w2[1] = w2[1] | 0x0200; - break; - - case 38: - w2[1] = w2[1] | 0x020000; - break; - - case 39: - w2[1] = w2[1] | 0x02000000; - break; - - case 40: - w2[2] = 0x02; - break; - - case 41: - w2[2] = w2[2] | 0x0200; - break; - - case 42: - w2[2] = w2[2] | 0x020000; - break; - - case 43: - w2[2] = w2[2] | 0x02000000; - break; - - case 44: - w2[3] = 0x02; - break; - - case 45: - w2[3] = w2[3] | 0x0200; - break; - - case 46: - w2[3] = w2[3] | 0x020000; - break; - - case 47: - w2[3] = w2[3] | 0x02000000; - break; - - case 48: - w3[0] = 0x02; - break; - - case 49: - w3[0] = w3[0] | 0x0200; - break; - - case 50: - w3[0] = w3[0] | 0x020000; - break; - - case 51: - w3[0] = w3[0] | 0x02000000; - break; - - case 52: - w3[1] = 0x02; - break; - - case 53: - w3[1] = w3[1] | 0x0200; - break; - - case 54: - w3[1] = w3[1] | 0x020000; - break; - - case 55: - w3[1] = w3[1] | 0x02000000; - break; - - case 56: - w3[2] = 0x02; - break; - - case 57: - w3[2] = w3[2] | 0x0200; - break; - - case 58: - w3[2] = w3[2] | 0x020000; - break; - - case 59: - w3[2] = w3[2] | 0x02000000; - break; - - case 60: - w3[3] = 0x02; - break; - - case 61: - w3[3] = w3[3] | 0x0200; - break; - - case 62: - w3[3] = w3[3] | 0x020000; - break; - - case 63: - w3[3] = w3[3] | 0x02000000; - break; - } -} - -static void append_0x02_8 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x02; - break; - - case 1: - w0[0] = w0[0] | 0x0200; - break; - - case 2: - w0[0] = w0[0] | 0x020000; - break; - - case 3: - w0[0] = w0[0] | 0x02000000; - break; - - case 4: - w0[1] = 0x02; - break; - - case 5: - w0[1] = w0[1] | 0x0200; - break; - - case 6: - w0[1] = w0[1] | 0x020000; - break; - - case 7: - w0[1] = w0[1] | 0x02000000; - break; - - case 8: - w0[2] = 0x02; - break; - - case 9: - w0[2] = w0[2] | 0x0200; - break; - - case 10: - w0[2] = w0[2] | 0x020000; - break; - - case 11: - w0[2] = w0[2] | 0x02000000; - break; - - case 12: - w0[3] = 0x02; - break; - - case 13: - w0[3] = w0[3] | 0x0200; - break; - - case 14: - w0[3] = w0[3] | 0x020000; - break; - - case 15: - w0[3] = w0[3] | 0x02000000; - break; - - case 16: - w1[0] = 0x02; - break; - - case 17: - w1[0] = w1[0] | 0x0200; - break; - - case 18: - w1[0] = w1[0] | 0x020000; - break; - - case 19: - w1[0] = w1[0] | 0x02000000; - break; - - case 20: - w1[1] = 0x02; - break; - - case 21: - w1[1] = w1[1] | 0x0200; - break; - - case 22: - w1[1] = w1[1] | 0x020000; - break; - - case 23: - w1[1] = w1[1] | 0x02000000; - break; - - case 24: - w1[2] = 0x02; - break; - - case 25: - w1[2] = w1[2] | 0x0200; - break; - - case 26: - w1[2] = w1[2] | 0x020000; - break; - - case 27: - w1[2] = w1[2] | 0x02000000; - break; - - case 28: - w1[3] = 0x02; - break; - - case 29: - w1[3] = w1[3] | 0x0200; - break; - - case 30: - w1[3] = w1[3] | 0x020000; - break; - - case 31: - w1[3] = w1[3] | 0x02000000; - break; - - case 32: - w2[0] = 0x02; - break; - - case 33: - w2[0] = w2[0] | 0x0200; - break; - - case 34: - w2[0] = w2[0] | 0x020000; - break; - - case 35: - w2[0] = w2[0] | 0x02000000; - break; - - case 36: - w2[1] = 0x02; - break; - - case 37: - w2[1] = w2[1] | 0x0200; - break; - - case 38: - w2[1] = w2[1] | 0x020000; - break; - - case 39: - w2[1] = w2[1] | 0x02000000; - break; - - case 40: - w2[2] = 0x02; - break; - - case 41: - w2[2] = w2[2] | 0x0200; - break; - - case 42: - w2[2] = w2[2] | 0x020000; - break; - - case 43: - w2[2] = w2[2] | 0x02000000; - break; - - case 44: - w2[3] = 0x02; - break; - - case 45: - w2[3] = w2[3] | 0x0200; - break; - - case 46: - w2[3] = w2[3] | 0x020000; - break; - - case 47: - w2[3] = w2[3] | 0x02000000; - break; - - case 48: - w3[0] = 0x02; - break; - - case 49: - w3[0] = w3[0] | 0x0200; - break; - - case 50: - w3[0] = w3[0] | 0x020000; - break; - - case 51: - w3[0] = w3[0] | 0x02000000; - break; - - case 52: - w3[1] = 0x02; - break; - - case 53: - w3[1] = w3[1] | 0x0200; - break; - - case 54: - w3[1] = w3[1] | 0x020000; - break; - - case 55: - w3[1] = w3[1] | 0x02000000; - break; - - case 56: - w3[2] = 0x02; - break; - - case 57: - w3[2] = w3[2] | 0x0200; - break; - - case 58: - w3[2] = w3[2] | 0x020000; - break; - - case 59: - w3[2] = w3[2] | 0x02000000; - break; - - case 60: - w3[3] = 0x02; - break; - - case 61: - w3[3] = w3[3] | 0x0200; - break; - - case 62: - w3[3] = w3[3] | 0x020000; - break; - - case 63: - w3[3] = w3[3] | 0x02000000; - break; - - case 64: - w4[0] = 0x02; - break; - - case 65: - w4[0] = w4[0] | 0x0200; - break; - - case 66: - w4[0] = w4[0] | 0x020000; - break; - - case 67: - w4[0] = w4[0] | 0x02000000; - break; - - case 68: - w4[1] = 0x02; - break; - - case 69: - w4[1] = w4[1] | 0x0200; - break; - - case 70: - w4[1] = w4[1] | 0x020000; - break; - - case 71: - w4[1] = w4[1] | 0x02000000; - break; - - case 72: - w4[2] = 0x02; - break; - - case 73: - w4[2] = w4[2] | 0x0200; - break; - - case 74: - w4[2] = w4[2] | 0x020000; - break; - - case 75: - w4[2] = w4[2] | 0x02000000; - break; - - case 76: - w4[3] = 0x02; - break; - - case 77: - w4[3] = w4[3] | 0x0200; - break; - - case 78: - w4[3] = w4[3] | 0x020000; - break; - - case 79: - w4[3] = w4[3] | 0x02000000; - break; - - case 80: - w5[0] = 0x02; - break; - - case 81: - w5[0] = w5[0] | 0x0200; - break; - - case 82: - w5[0] = w5[0] | 0x020000; - break; - - case 83: - w5[0] = w5[0] | 0x02000000; - break; - - case 84: - w5[1] = 0x02; - break; - - case 85: - w5[1] = w5[1] | 0x0200; - break; - - case 86: - w5[1] = w5[1] | 0x020000; - break; - - case 87: - w5[1] = w5[1] | 0x02000000; - break; - - case 88: - w5[2] = 0x02; - break; - - case 89: - w5[2] = w5[2] | 0x0200; - break; - - case 90: - w5[2] = w5[2] | 0x020000; - break; - - case 91: - w5[2] = w5[2] | 0x02000000; - break; - - case 92: - w5[3] = 0x02; - break; - - case 93: - w5[3] = w5[3] | 0x0200; - break; - - case 94: - w5[3] = w5[3] | 0x020000; - break; - - case 95: - w5[3] = w5[3] | 0x02000000; - break; - - case 96: - w6[0] = 0x02; - break; - - case 97: - w6[0] = w6[0] | 0x0200; - break; - - case 98: - w6[0] = w6[0] | 0x020000; - break; - - case 99: - w6[0] = w6[0] | 0x02000000; - break; - - case 100: - w6[1] = 0x02; - break; - - case 101: - w6[1] = w6[1] | 0x0200; - break; - - case 102: - w6[1] = w6[1] | 0x020000; - break; - - case 103: - w6[1] = w6[1] | 0x02000000; - break; - - case 104: - w6[2] = 0x02; - break; - - case 105: - w6[2] = w6[2] | 0x0200; - break; - - case 106: - w6[2] = w6[2] | 0x020000; - break; - - case 107: - w6[2] = w6[2] | 0x02000000; - break; - - case 108: - w6[3] = 0x02; - break; - - case 109: - w6[3] = w6[3] | 0x0200; - break; - - case 110: - w6[3] = w6[3] | 0x020000; - break; - - case 111: - w6[3] = w6[3] | 0x02000000; - break; - - case 112: - w7[0] = 0x02; - break; - - case 113: - w7[0] = w7[0] | 0x0200; - break; - - case 114: - w7[0] = w7[0] | 0x020000; - break; - - case 115: - w7[0] = w7[0] | 0x02000000; - break; - - case 116: - w7[1] = 0x02; - break; - - case 117: - w7[1] = w7[1] | 0x0200; - break; - - case 118: - w7[1] = w7[1] | 0x020000; - break; - - case 119: - w7[1] = w7[1] | 0x02000000; - break; - - case 120: - w7[2] = 0x02; - break; - - case 121: - w7[2] = w7[2] | 0x0200; - break; - - case 122: - w7[2] = w7[2] | 0x020000; - break; - - case 123: - w7[2] = w7[2] | 0x02000000; - break; - - case 124: - w7[3] = 0x02; - break; - - case 125: - w7[3] = w7[3] | 0x0200; - break; - - case 126: - w7[3] = w7[3] | 0x020000; - break; - - case 127: - w7[3] = w7[3] | 0x02000000; - break; - } -} - -static void append_0x80_1 (u32x w0[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x80; - break; - - case 1: - w0[0] = w0[0] | 0x8000; - break; - - case 2: - w0[0] = w0[0] | 0x800000; - break; - - case 3: - w0[0] = w0[0] | 0x80000000; - break; - - case 4: - w0[1] = 0x80; - break; - - case 5: - w0[1] = w0[1] | 0x8000; - break; - - case 6: - w0[1] = w0[1] | 0x800000; - break; - - case 7: - w0[1] = w0[1] | 0x80000000; - break; - - case 8: - w0[2] = 0x80; - break; - - case 9: - w0[2] = w0[2] | 0x8000; - break; - - case 10: - w0[2] = w0[2] | 0x800000; - break; - - case 11: - w0[2] = w0[2] | 0x80000000; - break; - - case 12: - w0[3] = 0x80; - break; - - case 13: - w0[3] = w0[3] | 0x8000; - break; - - case 14: - w0[3] = w0[3] | 0x800000; - break; - - case 15: - w0[3] = w0[3] | 0x80000000; - break; - } -} - -static void append_0x80_2 (u32x w0[4], u32x w1[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x80; - break; - - case 1: - w0[0] = w0[0] | 0x8000; - break; - - case 2: - w0[0] = w0[0] | 0x800000; - break; - - case 3: - w0[0] = w0[0] | 0x80000000; - break; - - case 4: - w0[1] = 0x80; - break; - - case 5: - w0[1] = w0[1] | 0x8000; - break; - - case 6: - w0[1] = w0[1] | 0x800000; - break; - - case 7: - w0[1] = w0[1] | 0x80000000; - break; - - case 8: - w0[2] = 0x80; - break; - - case 9: - w0[2] = w0[2] | 0x8000; - break; - - case 10: - w0[2] = w0[2] | 0x800000; - break; - - case 11: - w0[2] = w0[2] | 0x80000000; - break; - - case 12: - w0[3] = 0x80; - break; - - case 13: - w0[3] = w0[3] | 0x8000; - break; - - case 14: - w0[3] = w0[3] | 0x800000; - break; - - case 15: - w0[3] = w0[3] | 0x80000000; - break; - - case 16: - w1[0] = 0x80; - break; - - case 17: - w1[0] = w1[0] | 0x8000; - break; - - case 18: - w1[0] = w1[0] | 0x800000; - break; - - case 19: - w1[0] = w1[0] | 0x80000000; - break; - - case 20: - w1[1] = 0x80; - break; - - case 21: - w1[1] = w1[1] | 0x8000; - break; - - case 22: - w1[1] = w1[1] | 0x800000; - break; - - case 23: - w1[1] = w1[1] | 0x80000000; - break; - - case 24: - w1[2] = 0x80; - break; - - case 25: - w1[2] = w1[2] | 0x8000; - break; - - case 26: - w1[2] = w1[2] | 0x800000; - break; - - case 27: - w1[2] = w1[2] | 0x80000000; - break; - - case 28: - w1[3] = 0x80; - break; - - case 29: - w1[3] = w1[3] | 0x8000; - break; - - case 30: - w1[3] = w1[3] | 0x800000; - break; - - case 31: - w1[3] = w1[3] | 0x80000000; - break; - } -} - -static void append_0x80_2_be (u32x w0[4], u32x w1[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] |= 0x80000000; - break; - - case 1: - w0[0] |= 0x800000; - break; - - case 2: - w0[0] |= 0x8000; - break; - - case 3: - w0[0] |= 0x80; - break; - - case 4: - w0[1] |= 0x80000000; - break; - - case 5: - w0[1] |= 0x800000; - break; - - case 6: - w0[1] |= 0x8000; - break; - - case 7: - w0[1] |= 0x80; - break; - - case 8: - w0[2] |= 0x80000000; - break; - - case 9: - w0[2] |= 0x800000; - break; - - case 10: - w0[2] |= 0x8000; - break; - - case 11: - w0[2] |= 0x80; - break; - - case 12: - w0[3] |= 0x80000000; - break; - - case 13: - w0[3] |= 0x800000; - break; - - case 14: - w0[3] |= 0x8000; - break; - - case 15: - w0[3] |= 0x80; - break; - - case 16: - w1[0] |= 0x80000000; - break; - - case 17: - w1[0] |= 0x800000; - break; - - case 18: - w1[0] |= 0x8000; - break; - - case 19: - w1[0] |= 0x80; - break; - - case 20: - w1[1] |= 0x80000000; - break; - - case 21: - w1[1] |= 0x800000; - break; - - case 22: - w1[1] |= 0x8000; - break; - - case 23: - w1[1] |= 0x80; - break; - - case 24: - w1[2] |= 0x80000000; - break; - - case 25: - w1[2] |= 0x800000; - break; - - case 26: - w1[2] |= 0x8000; - break; - - case 27: - w1[2] |= 0x80; - break; - - case 28: - w1[3] |= 0x80000000; - break; - - case 29: - w1[3] |= 0x800000; - break; - - case 30: - w1[3] |= 0x8000; - break; - - case 31: - w1[3] |= 0x80; - break; - } -} - -static void append_0x80_3 (u32x w0[4], u32x w1[4], u32x w2[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x80; - break; - - case 1: - w0[0] = w0[0] | 0x8000; - break; - - case 2: - w0[0] = w0[0] | 0x800000; - break; - - case 3: - w0[0] = w0[0] | 0x80000000; - break; - - case 4: - w0[1] = 0x80; - break; - - case 5: - w0[1] = w0[1] | 0x8000; - break; - - case 6: - w0[1] = w0[1] | 0x800000; - break; - - case 7: - w0[1] = w0[1] | 0x80000000; - break; - - case 8: - w0[2] = 0x80; - break; - - case 9: - w0[2] = w0[2] | 0x8000; - break; - - case 10: - w0[2] = w0[2] | 0x800000; - break; - - case 11: - w0[2] = w0[2] | 0x80000000; - break; - - case 12: - w0[3] = 0x80; - break; - - case 13: - w0[3] = w0[3] | 0x8000; - break; - - case 14: - w0[3] = w0[3] | 0x800000; - break; - - case 15: - w0[3] = w0[3] | 0x80000000; - break; - - case 16: - w1[0] = 0x80; - break; - - case 17: - w1[0] = w1[0] | 0x8000; - break; - - case 18: - w1[0] = w1[0] | 0x800000; - break; - - case 19: - w1[0] = w1[0] | 0x80000000; - break; - - case 20: - w1[1] = 0x80; - break; - - case 21: - w1[1] = w1[1] | 0x8000; - break; - - case 22: - w1[1] = w1[1] | 0x800000; - break; - - case 23: - w1[1] = w1[1] | 0x80000000; - break; - - case 24: - w1[2] = 0x80; - break; - - case 25: - w1[2] = w1[2] | 0x8000; - break; - - case 26: - w1[2] = w1[2] | 0x800000; - break; - - case 27: - w1[2] = w1[2] | 0x80000000; - break; - - case 28: - w1[3] = 0x80; - break; - - case 29: - w1[3] = w1[3] | 0x8000; - break; - - case 30: - w1[3] = w1[3] | 0x800000; - break; - - case 31: - w1[3] = w1[3] | 0x80000000; - break; - - case 32: - w2[0] = 0x80; - break; - - case 33: - w2[0] = w2[0] | 0x8000; - break; - - case 34: - w2[0] = w2[0] | 0x800000; - break; - - case 35: - w2[0] = w2[0] | 0x80000000; - break; - - case 36: - w2[1] = 0x80; - break; - - case 37: - w2[1] = w2[1] | 0x8000; - break; - - case 38: - w2[1] = w2[1] | 0x800000; - break; - - case 39: - w2[1] = w2[1] | 0x80000000; - break; - - case 40: - w2[2] = 0x80; - break; - - case 41: - w2[2] = w2[2] | 0x8000; - break; - - case 42: - w2[2] = w2[2] | 0x800000; - break; - - case 43: - w2[2] = w2[2] | 0x80000000; - break; - - case 44: - w2[3] = 0x80; - break; - - case 45: - w2[3] = w2[3] | 0x8000; - break; - - case 46: - w2[3] = w2[3] | 0x800000; - break; - - case 47: - w2[3] = w2[3] | 0x80000000; - break; - } -} - -static void append_0x80_4 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x80; - break; - - case 1: - w0[0] = w0[0] | 0x8000; - break; - - case 2: - w0[0] = w0[0] | 0x800000; - break; - - case 3: - w0[0] = w0[0] | 0x80000000; - break; - - case 4: - w0[1] = 0x80; - break; - - case 5: - w0[1] = w0[1] | 0x8000; - break; - - case 6: - w0[1] = w0[1] | 0x800000; - break; - - case 7: - w0[1] = w0[1] | 0x80000000; - break; - - case 8: - w0[2] = 0x80; - break; - - case 9: - w0[2] = w0[2] | 0x8000; - break; - - case 10: - w0[2] = w0[2] | 0x800000; - break; - - case 11: - w0[2] = w0[2] | 0x80000000; - break; - - case 12: - w0[3] = 0x80; - break; - - case 13: - w0[3] = w0[3] | 0x8000; - break; - - case 14: - w0[3] = w0[3] | 0x800000; - break; - - case 15: - w0[3] = w0[3] | 0x80000000; - break; - - case 16: - w1[0] = 0x80; - break; - - case 17: - w1[0] = w1[0] | 0x8000; - break; - - case 18: - w1[0] = w1[0] | 0x800000; - break; - - case 19: - w1[0] = w1[0] | 0x80000000; - break; - - case 20: - w1[1] = 0x80; - break; - - case 21: - w1[1] = w1[1] | 0x8000; - break; - - case 22: - w1[1] = w1[1] | 0x800000; - break; - - case 23: - w1[1] = w1[1] | 0x80000000; - break; - - case 24: - w1[2] = 0x80; - break; - - case 25: - w1[2] = w1[2] | 0x8000; - break; - - case 26: - w1[2] = w1[2] | 0x800000; - break; - - case 27: - w1[2] = w1[2] | 0x80000000; - break; - - case 28: - w1[3] = 0x80; - break; - - case 29: - w1[3] = w1[3] | 0x8000; - break; - - case 30: - w1[3] = w1[3] | 0x800000; - break; - - case 31: - w1[3] = w1[3] | 0x80000000; - break; - - case 32: - w2[0] = 0x80; - break; - - case 33: - w2[0] = w2[0] | 0x8000; - break; - - case 34: - w2[0] = w2[0] | 0x800000; - break; - - case 35: - w2[0] = w2[0] | 0x80000000; - break; - - case 36: - w2[1] = 0x80; - break; - - case 37: - w2[1] = w2[1] | 0x8000; - break; - - case 38: - w2[1] = w2[1] | 0x800000; - break; - - case 39: - w2[1] = w2[1] | 0x80000000; - break; - - case 40: - w2[2] = 0x80; - break; - - case 41: - w2[2] = w2[2] | 0x8000; - break; - - case 42: - w2[2] = w2[2] | 0x800000; - break; - - case 43: - w2[2] = w2[2] | 0x80000000; - break; - - case 44: - w2[3] = 0x80; - break; - - case 45: - w2[3] = w2[3] | 0x8000; - break; - - case 46: - w2[3] = w2[3] | 0x800000; - break; - - case 47: - w2[3] = w2[3] | 0x80000000; - break; - - case 48: - w3[0] = 0x80; - break; - - case 49: - w3[0] = w3[0] | 0x8000; - break; - - case 50: - w3[0] = w3[0] | 0x800000; - break; - - case 51: - w3[0] = w3[0] | 0x80000000; - break; - - case 52: - w3[1] = 0x80; - break; - - case 53: - w3[1] = w3[1] | 0x8000; - break; - - case 54: - w3[1] = w3[1] | 0x800000; - break; - - case 55: - w3[1] = w3[1] | 0x80000000; - break; - - case 56: - w3[2] = 0x80; - break; - - case 57: - w3[2] = w3[2] | 0x8000; - break; - - case 58: - w3[2] = w3[2] | 0x800000; - break; - - case 59: - w3[2] = w3[2] | 0x80000000; - break; - - case 60: - w3[3] = 0x80; - break; - - case 61: - w3[3] = w3[3] | 0x8000; - break; - - case 62: - w3[3] = w3[3] | 0x800000; - break; - - case 63: - w3[3] = w3[3] | 0x80000000; - break; - } -} - -static void append_0x80_8 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x80; - break; - - case 1: - w0[0] = w0[0] | 0x8000; - break; - - case 2: - w0[0] = w0[0] | 0x800000; - break; - - case 3: - w0[0] = w0[0] | 0x80000000; - break; - - case 4: - w0[1] = 0x80; - break; - - case 5: - w0[1] = w0[1] | 0x8000; - break; - - case 6: - w0[1] = w0[1] | 0x800000; - break; - - case 7: - w0[1] = w0[1] | 0x80000000; - break; - - case 8: - w0[2] = 0x80; - break; - - case 9: - w0[2] = w0[2] | 0x8000; - break; - - case 10: - w0[2] = w0[2] | 0x800000; - break; - - case 11: - w0[2] = w0[2] | 0x80000000; - break; - - case 12: - w0[3] = 0x80; - break; - - case 13: - w0[3] = w0[3] | 0x8000; - break; - - case 14: - w0[3] = w0[3] | 0x800000; - break; - - case 15: - w0[3] = w0[3] | 0x80000000; - break; - - case 16: - w1[0] = 0x80; - break; - - case 17: - w1[0] = w1[0] | 0x8000; - break; - - case 18: - w1[0] = w1[0] | 0x800000; - break; - - case 19: - w1[0] = w1[0] | 0x80000000; - break; - - case 20: - w1[1] = 0x80; - break; - - case 21: - w1[1] = w1[1] | 0x8000; - break; - - case 22: - w1[1] = w1[1] | 0x800000; - break; - - case 23: - w1[1] = w1[1] | 0x80000000; - break; - - case 24: - w1[2] = 0x80; - break; - - case 25: - w1[2] = w1[2] | 0x8000; - break; - - case 26: - w1[2] = w1[2] | 0x800000; - break; - - case 27: - w1[2] = w1[2] | 0x80000000; - break; - - case 28: - w1[3] = 0x80; - break; - - case 29: - w1[3] = w1[3] | 0x8000; - break; - - case 30: - w1[3] = w1[3] | 0x800000; - break; - - case 31: - w1[3] = w1[3] | 0x80000000; - break; - - case 32: - w2[0] = 0x80; - break; - - case 33: - w2[0] = w2[0] | 0x8000; - break; - - case 34: - w2[0] = w2[0] | 0x800000; - break; - - case 35: - w2[0] = w2[0] | 0x80000000; - break; - - case 36: - w2[1] = 0x80; - break; - - case 37: - w2[1] = w2[1] | 0x8000; - break; - - case 38: - w2[1] = w2[1] | 0x800000; - break; - - case 39: - w2[1] = w2[1] | 0x80000000; - break; - - case 40: - w2[2] = 0x80; - break; - - case 41: - w2[2] = w2[2] | 0x8000; - break; - - case 42: - w2[2] = w2[2] | 0x800000; - break; - - case 43: - w2[2] = w2[2] | 0x80000000; - break; - - case 44: - w2[3] = 0x80; - break; - - case 45: - w2[3] = w2[3] | 0x8000; - break; - - case 46: - w2[3] = w2[3] | 0x800000; - break; - - case 47: - w2[3] = w2[3] | 0x80000000; - break; - - case 48: - w3[0] = 0x80; - break; - - case 49: - w3[0] = w3[0] | 0x8000; - break; - - case 50: - w3[0] = w3[0] | 0x800000; - break; - - case 51: - w3[0] = w3[0] | 0x80000000; - break; - - case 52: - w3[1] = 0x80; - break; - - case 53: - w3[1] = w3[1] | 0x8000; - break; - - case 54: - w3[1] = w3[1] | 0x800000; - break; - - case 55: - w3[1] = w3[1] | 0x80000000; - break; - - case 56: - w3[2] = 0x80; - break; - - case 57: - w3[2] = w3[2] | 0x8000; - break; - - case 58: - w3[2] = w3[2] | 0x800000; - break; - - case 59: - w3[2] = w3[2] | 0x80000000; - break; - - case 60: - w3[3] = 0x80; - break; - - case 61: - w3[3] = w3[3] | 0x8000; - break; - - case 62: - w3[3] = w3[3] | 0x800000; - break; - - case 63: - w3[3] = w3[3] | 0x80000000; - break; - - case 64: - w4[0] = 0x80; - break; - - case 65: - w4[0] = w4[0] | 0x8000; - break; - - case 66: - w4[0] = w4[0] | 0x800000; - break; - - case 67: - w4[0] = w4[0] | 0x80000000; - break; - - case 68: - w4[1] = 0x80; - break; - - case 69: - w4[1] = w4[1] | 0x8000; - break; - - case 70: - w4[1] = w4[1] | 0x800000; - break; - - case 71: - w4[1] = w4[1] | 0x80000000; - break; - - case 72: - w4[2] = 0x80; - break; - - case 73: - w4[2] = w4[2] | 0x8000; - break; - - case 74: - w4[2] = w4[2] | 0x800000; - break; - - case 75: - w4[2] = w4[2] | 0x80000000; - break; - - case 76: - w4[3] = 0x80; - break; - - case 77: - w4[3] = w4[3] | 0x8000; - break; - - case 78: - w4[3] = w4[3] | 0x800000; - break; - - case 79: - w4[3] = w4[3] | 0x80000000; - break; - - case 80: - w5[0] = 0x80; - break; - - case 81: - w5[0] = w5[0] | 0x8000; - break; - - case 82: - w5[0] = w5[0] | 0x800000; - break; - - case 83: - w5[0] = w5[0] | 0x80000000; - break; - - case 84: - w5[1] = 0x80; - break; - - case 85: - w5[1] = w5[1] | 0x8000; - break; - - case 86: - w5[1] = w5[1] | 0x800000; - break; - - case 87: - w5[1] = w5[1] | 0x80000000; - break; - - case 88: - w5[2] = 0x80; - break; - - case 89: - w5[2] = w5[2] | 0x8000; - break; - - case 90: - w5[2] = w5[2] | 0x800000; - break; - - case 91: - w5[2] = w5[2] | 0x80000000; - break; - - case 92: - w5[3] = 0x80; - break; - - case 93: - w5[3] = w5[3] | 0x8000; - break; - - case 94: - w5[3] = w5[3] | 0x800000; - break; - - case 95: - w5[3] = w5[3] | 0x80000000; - break; - - case 96: - w6[0] = 0x80; - break; - - case 97: - w6[0] = w6[0] | 0x8000; - break; - - case 98: - w6[0] = w6[0] | 0x800000; - break; - - case 99: - w6[0] = w6[0] | 0x80000000; - break; - - case 100: - w6[1] = 0x80; - break; - - case 101: - w6[1] = w6[1] | 0x8000; - break; - - case 102: - w6[1] = w6[1] | 0x800000; - break; - - case 103: - w6[1] = w6[1] | 0x80000000; - break; - - case 104: - w6[2] = 0x80; - break; - - case 105: - w6[2] = w6[2] | 0x8000; - break; - - case 106: - w6[2] = w6[2] | 0x800000; - break; - - case 107: - w6[2] = w6[2] | 0x80000000; - break; - - case 108: - w6[3] = 0x80; - break; - - case 109: - w6[3] = w6[3] | 0x8000; - break; - - case 110: - w6[3] = w6[3] | 0x800000; - break; - - case 111: - w6[3] = w6[3] | 0x80000000; - break; - - case 112: - w7[0] = 0x80; - break; - - case 113: - w7[0] = w7[0] | 0x8000; - break; - - case 114: - w7[0] = w7[0] | 0x800000; - break; - - case 115: - w7[0] = w7[0] | 0x80000000; - break; - - case 116: - w7[1] = 0x80; - break; - - case 117: - w7[1] = w7[1] | 0x8000; - break; - - case 118: - w7[1] = w7[1] | 0x800000; - break; - - case 119: - w7[1] = w7[1] | 0x80000000; - break; - - case 120: - w7[2] = 0x80; - break; - - case 121: - w7[2] = w7[2] | 0x8000; - break; - - case 122: - w7[2] = w7[2] | 0x800000; - break; - - case 123: - w7[2] = w7[2] | 0x80000000; - break; - - case 124: - w7[3] = 0x80; - break; - - case 125: - w7[3] = w7[3] | 0x8000; - break; - - case 126: - w7[3] = w7[3] | 0x800000; - break; - - case 127: - w7[3] = w7[3] | 0x80000000; - break; - } -} - -static void append_0x80_4 (u32x w[16], const u32 offset) -{ - switch (offset) - { - case 0: - w[ 0] = 0x80; - break; - - case 1: - w[ 0] = w[ 0] | 0x8000; - break; - - case 2: - w[ 0] = w[ 0] | 0x800000; - break; - - case 3: - w[ 0] = w[ 0] | 0x80000000; - break; - - case 4: - w[ 1] = 0x80; - break; - - case 5: - w[ 1] = w[ 1] | 0x8000; - break; - - case 6: - w[ 1] = w[ 1] | 0x800000; - break; - - case 7: - w[ 1] = w[ 1] | 0x80000000; - break; - - case 8: - w[ 2] = 0x80; - break; - - case 9: - w[ 2] = w[ 2] | 0x8000; - break; - - case 10: - w[ 2] = w[ 2] | 0x800000; - break; - - case 11: - w[ 2] = w[ 2] | 0x80000000; - break; - - case 12: - w[ 3] = 0x80; - break; - - case 13: - w[ 3] = w[ 3] | 0x8000; - break; - - case 14: - w[ 3] = w[ 3] | 0x800000; - break; - - case 15: - w[ 3] = w[ 3] | 0x80000000; - break; - - case 16: - w[ 4] = 0x80; - break; - - case 17: - w[ 4] = w[ 4] | 0x8000; - break; - - case 18: - w[ 4] = w[ 4] | 0x800000; - break; - - case 19: - w[ 4] = w[ 4] | 0x80000000; - break; - - case 20: - w[ 5] = 0x80; - break; - - case 21: - w[ 5] = w[ 5] | 0x8000; - break; - - case 22: - w[ 5] = w[ 5] | 0x800000; - break; - - case 23: - w[ 5] = w[ 5] | 0x80000000; - break; - - case 24: - w[ 6] = 0x80; - break; - - case 25: - w[ 6] = w[ 6] | 0x8000; - break; - - case 26: - w[ 6] = w[ 6] | 0x800000; - break; - - case 27: - w[ 6] = w[ 6] | 0x80000000; - break; - - case 28: - w[ 7] = 0x80; - break; - - case 29: - w[ 7] = w[ 7] | 0x8000; - break; - - case 30: - w[ 7] = w[ 7] | 0x800000; - break; - - case 31: - w[ 7] = w[ 7] | 0x80000000; - break; - - case 32: - w[ 8] = 0x80; - break; - - case 33: - w[ 8] = w[ 8] | 0x8000; - break; - - case 34: - w[ 8] = w[ 8] | 0x800000; - break; - - case 35: - w[ 8] = w[ 8] | 0x80000000; - break; - - case 36: - w[ 9] = 0x80; - break; - - case 37: - w[ 9] = w[ 9] | 0x8000; - break; - - case 38: - w[ 9] = w[ 9] | 0x800000; - break; - - case 39: - w[ 9] = w[ 9] | 0x80000000; - break; - - case 40: - w[10] = 0x80; - break; - - case 41: - w[10] = w[10] | 0x8000; - break; - - case 42: - w[10] = w[10] | 0x800000; - break; - - case 43: - w[10] = w[10] | 0x80000000; - break; - - case 44: - w[11] = 0x80; - break; - - case 45: - w[11] = w[11] | 0x8000; - break; - - case 46: - w[11] = w[11] | 0x800000; - break; - - case 47: - w[11] = w[11] | 0x80000000; - break; - - case 48: - w[12] = 0x80; - break; - - case 49: - w[12] = w[12] | 0x8000; - break; - - case 50: - w[12] = w[12] | 0x800000; - break; - - case 51: - w[12] = w[12] | 0x80000000; - break; - - case 52: - w[13] = 0x80; - break; - - case 53: - w[13] = w[13] | 0x8000; - break; - - case 54: - w[13] = w[13] | 0x800000; - break; - - case 55: - w[13] = w[13] | 0x80000000; - break; - - case 56: - w[14] = 0x80; - break; - - case 57: - w[14] = w[14] | 0x8000; - break; - - case 58: - w[14] = w[14] | 0x800000; - break; - - case 59: - w[14] = w[14] | 0x80000000; - break; - - case 60: - w[15] = 0x80; - break; - - case 61: - w[15] = w[15] | 0x8000; - break; - - case 62: - w[15] = w[15] | 0x800000; - break; - - case 63: - w[15] = w[15] | 0x80000000; - break; - } -} - -static void device_memcat2L (const u32 offset, u32x dst0[2], u32x src_l0[2], u32 src_r0[2]) -{ - switch (offset) - { - case 1: - dst0[0] = src_l0[0] | src_r0[0] << 8; - dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; - break; - - case 2: - dst0[0] = src_l0[0] | src_r0[0] << 16; - dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; - break; - - case 3: - dst0[0] = src_l0[0] | src_r0[0] << 24; - dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; - break; - - case 4: - dst0[1] = src_r0[0]; - break; - - case 5: - dst0[1] = src_l0[1] | src_r0[0] << 8; - break; - - case 6: - dst0[1] = src_l0[1] | src_r0[0] << 16; - break; - - case 7: - dst0[1] = src_l0[1] | src_r0[0] << 24; - break; - } -} - -static void device_memcat2L (const u32 offset, u32x dst0[2], u32x src_l0[2], u32x src_r0[2]) -{ - switch (offset) - { - case 1: - dst0[0] = src_l0[0] | src_r0[0] << 8; - dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; - break; - - case 2: - dst0[0] = src_l0[0] | src_r0[0] << 16; - dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; - break; - - case 3: - dst0[0] = src_l0[0] | src_r0[0] << 24; - dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; - break; - - case 4: - dst0[1] = src_r0[0]; - break; - - case 5: - dst0[1] = src_l0[1] | src_r0[0] << 8; - break; - - case 6: - dst0[1] = src_l0[1] | src_r0[0] << 16; - break; - - case 7: - dst0[1] = src_l0[1] | src_r0[0] << 24; - break; - } -} - -static void device_memcat4L (const u32 offset, u32x dst0[4], u32x src_l0[4], u32 src_r0[4]) -{ - switch (offset) - { - case 1: - dst0[0] = src_l0[0] | src_r0[0] << 8; - dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8; - break; - - case 2: - dst0[0] = src_l0[0] | src_r0[0] << 16; - dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16; - break; - - case 3: - dst0[0] = src_l0[0] | src_r0[0] << 24; - dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24; - break; - - case 4: - dst0[1] = src_r0[0]; - dst0[2] = src_r0[1]; - dst0[3] = src_r0[2]; - break; - - case 5: - dst0[1] = src_l0[1] | src_r0[0] << 8; - dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8; - break; - - case 6: - dst0[1] = src_l0[1] | src_r0[0] << 16; - dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16; - break; - - case 7: - dst0[1] = src_l0[1] | src_r0[0] << 24; - dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24; - break; - - case 8: - dst0[2] = src_r0[0]; - dst0[3] = src_r0[1]; - break; - - case 9: - dst0[2] = src_l0[2] | src_r0[0] << 8; - dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8; - break; - - case 10: - dst0[2] = src_l0[2] | src_r0[0] << 16; - dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16; - break; - - case 11: - dst0[2] = src_l0[2] | src_r0[0] << 24; - dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24; - break; - - case 12: - dst0[3] = src_r0[0]; - break; - - case 13: - dst0[3] = src_l0[3] | src_r0[0] << 8; - break; - - case 14: - dst0[3] = src_l0[3] | src_r0[0] << 16; - break; - - case 15: - dst0[3] = src_l0[3] | src_r0[0] << 24; - break; - } -} - -static void device_memcat4L (const u32 offset, u32x dst0[4], u32x src_l0[4], u32x src_r0[4]) -{ - switch (offset) - { - case 1: - dst0[0] = src_l0[0] | src_r0[0] << 8; - dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8; - break; - - case 2: - dst0[0] = src_l0[0] | src_r0[0] << 16; - dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16; - break; - - case 3: - dst0[0] = src_l0[0] | src_r0[0] << 24; - dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24; - break; - - case 4: - dst0[1] = src_r0[0]; - dst0[2] = src_r0[1]; - dst0[3] = src_r0[2]; - break; - - case 5: - dst0[1] = src_l0[1] | src_r0[0] << 8; - dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8; - break; - - case 6: - dst0[1] = src_l0[1] | src_r0[0] << 16; - dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16; - break; - - case 7: - dst0[1] = src_l0[1] | src_r0[0] << 24; - dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24; - break; - - case 8: - dst0[2] = src_r0[0]; - dst0[3] = src_r0[1]; - break; - - case 9: - dst0[2] = src_l0[2] | src_r0[0] << 8; - dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8; - break; - - case 10: - dst0[2] = src_l0[2] | src_r0[0] << 16; - dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16; - break; - - case 11: - dst0[2] = src_l0[2] | src_r0[0] << 24; - dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24; - break; - - case 12: - dst0[3] = src_r0[0]; - break; - - case 13: - dst0[3] = src_l0[3] | src_r0[0] << 8; - break; - - case 14: - dst0[3] = src_l0[3] | src_r0[0] << 16; - break; - - case 15: - dst0[3] = src_l0[3] | src_r0[0] << 24; - break; - } -} - -static void device_memcat8L (const u32 offset, u32x dst0[4], u32x dst1[4], u32x src_l0[4], u32x src_l1[4], u32 src_r0[4]) -{ - switch (offset) - { - case 1: - dst0[0] = src_l0[0] | src_r0[0] << 8; - dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[0] = src_r0[3] >> 24; - break; - - case 2: - dst0[0] = src_l0[0] | src_r0[0] << 16; - dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[0] = src_r0[3] >> 16; - break; - - case 3: - dst0[0] = src_l0[0] | src_r0[0] << 24; - dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[0] = src_r0[3] >> 8; - break; - - case 4: - dst0[1] = src_r0[0]; - dst0[2] = src_r0[1]; - dst0[3] = src_r0[2]; - dst1[0] = src_r0[3]; - break; - - case 5: - dst0[1] = src_l0[1] | src_r0[0] << 8; - dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[1] = src_r0[3] >> 24; - break; - - case 6: - dst0[1] = src_l0[1] | src_r0[0] << 16; - dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[1] = src_r0[3] >> 16; - break; - - case 7: - dst0[1] = src_l0[1] | src_r0[0] << 24; - dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[1] = src_r0[3] >> 8; - break; - - case 8: - dst0[2] = src_r0[0]; - dst0[3] = src_r0[1]; - dst1[0] = src_r0[2]; - dst1[1] = src_r0[3]; - break; - - case 9: - dst0[2] = src_l0[2] | src_r0[0] << 8; - dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[2] = src_r0[3] >> 24; - break; - - case 10: - dst0[2] = src_l0[2] | src_r0[0] << 16; - dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[2] = src_r0[3] >> 16; - break; - - case 11: - dst0[2] = src_l0[2] | src_r0[0] << 24; - dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[2] = src_r0[3] >> 8; - break; - - case 12: - dst0[3] = src_r0[0]; - dst1[0] = src_r0[1]; - dst1[1] = src_r0[2]; - dst1[2] = src_r0[3]; - break; - - case 13: - dst0[3] = src_l0[3] | src_r0[0] << 8; - dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[3] = src_r0[3] >> 24; - break; - - case 14: - dst0[3] = src_l0[3] | src_r0[0] << 16; - dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[3] = src_r0[3] >> 16; - break; - - case 15: - dst0[3] = src_l0[3] | src_r0[0] << 24; - dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[3] = src_r0[3] >> 8; - break; - - case 16: - dst1[0] = src_r0[0]; - dst1[1] = src_r0[1]; - dst1[2] = src_r0[2]; - dst1[3] = src_r0[3]; - break; - - case 17: - dst1[0] = src_l1[0] | src_r0[0] << 8; - dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8; - break; - - case 18: - dst1[0] = src_l1[0] | src_r0[0] << 16; - dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16; - break; - - case 19: - dst1[0] = src_l1[0] | src_r0[0] << 24; - dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24; - break; - - case 20: - dst1[1] = src_r0[0]; - dst1[2] = src_r0[1]; - dst1[3] = src_r0[2]; - break; - - case 21: - dst1[1] = src_l1[1] | src_r0[0] << 8; - dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8; - break; - - case 22: - dst1[1] = src_l1[1] | src_r0[0] << 16; - dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16; - break; - - case 23: - dst1[1] = src_l1[1] | src_r0[0] << 24; - dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24; - break; - - case 24: - dst1[2] = src_r0[0]; - dst1[3] = src_r0[1]; - break; - - case 25: - dst1[2] = src_l1[2] | src_r0[0] << 8; - dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8; - break; - - case 26: - dst1[2] = src_l1[2] | src_r0[0] << 16; - dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16; - break; - - case 27: - dst1[2] = src_l1[2] | src_r0[0] << 24; - dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24; - break; - - case 28: - dst1[3] = src_r0[0]; - break; - - case 29: - dst1[3] = src_l1[3] | src_r0[0] << 8; - break; - - case 30: - dst1[3] = src_l1[3] | src_r0[0] << 16; - break; - - case 31: - dst1[3] = src_l1[3] | src_r0[0] << 24; - break; - } -} - -static void device_memcat8L (const u32 offset, u32x dst0[4], u32x dst1[4], u32x src_l0[4], u32x src_l1[4], u32x src_r0[4]) -{ - switch (offset) - { - case 1: - dst0[0] = src_l0[0] | src_r0[0] << 8; - dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[0] = src_r0[3] >> 24; - break; - - case 2: - dst0[0] = src_l0[0] | src_r0[0] << 16; - dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[0] = src_r0[3] >> 16; - break; - - case 3: - dst0[0] = src_l0[0] | src_r0[0] << 24; - dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[0] = src_r0[3] >> 8; - break; - - case 4: - dst0[1] = src_r0[0]; - dst0[2] = src_r0[1]; - dst0[3] = src_r0[2]; - dst1[0] = src_r0[3]; - break; - - case 5: - dst0[1] = src_l0[1] | src_r0[0] << 8; - dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[1] = src_r0[3] >> 24; - break; - - case 6: - dst0[1] = src_l0[1] | src_r0[0] << 16; - dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[1] = src_r0[3] >> 16; - break; - - case 7: - dst0[1] = src_l0[1] | src_r0[0] << 24; - dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[1] = src_r0[3] >> 8; - break; - - case 8: - dst0[2] = src_r0[0]; - dst0[3] = src_r0[1]; - dst1[0] = src_r0[2]; - dst1[1] = src_r0[3]; - break; - - case 9: - dst0[2] = src_l0[2] | src_r0[0] << 8; - dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[2] = src_r0[3] >> 24; - break; - - case 10: - dst0[2] = src_l0[2] | src_r0[0] << 16; - dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[2] = src_r0[3] >> 16; - break; - - case 11: - dst0[2] = src_l0[2] | src_r0[0] << 24; - dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[2] = src_r0[3] >> 8; - break; - - case 12: - dst0[3] = src_r0[0]; - dst1[0] = src_r0[1]; - dst1[1] = src_r0[2]; - dst1[2] = src_r0[3]; - break; - - case 13: - dst0[3] = src_l0[3] | src_r0[0] << 8; - dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[3] = src_r0[3] >> 24; - break; - - case 14: - dst0[3] = src_l0[3] | src_r0[0] << 16; - dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[3] = src_r0[3] >> 16; - break; - - case 15: - dst0[3] = src_l0[3] | src_r0[0] << 24; - dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[3] = src_r0[3] >> 8; - break; - - case 16: - dst1[0] = src_r0[0]; - dst1[1] = src_r0[1]; - dst1[2] = src_r0[2]; - dst1[3] = src_r0[3]; - break; - - case 17: - dst1[0] = src_l1[0] | src_r0[0] << 8; - dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8; - break; - - case 18: - dst1[0] = src_l1[0] | src_r0[0] << 16; - dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16; - break; - - case 19: - dst1[0] = src_l1[0] | src_r0[0] << 24; - dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24; - break; - - case 20: - dst1[1] = src_r0[0]; - dst1[2] = src_r0[1]; - dst1[3] = src_r0[2]; - break; - - case 21: - dst1[1] = src_l1[1] | src_r0[0] << 8; - dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8; - break; - - case 22: - dst1[1] = src_l1[1] | src_r0[0] << 16; - dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16; - break; - - case 23: - dst1[1] = src_l1[1] | src_r0[0] << 24; - dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24; - break; - - case 24: - dst1[2] = src_r0[0]; - dst1[3] = src_r0[1]; - break; - - case 25: - dst1[2] = src_l1[2] | src_r0[0] << 8; - dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8; - break; - - case 26: - dst1[2] = src_l1[2] | src_r0[0] << 16; - dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16; - break; - - case 27: - dst1[2] = src_l1[2] | src_r0[0] << 24; - dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24; - break; - - case 28: - dst1[3] = src_r0[0]; - break; - - case 29: - dst1[3] = src_l1[3] | src_r0[0] << 8; - break; - - case 30: - dst1[3] = src_l1[3] | src_r0[0] << 16; - break; - - case 31: - dst1[3] = src_l1[3] | src_r0[0] << 24; - break; - } -} - -static void device_memcat12L (const u32 offset, u32x dst0[4], u32x dst1[4], u32x dst2[4], u32x src_l0[4], u32x src_l1[4], u32x src_l2[4], u32 src_r0[4]) -{ - switch (offset) - { - case 1: - dst0[0] = src_l0[0] | src_r0[0] << 8; - dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[0] = src_r0[3] >> 24; - break; - - case 2: - dst0[0] = src_l0[0] | src_r0[0] << 16; - dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[0] = src_r0[3] >> 16; - break; - - case 3: - dst0[0] = src_l0[0] | src_r0[0] << 24; - dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[0] = src_r0[3] >> 8; - break; - - case 4: - dst0[1] = src_r0[0]; - dst0[2] = src_r0[1]; - dst0[3] = src_r0[2]; - dst1[0] = src_r0[3]; - break; - - case 5: - dst0[1] = src_l0[1] | src_r0[0] << 8; - dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[1] = src_r0[3] >> 24; - break; - - case 6: - dst0[1] = src_l0[1] | src_r0[0] << 16; - dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[1] = src_r0[3] >> 16; - break; - - case 7: - dst0[1] = src_l0[1] | src_r0[0] << 24; - dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[1] = src_r0[3] >> 8; - break; - - case 8: - dst0[2] = src_r0[0]; - dst0[3] = src_r0[1]; - dst1[0] = src_r0[2]; - dst1[1] = src_r0[3]; - break; - - case 9: - dst0[2] = src_l0[2] | src_r0[0] << 8; - dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[2] = src_r0[3] >> 24; - break; - - case 10: - dst0[2] = src_l0[2] | src_r0[0] << 16; - dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[2] = src_r0[3] >> 16; - break; - - case 11: - dst0[2] = src_l0[2] | src_r0[0] << 24; - dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[2] = src_r0[3] >> 8; - break; - - case 12: - dst0[3] = src_r0[0]; - dst1[0] = src_r0[1]; - dst1[1] = src_r0[2]; - dst1[2] = src_r0[3]; - break; - - case 13: - dst0[3] = src_l0[3] | src_r0[0] << 8; - dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[3] = src_r0[3] >> 24; - break; - - case 14: - dst0[3] = src_l0[3] | src_r0[0] << 16; - dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[3] = src_r0[3] >> 16; - break; - - case 15: - dst0[3] = src_l0[3] | src_r0[0] << 24; - dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[3] = src_r0[3] >> 8; - break; - - case 16: - dst1[0] = src_r0[0]; - dst1[1] = src_r0[1]; - dst1[2] = src_r0[2]; - dst1[3] = src_r0[3]; - break; - - case 17: - dst1[0] = src_l1[0] | src_r0[0] << 8; - dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[0] = src_r0[3] >> 24; - break; - - case 18: - dst1[0] = src_l1[0] | src_r0[0] << 16; - dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[0] = src_r0[3] >> 16; - break; - - case 19: - dst1[0] = src_l1[0] | src_r0[0] << 24; - dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[0] = src_r0[3] >> 8; - break; - - case 20: - dst1[1] = src_r0[0]; - dst1[2] = src_r0[1]; - dst1[3] = src_r0[2]; - dst2[0] = src_r0[3]; - break; - - case 21: - dst1[1] = src_l1[1] | src_r0[0] << 8; - dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[0] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[1] = src_r0[3] >> 24; - break; - - case 22: - dst1[1] = src_l1[1] | src_r0[0] << 16; - dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[0] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[1] = src_r0[3] >> 16; - break; - - case 23: - dst1[1] = src_l1[1] | src_r0[0] << 24; - dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[0] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[1] = src_r0[3] >> 8; - break; - - case 24: - dst1[2] = src_r0[0]; - dst1[3] = src_r0[1]; - dst2[0] = src_r0[2]; - dst2[1] = src_r0[3]; - break; - - case 25: - dst1[2] = src_l1[2] | src_r0[0] << 8; - dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[0] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[1] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[2] = src_r0[3] >> 24; - break; - - case 26: - dst1[2] = src_l1[2] | src_r0[0] << 16; - dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[0] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[1] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[2] = src_r0[3] >> 16; - break; - - case 27: - dst1[2] = src_l1[2] | src_r0[0] << 24; - dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[0] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[1] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[2] = src_r0[3] >> 8; - break; - - case 28: - dst1[3] = src_r0[0]; - dst2[0] = src_r0[1]; - dst2[1] = src_r0[2]; - dst2[2] = src_r0[3]; - break; - - case 29: - dst1[3] = src_l1[3] | src_r0[0] << 8; - dst2[0] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[1] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[2] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[3] = src_r0[3] >> 24; - break; - - case 30: - dst1[3] = src_l1[3] | src_r0[0] << 16; - dst2[0] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[1] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[2] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[3] = src_r0[3] >> 16; - break; - - case 31: - dst1[3] = src_l1[3] | src_r0[0] << 24; - dst2[0] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[1] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[2] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[3] = src_r0[3] >> 8; - break; - - case 32: - dst2[0] = src_r0[0]; - dst2[1] = src_r0[1]; - dst2[2] = src_r0[2]; - dst2[3] = src_r0[3]; - break; - - case 33: - dst2[0] = src_l2[0] | src_r0[0] << 8; - dst2[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[3] = src_r0[2] >> 24 | src_r0[3] << 8; - break; - - case 34: - dst2[0] = src_l2[0] | src_r0[0] << 16; - dst2[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[3] = src_r0[2] >> 16 | src_r0[3] << 16; - break; - - case 35: - dst2[0] = src_l2[0] | src_r0[0] << 24; - dst2[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[3] = src_r0[2] >> 8 | src_r0[3] << 24; - break; - - case 36: - dst2[1] = src_r0[0]; - dst2[2] = src_r0[1]; - dst2[3] = src_r0[2]; - break; - - case 37: - dst2[1] = src_l2[1] | src_r0[0] << 8; - dst2[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[3] = src_r0[1] >> 24 | src_r0[2] << 8; - break; - - case 38: - dst2[1] = src_l2[1] | src_r0[0] << 16; - dst2[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[3] = src_r0[1] >> 16 | src_r0[2] << 16; - break; - - case 39: - dst2[1] = src_l2[1] | src_r0[0] << 24; - dst2[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[3] = src_r0[1] >> 8 | src_r0[2] << 24; - break; - - case 40: - dst2[2] = src_r0[0]; - dst2[3] = src_r0[1]; - break; - - case 41: - dst2[2] = src_l2[2] | src_r0[0] << 8; - dst2[3] = src_r0[0] >> 24 | src_r0[1] << 8; - break; - - case 42: - dst2[2] = src_l2[2] | src_r0[0] << 16; - dst2[3] = src_r0[0] >> 16 | src_r0[1] << 16; - break; - - case 43: - dst2[2] = src_l2[2] | src_r0[0] << 24; - dst2[3] = src_r0[0] >> 8 | src_r0[1] << 24; - break; - - case 44: - dst2[3] = src_r0[0]; - break; - - case 45: - dst2[3] = src_l2[3] | src_r0[0] << 8; - break; - - case 46: - dst2[3] = src_l2[3] | src_r0[0] << 16; - break; - - case 47: - dst2[3] = src_l2[3] | src_r0[0] << 24; - break; - } -} - -static void device_memcat12L (const u32 offset, u32x dst0[4], u32x dst1[4], u32x dst2[4], u32x src_l0[4], u32x src_l1[4], u32x src_l2[4], u32x src_r0[4]) -{ - switch (offset) - { - case 1: - dst0[0] = src_l0[0] | src_r0[0] << 8; - dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[0] = src_r0[3] >> 24; - break; - - case 2: - dst0[0] = src_l0[0] | src_r0[0] << 16; - dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[0] = src_r0[3] >> 16; - break; - - case 3: - dst0[0] = src_l0[0] | src_r0[0] << 24; - dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[0] = src_r0[3] >> 8; - break; - - case 4: - dst0[1] = src_r0[0]; - dst0[2] = src_r0[1]; - dst0[3] = src_r0[2]; - dst1[0] = src_r0[3]; - break; - - case 5: - dst0[1] = src_l0[1] | src_r0[0] << 8; - dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[1] = src_r0[3] >> 24; - break; - - case 6: - dst0[1] = src_l0[1] | src_r0[0] << 16; - dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[1] = src_r0[3] >> 16; - break; - - case 7: - dst0[1] = src_l0[1] | src_r0[0] << 24; - dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[1] = src_r0[3] >> 8; - break; - - case 8: - dst0[2] = src_r0[0]; - dst0[3] = src_r0[1]; - dst1[0] = src_r0[2]; - dst1[1] = src_r0[3]; - break; - - case 9: - dst0[2] = src_l0[2] | src_r0[0] << 8; - dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[2] = src_r0[3] >> 24; - break; - - case 10: - dst0[2] = src_l0[2] | src_r0[0] << 16; - dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[2] = src_r0[3] >> 16; - break; - - case 11: - dst0[2] = src_l0[2] | src_r0[0] << 24; - dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[2] = src_r0[3] >> 8; - break; - - case 12: - dst0[3] = src_r0[0]; - dst1[0] = src_r0[1]; - dst1[1] = src_r0[2]; - dst1[2] = src_r0[3]; - break; - - case 13: - dst0[3] = src_l0[3] | src_r0[0] << 8; - dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[3] = src_r0[3] >> 24; - break; - - case 14: - dst0[3] = src_l0[3] | src_r0[0] << 16; - dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[3] = src_r0[3] >> 16; - break; - - case 15: - dst0[3] = src_l0[3] | src_r0[0] << 24; - dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[3] = src_r0[3] >> 8; - break; - - case 16: - dst1[0] = src_r0[0]; - dst1[1] = src_r0[1]; - dst1[2] = src_r0[2]; - dst1[3] = src_r0[3]; - break; - - case 17: - dst1[0] = src_l1[0] | src_r0[0] << 8; - dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[0] = src_r0[3] >> 24; - break; - - case 18: - dst1[0] = src_l1[0] | src_r0[0] << 16; - dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[0] = src_r0[3] >> 16; - break; - - case 19: - dst1[0] = src_l1[0] | src_r0[0] << 24; - dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[0] = src_r0[3] >> 8; - break; - - case 20: - dst1[1] = src_r0[0]; - dst1[2] = src_r0[1]; - dst1[3] = src_r0[2]; - dst2[0] = src_r0[3]; - break; - - case 21: - dst1[1] = src_l1[1] | src_r0[0] << 8; - dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[0] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[1] = src_r0[3] >> 24; - break; - - case 22: - dst1[1] = src_l1[1] | src_r0[0] << 16; - dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[0] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[1] = src_r0[3] >> 16; - break; - - case 23: - dst1[1] = src_l1[1] | src_r0[0] << 24; - dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[0] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[1] = src_r0[3] >> 8; - break; - - case 24: - dst1[2] = src_r0[0]; - dst1[3] = src_r0[1]; - dst2[0] = src_r0[2]; - dst2[1] = src_r0[3]; - break; - - case 25: - dst1[2] = src_l1[2] | src_r0[0] << 8; - dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[0] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[1] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[2] = src_r0[3] >> 24; - break; - - case 26: - dst1[2] = src_l1[2] | src_r0[0] << 16; - dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[0] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[1] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[2] = src_r0[3] >> 16; - break; - - case 27: - dst1[2] = src_l1[2] | src_r0[0] << 24; - dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[0] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[1] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[2] = src_r0[3] >> 8; - break; - - case 28: - dst1[3] = src_r0[0]; - dst2[0] = src_r0[1]; - dst2[1] = src_r0[2]; - dst2[2] = src_r0[3]; - break; - - case 29: - dst1[3] = src_l1[3] | src_r0[0] << 8; - dst2[0] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[1] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[2] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[3] = src_r0[3] >> 24; - break; - - case 30: - dst1[3] = src_l1[3] | src_r0[0] << 16; - dst2[0] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[1] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[2] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[3] = src_r0[3] >> 16; - break; - - case 31: - dst1[3] = src_l1[3] | src_r0[0] << 24; - dst2[0] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[1] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[2] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[3] = src_r0[3] >> 8; - break; - - case 32: - dst2[0] = src_r0[0]; - dst2[1] = src_r0[1]; - dst2[2] = src_r0[2]; - dst2[3] = src_r0[3]; - break; - - case 33: - dst2[0] = src_l2[0] | src_r0[0] << 8; - dst2[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[3] = src_r0[2] >> 24 | src_r0[3] << 8; - break; - - case 34: - dst2[0] = src_l2[0] | src_r0[0] << 16; - dst2[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[3] = src_r0[2] >> 16 | src_r0[3] << 16; - break; - - case 35: - dst2[0] = src_l2[0] | src_r0[0] << 24; - dst2[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[3] = src_r0[2] >> 8 | src_r0[3] << 24; - break; - - case 36: - dst2[1] = src_r0[0]; - dst2[2] = src_r0[1]; - dst2[3] = src_r0[2]; - break; - - case 37: - dst2[1] = src_l2[1] | src_r0[0] << 8; - dst2[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[3] = src_r0[1] >> 24 | src_r0[2] << 8; - break; - - case 38: - dst2[1] = src_l2[1] | src_r0[0] << 16; - dst2[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[3] = src_r0[1] >> 16 | src_r0[2] << 16; - break; - - case 39: - dst2[1] = src_l2[1] | src_r0[0] << 24; - dst2[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[3] = src_r0[1] >> 8 | src_r0[2] << 24; - break; - - case 40: - dst2[2] = src_r0[0]; - dst2[3] = src_r0[1]; - break; - - case 41: - dst2[2] = src_l2[2] | src_r0[0] << 8; - dst2[3] = src_r0[0] >> 24 | src_r0[1] << 8; - break; - - case 42: - dst2[2] = src_l2[2] | src_r0[0] << 16; - dst2[3] = src_r0[0] >> 16 | src_r0[1] << 16; - break; - - case 43: - dst2[2] = src_l2[2] | src_r0[0] << 24; - dst2[3] = src_r0[0] >> 8 | src_r0[1] << 24; - break; - - case 44: - dst2[3] = src_r0[0]; - break; - - case 45: - dst2[3] = src_l2[3] | src_r0[0] << 8; - break; - - case 46: - dst2[3] = src_l2[3] | src_r0[0] << 16; - break; - - case 47: - dst2[3] = src_l2[3] | src_r0[0] << 24; - break; - } -} - -static void device_memcat12L (const u32 offset, u32x dst0[4], u32x dst1[4], u32x dst2[4], u32x src_l0[4], u32x src_l1[4], u32x src_l2[4], u32x src_r0[4], u32x src_r1[4]) -{ - switch (offset) - { - case 0: - dst0[0] = src_r0[0]; - dst0[1] = src_r0[1]; - dst0[2] = src_r0[2]; - dst0[3] = src_r0[3]; - dst1[0] = src_r1[0]; - dst1[1] = src_r1[1]; - dst1[2] = src_r1[2]; - dst1[3] = src_r1[3]; - break; - - case 1: - dst0[0] = src_l0[0] | src_r0[0] << 8; - dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[0] = src_r0[3] >> 24 | src_r1[0] << 8; - dst1[1] = src_r1[0] >> 24 | src_r1[1] << 8; - dst1[2] = src_r1[1] >> 24 | src_r1[2] << 8; - dst1[3] = src_r1[2] >> 24 | src_r1[3] << 8; - dst2[0] = src_r1[3] >> 24; - break; - - case 2: - dst0[0] = src_l0[0] | src_r0[0] << 16; - dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[0] = src_r0[3] >> 16 | src_r1[0] << 16; - dst1[1] = src_r1[0] >> 16 | src_r1[1] << 16; - dst1[2] = src_r1[1] >> 16 | src_r1[2] << 16; - dst1[3] = src_r1[2] >> 16 | src_r1[3] << 16; - dst2[0] = src_r1[3] >> 16; - break; - - case 3: - dst0[0] = src_l0[0] | src_r0[0] << 24; - dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[0] = src_r0[3] >> 8 | src_r1[0] << 24; - dst1[1] = src_r1[0] >> 8 | src_r1[1] << 24; - dst1[2] = src_r1[1] >> 8 | src_r1[2] << 24; - dst1[3] = src_r1[2] >> 8 | src_r1[3] << 24; - dst2[0] = src_r1[3] >> 8; - break; - - case 4: - dst0[1] = src_r0[0]; - dst0[2] = src_r0[1]; - dst0[3] = src_r0[2]; - dst1[0] = src_r0[3]; - dst1[1] = src_r1[0]; - dst1[2] = src_r1[1]; - dst1[3] = src_r1[2]; - dst2[0] = src_r1[3]; - break; - - case 5: - dst0[1] = src_l0[1] | src_r0[0] << 8; - dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[1] = src_r0[3] >> 24 | src_r1[0] << 8; - dst1[2] = src_r1[0] >> 24 | src_r1[1] << 8; - dst1[3] = src_r1[1] >> 24 | src_r1[2] << 8; - dst2[0] = src_r1[2] >> 24 | src_r1[3] << 8; - dst2[1] = src_r1[3] >> 24; - break; - - case 6: - dst0[1] = src_l0[1] | src_r0[0] << 16; - dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[1] = src_r0[3] >> 16 | src_r1[0] << 16; - dst1[2] = src_r1[0] >> 16 | src_r1[1] << 16; - dst1[3] = src_r1[1] >> 16 | src_r1[2] << 16; - dst2[0] = src_r1[2] >> 16 | src_r1[3] << 16; - dst2[1] = src_r1[3] >> 16; - break; - - case 7: - dst0[1] = src_l0[1] | src_r0[0] << 24; - dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[1] = src_r0[3] >> 8 | src_r1[0] << 24; - dst1[2] = src_r1[0] >> 8 | src_r1[1] << 24; - dst1[3] = src_r1[1] >> 8 | src_r1[2] << 24; - dst2[0] = src_r1[2] >> 8 | src_r1[3] << 24; - dst2[1] = src_r1[3] >> 8; - break; - - case 8: - dst0[2] = src_r0[0]; - dst0[3] = src_r0[1]; - dst1[0] = src_r0[2]; - dst1[1] = src_r0[3]; - dst1[2] = src_r1[0]; - dst1[3] = src_r1[1]; - dst2[0] = src_r1[2]; - dst2[1] = src_r1[3]; - break; - - case 9: - dst0[2] = src_l0[2] | src_r0[0] << 8; - dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[2] = src_r0[3] >> 24 | src_r1[0] << 8; - dst1[3] = src_r1[0] >> 24 | src_r1[1] << 8; - dst2[0] = src_r1[1] >> 24 | src_r1[2] << 8; - dst2[1] = src_r1[2] >> 24 | src_r1[3] << 8; - dst2[2] = src_r1[3] >> 24; - break; - - case 10: - dst0[2] = src_l0[2] | src_r0[0] << 16; - dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[2] = src_r0[3] >> 16 | src_r1[0] << 16; - dst1[3] = src_r1[0] >> 16 | src_r1[1] << 16; - dst2[0] = src_r1[1] >> 16 | src_r1[2] << 16; - dst2[1] = src_r1[2] >> 16 | src_r1[3] << 16; - dst2[2] = src_r1[3] >> 16; - break; - - case 11: - dst0[2] = src_l0[2] | src_r0[0] << 24; - dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[2] = src_r0[3] >> 8 | src_r1[0] << 24; - dst1[3] = src_r1[0] >> 8 | src_r1[1] << 24; - dst2[0] = src_r1[1] >> 8 | src_r1[2] << 24; - dst2[1] = src_r1[2] >> 8 | src_r1[3] << 24; - dst2[2] = src_r1[3] >> 8; - break; - - case 12: - dst0[3] = src_r0[0]; - dst1[0] = src_r0[1]; - dst1[1] = src_r0[2]; - dst1[2] = src_r0[3]; - dst1[3] = src_r1[0]; - dst2[0] = src_r1[1]; - dst2[1] = src_r1[2]; - dst2[2] = src_r1[3]; - break; - - case 13: - dst0[3] = src_l0[3] | src_r0[0] << 8; - dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[3] = src_r0[3] >> 24 | src_r1[0] << 8; - dst2[0] = src_r1[0] >> 24 | src_r1[1] << 8; - dst2[1] = src_r1[1] >> 24 | src_r1[2] << 8; - dst2[2] = src_r1[2] >> 24 | src_r1[3] << 8; - dst2[3] = src_r1[3] >> 24; - break; - - case 14: - dst0[3] = src_l0[3] | src_r0[0] << 16; - dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[3] = src_r0[3] >> 16 | src_r1[0] << 16; - dst2[0] = src_r1[0] >> 16 | src_r1[1] << 16; - dst2[1] = src_r1[1] >> 16 | src_r1[2] << 16; - dst2[2] = src_r1[2] >> 16 | src_r1[3] << 16; - dst2[3] = src_r1[3] >> 16; - break; - - case 15: - dst0[3] = src_l0[3] | src_r0[0] << 24; - dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[3] = src_r0[3] >> 8 | src_r1[0] << 24; - dst2[0] = src_r1[0] >> 8 | src_r1[1] << 24; - dst2[1] = src_r1[1] >> 8 | src_r1[2] << 24; - dst2[2] = src_r1[2] >> 8 | src_r1[3] << 24; - dst2[3] = src_r1[3] >> 8; - break; - - case 16: - dst1[0] = src_r0[0]; - dst1[1] = src_r0[1]; - dst1[2] = src_r0[2]; - dst1[3] = src_r0[3]; - dst2[0] = src_r1[0]; - dst2[1] = src_r1[1]; - dst2[2] = src_r1[2]; - dst2[3] = src_r1[3]; - break; - - case 17: - dst1[0] = src_l1[0] | src_r0[0] << 8; - dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[0] = src_r0[3] >> 24 | src_r1[0] << 8; - dst2[1] = src_r1[0] >> 24 | src_r1[1] << 8; - dst2[2] = src_r1[1] >> 24 | src_r1[2] << 8; - dst2[3] = src_r1[2] >> 24 | src_r1[3] << 8; - break; - - case 18: - dst1[0] = src_l1[0] | src_r0[0] << 16; - dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[0] = src_r0[3] >> 16 | src_r1[0] << 16; - dst2[1] = src_r1[0] >> 16 | src_r1[1] << 16; - dst2[2] = src_r1[1] >> 16 | src_r1[2] << 16; - dst2[3] = src_r1[2] >> 16 | src_r1[3] << 16; - break; - - case 19: - dst1[0] = src_l1[0] | src_r0[0] << 24; - dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[0] = src_r0[3] >> 8 | src_r1[0] << 24; - dst2[1] = src_r1[0] >> 8 | src_r1[1] << 24; - dst2[2] = src_r1[1] >> 8 | src_r1[2] << 24; - dst2[3] = src_r1[2] >> 8 | src_r1[3] << 24; - break; - - case 20: - dst1[1] = src_r1[0]; - dst1[2] = src_r0[1]; - dst1[3] = src_r0[2]; - dst2[0] = src_r0[3]; - dst2[1] = src_r1[0]; - dst2[2] = src_r1[1]; - dst2[3] = src_r1[2]; - break; - - case 21: - dst1[1] = src_l1[1] | src_r0[0] << 8; - dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[0] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[1] = src_r0[3] >> 24 | src_r1[0] << 8; - dst2[2] = src_r1[0] >> 24 | src_r1[1] << 8; - dst2[3] = src_r1[1] >> 24 | src_r1[2] << 8; - break; - - case 22: - dst1[1] = src_l1[1] | src_r0[0] << 16; - dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[0] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[1] = src_r0[3] >> 16 | src_r1[0] << 16; - dst2[2] = src_r1[0] >> 16 | src_r1[1] << 16; - dst2[3] = src_r1[1] >> 16 | src_r1[2] << 16; - break; - - case 23: - dst1[1] = src_l1[1] | src_r0[0] << 24; - dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[0] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[1] = src_r0[3] >> 8 | src_r1[0] << 24; - dst2[2] = src_r1[0] >> 8 | src_r1[1] << 24; - dst2[3] = src_r1[1] >> 8 | src_r1[2] << 24; - break; - - case 24: - dst1[2] = src_r1[0]; - dst1[3] = src_r0[1]; - dst2[0] = src_r0[2]; - dst2[1] = src_r0[3]; - dst2[2] = src_r1[0]; - dst2[3] = src_r1[1]; - break; - - case 25: - dst1[2] = src_l1[2] | src_r0[0] << 8; - dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[0] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[1] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[2] = src_r0[3] >> 24 | src_r1[0] << 8; - dst2[3] = src_r1[0] >> 24 | src_r1[1] << 8; - break; - - case 26: - dst1[2] = src_l1[2] | src_r0[0] << 16; - dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[0] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[1] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[2] = src_r0[3] >> 16 | src_r1[0] << 16; - dst2[3] = src_r1[0] >> 16 | src_r1[1] << 16; - break; - - case 27: - dst1[2] = src_l1[2] | src_r0[0] << 24; - dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[0] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[1] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[2] = src_r0[3] >> 8 | src_r1[0] << 24; - dst2[3] = src_r1[0] >> 8 | src_r1[1] << 24; - break; - - case 28: - dst1[3] = src_r1[0]; - dst2[0] = src_r0[1]; - dst2[1] = src_r0[2]; - dst2[2] = src_r0[3]; - dst2[3] = src_r1[0]; - break; - - case 29: - dst1[3] = src_l1[3] | src_r0[0] << 8; - dst2[0] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[1] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[2] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[3] = src_r0[3] >> 24 | src_r1[0] << 8; - break; - - case 30: - dst1[3] = src_l1[3] | src_r0[0] << 16; - dst2[0] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[1] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[2] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[3] = src_r0[3] >> 16 | src_r1[0] << 16; - break; - - case 31: - dst1[3] = src_l1[3] | src_r0[0] << 24; - dst2[0] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[1] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[2] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[3] = src_r0[3] >> 8 | src_r1[0] << 24; - break; - - case 32: - dst2[0] = src_r0[0]; - dst2[1] = src_r0[1]; - dst2[2] = src_r0[2]; - dst2[3] = src_r0[3]; - break; - - case 33: - dst2[0] = src_l2[0] | src_r0[0] << 8; - dst2[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[3] = src_r0[2] >> 24 | src_r0[3] << 8; - break; - - case 34: - dst2[0] = src_l2[0] | src_r0[0] << 16; - dst2[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[3] = src_r0[2] >> 16 | src_r0[3] << 16; - break; - - case 35: - dst2[0] = src_l2[0] | src_r0[0] << 24; - dst2[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[3] = src_r0[2] >> 8 | src_r0[3] << 24; - break; - - case 36: - dst2[1] = src_r0[0]; - dst2[2] = src_r0[1]; - dst2[3] = src_r0[2]; - break; - - case 37: - dst2[1] = src_l2[1] | src_r0[0] << 8; - dst2[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[3] = src_r0[1] >> 24 | src_r0[2] << 8; - break; - - case 38: - dst2[1] = src_l2[1] | src_r0[0] << 16; - dst2[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[3] = src_r0[1] >> 16 | src_r0[2] << 16; - break; - - case 39: - dst2[1] = src_l2[1] | src_r0[0] << 24; - dst2[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[3] = src_r0[1] >> 8 | src_r0[2] << 24; - break; - - case 40: - dst2[2] = src_r0[0]; - dst2[3] = src_r0[1]; - break; - - case 41: - dst2[2] = src_l2[2] | src_r0[0] << 8; - dst2[3] = src_r0[0] >> 24 | src_r0[1] << 8; - break; - - case 42: - dst2[2] = src_l2[2] | src_r0[0] << 16; - dst2[3] = src_r0[0] >> 16 | src_r0[1] << 16; - break; - - case 43: - dst2[2] = src_l2[2] | src_r0[0] << 24; - dst2[3] = src_r0[0] >> 8 | src_r0[1] << 24; - break; - - case 44: - dst2[3] = src_r0[0]; - break; - - case 45: - dst2[3] = src_l2[3] | src_r0[0] << 8; - break; - - case 46: - dst2[3] = src_l2[3] | src_r0[0] << 16; - break; - - case 47: - dst2[3] = src_l2[3] | src_r0[0] << 24; - break; - } -} - -static void memcat16_9 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = append0[0]; - w0[1] = append0[1]; - w0[2] = append0[2]; - w0[3] = append0[3]; - w1[0] = append1[0]; - w1[1] = append1[1]; - w1[2] = append1[2]; - w1[3] = append1[3]; - w2[0] = append2[0]; - break; - - case 1: - w0[0] = w0[0] | append0[0] << 8; - w0[1] = append0[0] >> 24 | append0[1] << 8; - w0[2] = append0[1] >> 24 | append0[2] << 8; - w0[3] = append0[2] >> 24 | append0[3] << 8; - w1[0] = append0[3] >> 24 | append1[0] << 8; - w1[1] = append1[0] >> 24 | append1[1] << 8; - w1[2] = append1[1] >> 24 | append1[2] << 8; - w1[3] = append1[2] >> 24 | append1[3] << 8; - w2[0] = append1[3] >> 24 | append2[0] << 8; - w2[1] = append2[0] >> 24; - break; - - case 2: - w0[0] = w0[0] | append0[0] << 16; - w0[1] = append0[0] >> 16 | append0[1] << 16; - w0[2] = append0[1] >> 16 | append0[2] << 16; - w0[3] = append0[2] >> 16 | append0[3] << 16; - w1[0] = append0[3] >> 16 | append1[0] << 16; - w1[1] = append1[0] >> 16 | append1[1] << 16; - w1[2] = append1[1] >> 16 | append1[2] << 16; - w1[3] = append1[2] >> 16 | append1[3] << 16; - w2[0] = append1[3] >> 16 | append2[0] << 16; - w2[1] = append2[0] >> 16; - break; - - case 3: - w0[0] = w0[0] | append0[0] << 24; - w0[1] = append0[0] >> 8 | append0[1] << 24; - w0[2] = append0[1] >> 8 | append0[2] << 24; - w0[3] = append0[2] >> 8 | append0[3] << 24; - w1[0] = append0[3] >> 8 | append1[0] << 24; - w1[1] = append1[0] >> 8 | append1[1] << 24; - w1[2] = append1[1] >> 8 | append1[2] << 24; - w1[3] = append1[2] >> 8 | append1[3] << 24; - w2[0] = append1[3] >> 8 | append2[0] << 24; - w2[1] = append2[0] >> 8; - break; - - case 4: - w0[1] = append0[0]; - w0[2] = append0[1]; - w0[3] = append0[2]; - w1[0] = append0[3]; - w1[1] = append1[0]; - w1[2] = append1[1]; - w1[3] = append1[2]; - w2[0] = append1[3]; - w2[1] = append2[0]; - break; - - case 5: - w0[1] = w0[1] | append0[0] << 8; - w0[2] = append0[0] >> 24 | append0[1] << 8; - w0[3] = append0[1] >> 24 | append0[2] << 8; - w1[0] = append0[2] >> 24 | append0[3] << 8; - w1[1] = append0[3] >> 24 | append1[0] << 8; - w1[2] = append1[0] >> 24 | append1[1] << 8; - w1[3] = append1[1] >> 24 | append1[2] << 8; - w2[0] = append1[2] >> 24 | append1[3] << 8; - w2[1] = append1[3] >> 24 | append2[0] << 8; - w2[2] = append2[0] >> 24; - break; - - case 6: - w0[1] = w0[1] | append0[0] << 16; - w0[2] = append0[0] >> 16 | append0[1] << 16; - w0[3] = append0[1] >> 16 | append0[2] << 16; - w1[0] = append0[2] >> 16 | append0[3] << 16; - w1[1] = append0[3] >> 16 | append1[0] << 16; - w1[2] = append1[0] >> 16 | append1[1] << 16; - w1[3] = append1[1] >> 16 | append1[2] << 16; - w2[0] = append1[2] >> 16 | append1[3] << 16; - w2[1] = append1[3] >> 16 | append2[0] << 16; - w2[2] = append2[0] >> 16; - break; - - case 7: - w0[1] = w0[1] | append0[0] << 24; - w0[2] = append0[0] >> 8 | append0[1] << 24; - w0[3] = append0[1] >> 8 | append0[2] << 24; - w1[0] = append0[2] >> 8 | append0[3] << 24; - w1[1] = append0[3] >> 8 | append1[0] << 24; - w1[2] = append1[0] >> 8 | append1[1] << 24; - w1[3] = append1[1] >> 8 | append1[2] << 24; - w2[0] = append1[2] >> 8 | append1[3] << 24; - w2[1] = append1[3] >> 8 | append2[0] << 24; - w2[2] = append2[0] >> 8; - break; - - case 8: - w0[2] = append0[0]; - w0[3] = append0[1]; - w1[0] = append0[2]; - w1[1] = append0[3]; - w1[2] = append1[0]; - w1[3] = append1[1]; - w2[0] = append1[2]; - w2[1] = append1[3]; - w2[2] = append2[0]; - break; - - case 9: - w0[2] = w0[2] | append0[0] << 8; - w0[3] = append0[0] >> 24 | append0[1] << 8; - w1[0] = append0[1] >> 24 | append0[2] << 8; - w1[1] = append0[2] >> 24 | append0[3] << 8; - w1[2] = append0[3] >> 24 | append1[0] << 8; - w1[3] = append1[0] >> 24 | append1[1] << 8; - w2[0] = append1[1] >> 24 | append1[2] << 8; - w2[1] = append1[2] >> 24 | append1[3] << 8; - w2[2] = append1[3] >> 24 | append2[0] << 8; - w2[3] = append2[0] >> 24; - break; - - case 10: - w0[2] = w0[2] | append0[0] << 16; - w0[3] = append0[0] >> 16 | append0[1] << 16; - w1[0] = append0[1] >> 16 | append0[2] << 16; - w1[1] = append0[2] >> 16 | append0[3] << 16; - w1[2] = append0[3] >> 16 | append1[0] << 16; - w1[3] = append1[0] >> 16 | append1[1] << 16; - w2[0] = append1[1] >> 16 | append1[2] << 16; - w2[1] = append1[2] >> 16 | append1[3] << 16; - w2[2] = append1[3] >> 16 | append2[0] << 16; - w2[3] = append2[0] >> 16; - break; - - case 11: - w0[2] = w0[2] | append0[0] << 24; - w0[3] = append0[0] >> 8 | append0[1] << 24; - w1[0] = append0[1] >> 8 | append0[2] << 24; - w1[1] = append0[2] >> 8 | append0[3] << 24; - w1[2] = append0[3] >> 8 | append1[0] << 24; - w1[3] = append1[0] >> 8 | append1[1] << 24; - w2[0] = append1[1] >> 8 | append1[2] << 24; - w2[1] = append1[2] >> 8 | append1[3] << 24; - w2[2] = append1[3] >> 8 | append2[0] << 24; - w2[3] = append2[0] >> 8; - break; - - case 12: - w0[3] = append0[0]; - w1[0] = append0[1]; - w1[1] = append0[2]; - w1[2] = append0[3]; - w1[3] = append1[0]; - w2[0] = append1[1]; - w2[1] = append1[2]; - w2[2] = append1[3]; - w2[3] = append2[0]; - break; - - case 13: - w0[3] = w0[3] | append0[0] << 8; - w1[0] = append0[0] >> 24 | append0[1] << 8; - w1[1] = append0[1] >> 24 | append0[2] << 8; - w1[2] = append0[2] >> 24 | append0[3] << 8; - w1[3] = append0[3] >> 24 | append1[0] << 8; - w2[0] = append1[0] >> 24 | append1[1] << 8; - w2[1] = append1[1] >> 24 | append1[2] << 8; - w2[2] = append1[2] >> 24 | append1[3] << 8; - w2[3] = append1[3] >> 24 | append2[0] << 8; - w3[0] = append2[0] >> 24; - break; - - case 14: - w0[3] = w0[3] | append0[0] << 16; - w1[0] = append0[0] >> 16 | append0[1] << 16; - w1[1] = append0[1] >> 16 | append0[2] << 16; - w1[2] = append0[2] >> 16 | append0[3] << 16; - w1[3] = append0[3] >> 16 | append1[0] << 16; - w2[0] = append1[0] >> 16 | append1[1] << 16; - w2[1] = append1[1] >> 16 | append1[2] << 16; - w2[2] = append1[2] >> 16 | append1[3] << 16; - w2[3] = append1[3] >> 16 | append2[0] << 16; - w3[0] = append2[0] >> 16; - break; - - case 15: - w0[3] = w0[3] | append0[0] << 24; - w1[0] = append0[0] >> 8 | append0[1] << 24; - w1[1] = append0[1] >> 8 | append0[2] << 24; - w1[2] = append0[2] >> 8 | append0[3] << 24; - w1[3] = append0[3] >> 8 | append1[0] << 24; - w2[0] = append1[0] >> 8 | append1[1] << 24; - w2[1] = append1[1] >> 8 | append1[2] << 24; - w2[2] = append1[2] >> 8 | append1[3] << 24; - w2[3] = append1[3] >> 8 | append2[0] << 24; - w3[0] = append2[0] >> 8; - break; - } -} - -static void memcat16_9 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32x append0[4], const u32x append1[4], const u32x append2[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = append0[0]; - w0[1] = append0[1]; - w0[2] = append0[2]; - w0[3] = append0[3]; - w1[0] = append1[0]; - w1[1] = append1[1]; - w1[2] = append1[2]; - w1[3] = append1[3]; - w2[0] = append2[0]; - break; - - case 1: - w0[0] = w0[0] | append0[0] << 8; - w0[1] = append0[0] >> 24 | append0[1] << 8; - w0[2] = append0[1] >> 24 | append0[2] << 8; - w0[3] = append0[2] >> 24 | append0[3] << 8; - w1[0] = append0[3] >> 24 | append1[0] << 8; - w1[1] = append1[0] >> 24 | append1[1] << 8; - w1[2] = append1[1] >> 24 | append1[2] << 8; - w1[3] = append1[2] >> 24 | append1[3] << 8; - w2[0] = append1[3] >> 24 | append2[0] << 8; - w2[1] = append2[0] >> 24; - break; - - case 2: - w0[0] = w0[0] | append0[0] << 16; - w0[1] = append0[0] >> 16 | append0[1] << 16; - w0[2] = append0[1] >> 16 | append0[2] << 16; - w0[3] = append0[2] >> 16 | append0[3] << 16; - w1[0] = append0[3] >> 16 | append1[0] << 16; - w1[1] = append1[0] >> 16 | append1[1] << 16; - w1[2] = append1[1] >> 16 | append1[2] << 16; - w1[3] = append1[2] >> 16 | append1[3] << 16; - w2[0] = append1[3] >> 16 | append2[0] << 16; - w2[1] = append2[0] >> 16; - break; - - case 3: - w0[0] = w0[0] | append0[0] << 24; - w0[1] = append0[0] >> 8 | append0[1] << 24; - w0[2] = append0[1] >> 8 | append0[2] << 24; - w0[3] = append0[2] >> 8 | append0[3] << 24; - w1[0] = append0[3] >> 8 | append1[0] << 24; - w1[1] = append1[0] >> 8 | append1[1] << 24; - w1[2] = append1[1] >> 8 | append1[2] << 24; - w1[3] = append1[2] >> 8 | append1[3] << 24; - w2[0] = append1[3] >> 8 | append2[0] << 24; - w2[1] = append2[0] >> 8; - break; - - case 4: - w0[1] = append0[0]; - w0[2] = append0[1]; - w0[3] = append0[2]; - w1[0] = append0[3]; - w1[1] = append1[0]; - w1[2] = append1[1]; - w1[3] = append1[2]; - w2[0] = append1[3]; - w2[1] = append2[0]; - break; - - case 5: - w0[1] = w0[1] | append0[0] << 8; - w0[2] = append0[0] >> 24 | append0[1] << 8; - w0[3] = append0[1] >> 24 | append0[2] << 8; - w1[0] = append0[2] >> 24 | append0[3] << 8; - w1[1] = append0[3] >> 24 | append1[0] << 8; - w1[2] = append1[0] >> 24 | append1[1] << 8; - w1[3] = append1[1] >> 24 | append1[2] << 8; - w2[0] = append1[2] >> 24 | append1[3] << 8; - w2[1] = append1[3] >> 24 | append2[0] << 8; - w2[2] = append2[0] >> 24; - break; - - case 6: - w0[1] = w0[1] | append0[0] << 16; - w0[2] = append0[0] >> 16 | append0[1] << 16; - w0[3] = append0[1] >> 16 | append0[2] << 16; - w1[0] = append0[2] >> 16 | append0[3] << 16; - w1[1] = append0[3] >> 16 | append1[0] << 16; - w1[2] = append1[0] >> 16 | append1[1] << 16; - w1[3] = append1[1] >> 16 | append1[2] << 16; - w2[0] = append1[2] >> 16 | append1[3] << 16; - w2[1] = append1[3] >> 16 | append2[0] << 16; - w2[2] = append2[0] >> 16; - break; - - case 7: - w0[1] = w0[1] | append0[0] << 24; - w0[2] = append0[0] >> 8 | append0[1] << 24; - w0[3] = append0[1] >> 8 | append0[2] << 24; - w1[0] = append0[2] >> 8 | append0[3] << 24; - w1[1] = append0[3] >> 8 | append1[0] << 24; - w1[2] = append1[0] >> 8 | append1[1] << 24; - w1[3] = append1[1] >> 8 | append1[2] << 24; - w2[0] = append1[2] >> 8 | append1[3] << 24; - w2[1] = append1[3] >> 8 | append2[0] << 24; - w2[2] = append2[0] >> 8; - break; - - case 8: - w0[2] = append0[0]; - w0[3] = append0[1]; - w1[0] = append0[2]; - w1[1] = append0[3]; - w1[2] = append1[0]; - w1[3] = append1[1]; - w2[0] = append1[2]; - w2[1] = append1[3]; - w2[2] = append2[0]; - break; - - case 9: - w0[2] = w0[2] | append0[0] << 8; - w0[3] = append0[0] >> 24 | append0[1] << 8; - w1[0] = append0[1] >> 24 | append0[2] << 8; - w1[1] = append0[2] >> 24 | append0[3] << 8; - w1[2] = append0[3] >> 24 | append1[0] << 8; - w1[3] = append1[0] >> 24 | append1[1] << 8; - w2[0] = append1[1] >> 24 | append1[2] << 8; - w2[1] = append1[2] >> 24 | append1[3] << 8; - w2[2] = append1[3] >> 24 | append2[0] << 8; - w2[3] = append2[0] >> 24; - break; - - case 10: - w0[2] = w0[2] | append0[0] << 16; - w0[3] = append0[0] >> 16 | append0[1] << 16; - w1[0] = append0[1] >> 16 | append0[2] << 16; - w1[1] = append0[2] >> 16 | append0[3] << 16; - w1[2] = append0[3] >> 16 | append1[0] << 16; - w1[3] = append1[0] >> 16 | append1[1] << 16; - w2[0] = append1[1] >> 16 | append1[2] << 16; - w2[1] = append1[2] >> 16 | append1[3] << 16; - w2[2] = append1[3] >> 16 | append2[0] << 16; - w2[3] = append2[0] >> 16; - break; - - case 11: - w0[2] = w0[2] | append0[0] << 24; - w0[3] = append0[0] >> 8 | append0[1] << 24; - w1[0] = append0[1] >> 8 | append0[2] << 24; - w1[1] = append0[2] >> 8 | append0[3] << 24; - w1[2] = append0[3] >> 8 | append1[0] << 24; - w1[3] = append1[0] >> 8 | append1[1] << 24; - w2[0] = append1[1] >> 8 | append1[2] << 24; - w2[1] = append1[2] >> 8 | append1[3] << 24; - w2[2] = append1[3] >> 8 | append2[0] << 24; - w2[3] = append2[0] >> 8; - break; - - case 12: - w0[3] = append0[0]; - w1[0] = append0[1]; - w1[1] = append0[2]; - w1[2] = append0[3]; - w1[3] = append1[0]; - w2[0] = append1[1]; - w2[1] = append1[2]; - w2[2] = append1[3]; - w2[3] = append2[0]; - break; - - case 13: - w0[3] = w0[3] | append0[0] << 8; - w1[0] = append0[0] >> 24 | append0[1] << 8; - w1[1] = append0[1] >> 24 | append0[2] << 8; - w1[2] = append0[2] >> 24 | append0[3] << 8; - w1[3] = append0[3] >> 24 | append1[0] << 8; - w2[0] = append1[0] >> 24 | append1[1] << 8; - w2[1] = append1[1] >> 24 | append1[2] << 8; - w2[2] = append1[2] >> 24 | append1[3] << 8; - w2[3] = append1[3] >> 24 | append2[0] << 8; - w3[0] = append2[0] >> 24; - break; - - case 14: - w0[3] = w0[3] | append0[0] << 16; - w1[0] = append0[0] >> 16 | append0[1] << 16; - w1[1] = append0[1] >> 16 | append0[2] << 16; - w1[2] = append0[2] >> 16 | append0[3] << 16; - w1[3] = append0[3] >> 16 | append1[0] << 16; - w2[0] = append1[0] >> 16 | append1[1] << 16; - w2[1] = append1[1] >> 16 | append1[2] << 16; - w2[2] = append1[2] >> 16 | append1[3] << 16; - w2[3] = append1[3] >> 16 | append2[0] << 16; - w3[0] = append2[0] >> 16; - break; - - case 15: - w0[3] = w0[3] | append0[0] << 24; - w1[0] = append0[0] >> 8 | append0[1] << 24; - w1[1] = append0[1] >> 8 | append0[2] << 24; - w1[2] = append0[2] >> 8 | append0[3] << 24; - w1[3] = append0[3] >> 8 | append1[0] << 24; - w2[0] = append1[0] >> 8 | append1[1] << 24; - w2[1] = append1[1] >> 8 | append1[2] << 24; - w2[2] = append1[2] >> 8 | append1[3] << 24; - w2[3] = append1[3] >> 8 | append2[0] << 24; - w3[0] = append2[0] >> 8; - break; - } -} - -static void memcat32_8 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 append0[4], const u32 append1[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = append0[0]; - w0[1] = append0[1]; - w0[2] = append0[2]; - w0[3] = append0[3]; - w1[0] = append1[0]; - w1[1] = append1[1]; - w1[2] = append1[2]; - w1[3] = append1[3]; - break; - - case 1: - w0[0] = w0[0] | append0[0] << 8; - w0[1] = append0[0] >> 24 | append0[1] << 8; - w0[2] = append0[1] >> 24 | append0[2] << 8; - w0[3] = append0[2] >> 24 | append0[3] << 8; - w1[0] = append0[3] >> 24 | append1[0] << 8; - w1[1] = append1[0] >> 24 | append1[1] << 8; - w1[2] = append1[1] >> 24 | append1[2] << 8; - w1[3] = append1[2] >> 24 | append1[3] << 8; - w2[0] = append1[3] >> 24; - break; - - case 2: - w0[0] = w0[0] | append0[0] << 16; - w0[1] = append0[0] >> 16 | append0[1] << 16; - w0[2] = append0[1] >> 16 | append0[2] << 16; - w0[3] = append0[2] >> 16 | append0[3] << 16; - w1[0] = append0[3] >> 16 | append1[0] << 16; - w1[1] = append1[0] >> 16 | append1[1] << 16; - w1[2] = append1[1] >> 16 | append1[2] << 16; - w1[3] = append1[2] >> 16 | append1[3] << 16; - w2[0] = append1[3] >> 16; - break; - - case 3: - w0[0] = w0[0] | append0[0] << 24; - w0[1] = append0[0] >> 8 | append0[1] << 24; - w0[2] = append0[1] >> 8 | append0[2] << 24; - w0[3] = append0[2] >> 8 | append0[3] << 24; - w1[0] = append0[3] >> 8 | append1[0] << 24; - w1[1] = append1[0] >> 8 | append1[1] << 24; - w1[2] = append1[1] >> 8 | append1[2] << 24; - w1[3] = append1[2] >> 8 | append1[3] << 24; - w2[0] = append1[3] >> 8; - break; - - case 4: - w0[1] = append0[0]; - w0[2] = append0[1]; - w0[3] = append0[2]; - w1[0] = append0[3]; - w1[1] = append1[0]; - w1[2] = append1[1]; - w1[3] = append1[2]; - w2[0] = append1[3]; - break; - - case 5: - w0[1] = w0[1] | append0[0] << 8; - w0[2] = append0[0] >> 24 | append0[1] << 8; - w0[3] = append0[1] >> 24 | append0[2] << 8; - w1[0] = append0[2] >> 24 | append0[3] << 8; - w1[1] = append0[3] >> 24 | append1[0] << 8; - w1[2] = append1[0] >> 24 | append1[1] << 8; - w1[3] = append1[1] >> 24 | append1[2] << 8; - w2[0] = append1[2] >> 24 | append1[3] << 8; - w2[1] = append1[3] >> 24; - break; - - case 6: - w0[1] = w0[1] | append0[0] << 16; - w0[2] = append0[0] >> 16 | append0[1] << 16; - w0[3] = append0[1] >> 16 | append0[2] << 16; - w1[0] = append0[2] >> 16 | append0[3] << 16; - w1[1] = append0[3] >> 16 | append1[0] << 16; - w1[2] = append1[0] >> 16 | append1[1] << 16; - w1[3] = append1[1] >> 16 | append1[2] << 16; - w2[0] = append1[2] >> 16 | append1[3] << 16; - w2[1] = append1[3] >> 16; - break; - - case 7: - w0[1] = w0[1] | append0[0] << 24; - w0[2] = append0[0] >> 8 | append0[1] << 24; - w0[3] = append0[1] >> 8 | append0[2] << 24; - w1[0] = append0[2] >> 8 | append0[3] << 24; - w1[1] = append0[3] >> 8 | append1[0] << 24; - w1[2] = append1[0] >> 8 | append1[1] << 24; - w1[3] = append1[1] >> 8 | append1[2] << 24; - w2[0] = append1[2] >> 8 | append1[3] << 24; - w2[1] = append1[3] >> 8; - break; - - case 8: - w0[2] = append0[0]; - w0[3] = append0[1]; - w1[0] = append0[2]; - w1[1] = append0[3]; - w1[2] = append1[0]; - w1[3] = append1[1]; - w2[0] = append1[2]; - w2[1] = append1[3]; - break; - - case 9: - w0[2] = w0[2] | append0[0] << 8; - w0[3] = append0[0] >> 24 | append0[1] << 8; - w1[0] = append0[1] >> 24 | append0[2] << 8; - w1[1] = append0[2] >> 24 | append0[3] << 8; - w1[2] = append0[3] >> 24 | append1[0] << 8; - w1[3] = append1[0] >> 24 | append1[1] << 8; - w2[0] = append1[1] >> 24 | append1[2] << 8; - w2[1] = append1[2] >> 24 | append1[3] << 8; - w2[2] = append1[3] >> 24; - break; - - case 10: - w0[2] = w0[2] | append0[0] << 16; - w0[3] = append0[0] >> 16 | append0[1] << 16; - w1[0] = append0[1] >> 16 | append0[2] << 16; - w1[1] = append0[2] >> 16 | append0[3] << 16; - w1[2] = append0[3] >> 16 | append1[0] << 16; - w1[3] = append1[0] >> 16 | append1[1] << 16; - w2[0] = append1[1] >> 16 | append1[2] << 16; - w2[1] = append1[2] >> 16 | append1[3] << 16; - w2[2] = append1[3] >> 16; - break; - - case 11: - w0[2] = w0[2] | append0[0] << 24; - w0[3] = append0[0] >> 8 | append0[1] << 24; - w1[0] = append0[1] >> 8 | append0[2] << 24; - w1[1] = append0[2] >> 8 | append0[3] << 24; - w1[2] = append0[3] >> 8 | append1[0] << 24; - w1[3] = append1[0] >> 8 | append1[1] << 24; - w2[0] = append1[1] >> 8 | append1[2] << 24; - w2[1] = append1[2] >> 8 | append1[3] << 24; - w2[2] = append1[3] >> 8; - break; - - case 12: - w0[3] = append0[0]; - w1[0] = append0[1]; - w1[1] = append0[2]; - w1[2] = append0[3]; - w1[3] = append1[0]; - w2[0] = append1[1]; - w2[1] = append1[2]; - w2[2] = append1[3]; - break; - - case 13: - w0[3] = w0[3] | append0[0] << 8; - w1[0] = append0[0] >> 24 | append0[1] << 8; - w1[1] = append0[1] >> 24 | append0[2] << 8; - w1[2] = append0[2] >> 24 | append0[3] << 8; - w1[3] = append0[3] >> 24 | append1[0] << 8; - w2[0] = append1[0] >> 24 | append1[1] << 8; - w2[1] = append1[1] >> 24 | append1[2] << 8; - w2[2] = append1[2] >> 24 | append1[3] << 8; - w2[3] = append1[3] >> 24; - break; - - case 14: - w0[3] = w0[3] | append0[0] << 16; - w1[0] = append0[0] >> 16 | append0[1] << 16; - w1[1] = append0[1] >> 16 | append0[2] << 16; - w1[2] = append0[2] >> 16 | append0[3] << 16; - w1[3] = append0[3] >> 16 | append1[0] << 16; - w2[0] = append1[0] >> 16 | append1[1] << 16; - w2[1] = append1[1] >> 16 | append1[2] << 16; - w2[2] = append1[2] >> 16 | append1[3] << 16; - w2[3] = append1[3] >> 16; - break; - - case 15: - w0[3] = w0[3] | append0[0] << 24; - w1[0] = append0[0] >> 8 | append0[1] << 24; - w1[1] = append0[1] >> 8 | append0[2] << 24; - w1[2] = append0[2] >> 8 | append0[3] << 24; - w1[3] = append0[3] >> 8 | append1[0] << 24; - w2[0] = append1[0] >> 8 | append1[1] << 24; - w2[1] = append1[1] >> 8 | append1[2] << 24; - w2[2] = append1[2] >> 8 | append1[3] << 24; - w2[3] = append1[3] >> 8; - break; - - case 16: - w1[0] = append0[0]; - w1[1] = append0[1]; - w1[2] = append0[2]; - w1[3] = append0[3]; - w2[0] = append1[0]; - w2[1] = append1[1]; - w2[2] = append1[2]; - w2[3] = append1[3]; - break; - - case 17: - w1[0] = w1[0] | append0[0] << 8; - w1[1] = append0[0] >> 24 | append0[1] << 8; - w1[2] = append0[1] >> 24 | append0[2] << 8; - w1[3] = append0[2] >> 24 | append0[3] << 8; - w2[0] = append0[3] >> 24 | append1[0] << 8; - w2[1] = append1[0] >> 24 | append1[1] << 8; - w2[2] = append1[1] >> 24 | append1[2] << 8; - w2[3] = append1[2] >> 24 | append1[3] << 8; - w3[0] = append1[3] >> 24; - break; - - case 18: - w1[0] = w1[0] | append0[0] << 16; - w1[1] = append0[0] >> 16 | append0[1] << 16; - w1[2] = append0[1] >> 16 | append0[2] << 16; - w1[3] = append0[2] >> 16 | append0[3] << 16; - w2[0] = append0[3] >> 16 | append1[0] << 16; - w2[1] = append1[0] >> 16 | append1[1] << 16; - w2[2] = append1[1] >> 16 | append1[2] << 16; - w2[3] = append1[2] >> 16 | append1[3] << 16; - w3[0] = append1[3] >> 16; - break; - - case 19: - w1[0] = w1[0] | append0[0] << 24; - w1[1] = append0[0] >> 8 | append0[1] << 24; - w1[2] = append0[1] >> 8 | append0[2] << 24; - w1[3] = append0[2] >> 8 | append0[3] << 24; - w2[0] = append0[3] >> 8 | append1[0] << 24; - w2[1] = append1[0] >> 8 | append1[1] << 24; - w2[2] = append1[1] >> 8 | append1[2] << 24; - w2[3] = append1[2] >> 8 | append1[3] << 24; - w3[0] = append1[3] >> 8; - break; - - case 20: - w1[1] = append0[0]; - w1[2] = append0[1]; - w1[3] = append0[2]; - w2[0] = append0[3]; - w2[1] = append1[0]; - w2[2] = append1[1]; - w2[3] = append1[2]; - w3[0] = append1[3]; - break; - - case 21: - w1[1] = w1[1] | append0[0] << 8; - w1[2] = append0[0] >> 24 | append0[1] << 8; - w1[3] = append0[1] >> 24 | append0[2] << 8; - w2[0] = append0[2] >> 24 | append0[3] << 8; - w2[1] = append0[3] >> 24 | append1[0] << 8; - w2[2] = append1[0] >> 24 | append1[1] << 8; - w2[3] = append1[1] >> 24 | append1[2] << 8; - w3[0] = append1[2] >> 24 | append1[3] << 8; - w3[1] = append1[3] >> 24; - break; - - case 22: - w1[1] = w1[1] | append0[0] << 16; - w1[2] = append0[0] >> 16 | append0[1] << 16; - w1[3] = append0[1] >> 16 | append0[2] << 16; - w2[0] = append0[2] >> 16 | append0[3] << 16; - w2[1] = append0[3] >> 16 | append1[0] << 16; - w2[2] = append1[0] >> 16 | append1[1] << 16; - w2[3] = append1[1] >> 16 | append1[2] << 16; - w3[0] = append1[2] >> 16 | append1[3] << 16; - w3[1] = append1[3] >> 16; - break; - - case 23: - w1[1] = w1[1] | append0[0] << 24; - w1[2] = append0[0] >> 8 | append0[1] << 24; - w1[3] = append0[1] >> 8 | append0[2] << 24; - w2[0] = append0[2] >> 8 | append0[3] << 24; - w2[1] = append0[3] >> 8 | append1[0] << 24; - w2[2] = append1[0] >> 8 | append1[1] << 24; - w2[3] = append1[1] >> 8 | append1[2] << 24; - w3[0] = append1[2] >> 8 | append1[3] << 24; - w3[1] = append1[3] >> 8; - break; - - case 24: - w1[2] = append0[0]; - w1[3] = append0[1]; - w2[0] = append0[2]; - w2[1] = append0[3]; - w2[2] = append1[0]; - w2[3] = append1[1]; - w3[0] = append1[2]; - w3[1] = append1[3]; - break; - - case 25: - w1[2] = w1[2] | append0[0] << 8; - w1[3] = append0[0] >> 24 | append0[1] << 8; - w2[0] = append0[1] >> 24 | append0[2] << 8; - w2[1] = append0[2] >> 24 | append0[3] << 8; - w2[2] = append0[3] >> 24 | append1[0] << 8; - w2[3] = append1[0] >> 24 | append1[1] << 8; - w3[0] = append1[1] >> 24 | append1[2] << 8; - w3[1] = append1[2] >> 24 | append1[3] << 8; - break; - - case 26: - w1[2] = w1[2] | append0[0] << 16; - w1[3] = append0[0] >> 16 | append0[1] << 16; - w2[0] = append0[1] >> 16 | append0[2] << 16; - w2[1] = append0[2] >> 16 | append0[3] << 16; - w2[2] = append0[3] >> 16 | append1[0] << 16; - w2[3] = append1[0] >> 16 | append1[1] << 16; - w3[0] = append1[1] >> 16 | append1[2] << 16; - w3[1] = append1[2] >> 16 | append1[3] << 16; - break; - - case 27: - w1[2] = w1[2] | append0[0] << 24; - w1[3] = append0[0] >> 8 | append0[1] << 24; - w2[0] = append0[1] >> 8 | append0[2] << 24; - w2[1] = append0[2] >> 8 | append0[3] << 24; - w2[2] = append0[3] >> 8 | append1[0] << 24; - w2[3] = append1[0] >> 8 | append1[1] << 24; - w3[0] = append1[1] >> 8 | append1[2] << 24; - w3[1] = append1[2] >> 8 | append1[3] << 24; - break; - - case 28: - w1[3] = append0[0]; - w2[0] = append0[1]; - w2[1] = append0[2]; - w2[2] = append0[3]; - w2[3] = append1[0]; - w3[0] = append1[1]; - w3[1] = append1[2]; - break; - - case 29: - w1[3] = w1[3] | append0[0] << 8; - w2[0] = append0[0] >> 24 | append0[1] << 8; - w2[1] = append0[1] >> 24 | append0[2] << 8; - w2[2] = append0[2] >> 24 | append0[3] << 8; - w2[3] = append0[3] >> 24 | append1[0] << 8; - w3[0] = append1[0] >> 24 | append1[1] << 8; - w3[1] = append1[1] >> 24 | append1[2] << 8; - break; - - case 30: - w1[3] = w1[3] | append0[0] << 16; - w2[0] = append0[0] >> 16 | append0[1] << 16; - w2[1] = append0[1] >> 16 | append0[2] << 16; - w2[2] = append0[2] >> 16 | append0[3] << 16; - w2[3] = append0[3] >> 16 | append1[0] << 16; - w3[0] = append1[0] >> 16 | append1[1] << 16; - w3[1] = append1[1] >> 16 | append1[2] << 16; - break; - - case 31: - w1[3] = w1[3] | append0[0] << 24; - w2[0] = append0[0] >> 8 | append0[1] << 24; - w2[1] = append0[1] >> 8 | append0[2] << 24; - w2[2] = append0[2] >> 8 | append0[3] << 24; - w2[3] = append0[3] >> 8 | append1[0] << 24; - w3[0] = append1[0] >> 8 | append1[1] << 24; - w3[1] = append1[1] >> 8 | append1[2] << 24; - break; - - case 32: - w2[0] = append0[0]; - w2[1] = append0[1]; - w2[2] = append0[2]; - w2[3] = append0[3]; - w3[0] = append1[0]; - w3[1] = append1[1]; - break; - } -} - -static void memcat32_9 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = append0[0]; - w0[1] = append0[1]; - w0[2] = append0[2]; - w0[3] = append0[3]; - w1[0] = append1[0]; - w1[1] = append1[1]; - w1[2] = append1[2]; - w1[3] = append1[3]; - w2[0] = append2[0]; - break; - - case 1: - w0[0] = w0[0] | append0[0] << 8; - w0[1] = append0[0] >> 24 | append0[1] << 8; - w0[2] = append0[1] >> 24 | append0[2] << 8; - w0[3] = append0[2] >> 24 | append0[3] << 8; - w1[0] = append0[3] >> 24 | append1[0] << 8; - w1[1] = append1[0] >> 24 | append1[1] << 8; - w1[2] = append1[1] >> 24 | append1[2] << 8; - w1[3] = append1[2] >> 24 | append1[3] << 8; - w2[0] = append1[3] >> 24 | append2[0] << 8; - w2[1] = append2[0] >> 24; - break; - - case 2: - w0[0] = w0[0] | append0[0] << 16; - w0[1] = append0[0] >> 16 | append0[1] << 16; - w0[2] = append0[1] >> 16 | append0[2] << 16; - w0[3] = append0[2] >> 16 | append0[3] << 16; - w1[0] = append0[3] >> 16 | append1[0] << 16; - w1[1] = append1[0] >> 16 | append1[1] << 16; - w1[2] = append1[1] >> 16 | append1[2] << 16; - w1[3] = append1[2] >> 16 | append1[3] << 16; - w2[0] = append1[3] >> 16 | append2[0] << 16; - w2[1] = append2[0] >> 16; - break; - - case 3: - w0[0] = w0[0] | append0[0] << 24; - w0[1] = append0[0] >> 8 | append0[1] << 24; - w0[2] = append0[1] >> 8 | append0[2] << 24; - w0[3] = append0[2] >> 8 | append0[3] << 24; - w1[0] = append0[3] >> 8 | append1[0] << 24; - w1[1] = append1[0] >> 8 | append1[1] << 24; - w1[2] = append1[1] >> 8 | append1[2] << 24; - w1[3] = append1[2] >> 8 | append1[3] << 24; - w2[0] = append1[3] >> 8 | append2[0] << 24; - w2[1] = append2[0] >> 8; - break; - - case 4: - w0[1] = append0[0]; - w0[2] = append0[1]; - w0[3] = append0[2]; - w1[0] = append0[3]; - w1[1] = append1[0]; - w1[2] = append1[1]; - w1[3] = append1[2]; - w2[0] = append1[3]; - w2[1] = append2[0]; - break; - - case 5: - w0[1] = w0[1] | append0[0] << 8; - w0[2] = append0[0] >> 24 | append0[1] << 8; - w0[3] = append0[1] >> 24 | append0[2] << 8; - w1[0] = append0[2] >> 24 | append0[3] << 8; - w1[1] = append0[3] >> 24 | append1[0] << 8; - w1[2] = append1[0] >> 24 | append1[1] << 8; - w1[3] = append1[1] >> 24 | append1[2] << 8; - w2[0] = append1[2] >> 24 | append1[3] << 8; - w2[1] = append1[3] >> 24 | append2[0] << 8; - w2[2] = append2[0] >> 24; - break; - - case 6: - w0[1] = w0[1] | append0[0] << 16; - w0[2] = append0[0] >> 16 | append0[1] << 16; - w0[3] = append0[1] >> 16 | append0[2] << 16; - w1[0] = append0[2] >> 16 | append0[3] << 16; - w1[1] = append0[3] >> 16 | append1[0] << 16; - w1[2] = append1[0] >> 16 | append1[1] << 16; - w1[3] = append1[1] >> 16 | append1[2] << 16; - w2[0] = append1[2] >> 16 | append1[3] << 16; - w2[1] = append1[3] >> 16 | append2[0] << 16; - w2[2] = append2[0] >> 16; - break; - - case 7: - w0[1] = w0[1] | append0[0] << 24; - w0[2] = append0[0] >> 8 | append0[1] << 24; - w0[3] = append0[1] >> 8 | append0[2] << 24; - w1[0] = append0[2] >> 8 | append0[3] << 24; - w1[1] = append0[3] >> 8 | append1[0] << 24; - w1[2] = append1[0] >> 8 | append1[1] << 24; - w1[3] = append1[1] >> 8 | append1[2] << 24; - w2[0] = append1[2] >> 8 | append1[3] << 24; - w2[1] = append1[3] >> 8 | append2[0] << 24; - w2[2] = append2[0] >> 8; - break; - - case 8: - w0[2] = append0[0]; - w0[3] = append0[1]; - w1[0] = append0[2]; - w1[1] = append0[3]; - w1[2] = append1[0]; - w1[3] = append1[1]; - w2[0] = append1[2]; - w2[1] = append1[3]; - w2[2] = append2[0]; - break; - - case 9: - w0[2] = w0[2] | append0[0] << 8; - w0[3] = append0[0] >> 24 | append0[1] << 8; - w1[0] = append0[1] >> 24 | append0[2] << 8; - w1[1] = append0[2] >> 24 | append0[3] << 8; - w1[2] = append0[3] >> 24 | append1[0] << 8; - w1[3] = append1[0] >> 24 | append1[1] << 8; - w2[0] = append1[1] >> 24 | append1[2] << 8; - w2[1] = append1[2] >> 24 | append1[3] << 8; - w2[2] = append1[3] >> 24 | append2[0] << 8; - w2[3] = append2[0] >> 24; - break; - - case 10: - w0[2] = w0[2] | append0[0] << 16; - w0[3] = append0[0] >> 16 | append0[1] << 16; - w1[0] = append0[1] >> 16 | append0[2] << 16; - w1[1] = append0[2] >> 16 | append0[3] << 16; - w1[2] = append0[3] >> 16 | append1[0] << 16; - w1[3] = append1[0] >> 16 | append1[1] << 16; - w2[0] = append1[1] >> 16 | append1[2] << 16; - w2[1] = append1[2] >> 16 | append1[3] << 16; - w2[2] = append1[3] >> 16 | append2[0] << 16; - w2[3] = append2[0] >> 16; - break; - - case 11: - w0[2] = w0[2] | append0[0] << 24; - w0[3] = append0[0] >> 8 | append0[1] << 24; - w1[0] = append0[1] >> 8 | append0[2] << 24; - w1[1] = append0[2] >> 8 | append0[3] << 24; - w1[2] = append0[3] >> 8 | append1[0] << 24; - w1[3] = append1[0] >> 8 | append1[1] << 24; - w2[0] = append1[1] >> 8 | append1[2] << 24; - w2[1] = append1[2] >> 8 | append1[3] << 24; - w2[2] = append1[3] >> 8 | append2[0] << 24; - w2[3] = append2[0] >> 8; - break; - - case 12: - w0[3] = append0[0]; - w1[0] = append0[1]; - w1[1] = append0[2]; - w1[2] = append0[3]; - w1[3] = append1[0]; - w2[0] = append1[1]; - w2[1] = append1[2]; - w2[2] = append1[3]; - w2[3] = append2[0]; - break; - - case 13: - w0[3] = w0[3] | append0[0] << 8; - w1[0] = append0[0] >> 24 | append0[1] << 8; - w1[1] = append0[1] >> 24 | append0[2] << 8; - w1[2] = append0[2] >> 24 | append0[3] << 8; - w1[3] = append0[3] >> 24 | append1[0] << 8; - w2[0] = append1[0] >> 24 | append1[1] << 8; - w2[1] = append1[1] >> 24 | append1[2] << 8; - w2[2] = append1[2] >> 24 | append1[3] << 8; - w2[3] = append1[3] >> 24 | append2[0] << 8; - w3[0] = append2[0] >> 24; - break; - - case 14: - w0[3] = w0[3] | append0[0] << 16; - w1[0] = append0[0] >> 16 | append0[1] << 16; - w1[1] = append0[1] >> 16 | append0[2] << 16; - w1[2] = append0[2] >> 16 | append0[3] << 16; - w1[3] = append0[3] >> 16 | append1[0] << 16; - w2[0] = append1[0] >> 16 | append1[1] << 16; - w2[1] = append1[1] >> 16 | append1[2] << 16; - w2[2] = append1[2] >> 16 | append1[3] << 16; - w2[3] = append1[3] >> 16 | append2[0] << 16; - w3[0] = append2[0] >> 16; - break; - - case 15: - w0[3] = w0[3] | append0[0] << 24; - w1[0] = append0[0] >> 8 | append0[1] << 24; - w1[1] = append0[1] >> 8 | append0[2] << 24; - w1[2] = append0[2] >> 8 | append0[3] << 24; - w1[3] = append0[3] >> 8 | append1[0] << 24; - w2[0] = append1[0] >> 8 | append1[1] << 24; - w2[1] = append1[1] >> 8 | append1[2] << 24; - w2[2] = append1[2] >> 8 | append1[3] << 24; - w2[3] = append1[3] >> 8 | append2[0] << 24; - w3[0] = append2[0] >> 8; - break; - - case 16: - w1[0] = append0[0]; - w1[1] = append0[1]; - w1[2] = append0[2]; - w1[3] = append0[3]; - w2[0] = append1[0]; - w2[1] = append1[1]; - w2[2] = append1[2]; - w2[3] = append1[3]; - w3[0] = append2[0]; - break; - - case 17: - w1[0] = w1[0] | append0[0] << 8; - w1[1] = append0[0] >> 24 | append0[1] << 8; - w1[2] = append0[1] >> 24 | append0[2] << 8; - w1[3] = append0[2] >> 24 | append0[3] << 8; - w2[0] = append0[3] >> 24 | append1[0] << 8; - w2[1] = append1[0] >> 24 | append1[1] << 8; - w2[2] = append1[1] >> 24 | append1[2] << 8; - w2[3] = append1[2] >> 24 | append1[3] << 8; - w3[0] = append1[3] >> 24 | append2[0] << 8; - w3[1] = append2[0] >> 24; - break; - - case 18: - w1[0] = w1[0] | append0[0] << 16; - w1[1] = append0[0] >> 16 | append0[1] << 16; - w1[2] = append0[1] >> 16 | append0[2] << 16; - w1[3] = append0[2] >> 16 | append0[3] << 16; - w2[0] = append0[3] >> 16 | append1[0] << 16; - w2[1] = append1[0] >> 16 | append1[1] << 16; - w2[2] = append1[1] >> 16 | append1[2] << 16; - w2[3] = append1[2] >> 16 | append1[3] << 16; - w3[0] = append1[3] >> 16 | append2[0] << 16; - w3[1] = append2[0] >> 16; - break; - - case 19: - w1[0] = w1[0] | append0[0] << 24; - w1[1] = append0[0] >> 8 | append0[1] << 24; - w1[2] = append0[1] >> 8 | append0[2] << 24; - w1[3] = append0[2] >> 8 | append0[3] << 24; - w2[0] = append0[3] >> 8 | append1[0] << 24; - w2[1] = append1[0] >> 8 | append1[1] << 24; - w2[2] = append1[1] >> 8 | append1[2] << 24; - w2[3] = append1[2] >> 8 | append1[3] << 24; - w3[0] = append1[3] >> 8 | append2[0] << 24; - w3[1] = append2[0] >> 8; - break; - - case 20: - w1[1] = append0[0]; - w1[2] = append0[1]; - w1[3] = append0[2]; - w2[0] = append0[3]; - w2[1] = append1[0]; - w2[2] = append1[1]; - w2[3] = append1[2]; - w3[0] = append1[3]; - w3[1] = append2[0]; - break; - - case 21: - w1[1] = w1[1] | append0[0] << 8; - w1[2] = append0[0] >> 24 | append0[1] << 8; - w1[3] = append0[1] >> 24 | append0[2] << 8; - w2[0] = append0[2] >> 24 | append0[3] << 8; - w2[1] = append0[3] >> 24 | append1[0] << 8; - w2[2] = append1[0] >> 24 | append1[1] << 8; - w2[3] = append1[1] >> 24 | append1[2] << 8; - w3[0] = append1[2] >> 24 | append1[3] << 8; - w3[1] = append1[3] >> 24 | append2[0] << 8; - break; - - case 22: - w1[1] = w1[1] | append0[0] << 16; - w1[2] = append0[0] >> 16 | append0[1] << 16; - w1[3] = append0[1] >> 16 | append0[2] << 16; - w2[0] = append0[2] >> 16 | append0[3] << 16; - w2[1] = append0[3] >> 16 | append1[0] << 16; - w2[2] = append1[0] >> 16 | append1[1] << 16; - w2[3] = append1[1] >> 16 | append1[2] << 16; - w3[0] = append1[2] >> 16 | append1[3] << 16; - w3[1] = append1[3] >> 16 | append2[0] << 16; - break; - - case 23: - w1[1] = w1[1] | append0[0] << 24; - w1[2] = append0[0] >> 8 | append0[1] << 24; - w1[3] = append0[1] >> 8 | append0[2] << 24; - w2[0] = append0[2] >> 8 | append0[3] << 24; - w2[1] = append0[3] >> 8 | append1[0] << 24; - w2[2] = append1[0] >> 8 | append1[1] << 24; - w2[3] = append1[1] >> 8 | append1[2] << 24; - w3[0] = append1[2] >> 8 | append1[3] << 24; - w3[1] = append1[3] >> 8 | append2[0] << 24; - break; - - case 24: - w1[2] = append0[0]; - w1[3] = append0[1]; - w2[0] = append0[2]; - w2[1] = append0[3]; - w2[2] = append1[0]; - w2[3] = append1[1]; - w3[0] = append1[2]; - w3[1] = append1[3]; - break; - - case 25: - w1[2] = w1[2] | append0[0] << 8; - w1[3] = append0[0] >> 24 | append0[1] << 8; - w2[0] = append0[1] >> 24 | append0[2] << 8; - w2[1] = append0[2] >> 24 | append0[3] << 8; - w2[2] = append0[3] >> 24 | append1[0] << 8; - w2[3] = append1[0] >> 24 | append1[1] << 8; - w3[0] = append1[1] >> 24 | append1[2] << 8; - w3[1] = append1[2] >> 24 | append1[3] << 8; - break; - - case 26: - w1[2] = w1[2] | append0[0] << 16; - w1[3] = append0[0] >> 16 | append0[1] << 16; - w2[0] = append0[1] >> 16 | append0[2] << 16; - w2[1] = append0[2] >> 16 | append0[3] << 16; - w2[2] = append0[3] >> 16 | append1[0] << 16; - w2[3] = append1[0] >> 16 | append1[1] << 16; - w3[0] = append1[1] >> 16 | append1[2] << 16; - w3[1] = append1[2] >> 16 | append1[3] << 16; - break; - - case 27: - w1[2] = w1[2] | append0[0] << 24; - w1[3] = append0[0] >> 8 | append0[1] << 24; - w2[0] = append0[1] >> 8 | append0[2] << 24; - w2[1] = append0[2] >> 8 | append0[3] << 24; - w2[2] = append0[3] >> 8 | append1[0] << 24; - w2[3] = append1[0] >> 8 | append1[1] << 24; - w3[0] = append1[1] >> 8 | append1[2] << 24; - w3[1] = append1[2] >> 8 | append1[3] << 24; - break; - - case 28: - w1[3] = append0[0]; - w2[0] = append0[1]; - w2[1] = append0[2]; - w2[2] = append0[3]; - w2[3] = append1[0]; - w3[0] = append1[1]; - w3[1] = append1[2]; - break; - - case 29: - w1[3] = w1[3] | append0[0] << 8; - w2[0] = append0[0] >> 24 | append0[1] << 8; - w2[1] = append0[1] >> 24 | append0[2] << 8; - w2[2] = append0[2] >> 24 | append0[3] << 8; - w2[3] = append0[3] >> 24 | append1[0] << 8; - w3[0] = append1[0] >> 24 | append1[1] << 8; - w3[1] = append1[1] >> 24 | append1[2] << 8; - break; - - case 30: - w1[3] = w1[3] | append0[0] << 16; - w2[0] = append0[0] >> 16 | append0[1] << 16; - w2[1] = append0[1] >> 16 | append0[2] << 16; - w2[2] = append0[2] >> 16 | append0[3] << 16; - w2[3] = append0[3] >> 16 | append1[0] << 16; - w3[0] = append1[0] >> 16 | append1[1] << 16; - w3[1] = append1[1] >> 16 | append1[2] << 16; - break; - - case 31: - w1[3] = w1[3] | append0[0] << 24; - w2[0] = append0[0] >> 8 | append0[1] << 24; - w2[1] = append0[1] >> 8 | append0[2] << 24; - w2[2] = append0[2] >> 8 | append0[3] << 24; - w2[3] = append0[3] >> 8 | append1[0] << 24; - w3[0] = append1[0] >> 8 | append1[1] << 24; - w3[1] = append1[1] >> 8 | append1[2] << 24; - break; - - case 32: - w2[0] = append0[0]; - w2[1] = append0[1]; - w2[2] = append0[2]; - w2[3] = append0[3]; - w3[0] = append1[0]; - w3[1] = append1[1]; - break; - } -} - -static void switch_buffer_by_offset (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 offset) -{ - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset; - - switch (offset / 4) - { - case 0: - w3[2] = amd_bytealign ( 0, w3[1], offset_minus_4); - w3[1] = amd_bytealign (w3[1], w3[0], offset_minus_4); - w3[0] = amd_bytealign (w3[0], w2[3], offset_minus_4); - w2[3] = amd_bytealign (w2[3], w2[2], offset_minus_4); - w2[2] = amd_bytealign (w2[2], w2[1], offset_minus_4); - w2[1] = amd_bytealign (w2[1], w2[0], offset_minus_4); - w2[0] = amd_bytealign (w2[0], w1[3], offset_minus_4); - w1[3] = amd_bytealign (w1[3], w1[2], offset_minus_4); - w1[2] = amd_bytealign (w1[2], w1[1], offset_minus_4); - w1[1] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w1[0] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w0[3] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w0[2] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w0[1] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w0[0] = amd_bytealign (w0[0], 0, offset_minus_4); - - if (offset_mod_4 == 0) - { - w0[0] = w0[1]; - w0[1] = w0[2]; - w0[2] = w0[3]; - w0[3] = w1[0]; - w1[0] = w1[1]; - w1[1] = w1[2]; - w1[2] = w1[3]; - w1[3] = w2[0]; - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 1: - w3[2] = amd_bytealign ( 0, w3[0], offset_minus_4); - w3[1] = amd_bytealign (w3[0], w2[3], offset_minus_4); - w3[0] = amd_bytealign (w2[3], w2[2], offset_minus_4); - w2[3] = amd_bytealign (w2[2], w2[1], offset_minus_4); - w2[2] = amd_bytealign (w2[1], w2[0], offset_minus_4); - w2[1] = amd_bytealign (w2[0], w1[3], offset_minus_4); - w2[0] = amd_bytealign (w1[3], w1[2], offset_minus_4); - w1[3] = amd_bytealign (w1[2], w1[1], offset_minus_4); - w1[2] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w1[1] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w1[0] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w0[3] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w0[2] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w0[1] = amd_bytealign (w0[0], 0, offset_minus_4); - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w0[1] = w0[2]; - w0[2] = w0[3]; - w0[3] = w1[0]; - w1[0] = w1[1]; - w1[1] = w1[2]; - w1[2] = w1[3]; - w1[3] = w2[0]; - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 2: - w3[2] = amd_bytealign ( 0, w2[3], offset_minus_4); - w3[1] = amd_bytealign (w2[3], w2[2], offset_minus_4); - w3[0] = amd_bytealign (w2[2], w2[1], offset_minus_4); - w2[3] = amd_bytealign (w2[1], w2[0], offset_minus_4); - w2[2] = amd_bytealign (w2[0], w1[3], offset_minus_4); - w2[1] = amd_bytealign (w1[3], w1[2], offset_minus_4); - w2[0] = amd_bytealign (w1[2], w1[1], offset_minus_4); - w1[3] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w1[2] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w1[1] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w1[0] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w0[3] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w0[2] = amd_bytealign (w0[0], 0, offset_minus_4); - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w0[2] = w0[3]; - w0[3] = w1[0]; - w1[0] = w1[1]; - w1[1] = w1[2]; - w1[2] = w1[3]; - w1[3] = w2[0]; - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 3: - w3[2] = amd_bytealign ( 0, w2[2], offset_minus_4); - w3[1] = amd_bytealign (w2[2], w2[1], offset_minus_4); - w3[0] = amd_bytealign (w2[1], w2[0], offset_minus_4); - w2[3] = amd_bytealign (w2[0], w1[3], offset_minus_4); - w2[2] = amd_bytealign (w1[3], w1[2], offset_minus_4); - w2[1] = amd_bytealign (w1[2], w1[1], offset_minus_4); - w2[0] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w1[3] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w1[2] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w1[1] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w1[0] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w0[3] = amd_bytealign (w0[0], 0, offset_minus_4); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w0[3] = w1[0]; - w1[0] = w1[1]; - w1[1] = w1[2]; - w1[2] = w1[3]; - w1[3] = w2[0]; - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 4: - w3[2] = amd_bytealign ( 0, w2[1], offset_minus_4); - w3[1] = amd_bytealign (w2[1], w2[0], offset_minus_4); - w3[0] = amd_bytealign (w2[0], w1[3], offset_minus_4); - w2[3] = amd_bytealign (w1[3], w1[2], offset_minus_4); - w2[2] = amd_bytealign (w1[2], w1[1], offset_minus_4); - w2[1] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w2[0] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w1[3] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w1[2] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w1[1] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w1[0] = amd_bytealign (w0[0], 0, offset_minus_4); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w1[0] = w1[1]; - w1[1] = w1[2]; - w1[2] = w1[3]; - w1[3] = w2[0]; - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 5: - w3[2] = amd_bytealign ( 0, w2[0], offset_minus_4); - w3[1] = amd_bytealign (w2[0], w1[3], offset_minus_4); - w3[0] = amd_bytealign (w1[3], w1[2], offset_minus_4); - w2[3] = amd_bytealign (w1[2], w1[1], offset_minus_4); - w2[2] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w2[1] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w2[0] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w1[3] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w1[2] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w1[1] = amd_bytealign (w0[0], 0, offset_minus_4); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w1[1] = w1[2]; - w1[2] = w1[3]; - w1[3] = w2[0]; - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 6: - w3[2] = amd_bytealign ( 0, w1[3], offset_minus_4); - w3[1] = amd_bytealign (w1[3], w1[2], offset_minus_4); - w3[0] = amd_bytealign (w1[2], w1[1], offset_minus_4); - w2[3] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w2[2] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w2[1] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w2[0] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w1[3] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w1[2] = amd_bytealign (w0[0], 0, offset_minus_4); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w1[2] = w1[3]; - w1[3] = w2[0]; - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 7: - w3[2] = amd_bytealign ( 0, w1[2], offset_minus_4); - w3[1] = amd_bytealign (w1[2], w1[1], offset_minus_4); - w3[0] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w2[3] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w2[2] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w2[1] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w2[0] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w1[3] = amd_bytealign (w0[0], 0, offset_minus_4); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w1[3] = w2[0]; - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 8: - w3[2] = amd_bytealign ( 0, w1[1], offset_minus_4); - w3[1] = amd_bytealign (w1[1], w1[0], offset_minus_4); - w3[0] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w2[3] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w2[2] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w2[1] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w2[0] = amd_bytealign (w0[0], 0, offset_minus_4); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w2[0] = w2[1]; - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 9: - w3[2] = amd_bytealign ( 0, w1[0], offset_minus_4); - w3[1] = amd_bytealign (w1[0], w0[3], offset_minus_4); - w3[0] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w2[3] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w2[2] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w2[1] = amd_bytealign (w0[0], 0, offset_minus_4); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w2[1] = w2[2]; - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 10: - w3[2] = amd_bytealign ( 0, w0[3], offset_minus_4); - w3[1] = amd_bytealign (w0[3], w0[2], offset_minus_4); - w3[0] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w2[3] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w2[2] = amd_bytealign (w0[0], 0, offset_minus_4); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w2[2] = w2[3]; - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 11: - w3[2] = amd_bytealign ( 0, w0[2], offset_minus_4); - w3[1] = amd_bytealign (w0[2], w0[1], offset_minus_4); - w3[0] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w2[3] = amd_bytealign (w0[0], 0, offset_minus_4); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w2[3] = w3[0]; - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 12: - w3[2] = amd_bytealign ( 0, w0[1], offset_minus_4); - w3[1] = amd_bytealign (w0[1], w0[0], offset_minus_4); - w3[0] = amd_bytealign (w0[0], 0, offset_minus_4); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w3[0] = w3[1]; - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - - case 13: - w3[2] = amd_bytealign ( 0, w0[0], offset_minus_4); - w3[1] = amd_bytealign (w0[0], 0, offset_minus_4); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - if (offset_mod_4 == 0) - { - w3[1] = w3[2]; - w3[2] = 0; - } - - break; - } -} - -static void switch_buffer_by_offset_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 offset) -{ - switch (offset / 4) - { - case 0: - w3[2] = amd_bytealign (w3[1], 0, offset); - w3[1] = amd_bytealign (w3[0], w3[1], offset); - w3[0] = amd_bytealign (w2[3], w3[0], offset); - w2[3] = amd_bytealign (w2[2], w2[3], offset); - w2[2] = amd_bytealign (w2[1], w2[2], offset); - w2[1] = amd_bytealign (w2[0], w2[1], offset); - w2[0] = amd_bytealign (w1[3], w2[0], offset); - w1[3] = amd_bytealign (w1[2], w1[3], offset); - w1[2] = amd_bytealign (w1[1], w1[2], offset); - w1[1] = amd_bytealign (w1[0], w1[1], offset); - w1[0] = amd_bytealign (w0[3], w1[0], offset); - w0[3] = amd_bytealign (w0[2], w0[3], offset); - w0[2] = amd_bytealign (w0[1], w0[2], offset); - w0[1] = amd_bytealign (w0[0], w0[1], offset); - w0[0] = amd_bytealign ( 0, w0[0], offset); - break; - - case 1: - w3[2] = amd_bytealign (w3[0], 0, offset); - w3[1] = amd_bytealign (w2[3], w3[0], offset); - w3[0] = amd_bytealign (w2[2], w2[3], offset); - w2[3] = amd_bytealign (w2[1], w2[2], offset); - w2[2] = amd_bytealign (w2[0], w2[1], offset); - w2[1] = amd_bytealign (w1[3], w2[0], offset); - w2[0] = amd_bytealign (w1[2], w1[3], offset); - w1[3] = amd_bytealign (w1[1], w1[2], offset); - w1[2] = amd_bytealign (w1[0], w1[1], offset); - w1[1] = amd_bytealign (w0[3], w1[0], offset); - w1[0] = amd_bytealign (w0[2], w0[3], offset); - w0[3] = amd_bytealign (w0[1], w0[2], offset); - w0[2] = amd_bytealign (w0[0], w0[1], offset); - w0[1] = amd_bytealign ( 0, w0[0], offset); - w0[0] = 0; - break; - - case 2: - w3[2] = amd_bytealign (w2[3], 0, offset); - w3[1] = amd_bytealign (w2[2], w2[3], offset); - w3[0] = amd_bytealign (w2[1], w2[2], offset); - w2[3] = amd_bytealign (w2[0], w2[1], offset); - w2[2] = amd_bytealign (w1[3], w2[0], offset); - w2[1] = amd_bytealign (w1[2], w1[3], offset); - w2[0] = amd_bytealign (w1[1], w1[2], offset); - w1[3] = amd_bytealign (w1[0], w1[1], offset); - w1[2] = amd_bytealign (w0[3], w1[0], offset); - w1[1] = amd_bytealign (w0[2], w0[3], offset); - w1[0] = amd_bytealign (w0[1], w0[2], offset); - w0[3] = amd_bytealign (w0[0], w0[1], offset); - w0[2] = amd_bytealign ( 0, w0[0], offset); - w0[1] = 0; - w0[0] = 0; - break; - - case 3: - w3[2] = amd_bytealign (w2[2], 0, offset); - w3[1] = amd_bytealign (w2[1], w2[2], offset); - w3[0] = amd_bytealign (w2[0], w2[1], offset); - w2[3] = amd_bytealign (w1[3], w2[0], offset); - w2[2] = amd_bytealign (w1[2], w1[3], offset); - w2[1] = amd_bytealign (w1[1], w1[2], offset); - w2[0] = amd_bytealign (w1[0], w1[1], offset); - w1[3] = amd_bytealign (w0[3], w1[0], offset); - w1[2] = amd_bytealign (w0[2], w0[3], offset); - w1[1] = amd_bytealign (w0[1], w0[2], offset); - w1[0] = amd_bytealign (w0[0], w0[1], offset); - w0[3] = amd_bytealign ( 0, w0[0], offset); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 4: - w3[2] = amd_bytealign (w2[1], 0, offset); - w3[1] = amd_bytealign (w2[0], w2[1], offset); - w3[0] = amd_bytealign (w1[3], w2[0], offset); - w2[3] = amd_bytealign (w1[2], w1[3], offset); - w2[2] = amd_bytealign (w1[1], w1[2], offset); - w2[1] = amd_bytealign (w1[0], w1[1], offset); - w2[0] = amd_bytealign (w0[3], w1[0], offset); - w1[3] = amd_bytealign (w0[2], w0[3], offset); - w1[2] = amd_bytealign (w0[1], w0[2], offset); - w1[1] = amd_bytealign (w0[0], w0[1], offset); - w1[0] = amd_bytealign ( 0, w0[0], offset); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 5: - w3[2] = amd_bytealign (w2[0], 0, offset); - w3[1] = amd_bytealign (w1[3], w2[0], offset); - w3[0] = amd_bytealign (w1[2], w1[3], offset); - w2[3] = amd_bytealign (w1[1], w1[2], offset); - w2[2] = amd_bytealign (w1[0], w1[1], offset); - w2[1] = amd_bytealign (w0[3], w1[0], offset); - w2[0] = amd_bytealign (w0[2], w0[3], offset); - w1[3] = amd_bytealign (w0[1], w0[2], offset); - w1[2] = amd_bytealign (w0[0], w0[1], offset); - w1[1] = amd_bytealign ( 0, w0[0], offset); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 6: - w3[2] = amd_bytealign (w1[3], 0, offset); - w3[1] = amd_bytealign (w1[2], w1[3], offset); - w3[0] = amd_bytealign (w1[1], w1[2], offset); - w2[3] = amd_bytealign (w1[0], w1[1], offset); - w2[2] = amd_bytealign (w0[3], w1[0], offset); - w2[1] = amd_bytealign (w0[2], w0[3], offset); - w2[0] = amd_bytealign (w0[1], w0[2], offset); - w1[3] = amd_bytealign (w0[0], w0[1], offset); - w1[2] = amd_bytealign ( 0, w0[0], offset); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 7: - w3[2] = amd_bytealign (w1[2], 0, offset); - w3[1] = amd_bytealign (w1[1], w1[2], offset); - w3[0] = amd_bytealign (w1[0], w1[1], offset); - w2[3] = amd_bytealign (w0[3], w1[0], offset); - w2[2] = amd_bytealign (w0[2], w0[3], offset); - w2[1] = amd_bytealign (w0[1], w0[2], offset); - w2[0] = amd_bytealign (w0[0], w0[1], offset); - w1[3] = amd_bytealign ( 0, w0[0], offset); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 8: - w3[2] = amd_bytealign (w1[1], 0, offset); - w3[1] = amd_bytealign (w1[0], w1[1], offset); - w3[0] = amd_bytealign (w0[3], w1[0], offset); - w2[3] = amd_bytealign (w0[2], w0[3], offset); - w2[2] = amd_bytealign (w0[1], w0[2], offset); - w2[1] = amd_bytealign (w0[0], w0[1], offset); - w2[0] = amd_bytealign ( 0, w0[0], offset); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 9: - w3[2] = amd_bytealign (w1[0], 0, offset); - w3[1] = amd_bytealign (w0[3], w1[0], offset); - w3[0] = amd_bytealign (w0[2], w0[3], offset); - w2[3] = amd_bytealign (w0[1], w0[2], offset); - w2[2] = amd_bytealign (w0[0], w0[1], offset); - w2[1] = amd_bytealign ( 0, w0[0], offset); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 10: - w3[2] = amd_bytealign (w0[3], 0, offset); - w3[1] = amd_bytealign (w0[2], w0[3], offset); - w3[0] = amd_bytealign (w0[1], w0[2], offset); - w2[3] = amd_bytealign (w0[0], w0[1], offset); - w2[2] = amd_bytealign ( 0, w0[0], offset); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 11: - w3[2] = amd_bytealign (w0[2], 0, offset); - w3[1] = amd_bytealign (w0[1], w0[2], offset); - w3[0] = amd_bytealign (w0[0], w0[1], offset); - w2[3] = amd_bytealign ( 0, w0[0], offset); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 12: - w3[2] = amd_bytealign (w0[1], 0, offset); - w3[1] = amd_bytealign (w0[0], w0[1], offset); - w3[0] = amd_bytealign ( 0, w0[0], offset); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 13: - w3[2] = amd_bytealign (w0[0], 0, offset); - w3[1] = amd_bytealign ( 0, w0[0], offset); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - } -} -#endif - -static u32 check_vector_accessible (const u32 il_pos, const u32 bf_loops, const u32 bfs_cnt, const u32 element) -{ - #ifdef VECT_SIZE1 - - // nothing to do here - - #else - - if ((il_pos + 1) == bf_loops) - { - #ifdef VECT_SIZE2 - u32 bfs_over = bfs_cnt % 2; - - if (bfs_over == 0) bfs_over = 2; - #endif - - #ifdef VECT_SIZE4 - u32 bfs_over = bfs_cnt % 4; - - if (bfs_over == 0) bfs_over = 4; - #endif - - if (element >= bfs_over) return 0; - } - - #endif - - return 1; -} diff --git a/amd/markov_be_v2.cl b/amd/markov_be_v2.cl deleted file mode 100644 index 11b2db4..0000000 --- a/amd/markov_be_v2.cl +++ /dev/null @@ -1,178 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#include "include/kernel_vendor.h" - -#define CHARSIZ 256 - -#define VECT_SIZE2 - -#include "types_amd.c" - -static void generate_pw (u32 pw_buf[16], __global cs_t *root_css_buf, __global cs_t *markov_css_buf, const u32 pw_l_len, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, u64 val) -{ - pw_buf[ 0] = 0; - pw_buf[ 1] = 0; - pw_buf[ 2] = 0; - pw_buf[ 3] = 0; - pw_buf[ 4] = 0; - pw_buf[ 5] = 0; - pw_buf[ 6] = 0; - pw_buf[ 7] = 0; - pw_buf[ 8] = 0; - pw_buf[ 9] = 0; - pw_buf[10] = 0; - pw_buf[11] = 0; - pw_buf[12] = 0; - pw_buf[13] = 0; - pw_buf[14] = 0; - pw_buf[15] = 0; - - __global cs_t *cs = &root_css_buf[pw_r_len]; - - u32 i; - u32 j; - - for (i = 0, j = pw_r_len; i < pw_l_len; i++, j++) - { - const u32 len = cs->cs_len; - - const u64 next = val / len; - const u64 pos = val % len; - - val = next; - - const u32 key = cs->cs_buf[pos]; - - const u32 jd4 = j / 4; - const u32 jm4 = j % 4; - - pw_buf[jd4] |= key << ((3 - jm4) * 8); - - cs = &markov_css_buf[(j * CHARSIZ) + key]; - } - - const u32 jd4 = j / 4; - const u32 jm4 = j % 4; - - pw_buf[jd4] |= (0xff << ((3 - jm4) * 8)) & mask80; - - if (bits14) pw_buf[14] = (pw_l_len + pw_r_len) * 8; - if (bits15) pw_buf[15] = (pw_l_len + pw_r_len) * 8; -} - -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) l_markov (__global pw_t *pws_buf_l, __global cs_t *root_css_buf, __global cs_t *markov_css_buf, const u64 off, const u32 pw_l_len, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) -{ - const u32 gid = get_global_id (0); - - if (gid >= gid_max) return; - - const u32 gid2 = gid * 2; - - u32 pw_buf0[16]; - u32 pw_buf1[16]; - - generate_pw (pw_buf0, root_css_buf, markov_css_buf, pw_l_len, pw_r_len, mask80, bits14, bits15, off + gid2 + 0); - generate_pw (pw_buf1, root_css_buf, markov_css_buf, pw_l_len, pw_r_len, mask80, bits14, bits15, off + gid2 + 1); - - #pragma unroll 16 - for (int i = 0; i < 16; i++) - { - pws_buf_l[gid].i[i].s0 = pw_buf0[i]; - pws_buf_l[gid].i[i].s1 = pw_buf1[i]; - } - - pws_buf_l[gid].pw_len = pw_l_len + pw_r_len; -} - -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) r_markov (__global bf_t *pws_buf_r, __global cs_t *root_css_buf, __global cs_t *markov_css_buf, const u64 off, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) -{ - const u32 gid = get_global_id (0); - - if (gid >= gid_max) return; - - const u32 gid2 = gid * 2; - - u32 pw_buf[16]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid2 + 0); - - pws_buf_r[gid2 + 0].i = pw_buf[0]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid2 + 1); - - pws_buf_r[gid2 + 1].i = pw_buf[0]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid2 + 2); - - pws_buf_r[gid2 + 2].i = pw_buf[0]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid2 + 3); - - pws_buf_r[gid2 + 3].i = pw_buf[0]; -} - -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) C_markov (__global comb_t *pws_buf, __global cs_t *root_css_buf, __global cs_t *markov_css_buf, const u64 off, const u32 pw_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) -{ - const u32 gid = get_global_id (0); - - if (gid >= gid_max) return; - - const u32 gid2 = gid * 2; - - u32 pw_buf[16]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid2 + 0); - - pws_buf[gid2 + 0].i[ 0] = pw_buf[ 0]; - pws_buf[gid2 + 0].i[ 1] = pw_buf[ 1]; - pws_buf[gid2 + 0].i[ 2] = pw_buf[ 2]; - pws_buf[gid2 + 0].i[ 3] = pw_buf[ 3]; - pws_buf[gid2 + 0].i[ 4] = pw_buf[ 4]; - pws_buf[gid2 + 0].i[ 5] = pw_buf[ 5]; - pws_buf[gid2 + 0].i[ 6] = pw_buf[ 6]; - pws_buf[gid2 + 0].i[ 7] = pw_buf[ 7]; - - pws_buf[gid2 + 0].pw_len = pw_len; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid2 + 1); - - pws_buf[gid2 + 1].i[ 0] = pw_buf[ 0]; - pws_buf[gid2 + 1].i[ 1] = pw_buf[ 1]; - pws_buf[gid2 + 1].i[ 2] = pw_buf[ 2]; - pws_buf[gid2 + 1].i[ 3] = pw_buf[ 3]; - pws_buf[gid2 + 1].i[ 4] = pw_buf[ 4]; - pws_buf[gid2 + 1].i[ 5] = pw_buf[ 5]; - pws_buf[gid2 + 1].i[ 6] = pw_buf[ 6]; - pws_buf[gid2 + 1].i[ 7] = pw_buf[ 7]; - - pws_buf[gid2 + 1].pw_len = pw_len; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid2 + 2); - - pws_buf[gid2 + 2].i[ 0] = pw_buf[ 0]; - pws_buf[gid2 + 2].i[ 1] = pw_buf[ 1]; - pws_buf[gid2 + 2].i[ 2] = pw_buf[ 2]; - pws_buf[gid2 + 2].i[ 3] = pw_buf[ 3]; - pws_buf[gid2 + 2].i[ 4] = pw_buf[ 4]; - pws_buf[gid2 + 2].i[ 5] = pw_buf[ 5]; - pws_buf[gid2 + 2].i[ 6] = pw_buf[ 6]; - pws_buf[gid2 + 2].i[ 7] = pw_buf[ 7]; - - pws_buf[gid2 + 2].pw_len = pw_len; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid2 + 3); - - pws_buf[gid2 + 3].i[ 0] = pw_buf[ 0]; - pws_buf[gid2 + 3].i[ 1] = pw_buf[ 1]; - pws_buf[gid2 + 3].i[ 2] = pw_buf[ 2]; - pws_buf[gid2 + 3].i[ 3] = pw_buf[ 3]; - pws_buf[gid2 + 3].i[ 4] = pw_buf[ 4]; - pws_buf[gid2 + 3].i[ 5] = pw_buf[ 5]; - pws_buf[gid2 + 3].i[ 6] = pw_buf[ 6]; - pws_buf[gid2 + 3].i[ 7] = pw_buf[ 7]; - - pws_buf[gid2 + 3].pw_len = pw_len; -} diff --git a/amd/markov_be_v4.cl b/amd/markov_be_v4.cl deleted file mode 100644 index a5d4a6b..0000000 --- a/amd/markov_be_v4.cl +++ /dev/null @@ -1,184 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#include "include/kernel_vendor.h" - -#define CHARSIZ 256 - -#define VECT_SIZE4 - -#include "types_amd.c" - -static void generate_pw (u32 pw_buf[16], __global cs_t *root_css_buf, __global cs_t *markov_css_buf, const u32 pw_l_len, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, u64 val) -{ - pw_buf[ 0] = 0; - pw_buf[ 1] = 0; - pw_buf[ 2] = 0; - pw_buf[ 3] = 0; - pw_buf[ 4] = 0; - pw_buf[ 5] = 0; - pw_buf[ 6] = 0; - pw_buf[ 7] = 0; - pw_buf[ 8] = 0; - pw_buf[ 9] = 0; - pw_buf[10] = 0; - pw_buf[11] = 0; - pw_buf[12] = 0; - pw_buf[13] = 0; - pw_buf[14] = 0; - pw_buf[15] = 0; - - __global cs_t *cs = &root_css_buf[pw_r_len]; - - u32 i; - u32 j; - - for (i = 0, j = pw_r_len; i < pw_l_len; i++, j++) - { - const u32 len = cs->cs_len; - - const u64 next = val / len; - const u64 pos = val % len; - - val = next; - - const u32 key = cs->cs_buf[pos]; - - const u32 jd4 = j / 4; - const u32 jm4 = j % 4; - - pw_buf[jd4] |= key << ((3 - jm4) * 8); - - cs = &markov_css_buf[(j * CHARSIZ) + key]; - } - - const u32 jd4 = j / 4; - const u32 jm4 = j % 4; - - pw_buf[jd4] |= (0xff << ((3 - jm4) * 8)) & mask80; - - if (bits14) pw_buf[14] = (pw_l_len + pw_r_len) * 8; - if (bits15) pw_buf[15] = (pw_l_len + pw_r_len) * 8; -} - -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) l_markov (__global pw_t *pws_buf_l, __global cs_t *root_css_buf, __global cs_t *markov_css_buf, const u64 off, const u32 pw_l_len, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) -{ - const u32 gid = get_global_id (0); - - if (gid >= gid_max) return; - - const u32 gid4 = gid * 4; - - u32 pw_buf0[16]; - u32 pw_buf1[16]; - u32 pw_buf2[16]; - u32 pw_buf3[16]; - - generate_pw (pw_buf0, root_css_buf, markov_css_buf, pw_l_len, pw_r_len, mask80, bits14, bits15, off + gid4 + 0); - generate_pw (pw_buf1, root_css_buf, markov_css_buf, pw_l_len, pw_r_len, mask80, bits14, bits15, off + gid4 + 1); - generate_pw (pw_buf2, root_css_buf, markov_css_buf, pw_l_len, pw_r_len, mask80, bits14, bits15, off + gid4 + 2); - generate_pw (pw_buf3, root_css_buf, markov_css_buf, pw_l_len, pw_r_len, mask80, bits14, bits15, off + gid4 + 3); - - #pragma unroll 16 - for (int i = 0; i < 16; i++) - { - pws_buf_l[gid].i[i].s0 = pw_buf0[i]; - pws_buf_l[gid].i[i].s1 = pw_buf1[i]; - pws_buf_l[gid].i[i].s2 = pw_buf2[i]; - pws_buf_l[gid].i[i].s3 = pw_buf3[i]; - } - - pws_buf_l[gid].pw_len = pw_l_len + pw_r_len; -} - -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) r_markov (__global bf_t *pws_buf_r, __global cs_t *root_css_buf, __global cs_t *markov_css_buf, const u64 off, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) -{ - const u32 gid = get_global_id (0); - - if (gid >= gid_max) return; - - const u32 gid4 = gid * 4; - - u32 pw_buf[16]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid4 + 0); - - pws_buf_r[gid4 + 0].i = pw_buf[0]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid4 + 1); - - pws_buf_r[gid4 + 1].i = pw_buf[0]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid4 + 2); - - pws_buf_r[gid4 + 2].i = pw_buf[0]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid4 + 3); - - pws_buf_r[gid4 + 3].i = pw_buf[0]; -} - -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) C_markov (__global comb_t *pws_buf, __global cs_t *root_css_buf, __global cs_t *markov_css_buf, const u64 off, const u32 pw_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) -{ - const u32 gid = get_global_id (0); - - if (gid >= gid_max) return; - - const u32 gid4 = gid * 4; - - u32 pw_buf[16]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid4 + 0); - - pws_buf[gid4 + 0].i[ 0] = pw_buf[ 0]; - pws_buf[gid4 + 0].i[ 1] = pw_buf[ 1]; - pws_buf[gid4 + 0].i[ 2] = pw_buf[ 2]; - pws_buf[gid4 + 0].i[ 3] = pw_buf[ 3]; - pws_buf[gid4 + 0].i[ 4] = pw_buf[ 4]; - pws_buf[gid4 + 0].i[ 5] = pw_buf[ 5]; - pws_buf[gid4 + 0].i[ 6] = pw_buf[ 6]; - pws_buf[gid4 + 0].i[ 7] = pw_buf[ 7]; - - pws_buf[gid4 + 0].pw_len = pw_len; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid4 + 1); - - pws_buf[gid4 + 1].i[ 0] = pw_buf[ 0]; - pws_buf[gid4 + 1].i[ 1] = pw_buf[ 1]; - pws_buf[gid4 + 1].i[ 2] = pw_buf[ 2]; - pws_buf[gid4 + 1].i[ 3] = pw_buf[ 3]; - pws_buf[gid4 + 1].i[ 4] = pw_buf[ 4]; - pws_buf[gid4 + 1].i[ 5] = pw_buf[ 5]; - pws_buf[gid4 + 1].i[ 6] = pw_buf[ 6]; - pws_buf[gid4 + 1].i[ 7] = pw_buf[ 7]; - - pws_buf[gid4 + 1].pw_len = pw_len; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid4 + 2); - - pws_buf[gid4 + 2].i[ 0] = pw_buf[ 0]; - pws_buf[gid4 + 2].i[ 1] = pw_buf[ 1]; - pws_buf[gid4 + 2].i[ 2] = pw_buf[ 2]; - pws_buf[gid4 + 2].i[ 3] = pw_buf[ 3]; - pws_buf[gid4 + 2].i[ 4] = pw_buf[ 4]; - pws_buf[gid4 + 2].i[ 5] = pw_buf[ 5]; - pws_buf[gid4 + 2].i[ 6] = pw_buf[ 6]; - pws_buf[gid4 + 2].i[ 7] = pw_buf[ 7]; - - pws_buf[gid4 + 2].pw_len = pw_len; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid4 + 3); - - pws_buf[gid4 + 3].i[ 0] = pw_buf[ 0]; - pws_buf[gid4 + 3].i[ 1] = pw_buf[ 1]; - pws_buf[gid4 + 3].i[ 2] = pw_buf[ 2]; - pws_buf[gid4 + 3].i[ 3] = pw_buf[ 3]; - pws_buf[gid4 + 3].i[ 4] = pw_buf[ 4]; - pws_buf[gid4 + 3].i[ 5] = pw_buf[ 5]; - pws_buf[gid4 + 3].i[ 6] = pw_buf[ 6]; - pws_buf[gid4 + 3].i[ 7] = pw_buf[ 7]; - - pws_buf[gid4 + 3].pw_len = pw_len; -} diff --git a/amd/markov_le_v2.cl b/amd/markov_le_v2.cl deleted file mode 100644 index b341877..0000000 --- a/amd/markov_le_v2.cl +++ /dev/null @@ -1,144 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#include "include/kernel_vendor.h" - -#define CHARSIZ 256 - -#define VECT_SIZE2 - -#include "types_amd.c" - -static void generate_pw (u32 pw_buf[16], __global cs_t *root_css_buf, __global cs_t *markov_css_buf, const u32 pw_l_len, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, u64 val) -{ - pw_buf[ 0] = 0; - pw_buf[ 1] = 0; - pw_buf[ 2] = 0; - pw_buf[ 3] = 0; - pw_buf[ 4] = 0; - pw_buf[ 5] = 0; - pw_buf[ 6] = 0; - pw_buf[ 7] = 0; - pw_buf[ 8] = 0; - pw_buf[ 9] = 0; - pw_buf[10] = 0; - pw_buf[11] = 0; - pw_buf[12] = 0; - pw_buf[13] = 0; - pw_buf[14] = 0; - pw_buf[15] = 0; - - __global cs_t *cs = &root_css_buf[pw_r_len]; - - u32 i; - u32 j; - - for (i = 0, j = pw_r_len; i < pw_l_len; i++, j++) - { - const u32 len = cs->cs_len; - - const u64 next = val / len; - const u64 pos = val % len; - - val = next; - - const u32 key = cs->cs_buf[pos]; - - const u32 jd4 = j / 4; - const u32 jm4 = j % 4; - - pw_buf[jd4] |= key << (jm4 * 8); - - cs = &markov_css_buf[(j * CHARSIZ) + key]; - } - - const u32 jd4 = j / 4; - const u32 jm4 = j % 4; - - pw_buf[jd4] |= (0xff << (jm4 * 8)) & mask80; - - if (bits14) pw_buf[14] = (pw_l_len + pw_r_len) * 8; - if (bits15) pw_buf[15] = (pw_l_len + pw_r_len) * 8; -} - -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) l_markov (__global pw_t *pws_buf_l, __global cs_t *root_css_buf, __global cs_t *markov_css_buf, const u64 off, const u32 pw_l_len, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) -{ - const u32 gid = get_global_id (0); - - if (gid >= gid_max) return; - - const u32 gid2 = gid * 2; - - u32 pw_buf0[16]; - u32 pw_buf1[16]; - - generate_pw (pw_buf0, root_css_buf, markov_css_buf, pw_l_len, pw_r_len, mask80, bits14, bits15, off + gid2 + 0); - generate_pw (pw_buf1, root_css_buf, markov_css_buf, pw_l_len, pw_r_len, mask80, bits14, bits15, off + gid2 + 1); - - #pragma unroll 16 - for (int i = 0; i < 16; i++) - { - pws_buf_l[gid].i[i].s0 = pw_buf0[i]; - pws_buf_l[gid].i[i].s1 = pw_buf1[i]; - } - - pws_buf_l[gid].pw_len = pw_l_len + pw_r_len; -} - -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) r_markov (__global bf_t *pws_buf_r, __global cs_t *root_css_buf, __global cs_t *markov_css_buf, const u64 off, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) -{ - const u32 gid = get_global_id (0); - - if (gid >= gid_max) return; - - const u32 gid2 = gid * 2; - - u32 pw_buf[16]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid2 + 0); - - pws_buf_r[gid2 + 0].i = pw_buf[0]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid2 + 1); - - pws_buf_r[gid2 + 1].i = pw_buf[0]; -} - -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) C_markov (__global comb_t *pws_buf, __global cs_t *root_css_buf, __global cs_t *markov_css_buf, const u64 off, const u32 pw_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) -{ - const u32 gid = get_global_id (0); - - if (gid >= gid_max) return; - - const u32 gid2 = gid * 2; - - u32 pw_buf[16]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid2 + 0); - - pws_buf[gid2 + 0].i[ 0] = pw_buf[ 0]; - pws_buf[gid2 + 0].i[ 1] = pw_buf[ 1]; - pws_buf[gid2 + 0].i[ 2] = pw_buf[ 2]; - pws_buf[gid2 + 0].i[ 3] = pw_buf[ 3]; - pws_buf[gid2 + 0].i[ 4] = pw_buf[ 4]; - pws_buf[gid2 + 0].i[ 5] = pw_buf[ 5]; - pws_buf[gid2 + 0].i[ 6] = pw_buf[ 6]; - pws_buf[gid2 + 0].i[ 7] = pw_buf[ 7]; - - pws_buf[gid2 + 0].pw_len = pw_len; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid2 + 1); - - pws_buf[gid2 + 1].i[ 0] = pw_buf[ 0]; - pws_buf[gid2 + 1].i[ 1] = pw_buf[ 1]; - pws_buf[gid2 + 1].i[ 2] = pw_buf[ 2]; - pws_buf[gid2 + 1].i[ 3] = pw_buf[ 3]; - pws_buf[gid2 + 1].i[ 4] = pw_buf[ 4]; - pws_buf[gid2 + 1].i[ 5] = pw_buf[ 5]; - pws_buf[gid2 + 1].i[ 6] = pw_buf[ 6]; - pws_buf[gid2 + 1].i[ 7] = pw_buf[ 7]; - - pws_buf[gid2 + 1].pw_len = pw_len; -} diff --git a/amd/markov_le_v4.cl b/amd/markov_le_v4.cl deleted file mode 100644 index e7b9b06..0000000 --- a/amd/markov_le_v4.cl +++ /dev/null @@ -1,184 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#include "include/kernel_vendor.h" - -#define CHARSIZ 256 - -#define VECT_SIZE4 - -#include "types_amd.c" - -static void generate_pw (u32 pw_buf[16], __global cs_t *root_css_buf, __global cs_t *markov_css_buf, const u32 pw_l_len, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, u64 val) -{ - pw_buf[ 0] = 0; - pw_buf[ 1] = 0; - pw_buf[ 2] = 0; - pw_buf[ 3] = 0; - pw_buf[ 4] = 0; - pw_buf[ 5] = 0; - pw_buf[ 6] = 0; - pw_buf[ 7] = 0; - pw_buf[ 8] = 0; - pw_buf[ 9] = 0; - pw_buf[10] = 0; - pw_buf[11] = 0; - pw_buf[12] = 0; - pw_buf[13] = 0; - pw_buf[14] = 0; - pw_buf[15] = 0; - - __global cs_t *cs = &root_css_buf[pw_r_len]; - - u32 i; - u32 j; - - for (i = 0, j = pw_r_len; i < pw_l_len; i++, j++) - { - const u32 len = cs->cs_len; - - const u64 next = val / len; - const u64 pos = val % len; - - val = next; - - const u32 key = cs->cs_buf[pos]; - - const u32 jd4 = j / 4; - const u32 jm4 = j % 4; - - pw_buf[jd4] |= key << (jm4 * 8); - - cs = &markov_css_buf[(j * CHARSIZ) + key]; - } - - const u32 jd4 = j / 4; - const u32 jm4 = j % 4; - - pw_buf[jd4] |= (0xff << (jm4 * 8)) & mask80; - - if (bits14) pw_buf[14] = (pw_l_len + pw_r_len) * 8; - if (bits15) pw_buf[15] = (pw_l_len + pw_r_len) * 8; -} - -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) l_markov (__global pw_t *pws_buf_l, __global cs_t *root_css_buf, __global cs_t *markov_css_buf, const u64 off, const u32 pw_l_len, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) -{ - const u32 gid = get_global_id (0); - - if (gid >= gid_max) return; - - const u32 gid4 = gid * 4; - - u32 pw_buf0[16]; - u32 pw_buf1[16]; - u32 pw_buf2[16]; - u32 pw_buf3[16]; - - generate_pw (pw_buf0, root_css_buf, markov_css_buf, pw_l_len, pw_r_len, mask80, bits14, bits15, off + gid4 + 0); - generate_pw (pw_buf1, root_css_buf, markov_css_buf, pw_l_len, pw_r_len, mask80, bits14, bits15, off + gid4 + 1); - generate_pw (pw_buf2, root_css_buf, markov_css_buf, pw_l_len, pw_r_len, mask80, bits14, bits15, off + gid4 + 2); - generate_pw (pw_buf3, root_css_buf, markov_css_buf, pw_l_len, pw_r_len, mask80, bits14, bits15, off + gid4 + 3); - - #pragma unroll 16 - for (int i = 0; i < 16; i++) - { - pws_buf_l[gid].i[i].s0 = pw_buf0[i]; - pws_buf_l[gid].i[i].s1 = pw_buf1[i]; - pws_buf_l[gid].i[i].s2 = pw_buf2[i]; - pws_buf_l[gid].i[i].s3 = pw_buf3[i]; - } - - pws_buf_l[gid].pw_len = pw_l_len + pw_r_len; -} - -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) r_markov (__global bf_t *pws_buf_r, __global cs_t *root_css_buf, __global cs_t *markov_css_buf, const u64 off, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) -{ - const u32 gid = get_global_id (0); - - if (gid >= gid_max) return; - - const u32 gid4 = gid * 4; - - u32 pw_buf[16]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid4 + 0); - - pws_buf_r[gid4 + 0].i = pw_buf[0]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid4 + 1); - - pws_buf_r[gid4 + 1].i = pw_buf[0]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid4 + 2); - - pws_buf_r[gid4 + 2].i = pw_buf[0]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid4 + 3); - - pws_buf_r[gid4 + 3].i = pw_buf[0]; -} - -__kernel void __attribute__((reqd_work_group_size (64, 1, 1))) C_markov (__global comb_t *pws_buf, __global cs_t *root_css_buf, __global cs_t *markov_css_buf, const u64 off, const u32 pw_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) -{ - const u32 gid = get_global_id (0); - - if (gid >= gid_max) return; - - const u32 gid4 = gid * 4; - - u32 pw_buf[16]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid4 + 0); - - pws_buf[gid4 + 0].i[ 0] = pw_buf[ 0]; - pws_buf[gid4 + 0].i[ 1] = pw_buf[ 1]; - pws_buf[gid4 + 0].i[ 2] = pw_buf[ 2]; - pws_buf[gid4 + 0].i[ 3] = pw_buf[ 3]; - pws_buf[gid4 + 0].i[ 4] = pw_buf[ 4]; - pws_buf[gid4 + 0].i[ 5] = pw_buf[ 5]; - pws_buf[gid4 + 0].i[ 6] = pw_buf[ 6]; - pws_buf[gid4 + 0].i[ 7] = pw_buf[ 7]; - - pws_buf[gid4 + 0].pw_len = pw_len; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid4 + 1); - - pws_buf[gid4 + 1].i[ 0] = pw_buf[ 0]; - pws_buf[gid4 + 1].i[ 1] = pw_buf[ 1]; - pws_buf[gid4 + 1].i[ 2] = pw_buf[ 2]; - pws_buf[gid4 + 1].i[ 3] = pw_buf[ 3]; - pws_buf[gid4 + 1].i[ 4] = pw_buf[ 4]; - pws_buf[gid4 + 1].i[ 5] = pw_buf[ 5]; - pws_buf[gid4 + 1].i[ 6] = pw_buf[ 6]; - pws_buf[gid4 + 1].i[ 7] = pw_buf[ 7]; - - pws_buf[gid4 + 1].pw_len = pw_len; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid4 + 2); - - pws_buf[gid4 + 2].i[ 0] = pw_buf[ 0]; - pws_buf[gid4 + 2].i[ 1] = pw_buf[ 1]; - pws_buf[gid4 + 2].i[ 2] = pw_buf[ 2]; - pws_buf[gid4 + 2].i[ 3] = pw_buf[ 3]; - pws_buf[gid4 + 2].i[ 4] = pw_buf[ 4]; - pws_buf[gid4 + 2].i[ 5] = pw_buf[ 5]; - pws_buf[gid4 + 2].i[ 6] = pw_buf[ 6]; - pws_buf[gid4 + 2].i[ 7] = pw_buf[ 7]; - - pws_buf[gid4 + 2].pw_len = pw_len; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid4 + 3); - - pws_buf[gid4 + 3].i[ 0] = pw_buf[ 0]; - pws_buf[gid4 + 3].i[ 1] = pw_buf[ 1]; - pws_buf[gid4 + 3].i[ 2] = pw_buf[ 2]; - pws_buf[gid4 + 3].i[ 3] = pw_buf[ 3]; - pws_buf[gid4 + 3].i[ 4] = pw_buf[ 4]; - pws_buf[gid4 + 3].i[ 5] = pw_buf[ 5]; - pws_buf[gid4 + 3].i[ 6] = pw_buf[ 6]; - pws_buf[gid4 + 3].i[ 7] = pw_buf[ 7]; - - pws_buf[gid4 + 3].pw_len = pw_len; -} diff --git a/amd/rp_amd.c b/amd/rp_amd.c deleted file mode 100644 index 5b41e87..0000000 --- a/amd/rp_amd.c +++ /dev/null @@ -1,2838 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -static u32x generate_cmask (u32x buf) -{ - const u32x rmask = ((buf & 0x40404040) >> 1) - & ~((buf & 0x80808080) >> 2); - - const u32x hmask = (buf & 0x1f1f1f1f) + 0x05050505; - const u32x lmask = (buf & 0x1f1f1f1f) + 0x1f1f1f1f; - - return rmask & ~hmask & lmask; -} - -static void truncate_right (u32x w0[4], u32x w1[4], const u32 len) -{ - const u32 tmp = (1 << ((len % 4) * 8)) - 1; - - switch (len / 4) - { - case 0: w0[0] &= tmp; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - break; - case 1: w0[1] &= tmp; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - break; - case 2: w0[2] &= tmp; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - break; - case 3: w0[3] &= tmp; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - break; - case 4: w1[0] &= tmp; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - break; - case 5: w1[1] &= tmp; - w1[2] = 0; - w1[3] = 0; - break; - case 6: w1[2] &= tmp; - w1[3] = 0; - break; - case 7: w1[3] &= tmp; - break; - } -} - -static void truncate_left (u32x w0[4], u32x w1[4], const u32 len) -{ - const u32 tmp = ~((1 << ((len % 4) * 8)) - 1); - - switch (len / 4) - { - case 0: w0[0] &= tmp; - break; - case 1: w0[0] = 0; - w0[1] &= tmp; - break; - case 2: w0[0] = 0; - w0[1] = 0; - w0[2] &= tmp; - break; - case 3: w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] &= tmp; - break; - case 4: w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] &= tmp; - break; - case 5: w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] &= tmp; - break; - case 6: w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] &= tmp; - break; - case 7: w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] &= tmp; - break; - } -} - -static void lshift_block (const u32x in0[4], const u32x in1[4], u32x out0[4], u32x out1[4]) -{ - /* cuda - out0[0] = in0[0] >> 8 | in0[1] << 24; - out0[1] = in0[1] >> 8 | in0[2] << 24; - out0[2] = in0[2] >> 8 | in0[3] << 24; - out0[3] = in0[3] >> 8 | in1[0] << 24; - out1[0] = in1[0] >> 8 | in1[1] << 24; - out1[1] = in1[1] >> 8 | in1[2] << 24; - out1[2] = in1[2] >> 8 | in1[3] << 24; - out1[3] = in1[3] >> 8; - */ - - out0[0] = amd_bytealign (in0[1], in0[0], 1); - out0[1] = amd_bytealign (in0[2], in0[1], 1); - out0[2] = amd_bytealign (in0[3], in0[2], 1); - out0[3] = amd_bytealign (in1[0], in0[3], 1); - out1[0] = amd_bytealign (in1[1], in1[0], 1); - out1[1] = amd_bytealign (in1[2], in1[1], 1); - out1[2] = amd_bytealign (in1[3], in1[2], 1); - out1[3] = amd_bytealign ( 0, in1[3], 1); -} - -static void rshift_block (const u32x in0[4], const u32x in1[4], u32x out0[4], u32x out1[4]) -{ - /* cuda - out1[3] = in1[3] << 8 | in1[2] >> 24; - out1[2] = in1[2] << 8 | in1[1] >> 24; - out1[1] = in1[1] << 8 | in1[0] >> 24; - out1[0] = in1[0] << 8 | in0[3] >> 24; - out0[3] = in0[3] << 8 | in0[2] >> 24; - out0[2] = in0[2] << 8 | in0[1] >> 24; - out0[1] = in0[1] << 8 | in0[0] >> 24; - out0[0] = in0[0] << 8; - */ - - out1[3] = amd_bytealign (in1[3], in1[2], 3); - out1[2] = amd_bytealign (in1[2], in1[1], 3); - out1[1] = amd_bytealign (in1[1], in1[0], 3); - out1[0] = amd_bytealign (in1[0], in0[3], 3); - out0[3] = amd_bytealign (in0[3], in0[2], 3); - out0[2] = amd_bytealign (in0[2], in0[1], 3); - out0[1] = amd_bytealign (in0[1], in0[0], 3); - out0[0] = amd_bytealign (in0[0], 0, 3); -} - -static void lshift_block_N (const u32x in0[4], const u32x in1[4], u32x out0[4], u32x out1[4], const u32 num) -{ - switch (num) - { - case 0: out0[0] = in0[0]; - out0[1] = in0[1]; - out0[2] = in0[2]; - out0[3] = in0[3]; - out1[0] = in1[0]; - out1[1] = in1[1]; - out1[2] = in1[2]; - out1[3] = in1[3]; - break; - case 1: out0[0] = amd_bytealign (in0[1], in0[0], 1); - out0[1] = amd_bytealign (in0[2], in0[1], 1); - out0[2] = amd_bytealign (in0[3], in0[2], 1); - out0[3] = amd_bytealign (in1[0], in0[3], 1); - out1[0] = amd_bytealign (in1[1], in1[0], 1); - out1[1] = amd_bytealign (in1[2], in1[1], 1); - out1[2] = amd_bytealign (in1[3], in1[2], 1); - out1[3] = amd_bytealign ( 0, in1[3], 1); - break; - case 2: out0[0] = amd_bytealign (in0[1], in0[0], 2); - out0[1] = amd_bytealign (in0[2], in0[1], 2); - out0[2] = amd_bytealign (in0[3], in0[2], 2); - out0[3] = amd_bytealign (in1[0], in0[3], 2); - out1[0] = amd_bytealign (in1[1], in1[0], 2); - out1[1] = amd_bytealign (in1[2], in1[1], 2); - out1[2] = amd_bytealign (in1[3], in1[2], 2); - out1[3] = amd_bytealign ( 0, in1[3], 2); - break; - case 3: out0[0] = amd_bytealign (in0[1], in0[0], 3); - out0[1] = amd_bytealign (in0[2], in0[1], 3); - out0[2] = amd_bytealign (in0[3], in0[2], 3); - out0[3] = amd_bytealign (in1[0], in0[3], 3); - out1[0] = amd_bytealign (in1[1], in1[0], 3); - out1[1] = amd_bytealign (in1[2], in1[1], 3); - out1[2] = amd_bytealign (in1[3], in1[2], 3); - out1[3] = amd_bytealign ( 0, in1[3], 3); - break; - case 4: out0[0] = in0[1]; - out0[1] = in0[2]; - out0[2] = in0[3]; - out0[3] = in1[0]; - out1[0] = in1[1]; - out1[1] = in1[2]; - out1[2] = in1[3]; - out1[3] = 0; - break; - case 5: out0[0] = amd_bytealign (in0[2], in0[1], 1); - out0[1] = amd_bytealign (in0[3], in0[2], 1); - out0[2] = amd_bytealign (in1[0], in0[3], 1); - out0[3] = amd_bytealign (in1[1], in1[0], 1); - out1[0] = amd_bytealign (in1[2], in1[1], 1); - out1[1] = amd_bytealign (in1[3], in1[2], 1); - out1[2] = amd_bytealign ( 0, in1[3], 1); - out1[3] = 0; - break; - case 6: out0[0] = amd_bytealign (in0[2], in0[1], 2); - out0[1] = amd_bytealign (in0[3], in0[2], 2); - out0[2] = amd_bytealign (in1[0], in0[3], 2); - out0[3] = amd_bytealign (in1[1], in1[0], 2); - out1[0] = amd_bytealign (in1[2], in1[1], 2); - out1[1] = amd_bytealign (in1[3], in1[2], 2); - out1[2] = amd_bytealign ( 0, in1[3], 2); - out1[3] = 0; - break; - case 7: out0[0] = amd_bytealign (in0[2], in0[1], 3); - out0[1] = amd_bytealign (in0[3], in0[2], 3); - out0[2] = amd_bytealign (in1[0], in0[3], 3); - out0[3] = amd_bytealign (in1[1], in1[0], 3); - out1[0] = amd_bytealign (in1[2], in1[1], 3); - out1[1] = amd_bytealign (in1[3], in1[2], 3); - out1[2] = amd_bytealign ( 0, in1[3], 3); - out1[3] = 0; - break; - case 8: out0[0] = in0[2]; - out0[1] = in0[3]; - out0[2] = in1[0]; - out0[3] = in1[1]; - out1[0] = in1[2]; - out1[1] = in1[3]; - out1[2] = 0; - out1[3] = 0; - break; - case 9: out0[0] = amd_bytealign (in0[3], in0[2], 1); - out0[1] = amd_bytealign (in1[0], in0[3], 1); - out0[2] = amd_bytealign (in1[1], in1[0], 1); - out0[3] = amd_bytealign (in1[2], in1[1], 1); - out1[0] = amd_bytealign (in1[3], in1[2], 1); - out1[1] = amd_bytealign ( 0, in1[3], 1); - out1[2] = 0; - out1[3] = 0; - break; - case 10: out0[0] = amd_bytealign (in0[3], in0[2], 2); - out0[1] = amd_bytealign (in1[0], in0[3], 2); - out0[2] = amd_bytealign (in1[1], in1[0], 2); - out0[3] = amd_bytealign (in1[2], in1[1], 2); - out1[0] = amd_bytealign (in1[3], in1[2], 2); - out1[1] = amd_bytealign ( 0, in1[3], 2); - out1[2] = 0; - out1[3] = 0; - break; - case 11: out0[0] = amd_bytealign (in0[3], in0[2], 3); - out0[1] = amd_bytealign (in1[0], in0[3], 3); - out0[2] = amd_bytealign (in1[1], in1[0], 3); - out0[3] = amd_bytealign (in1[2], in1[1], 3); - out1[0] = amd_bytealign (in1[3], in1[2], 3); - out1[1] = amd_bytealign ( 0, in1[3], 3); - out1[2] = 0; - out1[3] = 0; - break; - case 12: out0[0] = in0[3]; - out0[1] = in1[0]; - out0[2] = in1[1]; - out0[3] = in1[2]; - out1[0] = in1[3]; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 13: out0[0] = amd_bytealign (in1[0], in0[3], 1); - out0[1] = amd_bytealign (in1[1], in1[0], 1); - out0[2] = amd_bytealign (in1[2], in1[1], 1); - out0[3] = amd_bytealign (in1[3], in1[2], 1); - out1[0] = amd_bytealign ( 0, in1[3], 1); - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 14: out0[0] = amd_bytealign (in1[0], in0[3], 2); - out0[1] = amd_bytealign (in1[1], in1[0], 2); - out0[2] = amd_bytealign (in1[2], in1[1], 2); - out0[3] = amd_bytealign (in1[3], in1[2], 2); - out1[0] = amd_bytealign ( 0, in1[3], 2); - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 15: out0[0] = amd_bytealign (in1[0], in0[3], 3); - out0[1] = amd_bytealign (in1[1], in1[0], 3); - out0[2] = amd_bytealign (in1[2], in1[1], 3); - out0[3] = amd_bytealign (in1[3], in1[2], 3); - out1[0] = amd_bytealign ( 0, in1[3], 3); - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 16: out0[0] = in1[0]; - out0[1] = in1[1]; - out0[2] = in1[2]; - out0[3] = in1[3]; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 17: out0[0] = amd_bytealign (in1[1], in1[0], 1); - out0[1] = amd_bytealign (in1[2], in1[1], 1); - out0[2] = amd_bytealign (in1[3], in1[2], 1); - out0[3] = amd_bytealign ( 0, in1[3], 1); - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 18: out0[0] = amd_bytealign (in1[1], in1[0], 2); - out0[1] = amd_bytealign (in1[2], in1[1], 2); - out0[2] = amd_bytealign (in1[3], in1[2], 2); - out0[3] = amd_bytealign ( 0, in1[3], 2); - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 19: out0[0] = amd_bytealign (in1[1], in1[0], 3); - out0[1] = amd_bytealign (in1[2], in1[1], 3); - out0[2] = amd_bytealign (in1[3], in1[2], 3); - out0[3] = amd_bytealign ( 0, in1[3], 3); - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 20: out0[0] = in1[1]; - out0[1] = in1[2]; - out0[2] = in1[3]; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 21: out0[0] = amd_bytealign (in1[2], in1[1], 1); - out0[1] = amd_bytealign (in1[3], in1[2], 1); - out0[2] = amd_bytealign ( 0, in1[3], 1); - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 22: out0[0] = amd_bytealign (in1[2], in1[1], 2); - out0[1] = amd_bytealign (in1[3], in1[2], 2); - out0[2] = amd_bytealign ( 0, in1[3], 2); - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 23: out0[0] = amd_bytealign (in1[2], in1[1], 3); - out0[1] = amd_bytealign (in1[3], in1[2], 3); - out0[2] = amd_bytealign ( 0, in1[3], 3); - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 24: out0[0] = in1[2]; - out0[1] = in1[3]; - out0[2] = 0; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 25: out0[0] = amd_bytealign (in1[3], in1[2], 1); - out0[1] = amd_bytealign ( 0, in1[3], 1); - out0[2] = 0; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 26: out0[0] = amd_bytealign (in1[3], in1[2], 2); - out0[1] = amd_bytealign ( 0, in1[3], 2); - out0[2] = 0; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 27: out0[0] = amd_bytealign (in1[3], in1[2], 3); - out0[1] = amd_bytealign ( 0, in1[3], 3); - out0[2] = 0; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 28: out0[0] = in1[3]; - out0[1] = 0; - out0[2] = 0; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 29: out0[0] = amd_bytealign ( 0, in1[3], 1); - out0[1] = 0; - out0[2] = 0; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 30: out0[0] = amd_bytealign ( 0, in1[3], 2); - out0[1] = 0; - out0[2] = 0; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 31: out0[0] = amd_bytealign ( 0, in1[3], 3); - out0[1] = 0; - out0[2] = 0; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - } -} - -static void rshift_block_N (const u32x in0[4], const u32x in1[4], u32x out0[4], u32x out1[4], const u32 num) -{ - switch (num) - { - case 0: out1[3] = in1[3]; - out1[2] = in1[2]; - out1[1] = in1[1]; - out1[0] = in1[0]; - out0[3] = in0[3]; - out0[2] = in0[2]; - out0[1] = in0[1]; - out0[0] = in0[0]; - break; - case 1: out1[3] = amd_bytealign (in1[3], in1[2], 3); - out1[2] = amd_bytealign (in1[2], in1[1], 3); - out1[1] = amd_bytealign (in1[1], in1[0], 3); - out1[0] = amd_bytealign (in1[0], in0[3], 3); - out0[3] = amd_bytealign (in0[3], in0[2], 3); - out0[2] = amd_bytealign (in0[2], in0[1], 3); - out0[1] = amd_bytealign (in0[1], in0[0], 3); - out0[0] = amd_bytealign (in0[0], 0, 3); - break; - case 2: out1[3] = amd_bytealign (in1[3], in1[2], 2); - out1[2] = amd_bytealign (in1[2], in1[1], 2); - out1[1] = amd_bytealign (in1[1], in1[0], 2); - out1[0] = amd_bytealign (in1[0], in0[3], 2); - out0[3] = amd_bytealign (in0[3], in0[2], 2); - out0[2] = amd_bytealign (in0[2], in0[1], 2); - out0[1] = amd_bytealign (in0[1], in0[0], 2); - out0[0] = amd_bytealign (in0[0], 0, 2); - break; - case 3: out1[3] = amd_bytealign (in1[3], in1[2], 1); - out1[2] = amd_bytealign (in1[2], in1[1], 1); - out1[1] = amd_bytealign (in1[1], in1[0], 1); - out1[0] = amd_bytealign (in1[0], in0[3], 1); - out0[3] = amd_bytealign (in0[3], in0[2], 1); - out0[2] = amd_bytealign (in0[2], in0[1], 1); - out0[1] = amd_bytealign (in0[1], in0[0], 1); - out0[0] = amd_bytealign (in0[0], 0, 1); - break; - case 4: out1[3] = in1[2]; - out1[2] = in1[1]; - out1[1] = in1[0]; - out1[0] = in0[3]; - out0[3] = in0[2]; - out0[2] = in0[1]; - out0[1] = in0[0]; - out0[0] = 0; - break; - case 5: out1[3] = amd_bytealign (in1[2], in1[1], 3); - out1[2] = amd_bytealign (in1[1], in1[0], 3); - out1[1] = amd_bytealign (in1[0], in0[3], 3); - out1[0] = amd_bytealign (in0[3], in0[2], 3); - out0[3] = amd_bytealign (in0[2], in0[1], 3); - out0[2] = amd_bytealign (in0[1], in0[0], 3); - out0[1] = amd_bytealign (in0[0], 0, 3); - out0[0] = 0; - break; - case 6: out1[3] = amd_bytealign (in1[2], in1[1], 2); - out1[2] = amd_bytealign (in1[1], in1[0], 2); - out1[1] = amd_bytealign (in1[0], in0[3], 2); - out1[0] = amd_bytealign (in0[3], in0[2], 2); - out0[3] = amd_bytealign (in0[2], in0[1], 2); - out0[2] = amd_bytealign (in0[1], in0[0], 2); - out0[1] = amd_bytealign (in0[0], 0, 2); - out0[0] = 0; - break; - case 7: out1[3] = amd_bytealign (in1[2], in1[1], 1); - out1[2] = amd_bytealign (in1[1], in1[0], 1); - out1[1] = amd_bytealign (in1[0], in0[3], 1); - out1[0] = amd_bytealign (in0[3], in0[2], 1); - out0[3] = amd_bytealign (in0[2], in0[1], 1); - out0[2] = amd_bytealign (in0[1], in0[0], 1); - out0[1] = amd_bytealign (in0[0], 0, 1); - out0[0] = 0; - break; - case 8: out1[3] = in1[1]; - out1[2] = in1[0]; - out1[1] = in0[3]; - out1[0] = in0[2]; - out0[3] = in0[1]; - out0[2] = in0[0]; - out0[1] = 0; - out0[0] = 0; - break; - case 9: out1[3] = amd_bytealign (in1[1], in1[0], 3); - out1[2] = amd_bytealign (in1[0], in0[3], 3); - out1[1] = amd_bytealign (in0[3], in0[2], 3); - out1[0] = amd_bytealign (in0[2], in0[1], 3); - out0[3] = amd_bytealign (in0[1], in0[0], 3); - out0[2] = amd_bytealign (in0[0], 0, 3); - out0[1] = 0; - out0[0] = 0; - break; - case 10: out1[3] = amd_bytealign (in1[1], in1[0], 2); - out1[2] = amd_bytealign (in1[0], in0[3], 2); - out1[1] = amd_bytealign (in0[3], in0[2], 2); - out1[0] = amd_bytealign (in0[2], in0[1], 2); - out0[3] = amd_bytealign (in0[1], in0[0], 2); - out0[2] = amd_bytealign (in0[0], 0, 2); - out0[1] = 0; - out0[0] = 0; - break; - case 11: out1[3] = amd_bytealign (in1[1], in1[0], 1); - out1[2] = amd_bytealign (in1[0], in0[3], 1); - out1[1] = amd_bytealign (in0[3], in0[2], 1); - out1[0] = amd_bytealign (in0[2], in0[1], 1); - out0[3] = amd_bytealign (in0[1], in0[0], 1); - out0[2] = amd_bytealign (in0[0], 0, 1); - out0[1] = 0; - out0[0] = 0; - break; - case 12: out1[3] = in1[0]; - out1[2] = in0[3]; - out1[1] = in0[2]; - out1[0] = in0[1]; - out0[3] = in0[0]; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 13: out1[3] = amd_bytealign (in1[0], in0[3], 3); - out1[2] = amd_bytealign (in0[3], in0[2], 3); - out1[1] = amd_bytealign (in0[2], in0[1], 3); - out1[0] = amd_bytealign (in0[1], in0[0], 3); - out0[3] = amd_bytealign (in0[0], 0, 3); - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 14: out1[3] = amd_bytealign (in1[0], in0[3], 2); - out1[2] = amd_bytealign (in0[3], in0[2], 2); - out1[1] = amd_bytealign (in0[2], in0[1], 2); - out1[0] = amd_bytealign (in0[1], in0[0], 2); - out0[3] = amd_bytealign (in0[0], 0, 2); - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 15: out1[3] = amd_bytealign (in1[0], in0[3], 1); - out1[2] = amd_bytealign (in0[3], in0[2], 1); - out1[1] = amd_bytealign (in0[2], in0[1], 1); - out1[0] = amd_bytealign (in0[1], in0[0], 1); - out0[3] = amd_bytealign (in0[0], 0, 1); - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 16: out1[3] = in0[3]; - out1[2] = in0[2]; - out1[1] = in0[1]; - out1[0] = in0[0]; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 17: out1[3] = amd_bytealign (in0[3], in0[2], 3); - out1[2] = amd_bytealign (in0[2], in0[1], 3); - out1[1] = amd_bytealign (in0[1], in0[0], 3); - out1[0] = amd_bytealign (in0[0], 0, 3); - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 18: out1[3] = amd_bytealign (in0[3], in0[2], 2); - out1[2] = amd_bytealign (in0[2], in0[1], 2); - out1[1] = amd_bytealign (in0[1], in0[0], 2); - out1[0] = amd_bytealign (in0[0], 0, 2); - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 19: out1[3] = amd_bytealign (in0[3], in0[2], 1); - out1[2] = amd_bytealign (in0[2], in0[1], 1); - out1[1] = amd_bytealign (in0[1], in0[0], 1); - out1[0] = amd_bytealign (in0[0], 0, 1); - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 20: out1[3] = in0[2]; - out1[2] = in0[1]; - out1[1] = in0[0]; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 21: out1[3] = amd_bytealign (in0[2], in0[1], 3); - out1[2] = amd_bytealign (in0[1], in0[0], 3); - out1[1] = amd_bytealign (in0[0], 0, 3); - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 22: out1[3] = amd_bytealign (in0[2], in0[1], 2); - out1[2] = amd_bytealign (in0[1], in0[0], 2); - out1[1] = amd_bytealign (in0[0], 0, 2); - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 23: out1[3] = amd_bytealign (in0[2], in0[1], 1); - out1[2] = amd_bytealign (in0[1], in0[0], 1); - out1[1] = amd_bytealign (in0[0], 0, 1); - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 24: out1[3] = in0[1]; - out1[2] = in0[0]; - out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 25: out1[3] = amd_bytealign (in0[1], in0[0], 3); - out1[2] = amd_bytealign (in0[0], 0, 3); - out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 26: out1[3] = amd_bytealign (in0[1], in0[0], 2); - out1[2] = amd_bytealign (in0[0], 0, 2); - out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 27: out1[3] = amd_bytealign (in0[1], in0[0], 1); - out1[2] = amd_bytealign (in0[0], 0, 1); - out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 28: out1[3] = in0[0]; - out1[2] = 0; - out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 29: out1[3] = amd_bytealign (in0[0], 0, 3); - out1[2] = 0; - out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 30: out1[3] = amd_bytealign (in0[0], 0, 2); - out1[2] = 0; - out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 31: out1[3] = amd_bytealign (in0[0], 0, 1); - out1[2] = 0; - out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - } -} - -static void append_block1 (const u32 offset, u32x dst0[4], u32x dst1[4], const u32x src_r0) -{ - u32x tmp[2]; - - switch (offset & 3) - { - case 0: tmp[0] = src_r0; - tmp[1] = 0; - break; - case 1: tmp[0] = src_r0 << 8; - tmp[1] = src_r0 >> 24; - break; - case 2: tmp[0] = src_r0 << 16; - tmp[1] = src_r0 >> 16; - break; - case 3: tmp[0] = src_r0 << 24; - tmp[1] = src_r0 >> 8; - break; - } - - switch (offset / 4) - { - case 0: dst0[0] |= tmp[0]; - dst0[1] = tmp[1]; - break; - case 1: dst0[1] |= tmp[0]; - dst0[2] = tmp[1]; - break; - case 2: dst0[2] |= tmp[0]; - dst0[3] = tmp[1]; - break; - case 3: dst0[3] |= tmp[0]; - dst1[0] = tmp[1]; - break; - case 4: dst1[0] |= tmp[0]; - dst1[1] = tmp[1]; - break; - case 5: dst1[1] |= tmp[0]; - dst1[2] = tmp[1]; - break; - case 6: dst1[2] |= tmp[0]; - dst1[3] = tmp[1]; - break; - case 7: dst1[3] |= tmp[0]; - break; - } -} - -static void append_block8 (const u32 offset, u32x dst0[4], u32x dst1[4], const u32x src_l0[4], const u32x src_l1[4], const u32x src_r0[4], const u32x src_r1[4]) -{ - switch (offset) - { - case 0: - dst0[0] = src_r0[0]; - dst0[1] = src_r0[1]; - dst0[2] = src_r0[2]; - dst0[3] = src_r0[3]; - dst1[0] = src_r1[0]; - dst1[1] = src_r1[1]; - dst1[2] = src_r1[2]; - dst1[3] = src_r1[3]; - break; - - case 1: - dst0[0] = src_l0[0] - | src_r0[0] << 8; - dst0[1] = amd_bytealign (src_r0[1], src_r0[0], 3); - dst0[2] = amd_bytealign (src_r0[2], src_r0[1], 3); - dst0[3] = amd_bytealign (src_r0[3], src_r0[2], 3); - dst1[0] = amd_bytealign (src_r1[0], src_r0[3], 3); - dst1[1] = amd_bytealign (src_r1[1], src_r1[0], 3); - dst1[2] = amd_bytealign (src_r1[2], src_r1[1], 3); - dst1[3] = amd_bytealign (src_r1[3], src_r1[2], 3); - break; - - case 2: - dst0[0] = src_l0[0] - | src_r0[0] << 16; - dst0[1] = amd_bytealign (src_r0[1], src_r0[0], 2); - dst0[2] = amd_bytealign (src_r0[2], src_r0[1], 2); - dst0[3] = amd_bytealign (src_r0[3], src_r0[2], 2); - dst1[0] = amd_bytealign (src_r1[0], src_r0[3], 2); - dst1[1] = amd_bytealign (src_r1[1], src_r1[0], 2); - dst1[2] = amd_bytealign (src_r1[2], src_r1[1], 2); - dst1[3] = amd_bytealign (src_r1[3], src_r1[2], 2); - break; - - case 3: - dst0[0] = src_l0[0] - | src_r0[0] << 24; - dst0[1] = amd_bytealign (src_r0[1], src_r0[0], 1); - dst0[2] = amd_bytealign (src_r0[2], src_r0[1], 1); - dst0[3] = amd_bytealign (src_r0[3], src_r0[2], 1); - dst1[0] = amd_bytealign (src_r1[0], src_r0[3], 1); - dst1[1] = amd_bytealign (src_r1[1], src_r1[0], 1); - dst1[2] = amd_bytealign (src_r1[2], src_r1[1], 1); - dst1[3] = amd_bytealign (src_r1[3], src_r1[2], 1); - break; - - case 4: - dst0[1] = src_r0[0]; - dst0[2] = src_r0[1]; - dst0[3] = src_r0[2]; - dst1[0] = src_r0[3]; - dst1[1] = src_r1[0]; - dst1[2] = src_r1[1]; - dst1[3] = src_r1[2]; - break; - - case 5: - dst0[1] = src_l0[1] - | src_r0[0] << 8; - dst0[2] = amd_bytealign (src_r0[1], src_r0[0], 3); - dst0[3] = amd_bytealign (src_r0[2], src_r0[1], 3); - dst1[0] = amd_bytealign (src_r0[3], src_r0[2], 3); - dst1[1] = amd_bytealign (src_r1[0], src_r0[3], 3); - dst1[2] = amd_bytealign (src_r1[1], src_r1[0], 3); - dst1[3] = amd_bytealign (src_r1[2], src_r1[1], 3); - break; - - case 6: - dst0[1] = src_l0[1] - | src_r0[0] << 16; - dst0[2] = amd_bytealign (src_r0[1], src_r0[0], 2); - dst0[3] = amd_bytealign (src_r0[2], src_r0[1], 2); - dst1[0] = amd_bytealign (src_r0[3], src_r0[2], 2); - dst1[1] = amd_bytealign (src_r1[0], src_r0[3], 2); - dst1[2] = amd_bytealign (src_r1[1], src_r1[0], 2); - dst1[3] = amd_bytealign (src_r1[2], src_r1[1], 2); - break; - - case 7: - dst0[1] = src_l0[1] - | src_r0[0] << 24; - dst0[2] = amd_bytealign (src_r0[1], src_r0[0], 1); - dst0[3] = amd_bytealign (src_r0[2], src_r0[1], 1); - dst1[0] = amd_bytealign (src_r0[3], src_r0[2], 1); - dst1[1] = amd_bytealign (src_r1[0], src_r0[3], 1); - dst1[2] = amd_bytealign (src_r1[1], src_r1[0], 1); - dst1[3] = amd_bytealign (src_r1[2], src_r1[1], 1); - break; - - case 8: - dst0[2] = src_r0[0]; - dst0[3] = src_r0[1]; - dst1[0] = src_r0[2]; - dst1[1] = src_r0[3]; - dst1[2] = src_r1[0]; - dst1[3] = src_r1[1]; - break; - - case 9: - dst0[2] = src_l0[2] - | src_r0[0] << 8; - dst0[3] = amd_bytealign (src_r0[1], src_r0[0], 3); - dst1[0] = amd_bytealign (src_r0[2], src_r0[1], 3); - dst1[1] = amd_bytealign (src_r0[3], src_r0[2], 3); - dst1[2] = amd_bytealign (src_r1[0], src_r0[3], 3); - dst1[3] = amd_bytealign (src_r1[1], src_r1[0], 3); - break; - - case 10: - dst0[2] = src_l0[2] - | src_r0[0] << 16; - dst0[3] = amd_bytealign (src_r0[1], src_r0[0], 2); - dst1[0] = amd_bytealign (src_r0[2], src_r0[1], 2); - dst1[1] = amd_bytealign (src_r0[3], src_r0[2], 2); - dst1[2] = amd_bytealign (src_r1[0], src_r0[3], 2); - dst1[3] = amd_bytealign (src_r1[1], src_r1[0], 2); - break; - - case 11: - dst0[2] = src_l0[2] - | src_r0[0] << 24; - dst0[3] = amd_bytealign (src_r0[1], src_r0[0], 1); - dst1[0] = amd_bytealign (src_r0[2], src_r0[1], 1); - dst1[1] = amd_bytealign (src_r0[3], src_r0[2], 1); - dst1[2] = amd_bytealign (src_r1[0], src_r0[3], 1); - dst1[3] = amd_bytealign (src_r1[1], src_r1[0], 1); - break; - - case 12: - dst0[3] = src_r0[0]; - dst1[0] = src_r0[1]; - dst1[1] = src_r0[2]; - dst1[2] = src_r0[3]; - dst1[3] = src_r1[0]; - break; - - case 13: - dst0[3] = src_l0[3] - | src_r0[0] << 8; - dst1[0] = amd_bytealign (src_r0[1], src_r0[0], 3); - dst1[1] = amd_bytealign (src_r0[2], src_r0[1], 3); - dst1[2] = amd_bytealign (src_r0[3], src_r0[2], 3); - dst1[3] = amd_bytealign (src_r1[0], src_r0[3], 3); - break; - - case 14: - dst0[3] = src_l0[3] - | src_r0[0] << 16; - dst1[0] = amd_bytealign (src_r0[1], src_r0[0], 2); - dst1[1] = amd_bytealign (src_r0[2], src_r0[1], 2); - dst1[2] = amd_bytealign (src_r0[3], src_r0[2], 2); - dst1[3] = amd_bytealign (src_r1[0], src_r0[3], 2); - break; - - case 15: - dst0[3] = src_l0[3] - | src_r0[0] << 24; - dst1[0] = amd_bytealign (src_r0[1], src_r0[0], 1); - dst1[1] = amd_bytealign (src_r0[2], src_r0[1], 1); - dst1[2] = amd_bytealign (src_r0[3], src_r0[2], 1); - dst1[3] = amd_bytealign (src_r1[0], src_r0[3], 1); - break; - - case 16: - dst1[0] = src_r0[0]; - dst1[1] = src_r0[1]; - dst1[2] = src_r0[2]; - dst1[3] = src_r0[3]; - break; - - case 17: - dst1[0] = src_l1[0] - | src_r0[0] << 8; - dst1[1] = amd_bytealign (src_r0[1], src_r0[0], 3); - dst1[2] = amd_bytealign (src_r0[2], src_r0[1], 3); - dst1[3] = amd_bytealign (src_r0[3], src_r0[2], 3); - break; - - case 18: - dst1[0] = src_l1[0] - | src_r0[0] << 16; - dst1[1] = amd_bytealign (src_r0[1], src_r0[0], 2); - dst1[2] = amd_bytealign (src_r0[2], src_r0[1], 2); - dst1[3] = amd_bytealign (src_r0[3], src_r0[2], 2); - break; - - case 19: - dst1[0] = src_l1[0] - | src_r0[0] << 24; - dst1[1] = amd_bytealign (src_r0[1], src_r0[0], 1); - dst1[2] = amd_bytealign (src_r0[2], src_r0[1], 1); - dst1[3] = amd_bytealign (src_r0[3], src_r0[2], 1); - break; - - case 20: - dst1[1] = src_r0[0]; - dst1[2] = src_r0[1]; - dst1[3] = src_r0[2]; - break; - - case 21: - dst1[1] = src_l1[1] - | src_r0[0] << 8; - dst1[2] = amd_bytealign (src_r0[1], src_r0[0], 3); - dst1[3] = amd_bytealign (src_r0[2], src_r0[1], 3); - break; - - case 22: - dst1[1] = src_l1[1] - | src_r0[0] << 16; - dst1[2] = amd_bytealign (src_r0[1], src_r0[0], 2); - dst1[3] = amd_bytealign (src_r0[2], src_r0[1], 2); - break; - - case 23: - dst1[1] = src_l1[1] - | src_r0[0] << 24; - dst1[2] = amd_bytealign (src_r0[1], src_r0[0], 1); - dst1[3] = amd_bytealign (src_r0[2], src_r0[1], 1); - break; - - case 24: - dst1[2] = src_r0[0]; - dst1[3] = src_r0[1]; - break; - - case 25: - dst1[2] = src_l1[2] - | src_r0[0] << 8; - dst1[3] = amd_bytealign (src_r0[1], src_r0[0], 3); - break; - - case 26: - dst1[2] = src_l1[2] - | src_r0[0] << 16; - dst1[3] = amd_bytealign (src_r0[1], src_r0[0], 2); - break; - - case 27: - dst1[2] = src_l1[2] - | src_r0[0] << 24; - dst1[3] = amd_bytealign (src_r0[1], src_r0[0], 1); - break; - - case 28: - dst1[3] = src_r0[0]; - break; - - case 29: - dst1[3] = src_l1[3] - | src_r0[0] << 8; - break; - - case 30: - dst1[3] = src_l1[3] - | src_r0[0] << 16; - break; - - case 31: - dst1[3] = src_l1[3] - | src_r0[0] << 24; - break; - } -} - -static void reverse_block (u32x in0[4], u32x in1[4], u32x out0[4], u32x out1[4], const u32 len) -{ - rshift_block_N (in0, in1, out0, out1, 32 - len); - - u32x tib40[4]; - u32x tib41[4]; - - tib40[0] = out1[3]; - tib40[1] = out1[2]; - tib40[2] = out1[1]; - tib40[3] = out1[0]; - tib41[0] = out0[3]; - tib41[1] = out0[2]; - tib41[2] = out0[1]; - tib41[3] = out0[0]; - - out0[0] = swap_workaround (tib40[0]); - out0[1] = swap_workaround (tib40[1]); - out0[2] = swap_workaround (tib40[2]); - out0[3] = swap_workaround (tib40[3]); - out1[0] = swap_workaround (tib41[0]); - out1[1] = swap_workaround (tib41[1]); - out1[2] = swap_workaround (tib41[2]); - out1[3] = swap_workaround (tib41[3]); -} - -static u32 rule_op_mangle_lrest (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - buf0[0] |= (generate_cmask (buf0[0])); - buf0[1] |= (generate_cmask (buf0[1])); - buf0[2] |= (generate_cmask (buf0[2])); - buf0[3] |= (generate_cmask (buf0[3])); - buf1[0] |= (generate_cmask (buf1[0])); - buf1[1] |= (generate_cmask (buf1[1])); - buf1[2] |= (generate_cmask (buf1[2])); - buf1[3] |= (generate_cmask (buf1[3])); - - return in_len; -} - -static u32 rule_op_mangle_urest (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - buf0[0] &= ~(generate_cmask (buf0[0])); - buf0[1] &= ~(generate_cmask (buf0[1])); - buf0[2] &= ~(generate_cmask (buf0[2])); - buf0[3] &= ~(generate_cmask (buf0[3])); - buf1[0] &= ~(generate_cmask (buf1[0])); - buf1[1] &= ~(generate_cmask (buf1[1])); - buf1[2] &= ~(generate_cmask (buf1[2])); - buf1[3] &= ~(generate_cmask (buf1[3])); - - return in_len; -} - -static u32 rule_op_mangle_lrest_ufirst (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - rule_op_mangle_lrest (p0, p1, buf0, buf1, in_len); - - buf0[0] &= ~(0x00000020 & generate_cmask (buf0[0])); - - return in_len; -} - -static u32 rule_op_mangle_urest_lfirst (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - rule_op_mangle_urest (p0, p1, buf0, buf1, in_len); - - buf0[0] |= (0x00000020 & generate_cmask (buf0[0])); - - return in_len; -} - -static u32 rule_op_mangle_trest (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - buf0[0] ^= (generate_cmask (buf0[0])); - buf0[1] ^= (generate_cmask (buf0[1])); - buf0[2] ^= (generate_cmask (buf0[2])); - buf0[3] ^= (generate_cmask (buf0[3])); - buf1[0] ^= (generate_cmask (buf1[0])); - buf1[1] ^= (generate_cmask (buf1[1])); - buf1[2] ^= (generate_cmask (buf1[2])); - buf1[3] ^= (generate_cmask (buf1[3])); - - return in_len; -} - -static u32 rule_op_mangle_toggle_at (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - if (p0 >= in_len) return (in_len); - - const u32 tmp = 0x20 << ((p0 & 3) * 8); - - switch (p0 / 4) - { - case 0: buf0[0] ^= (tmp & generate_cmask (buf0[0])); break; - case 1: buf0[1] ^= (tmp & generate_cmask (buf0[1])); break; - case 2: buf0[2] ^= (tmp & generate_cmask (buf0[2])); break; - case 3: buf0[3] ^= (tmp & generate_cmask (buf0[3])); break; - case 4: buf1[0] ^= (tmp & generate_cmask (buf1[0])); break; - case 5: buf1[1] ^= (tmp & generate_cmask (buf1[1])); break; - case 6: buf1[2] ^= (tmp & generate_cmask (buf1[2])); break; - case 7: buf1[3] ^= (tmp & generate_cmask (buf1[3])); break; - } - - return in_len; -} - -static u32 rule_op_mangle_reverse (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - reverse_block (buf0, buf1, buf0, buf1, in_len); - - return in_len; -} - -static u32 rule_op_mangle_dupeword (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - if ((in_len + in_len) >= 32) return (in_len); - - u32 out_len = in_len; - - u32x tib40[4]; - u32x tib41[4]; - - tib40[0] = buf0[0]; - tib40[1] = buf0[1]; - tib40[2] = buf0[2]; - tib40[3] = buf0[3]; - tib41[0] = buf1[0]; - tib41[1] = buf1[1]; - tib41[2] = buf1[2]; - tib41[3] = buf1[3]; - - append_block8 (out_len, buf0, buf1, buf0, buf1, tib40, tib41); - - out_len += in_len; - - return out_len; -} - -static u32 rule_op_mangle_dupeword_times (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - if (((in_len * p0) + in_len) >= 32) return (in_len); - - u32 out_len = in_len; - - u32x tib40[4]; - u32x tib41[4]; - - tib40[0] = buf0[0]; - tib40[1] = buf0[1]; - tib40[2] = buf0[2]; - tib40[3] = buf0[3]; - tib41[0] = buf1[0]; - tib41[1] = buf1[1]; - tib41[2] = buf1[2]; - tib41[3] = buf1[3]; - - for (u32 i = 0; i < p0; i++) - { - append_block8 (out_len, buf0, buf1, buf0, buf1, tib40, tib41); - - out_len += in_len; - } - - return out_len; -} - -static u32 rule_op_mangle_reflect (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - if ((in_len + in_len) >= 32) return (in_len); - - u32 out_len = in_len; - - u32x tib40[4]; - u32x tib41[4]; - - reverse_block (buf0, buf1, tib40, tib41, out_len); - - append_block8 (out_len, buf0, buf1, buf0, buf1, tib40, tib41); - - out_len += in_len; - - return out_len; -} - -static u32 rule_op_mangle_append (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - if ((in_len + 1) >= 32) return (in_len); - - u32 out_len = in_len; - - append_block1 (out_len, buf0, buf1, p0); - - out_len++; - - return out_len; -} - -static u32 rule_op_mangle_prepend (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - if ((in_len + 1) >= 32) return (in_len); - - u32 out_len = in_len; - - rshift_block (buf0, buf1, buf0, buf1); - - buf0[0] = buf0[0] | p0; - - out_len++; - - return out_len; -} - -static u32 rule_op_mangle_rotate_left (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - if (in_len == 0) return (in_len); - - const u32 in_len1 = in_len - 1; - - const u32 sh = (in_len1 & 3) * 8; - - const u32x tmp = (buf0[0] & 0xff) << sh; - - lshift_block (buf0, buf1, buf0, buf1); - - switch (in_len1 / 4) - { - case 0: buf0[0] |= tmp; break; - case 1: buf0[1] |= tmp; break; - case 2: buf0[2] |= tmp; break; - case 3: buf0[3] |= tmp; break; - case 4: buf1[0] |= tmp; break; - case 5: buf1[1] |= tmp; break; - case 6: buf1[2] |= tmp; break; - case 7: buf1[3] |= tmp; break; - } - - return in_len; -} - -static u32 rule_op_mangle_rotate_right (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - if (in_len == 0) return (in_len); - - const u32 in_len1 = in_len - 1; - - const u32 sh = (in_len1 & 3) * 8; - - u32x tmp = 0; - - switch (in_len1 / 4) - { - case 0: tmp = (buf0[0] >> sh) & 0xff; break; - case 1: tmp = (buf0[1] >> sh) & 0xff; break; - case 2: tmp = (buf0[2] >> sh) & 0xff; break; - case 3: tmp = (buf0[3] >> sh) & 0xff; break; - case 4: tmp = (buf1[0] >> sh) & 0xff; break; - case 5: tmp = (buf1[1] >> sh) & 0xff; break; - case 6: tmp = (buf1[2] >> sh) & 0xff; break; - case 7: tmp = (buf1[3] >> sh) & 0xff; break; - } - - rshift_block (buf0, buf1, buf0, buf1); - - buf0[0] |= tmp; - - truncate_right (buf0, buf1, in_len); - - return in_len; -} - -static u32 rule_op_mangle_delete_first (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - if (in_len == 0) return (in_len); - - const u32 in_len1 = in_len - 1; - - lshift_block (buf0, buf1, buf0, buf1); - - return in_len1; -} - -static u32 rule_op_mangle_delete_last (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - if (in_len == 0) return (in_len); - - const u32 in_len1 = in_len - 1; - - const u32 tmp = (1 << ((in_len1 & 3) * 8)) - 1; - - switch (in_len1 / 4) - { - case 0: buf0[0] &= tmp; break; - case 1: buf0[1] &= tmp; break; - case 2: buf0[2] &= tmp; break; - case 3: buf0[3] &= tmp; break; - case 4: buf1[0] &= tmp; break; - case 5: buf1[1] &= tmp; break; - case 6: buf1[2] &= tmp; break; - case 7: buf1[3] &= tmp; break; - } - - return in_len1; -} - -static u32 rule_op_mangle_delete_at (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - if (p0 >= in_len) return (in_len); - - u32 out_len = in_len; - - u32x tib40[4]; - u32x tib41[4]; - - lshift_block (buf0, buf1, tib40, tib41); - - const u32 ml = (1 << ((p0 & 3) * 8)) - 1; - const u32 mr = ~ml; - - switch (p0 / 4) - { - case 0: buf0[0] = (buf0[0] & ml) - | (tib40[0] & mr); - buf0[1] = tib40[1]; - buf0[2] = tib40[2]; - buf0[3] = tib40[3]; - buf1[0] = tib41[0]; - buf1[1] = tib41[1]; - buf1[2] = tib41[2]; - buf1[3] = tib41[3]; - break; - case 1: buf0[1] = (buf0[1] & ml) - | (tib40[1] & mr); - buf0[2] = tib40[2]; - buf0[3] = tib40[3]; - buf1[0] = tib41[0]; - buf1[1] = tib41[1]; - buf1[2] = tib41[2]; - buf1[3] = tib41[3]; - break; - case 2: buf0[2] = (buf0[2] & ml) - | (tib40[2] & mr); - buf0[3] = tib40[3]; - buf1[0] = tib41[0]; - buf1[1] = tib41[1]; - buf1[2] = tib41[2]; - buf1[3] = tib41[3]; - break; - case 3: buf0[3] = (buf0[3] & ml) - | (tib40[3] & mr); - buf1[0] = tib41[0]; - buf1[1] = tib41[1]; - buf1[2] = tib41[2]; - buf1[3] = tib41[3]; - break; - case 4: buf1[0] = (buf1[0] & ml) - | (tib41[0] & mr); - buf1[1] = tib41[1]; - buf1[2] = tib41[2]; - buf1[3] = tib41[3]; - break; - case 5: buf1[1] = (buf1[1] & ml) - | (tib41[1] & mr); - buf1[2] = tib41[2]; - buf1[3] = tib41[3]; - break; - case 6: buf1[2] = (buf1[2] & ml) - | (tib41[2] & mr); - buf1[3] = tib41[3]; - break; - case 7: buf1[3] = (buf1[3] & ml) - | (tib41[3] & mr); - break; - } - - out_len--; - - return out_len; -} - -static u32 rule_op_mangle_extract (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - if (p0 >= in_len) return (in_len); - - if ((p0 + p1) > in_len) return (in_len); - - u32 out_len = p1; - - lshift_block_N (buf0, buf1, buf0, buf1, p0); - - truncate_right (buf0, buf1, out_len); - - return out_len; -} - -static u32 rule_op_mangle_omit (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - if (p0 >= in_len) return (in_len); - - if ((p0 + p1) > in_len) return (in_len); - - u32 out_len = in_len; - - u32x tib40[4]; - u32x tib41[4]; - - tib40[0] = 0; - tib40[1] = 0; - tib40[2] = 0; - tib40[3] = 0; - tib41[0] = 0; - tib41[1] = 0; - tib41[2] = 0; - tib41[3] = 0; - - lshift_block_N (buf0, buf1, tib40, tib41, p1); - - const u32 ml = (1 << ((p0 & 3) * 8)) - 1; - const u32 mr = ~ml; - - switch (p0 / 4) - { - case 0: buf0[0] = (buf0[0] & ml) - | (tib40[0] & mr); - buf0[1] = tib40[1]; - buf0[2] = tib40[2]; - buf0[3] = tib40[3]; - buf1[0] = tib41[0]; - buf1[1] = tib41[1]; - buf1[2] = tib41[2]; - buf1[3] = tib41[3]; - break; - case 1: buf0[1] = (buf0[1] & ml) - | (tib40[1] & mr); - buf0[2] = tib40[2]; - buf0[3] = tib40[3]; - buf1[0] = tib41[0]; - buf1[1] = tib41[1]; - buf1[2] = tib41[2]; - buf1[3] = tib41[3]; - break; - case 2: buf0[2] = (buf0[2] & ml) - | (tib40[2] & mr); - buf0[3] = tib40[3]; - buf1[0] = tib41[0]; - buf1[1] = tib41[1]; - buf1[2] = tib41[2]; - buf1[3] = tib41[3]; - break; - case 3: buf0[3] = (buf0[3] & ml) - | (tib40[3] & mr); - buf1[0] = tib41[0]; - buf1[1] = tib41[1]; - buf1[2] = tib41[2]; - buf1[3] = tib41[3]; - break; - case 4: buf1[0] = (buf1[0] & ml) - | (tib41[0] & mr); - buf1[1] = tib41[1]; - buf1[2] = tib41[2]; - buf1[3] = tib41[3]; - break; - case 5: buf1[1] = (buf1[1] & ml) - | (tib41[1] & mr); - buf1[2] = tib41[2]; - buf1[3] = tib41[3]; - break; - case 6: buf1[2] = (buf1[2] & ml) - | (tib41[2] & mr); - buf1[3] = tib41[3]; - break; - case 7: buf1[3] = (buf1[3] & ml) - | (tib41[3] & mr); - break; - } - - out_len -= p1; - - return out_len; -} - -static u32 rule_op_mangle_insert (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - if (p0 > in_len) return (in_len); - - if ((in_len + 1) >= 32) return (in_len); - - u32 out_len = in_len; - - u32x tib40[4]; - u32x tib41[4]; - - rshift_block (buf0, buf1, tib40, tib41); - - const u32 p1n = p1 << ((p0 & 3) * 8); - - const u32 ml = (1 << ((p0 & 3) * 8)) - 1; - - const u32 mr = 0xffffff00 << ((p0 & 3) * 8); - - switch (p0 / 4) - { - case 0: buf0[0] = (buf0[0] & ml) | p1n | (tib40[0] & mr); - buf0[1] = tib40[1]; - buf0[2] = tib40[2]; - buf0[3] = tib40[3]; - buf1[0] = tib41[0]; - buf1[1] = tib41[1]; - buf1[2] = tib41[2]; - buf1[3] = tib41[3]; - break; - case 1: buf0[1] = (buf0[1] & ml) | p1n | (tib40[1] & mr); - buf0[2] = tib40[2]; - buf0[3] = tib40[3]; - buf1[0] = tib41[0]; - buf1[1] = tib41[1]; - buf1[2] = tib41[2]; - buf1[3] = tib41[3]; - break; - case 2: buf0[2] = (buf0[2] & ml) | p1n | (tib40[2] & mr); - buf0[3] = tib40[3]; - buf1[0] = tib41[0]; - buf1[1] = tib41[1]; - buf1[2] = tib41[2]; - buf1[3] = tib41[3]; - break; - case 3: buf0[3] = (buf0[3] & ml) | p1n | (tib40[3] & mr); - buf1[0] = tib41[0]; - buf1[1] = tib41[1]; - buf1[2] = tib41[2]; - buf1[3] = tib41[3]; - break; - case 4: buf1[0] = (buf1[0] & ml) | p1n | (tib41[0] & mr); - buf1[1] = tib41[1]; - buf1[2] = tib41[2]; - buf1[3] = tib41[3]; - break; - case 5: buf1[1] = (buf1[1] & ml) | p1n | (tib41[1] & mr); - buf1[2] = tib41[2]; - buf1[3] = tib41[3]; - break; - case 6: buf1[2] = (buf1[2] & ml) | p1n | (tib41[2] & mr); - buf1[3] = tib41[3]; - break; - case 7: buf1[3] = (buf1[3] & ml) | p1n | (tib41[3] & mr); - break; - } - - out_len++; - - return out_len; -} - -static u32 rule_op_mangle_overstrike (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - if (p0 >= in_len) return (in_len); - - const u32 p1n = p1 << ((p0 & 3) * 8); - - const u32 m = ~(0xff << ((p0 & 3) * 8)); - - switch (p0 / 4) - { - case 0: buf0[0] = (buf0[0] & m) | p1n; break; - case 1: buf0[1] = (buf0[1] & m) | p1n; break; - case 2: buf0[2] = (buf0[2] & m) | p1n; break; - case 3: buf0[3] = (buf0[3] & m) | p1n; break; - case 4: buf1[0] = (buf1[0] & m) | p1n; break; - case 5: buf1[1] = (buf1[1] & m) | p1n; break; - case 6: buf1[2] = (buf1[2] & m) | p1n; break; - case 7: buf1[3] = (buf1[3] & m) | p1n; break; - } - - return in_len; -} - -static u32 rule_op_mangle_truncate_at (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - if (p0 >= in_len) return (in_len); - - truncate_right (buf0, buf1, p0); - - return p0; -} - -static u32 rule_op_mangle_replace (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - #ifdef VECT_SIZE1 - - const uchar4 tmp0 = (uchar4) (p0); - const uchar4 tmp1 = (uchar4) (p1); - - uchar4 tmp; - - tmp = as_uchar4 (buf0[0]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[0] = as_uint (tmp); - tmp = as_uchar4 (buf0[1]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[1] = as_uint (tmp); - tmp = as_uchar4 (buf0[2]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[2] = as_uint (tmp); - tmp = as_uchar4 (buf0[3]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[3] = as_uint (tmp); - tmp = as_uchar4 (buf1[0]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[0] = as_uint (tmp); - tmp = as_uchar4 (buf1[1]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[1] = as_uint (tmp); - tmp = as_uchar4 (buf1[2]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[2] = as_uint (tmp); - tmp = as_uchar4 (buf1[3]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[3] = as_uint (tmp); - - #endif - - #ifdef VECT_SIZE2 - - const uchar8 tmp0 = (uchar8) (p0); - const uchar8 tmp1 = (uchar8) (p1); - - uchar8 tmp; - - tmp = as_uchar8 (buf0[0]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[0] = as_uint2 (tmp); - tmp = as_uchar8 (buf0[1]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[1] = as_uint2 (tmp); - tmp = as_uchar8 (buf0[2]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[2] = as_uint2 (tmp); - tmp = as_uchar8 (buf0[3]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[3] = as_uint2 (tmp); - tmp = as_uchar8 (buf1[0]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[0] = as_uint2 (tmp); - tmp = as_uchar8 (buf1[1]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[1] = as_uint2 (tmp); - tmp = as_uchar8 (buf1[2]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[2] = as_uint2 (tmp); - tmp = as_uchar8 (buf1[3]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[3] = as_uint2 (tmp); - - #endif - - #ifdef VECT_SIZE4 - - const uchar16 tmp0 = (uchar16) (p0); - const uchar16 tmp1 = (uchar16) (p1); - - uchar16 tmp; - - tmp = as_uchar16 (buf0[0]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[0] = as_uint4 (tmp); - tmp = as_uchar16 (buf0[1]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[1] = as_uint4 (tmp); - tmp = as_uchar16 (buf0[2]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[2] = as_uint4 (tmp); - tmp = as_uchar16 (buf0[3]); tmp = select (tmp, tmp1, tmp == tmp0); buf0[3] = as_uint4 (tmp); - tmp = as_uchar16 (buf1[0]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[0] = as_uint4 (tmp); - tmp = as_uchar16 (buf1[1]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[1] = as_uint4 (tmp); - tmp = as_uchar16 (buf1[2]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[2] = as_uint4 (tmp); - tmp = as_uchar16 (buf1[3]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[3] = as_uint4 (tmp); - - #endif - - return in_len; -} - -static u32 rule_op_mangle_purgechar (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - // TODO - return in_len; -} - -static u32 rule_op_mangle_togglecase_rec (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - // TODO - return in_len; -} - -static u32 rule_op_mangle_dupechar_first (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - if ( in_len == 0) return (in_len); - if ((in_len + p0) >= 32) return (in_len); - - u32 out_len = in_len; - - const u32x tmp = buf0[0] & 0xFF; - - rshift_block_N (buf0, buf1, buf0, buf1, p0); - - switch (p0) - { - case 1: buf0[0] |= tmp << 0; - break; - case 2: buf0[0] |= tmp << 0 | tmp << 8; - break; - case 3: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16; - break; - case 4: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - break; - case 5: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[1] |= tmp << 0; - break; - case 6: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[1] |= tmp << 0 | tmp << 8; - break; - case 7: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16; - break; - case 8: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - break; - case 9: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[2] |= tmp << 0; - break; - case 10: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[2] |= tmp << 0 | tmp << 8; - break; - case 11: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16; - break; - case 12: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - break; - case 13: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[3] |= tmp << 0; - break; - case 14: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[3] |= tmp << 0 | tmp << 8; - break; - case 15: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16; - break; - case 16: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - break; - case 17: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[0] |= tmp << 0; - break; - case 18: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[0] |= tmp << 0 | tmp << 8; - break; - case 19: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16; - break; - case 20: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - break; - case 21: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[1] |= tmp << 0; - break; - case 22: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[1] |= tmp << 0 | tmp << 8; - break; - case 23: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16; - break; - case 24: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - break; - case 25: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[2] |= tmp << 0; - break; - case 26: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[2] |= tmp << 0 | tmp << 8; - break; - case 27: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16; - break; - case 28: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - break; - case 29: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[3] |= tmp << 0; - break; - case 30: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[3] |= tmp << 0 | tmp << 8; - break; - case 31: buf0[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf0[3] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[0] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[1] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[2] |= tmp << 0 | tmp << 8 | tmp << 16 | tmp << 24; - buf1[3] |= tmp << 0 | tmp << 8 | tmp << 16; - break; - } - - out_len += p0; - - return out_len; -} - -static u32 rule_op_mangle_dupechar_last (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - if ( in_len == 0) return (in_len); - if ((in_len + p0) >= 32) return (in_len); - - const u32 in_len1 = in_len - 1; - - const u32 sh = (in_len1 & 3) * 8; - - u32x tmp = 0; - - switch (in_len1 / 4) - { - case 0: tmp = (buf0[0] >> sh) & 0xff; break; - case 1: tmp = (buf0[1] >> sh) & 0xff; break; - case 2: tmp = (buf0[2] >> sh) & 0xff; break; - case 3: tmp = (buf0[3] >> sh) & 0xff; break; - case 4: tmp = (buf1[0] >> sh) & 0xff; break; - case 5: tmp = (buf1[1] >> sh) & 0xff; break; - case 6: tmp = (buf1[2] >> sh) & 0xff; break; - case 7: tmp = (buf1[3] >> sh) & 0xff; break; - } - - u32 out_len = in_len; - - for (u32 i = 0; i < p0; i++) - { - append_block1 (out_len, buf0, buf1, tmp); - - out_len++; - } - - return out_len; -} - -static u32 rule_op_mangle_dupechar_all (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - if ( in_len == 0) return (in_len); - if ((in_len + in_len) >= 32) return (in_len); - - u32 out_len = in_len; - - u32x tib40[4]; - u32x tib41[4]; - - tib40[0] = ((buf0[0] & 0x000000FF) << 0) | ((buf0[0] & 0x0000FF00) << 8); - tib40[1] = ((buf0[0] & 0x00FF0000) >> 16) | ((buf0[0] & 0xFF000000) >> 8); - tib40[2] = ((buf0[1] & 0x000000FF) << 0) | ((buf0[1] & 0x0000FF00) << 8); - tib40[3] = ((buf0[1] & 0x00FF0000) >> 16) | ((buf0[1] & 0xFF000000) >> 8); - tib41[0] = ((buf0[2] & 0x000000FF) << 0) | ((buf0[2] & 0x0000FF00) << 8); - tib41[1] = ((buf0[2] & 0x00FF0000) >> 16) | ((buf0[2] & 0xFF000000) >> 8); - tib41[2] = ((buf0[3] & 0x000000FF) << 0) | ((buf0[3] & 0x0000FF00) << 8); - tib41[3] = ((buf0[3] & 0x00FF0000) >> 16) | ((buf0[3] & 0xFF000000) >> 8); - - buf0[0] = tib40[0] | (tib40[0] << 8); - buf0[1] = tib40[1] | (tib40[1] << 8); - buf0[2] = tib40[2] | (tib40[2] << 8); - buf0[3] = tib40[3] | (tib40[3] << 8); - buf1[0] = tib41[0] | (tib41[0] << 8); - buf1[1] = tib41[1] | (tib41[1] << 8); - buf1[2] = tib41[2] | (tib41[2] << 8); - buf1[3] = tib41[3] | (tib41[3] << 8); - - out_len = out_len + out_len; - - return out_len; -} - -static u32 rule_op_mangle_switch_first (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - if (in_len < 2) return (in_len); - - buf0[0] = (buf0[0] & 0xFFFF0000) | ((buf0[0] << 8) & 0x0000FF00) | ((buf0[0] >> 8) & 0x000000FF); - - return in_len; -} - -static u32 rule_op_mangle_switch_last (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - if (in_len < 2) return (in_len); - - switch (in_len) - { - case 2: buf0[0] = ((buf0[0] << 8) & 0x0000FF00) | ((buf0[0] >> 8) & 0x000000FF); - break; - case 3: buf0[0] = (buf0[0] & 0x000000FF) | ((buf0[0] << 8) & 0x00FF0000) | ((buf0[0] >> 8) & 0x0000FF00); - break; - case 4: buf0[0] = (buf0[0] & 0x0000FFFF) | ((buf0[0] << 8) & 0xFF000000) | ((buf0[0] >> 8) & 0x00FF0000); - break; - case 5: buf0[1] = (buf0[0] & 0xFF000000) | buf0[1]; - buf0[0] = (buf0[0] & 0x00FFFFFF) | (buf0[1] << 24); - buf0[1] = (buf0[1] >> 24); - break; - case 6: buf0[1] = ((buf0[1] << 8) & 0x0000FF00) | ((buf0[1] >> 8) & 0x000000FF); - break; - case 7: buf0[1] = (buf0[1] & 0x000000FF) | ((buf0[1] << 8) & 0x00FF0000) | ((buf0[1] >> 8) & 0x0000FF00); - break; - case 8: buf0[1] = (buf0[1] & 0x0000FFFF) | ((buf0[1] << 8) & 0xFF000000) | ((buf0[1] >> 8) & 0x00FF0000); - break; - case 9: buf0[2] = (buf0[1] & 0xFF000000) | buf0[2]; - buf0[1] = (buf0[1] & 0x00FFFFFF) | (buf0[2] << 24); - buf0[2] = (buf0[2] >> 24); - break; - case 10: buf0[2] = ((buf0[2] << 8) & 0x0000FF00) | ((buf0[2] >> 8) & 0x000000FF); - break; - case 11: buf0[2] = (buf0[2] & 0x000000FF) | ((buf0[2] << 8) & 0x00FF0000) | ((buf0[2] >> 8) & 0x0000FF00); - break; - case 12: buf0[2] = (buf0[2] & 0x0000FFFF) | ((buf0[2] << 8) & 0xFF000000) | ((buf0[2] >> 8) & 0x00FF0000); - break; - case 13: buf0[3] = (buf0[2] & 0xFF000000) | buf0[3]; - buf0[2] = (buf0[2] & 0x00FFFFFF) | (buf0[3] << 24); - buf0[3] = (buf0[3] >> 24); - break; - case 14: buf0[3] = ((buf0[3] << 8) & 0x0000FF00) | ((buf0[3] >> 8) & 0x000000FF); - break; - case 15: buf0[3] = (buf0[3] & 0x000000FF) | ((buf0[3] << 8) & 0x00FF0000) | ((buf0[3] >> 8) & 0x0000FF00); - break; - case 16: buf0[3] = (buf0[3] & 0x0000FFFF) | ((buf0[3] << 8) & 0xFF000000) | ((buf0[3] >> 8) & 0x00FF0000); - break; - case 17: buf1[0] = (buf0[3] & 0xFF000000) | buf1[0]; - buf0[3] = (buf0[3] & 0x00FFFFFF) | (buf1[0] << 24); - buf1[0] = (buf1[0] >> 24); - break; - case 18: buf1[0] = ((buf1[0] << 8) & 0x0000FF00) | ((buf1[0] >> 8) & 0x000000FF); - break; - case 19: buf1[0] = (buf1[0] & 0x000000FF) | ((buf1[0] << 8) & 0x00FF0000) | ((buf1[0] >> 8) & 0x0000FF00); - break; - case 20: buf1[0] = (buf1[0] & 0x0000FFFF) | ((buf1[0] << 8) & 0xFF000000) | ((buf1[0] >> 8) & 0x00FF0000); - break; - case 21: buf1[1] = (buf1[0] & 0xFF000000) | buf1[1]; - buf1[0] = (buf1[0] & 0x00FFFFFF) | (buf1[1] << 24); - buf1[1] = (buf1[1] >> 24); - break; - case 22: buf1[1] = ((buf1[1] << 8) & 0x0000FF00) | ((buf1[1] >> 8) & 0x000000FF); - break; - case 23: buf1[1] = (buf1[1] & 0x000000FF) | ((buf1[1] << 8) & 0x00FF0000) | ((buf1[1] >> 8) & 0x0000FF00); - break; - case 24: buf1[1] = (buf1[1] & 0x0000FFFF) | ((buf1[1] << 8) & 0xFF000000) | ((buf1[1] >> 8) & 0x00FF0000); - break; - case 25: buf1[2] = (buf1[1] & 0xFF000000) | buf1[2]; - buf1[1] = (buf1[1] & 0x00FFFFFF) | (buf1[2] << 24); - buf1[2] = (buf1[2] >> 24); - break; - case 26: buf1[2] = ((buf1[2] << 8) & 0x0000FF00) | ((buf1[2] >> 8) & 0x000000FF); - break; - case 27: buf1[2] = (buf1[2] & 0x000000FF) | ((buf1[2] << 8) & 0x00FF0000) | ((buf1[2] >> 8) & 0x0000FF00); - break; - case 28: buf1[2] = (buf1[2] & 0x0000FFFF) | ((buf1[2] << 8) & 0xFF000000) | ((buf1[2] >> 8) & 0x00FF0000); - break; - case 29: buf1[3] = (buf1[2] & 0xFF000000) | buf1[3]; - buf1[2] = (buf1[2] & 0x00FFFFFF) | (buf1[3] << 24); - buf1[3] = (buf1[3] >> 24); - break; - case 30: buf1[3] = ((buf1[3] << 8) & 0x0000FF00) | ((buf1[3] >> 8) & 0x000000FF); - break; - case 31: buf1[3] = (buf1[3] & 0x000000FF) | ((buf1[3] << 8) & 0x00FF0000) | ((buf1[3] >> 8) & 0x0000FF00); - break; - } - - return in_len; -} - -static u32 rule_op_mangle_switch_at (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - if (p0 >= in_len) return (in_len); - if (p1 >= in_len) return (in_len); - - u32x tmp0 = 0; - u32x tmp1 = 0; - - switch (p0) - { - case 0: tmp0 = (buf0[0] >> 0) & 0xFF; - break; - case 1: tmp0 = (buf0[0] >> 8) & 0xFF; - break; - case 2: tmp0 = (buf0[0] >> 16) & 0xFF; - break; - case 3: tmp0 = (buf0[0] >> 24) & 0xFF; - break; - case 4: tmp0 = (buf0[1] >> 0) & 0xFF; - break; - case 5: tmp0 = (buf0[1] >> 8) & 0xFF; - break; - case 6: tmp0 = (buf0[1] >> 16) & 0xFF; - break; - case 7: tmp0 = (buf0[1] >> 24) & 0xFF; - break; - case 8: tmp0 = (buf0[2] >> 0) & 0xFF; - break; - case 9: tmp0 = (buf0[2] >> 8) & 0xFF; - break; - case 10: tmp0 = (buf0[2] >> 16) & 0xFF; - break; - case 11: tmp0 = (buf0[2] >> 24) & 0xFF; - break; - case 12: tmp0 = (buf0[3] >> 0) & 0xFF; - break; - case 13: tmp0 = (buf0[3] >> 8) & 0xFF; - break; - case 14: tmp0 = (buf0[3] >> 16) & 0xFF; - break; - case 15: tmp0 = (buf0[3] >> 24) & 0xFF; - break; - case 16: tmp0 = (buf1[0] >> 0) & 0xFF; - break; - case 17: tmp0 = (buf1[0] >> 8) & 0xFF; - break; - case 18: tmp0 = (buf1[0] >> 16) & 0xFF; - break; - case 19: tmp0 = (buf1[0] >> 24) & 0xFF; - break; - case 20: tmp0 = (buf1[1] >> 0) & 0xFF; - break; - case 21: tmp0 = (buf1[1] >> 8) & 0xFF; - break; - case 22: tmp0 = (buf1[1] >> 16) & 0xFF; - break; - case 23: tmp0 = (buf1[1] >> 24) & 0xFF; - break; - case 24: tmp0 = (buf1[2] >> 0) & 0xFF; - break; - case 25: tmp0 = (buf1[2] >> 8) & 0xFF; - break; - case 26: tmp0 = (buf1[2] >> 16) & 0xFF; - break; - case 27: tmp0 = (buf1[2] >> 24) & 0xFF; - break; - case 28: tmp0 = (buf1[3] >> 0) & 0xFF; - break; - case 29: tmp0 = (buf1[3] >> 8) & 0xFF; - break; - case 30: tmp0 = (buf1[3] >> 16) & 0xFF; - break; - case 31: tmp0 = (buf1[3] >> 24) & 0xFF; - break; - } - - switch (p1) - { - case 0: tmp1 = (buf0[0] >> 0) & 0xff; - buf0[0] = (buf0[0] & 0xffffff00) | tmp0 << 0; - break; - case 1: tmp1 = (buf0[0] >> 8) & 0xff; - buf0[0] = (buf0[0] & 0xffff00ff) | tmp0 << 8; - break; - case 2: tmp1 = (buf0[0] >> 16) & 0xff; - buf0[0] = (buf0[0] & 0xff00ffff) | tmp0 << 16; - break; - case 3: tmp1 = (buf0[0] >> 24) & 0xff; - buf0[0] = (buf0[0] & 0x00ffffff) | tmp0 << 24; - break; - case 4: tmp1 = (buf0[1] >> 0) & 0xff; - buf0[1] = (buf0[1] & 0xffffff00) | tmp0 << 0; - break; - case 5: tmp1 = (buf0[1] >> 8) & 0xff; - buf0[1] = (buf0[1] & 0xffff00ff) | tmp0 << 8; - break; - case 6: tmp1 = (buf0[1] >> 16) & 0xff; - buf0[1] = (buf0[1] & 0xff00ffff) | tmp0 << 16; - break; - case 7: tmp1 = (buf0[1] >> 24) & 0xff; - buf0[1] = (buf0[1] & 0x00ffffff) | tmp0 << 24; - break; - case 8: tmp1 = (buf0[2] >> 0) & 0xff; - buf0[2] = (buf0[2] & 0xffffff00) | tmp0 << 0; - break; - case 9: tmp1 = (buf0[2] >> 8) & 0xff; - buf0[2] = (buf0[2] & 0xffff00ff) | tmp0 << 8; - break; - case 10: tmp1 = (buf0[2] >> 16) & 0xff; - buf0[2] = (buf0[2] & 0xff00ffff) | tmp0 << 16; - break; - case 11: tmp1 = (buf0[2] >> 24) & 0xff; - buf0[2] = (buf0[2] & 0x00ffffff) | tmp0 << 24; - break; - case 12: tmp1 = (buf0[3] >> 0) & 0xff; - buf0[3] = (buf0[3] & 0xffffff00) | tmp0 << 0; - break; - case 13: tmp1 = (buf0[3] >> 8) & 0xff; - buf0[3] = (buf0[3] & 0xffff00ff) | tmp0 << 8; - break; - case 14: tmp1 = (buf0[3] >> 16) & 0xff; - buf0[3] = (buf0[3] & 0xff00ffff) | tmp0 << 16; - break; - case 15: tmp1 = (buf0[3] >> 24) & 0xff; - buf0[3] = (buf0[3] & 0x00ffffff) | tmp0 << 24; - break; - case 16: tmp1 = (buf1[0] >> 0) & 0xff; - buf1[0] = (buf1[0] & 0xffffff00) | tmp0 << 0; - break; - case 17: tmp1 = (buf1[0] >> 8) & 0xff; - buf1[0] = (buf1[0] & 0xffff00ff) | tmp0 << 8; - break; - case 18: tmp1 = (buf1[0] >> 16) & 0xff; - buf1[0] = (buf1[0] & 0xff00ffff) | tmp0 << 16; - break; - case 19: tmp1 = (buf1[0] >> 24) & 0xff; - buf1[0] = (buf1[0] & 0x00ffffff) | tmp0 << 24; - break; - case 20: tmp1 = (buf1[1] >> 0) & 0xff; - buf1[1] = (buf1[1] & 0xffffff00) | tmp0 << 0; - break; - case 21: tmp1 = (buf1[1] >> 8) & 0xff; - buf1[1] = (buf1[1] & 0xffff00ff) | tmp0 << 8; - break; - case 22: tmp1 = (buf1[1] >> 16) & 0xff; - buf1[1] = (buf1[1] & 0xff00ffff) | tmp0 << 16; - break; - case 23: tmp1 = (buf1[1] >> 24) & 0xff; - buf1[1] = (buf1[1] & 0x00ffffff) | tmp0 << 24; - break; - case 24: tmp1 = (buf1[2] >> 0) & 0xff; - buf1[2] = (buf1[2] & 0xffffff00) | tmp0 << 0; - break; - case 25: tmp1 = (buf1[2] >> 8) & 0xff; - buf1[2] = (buf1[2] & 0xffff00ff) | tmp0 << 8; - break; - case 26: tmp1 = (buf1[2] >> 16) & 0xff; - buf1[2] = (buf1[2] & 0xff00ffff) | tmp0 << 16; - break; - case 27: tmp1 = (buf1[2] >> 24) & 0xff; - buf1[2] = (buf1[2] & 0x00ffffff) | tmp0 << 24; - break; - case 28: tmp1 = (buf1[3] >> 0) & 0xff; - buf1[3] = (buf1[3] & 0xffffff00) | tmp0 << 0; - break; - case 29: tmp1 = (buf1[3] >> 8) & 0xff; - buf1[3] = (buf1[3] & 0xffff00ff) | tmp0 << 8; - break; - case 30: tmp1 = (buf1[3] >> 16) & 0xff; - buf1[3] = (buf1[3] & 0xff00ffff) | tmp0 << 16; - break; - case 31: tmp1 = (buf1[3] >> 24) & 0xff; - buf1[3] = (buf1[3] & 0x00ffffff) | tmp0 << 24; - break; - } - - switch (p0) - { - case 0: buf0[0] = (buf0[0] & 0xffffff00) | tmp1 << 0; - break; - case 1: buf0[0] = (buf0[0] & 0xffff00ff) | tmp1 << 8; - break; - case 2: buf0[0] = (buf0[0] & 0xff00ffff) | tmp1 << 16; - break; - case 3: buf0[0] = (buf0[0] & 0x00ffffff) | tmp1 << 24; - break; - case 4: buf0[1] = (buf0[1] & 0xffffff00) | tmp1 << 0; - break; - case 5: buf0[1] = (buf0[1] & 0xffff00ff) | tmp1 << 8; - break; - case 6: buf0[1] = (buf0[1] & 0xff00ffff) | tmp1 << 16; - break; - case 7: buf0[1] = (buf0[1] & 0x00ffffff) | tmp1 << 24; - break; - case 8: buf0[2] = (buf0[2] & 0xffffff00) | tmp1 << 0; - break; - case 9: buf0[2] = (buf0[2] & 0xffff00ff) | tmp1 << 8; - break; - case 10: buf0[2] = (buf0[2] & 0xff00ffff) | tmp1 << 16; - break; - case 11: buf0[2] = (buf0[2] & 0x00ffffff) | tmp1 << 24; - break; - case 12: buf0[3] = (buf0[3] & 0xffffff00) | tmp1 << 0; - break; - case 13: buf0[3] = (buf0[3] & 0xffff00ff) | tmp1 << 8; - break; - case 14: buf0[3] = (buf0[3] & 0xff00ffff) | tmp1 << 16; - break; - case 15: buf0[3] = (buf0[3] & 0x00ffffff) | tmp1 << 24; - break; - case 16: buf1[0] = (buf1[0] & 0xffffff00) | tmp1 << 0; - break; - case 17: buf1[0] = (buf1[0] & 0xffff00ff) | tmp1 << 8; - break; - case 18: buf1[0] = (buf1[0] & 0xff00ffff) | tmp1 << 16; - break; - case 19: buf1[0] = (buf1[0] & 0x00ffffff) | tmp1 << 24; - break; - case 20: buf1[1] = (buf1[1] & 0xffffff00) | tmp1 << 0; - break; - case 21: buf1[1] = (buf1[1] & 0xffff00ff) | tmp1 << 8; - break; - case 22: buf1[1] = (buf1[1] & 0xff00ffff) | tmp1 << 16; - break; - case 23: buf1[1] = (buf1[1] & 0x00ffffff) | tmp1 << 24; - break; - case 24: buf1[2] = (buf1[2] & 0xffffff00) | tmp1 << 0; - break; - case 25: buf1[2] = (buf1[2] & 0xffff00ff) | tmp1 << 8; - break; - case 26: buf1[2] = (buf1[2] & 0xff00ffff) | tmp1 << 16; - break; - case 27: buf1[2] = (buf1[2] & 0x00ffffff) | tmp1 << 24; - break; - case 28: buf1[3] = (buf1[3] & 0xffffff00) | tmp1 << 0; - break; - case 29: buf1[3] = (buf1[3] & 0xffff00ff) | tmp1 << 8; - break; - case 30: buf1[3] = (buf1[3] & 0xff00ffff) | tmp1 << 16; - break; - case 31: buf1[3] = (buf1[3] & 0x00ffffff) | tmp1 << 24; - break; - } - - return in_len; -} - -static u32 rule_op_mangle_chr_shiftl (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - if (p0 >= in_len) return (in_len); - - const u32 mr = 0xff << ((p0 & 3) * 8); - const u32 ml = ~mr; - - switch (p0 / 4) - { - case 0: buf0[0] = (buf0[0] & ml) | (((buf0[0] & mr) << 1) & mr); break; - case 1: buf0[1] = (buf0[1] & ml) | (((buf0[1] & mr) << 1) & mr); break; - case 2: buf0[2] = (buf0[2] & ml) | (((buf0[2] & mr) << 1) & mr); break; - case 3: buf0[3] = (buf0[3] & ml) | (((buf0[3] & mr) << 1) & mr); break; - case 4: buf1[0] = (buf1[0] & ml) | (((buf1[0] & mr) << 1) & mr); break; - case 5: buf1[1] = (buf1[1] & ml) | (((buf1[1] & mr) << 1) & mr); break; - case 6: buf1[2] = (buf1[2] & ml) | (((buf1[2] & mr) << 1) & mr); break; - case 7: buf1[3] = (buf1[3] & ml) | (((buf1[3] & mr) << 1) & mr); break; - } - - return in_len; -} - -static u32 rule_op_mangle_chr_shiftr (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - if (p0 >= in_len) return (in_len); - - const u32 mr = 0xff << ((p0 & 3) * 8); - const u32 ml = ~mr; - - switch (p0 / 4) - { - case 0: buf0[0] = (buf0[0] & ml) | (((buf0[0] & mr) >> 1) & mr); break; - case 1: buf0[1] = (buf0[1] & ml) | (((buf0[1] & mr) >> 1) & mr); break; - case 2: buf0[2] = (buf0[2] & ml) | (((buf0[2] & mr) >> 1) & mr); break; - case 3: buf0[3] = (buf0[3] & ml) | (((buf0[3] & mr) >> 1) & mr); break; - case 4: buf1[0] = (buf1[0] & ml) | (((buf1[0] & mr) >> 1) & mr); break; - case 5: buf1[1] = (buf1[1] & ml) | (((buf1[1] & mr) >> 1) & mr); break; - case 6: buf1[2] = (buf1[2] & ml) | (((buf1[2] & mr) >> 1) & mr); break; - case 7: buf1[3] = (buf1[3] & ml) | (((buf1[3] & mr) >> 1) & mr); break; - } - - return in_len; -} - -static u32 rule_op_mangle_chr_incr (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - if (p0 >= in_len) return (in_len); - - const u32 mr = 0xff << ((p0 & 3) * 8); - const u32 ml = ~mr; - - const u32 n = 0x01010101 & mr; - - switch (p0 / 4) - { - case 0: buf0[0] = (buf0[0] & ml) | (((buf0[0] & mr) + n) & mr); break; - case 1: buf0[1] = (buf0[1] & ml) | (((buf0[1] & mr) + n) & mr); break; - case 2: buf0[2] = (buf0[2] & ml) | (((buf0[2] & mr) + n) & mr); break; - case 3: buf0[3] = (buf0[3] & ml) | (((buf0[3] & mr) + n) & mr); break; - case 4: buf1[0] = (buf1[0] & ml) | (((buf1[0] & mr) + n) & mr); break; - case 5: buf1[1] = (buf1[1] & ml) | (((buf1[1] & mr) + n) & mr); break; - case 6: buf1[2] = (buf1[2] & ml) | (((buf1[2] & mr) + n) & mr); break; - case 7: buf1[3] = (buf1[3] & ml) | (((buf1[3] & mr) + n) & mr); break; - } - - return in_len; -} - -static u32 rule_op_mangle_chr_decr (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - if (p0 >= in_len) return (in_len); - - const u32 mr = 0xff << ((p0 & 3) * 8); - const u32 ml = ~mr; - - const u32 n = 0x01010101 & mr; - - switch (p0 / 4) - { - case 0: buf0[0] = (buf0[0] & ml) | (((buf0[0] & mr) - n) & mr); break; - case 1: buf0[1] = (buf0[1] & ml) | (((buf0[1] & mr) - n) & mr); break; - case 2: buf0[2] = (buf0[2] & ml) | (((buf0[2] & mr) - n) & mr); break; - case 3: buf0[3] = (buf0[3] & ml) | (((buf0[3] & mr) - n) & mr); break; - case 4: buf1[0] = (buf1[0] & ml) | (((buf1[0] & mr) - n) & mr); break; - case 5: buf1[1] = (buf1[1] & ml) | (((buf1[1] & mr) - n) & mr); break; - case 6: buf1[2] = (buf1[2] & ml) | (((buf1[2] & mr) - n) & mr); break; - case 7: buf1[3] = (buf1[3] & ml) | (((buf1[3] & mr) - n) & mr); break; - } - - return in_len; -} - -static u32 rule_op_mangle_replace_np1 (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - if ((p0 + 1) >= in_len) return (in_len); - - u32x tib40[4]; - u32x tib41[4]; - - lshift_block (buf0, buf1, tib40, tib41); - - const u32 mr = 0xff << ((p0 & 3) * 8); - const u32 ml = ~mr; - - switch (p0 / 4) - { - case 0: buf0[0] = (buf0[0] & ml) | (tib40[0] & mr); break; - case 1: buf0[1] = (buf0[1] & ml) | (tib40[1] & mr); break; - case 2: buf0[2] = (buf0[2] & ml) | (tib40[2] & mr); break; - case 3: buf0[3] = (buf0[3] & ml) | (tib40[3] & mr); break; - case 4: buf1[0] = (buf1[0] & ml) | (tib41[0] & mr); break; - case 5: buf1[1] = (buf1[1] & ml) | (tib41[1] & mr); break; - case 6: buf1[2] = (buf1[2] & ml) | (tib41[2] & mr); break; - case 7: buf1[3] = (buf1[3] & ml) | (tib41[3] & mr); break; - } - - return in_len; -} - -static u32 rule_op_mangle_replace_nm1 (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - if (p0 == 0) return (in_len); - - if (p0 >= in_len) return (in_len); - - u32x tib40[4]; - u32x tib41[4]; - - rshift_block (buf0, buf1, tib40, tib41); - - const u32 mr = 0xff << ((p0 & 3) * 8); - const u32 ml = ~mr; - - switch (p0 / 4) - { - case 0: buf0[0] = (buf0[0] & ml) | (tib40[0] & mr); break; - case 1: buf0[1] = (buf0[1] & ml) | (tib40[1] & mr); break; - case 2: buf0[2] = (buf0[2] & ml) | (tib40[2] & mr); break; - case 3: buf0[3] = (buf0[3] & ml) | (tib40[3] & mr); break; - case 4: buf1[0] = (buf1[0] & ml) | (tib41[0] & mr); break; - case 5: buf1[1] = (buf1[1] & ml) | (tib41[1] & mr); break; - case 6: buf1[2] = (buf1[2] & ml) | (tib41[2] & mr); break; - case 7: buf1[3] = (buf1[3] & ml) | (tib41[3] & mr); break; - } - - return in_len; -} - -static u32 rule_op_mangle_dupeblock_first (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - if (p0 > in_len) return (in_len); - - if ((in_len + p0) >= 32) return (in_len); - - u32 out_len = in_len; - - u32x tib40[4]; - u32x tib41[4]; - - tib40[0] = buf0[0]; - tib40[1] = buf0[1]; - tib40[2] = buf0[2]; - tib40[3] = buf0[3]; - tib41[0] = buf1[0]; - tib41[1] = buf1[1]; - tib41[2] = buf1[2]; - tib41[3] = buf1[3]; - - truncate_right (tib40, tib41, p0); - - rshift_block_N (buf0, buf1, buf0, buf1, p0); - - buf0[0] |= tib40[0]; - buf0[1] |= tib40[1]; - buf0[2] |= tib40[2]; - buf0[3] |= tib40[3]; - buf1[0] |= tib41[0]; - buf1[1] |= tib41[1]; - buf1[2] |= tib41[2]; - buf1[3] |= tib41[3]; - - out_len += p0; - - return out_len; -} - -static u32 rule_op_mangle_dupeblock_last (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - if (p0 > in_len) return (in_len); - - if ((in_len + p0) >= 32) return (in_len); - - u32 out_len = in_len; - - u32x tib40[4]; - u32x tib41[4]; - - rshift_block_N (buf0, buf1, tib40, tib41, p0); - - truncate_left (tib40, tib41, out_len); - - buf0[0] |= tib40[0]; - buf0[1] |= tib40[1]; - buf0[2] |= tib40[2]; - buf0[3] |= tib40[3]; - buf1[0] |= tib41[0]; - buf1[1] |= tib41[1]; - buf1[2] |= tib41[2]; - buf1[3] |= tib41[3]; - - out_len += p0; - - return out_len; -} - -static u32 rule_op_mangle_title (const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - buf0[0] |= (generate_cmask (buf0[0])); - buf0[1] |= (generate_cmask (buf0[1])); - buf0[2] |= (generate_cmask (buf0[2])); - buf0[3] |= (generate_cmask (buf0[3])); - buf1[0] |= (generate_cmask (buf1[0])); - buf1[1] |= (generate_cmask (buf1[1])); - buf1[2] |= (generate_cmask (buf1[2])); - buf1[3] |= (generate_cmask (buf1[3])); - - #ifdef VECT_SIZE1 - - u32x tib40[4]; - u32x tib41[4]; - - const uchar4 tmp0 = (uchar4) (' '); - const uchar4 tmp1 = (uchar4) (0x00); - const uchar4 tmp2 = (uchar4) (0xff); - - uchar4 tmp; - - tmp = as_uchar4 (buf0[0]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[0] = as_uint (tmp); - tmp = as_uchar4 (buf0[1]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[1] = as_uint (tmp); - tmp = as_uchar4 (buf0[2]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[2] = as_uint (tmp); - tmp = as_uchar4 (buf0[3]); tmp = select (tmp1, tmp2, tmp == tmp0); tib40[3] = as_uint (tmp); - tmp = as_uchar4 (buf1[0]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[0] = as_uint (tmp); - tmp = as_uchar4 (buf1[1]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[1] = as_uint (tmp); - tmp = as_uchar4 (buf1[2]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[2] = as_uint (tmp); - tmp = as_uchar4 (buf1[3]); tmp = select (tmp1, tmp2, tmp == tmp0); tib41[3] = as_uint (tmp); - - rshift_block (tib40, tib41, tib40, tib41); tib40[0] |= 0xff; - - buf0[0] &= ~(generate_cmask (buf0[0]) & tib40[0]); - buf0[1] &= ~(generate_cmask (buf0[1]) & tib40[1]); - buf0[2] &= ~(generate_cmask (buf0[2]) & tib40[2]); - buf0[3] &= ~(generate_cmask (buf0[3]) & tib40[3]); - buf1[0] &= ~(generate_cmask (buf1[0]) & tib41[0]); - buf1[1] &= ~(generate_cmask (buf1[1]) & tib41[1]); - buf1[2] &= ~(generate_cmask (buf1[2]) & tib41[2]); - buf1[3] &= ~(generate_cmask (buf1[3]) & tib41[3]); - - #else - - buf0[0] &= ~(0x00000020 & generate_cmask (buf0[0])); - - // The VLIW1 code above freezes VLIW4 and VLIW5 systems - - for (u32 i = 0; i < in_len; i++) - { - u32x tmp0; - u32x tmp1; - - switch (i) - { - case 0: tmp0 = (buf0[0] >> 0) & 0xFF; - tmp1 = ~(0x00002000 & generate_cmask (buf0[0])); break; - case 1: tmp0 = (buf0[0] >> 8) & 0xFF; - tmp1 = ~(0x00200000 & generate_cmask (buf0[0])); break; - case 2: tmp0 = (buf0[0] >> 16) & 0xFF; - tmp1 = ~(0x20000000 & generate_cmask (buf0[0])); break; - case 3: tmp0 = (buf0[0] >> 24) & 0xFF; - tmp1 = ~(0x00000020 & generate_cmask (buf0[1])); break; - case 4: tmp0 = (buf0[1] >> 0) & 0xFF; - tmp1 = ~(0x00002000 & generate_cmask (buf0[1])); break; - case 5: tmp0 = (buf0[1] >> 8) & 0xFF; - tmp1 = ~(0x00200000 & generate_cmask (buf0[1])); break; - case 6: tmp0 = (buf0[1] >> 16) & 0xFF; - tmp1 = ~(0x20000000 & generate_cmask (buf0[1])); break; - case 7: tmp0 = (buf0[1] >> 24) & 0xFF; - tmp1 = ~(0x00000020 & generate_cmask (buf0[2])); break; - case 8: tmp0 = (buf0[2] >> 0) & 0xFF; - tmp1 = ~(0x00002000 & generate_cmask (buf0[2])); break; - case 9: tmp0 = (buf0[2] >> 8) & 0xFF; - tmp1 = ~(0x00200000 & generate_cmask (buf0[2])); break; - case 10: tmp0 = (buf0[2] >> 16) & 0xFF; - tmp1 = ~(0x20000000 & generate_cmask (buf0[2])); break; - case 11: tmp0 = (buf0[2] >> 24) & 0xFF; - tmp1 = ~(0x00000020 & generate_cmask (buf0[3])); break; - case 12: tmp0 = (buf0[3] >> 0) & 0xFF; - tmp1 = ~(0x00002000 & generate_cmask (buf0[3])); break; - case 13: tmp0 = (buf0[3] >> 8) & 0xFF; - tmp1 = ~(0x00200000 & generate_cmask (buf0[3])); break; - case 14: tmp0 = (buf0[3] >> 16) & 0xFF; - tmp1 = ~(0x20000000 & generate_cmask (buf0[3])); break; - case 15: tmp0 = (buf0[3] >> 24) & 0xFF; - tmp1 = ~(0x00000020 & generate_cmask (buf1[0])); break; - case 16: tmp0 = (buf1[0] >> 0) & 0xFF; - tmp1 = ~(0x00002000 & generate_cmask (buf1[0])); break; - case 17: tmp0 = (buf1[0] >> 8) & 0xFF; - tmp1 = ~(0x00200000 & generate_cmask (buf1[0])); break; - case 18: tmp0 = (buf1[0] >> 16) & 0xFF; - tmp1 = ~(0x20000000 & generate_cmask (buf1[0])); break; - case 19: tmp0 = (buf1[0] >> 24) & 0xFF; - tmp1 = ~(0x00000020 & generate_cmask (buf1[1])); break; - case 20: tmp0 = (buf1[1] >> 0) & 0xFF; - tmp1 = ~(0x00002000 & generate_cmask (buf1[1])); break; - case 21: tmp0 = (buf1[1] >> 8) & 0xFF; - tmp1 = ~(0x00200000 & generate_cmask (buf1[1])); break; - case 22: tmp0 = (buf1[1] >> 16) & 0xFF; - tmp1 = ~(0x20000000 & generate_cmask (buf1[1])); break; - case 23: tmp0 = (buf1[1] >> 24) & 0xFF; - tmp1 = ~(0x00000020 & generate_cmask (buf1[2])); break; - case 24: tmp0 = (buf1[2] >> 0) & 0xFF; - tmp1 = ~(0x00002000 & generate_cmask (buf1[2])); break; - case 25: tmp0 = (buf1[2] >> 8) & 0xFF; - tmp1 = ~(0x00200000 & generate_cmask (buf1[2])); break; - case 26: tmp0 = (buf1[2] >> 16) & 0xFF; - tmp1 = ~(0x20000000 & generate_cmask (buf1[2])); break; - case 27: tmp0 = (buf1[2] >> 24) & 0xFF; - tmp1 = ~(0x00000020 & generate_cmask (buf1[3])); break; - case 28: tmp0 = (buf1[3] >> 0) & 0xFF; - tmp1 = ~(0x00002000 & generate_cmask (buf1[3])); break; - case 29: tmp0 = (buf1[3] >> 8) & 0xFF; - tmp1 = ~(0x00200000 & generate_cmask (buf1[3])); break; - case 30: tmp0 = (buf1[3] >> 16) & 0xFF; - tmp1 = ~(0x20000000 & generate_cmask (buf1[3])); break; - } - - #ifdef VECT_SIZE2 - if (i < 3) - { - if (tmp0.s0 == ' ') buf0[0].s0 &= tmp1.s0; - if (tmp0.s1 == ' ') buf0[0].s1 &= tmp1.s1; - } - else if (i < 7) - { - if (tmp0.s0 == ' ') buf0[1].s0 &= tmp1.s0; - if (tmp0.s1 == ' ') buf0[1].s1 &= tmp1.s1; - } - else if (i < 11) - { - if (tmp0.s0 == ' ') buf0[2].s0 &= tmp1.s0; - if (tmp0.s1 == ' ') buf0[2].s1 &= tmp1.s1; - } - else if (i < 15) - { - if (tmp0.s0 == ' ') buf0[3].s0 &= tmp1.s0; - if (tmp0.s1 == ' ') buf0[3].s1 &= tmp1.s1; - } - else if (i < 19) - { - if (tmp0.s0 == ' ') buf1[0].s0 &= tmp1.s0; - if (tmp0.s1 == ' ') buf1[0].s1 &= tmp1.s1; - } - else if (i < 23) - { - if (tmp0.s0 == ' ') buf1[1].s0 &= tmp1.s0; - if (tmp0.s1 == ' ') buf1[1].s1 &= tmp1.s1; - } - else if (i < 27) - { - if (tmp0.s0 == ' ') buf1[2].s0 &= tmp1.s0; - if (tmp0.s1 == ' ') buf1[2].s1 &= tmp1.s1; - } - else if (i < 31) - { - if (tmp0.s0 == ' ') buf1[3].s0 &= tmp1.s0; - if (tmp0.s1 == ' ') buf1[3].s1 &= tmp1.s1; - } - #endif - - #ifdef VECT_SIZE4 - if (i < 3) - { - if (tmp0.s0 == ' ') buf0[0].s0 &= tmp1.s0; - if (tmp0.s1 == ' ') buf0[0].s1 &= tmp1.s1; - if (tmp0.s2 == ' ') buf0[0].s2 &= tmp1.s2; - if (tmp0.s3 == ' ') buf0[0].s3 &= tmp1.s3; - } - else if (i < 7) - { - if (tmp0.s0 == ' ') buf0[1].s0 &= tmp1.s0; - if (tmp0.s1 == ' ') buf0[1].s1 &= tmp1.s1; - if (tmp0.s2 == ' ') buf0[1].s2 &= tmp1.s2; - if (tmp0.s3 == ' ') buf0[1].s3 &= tmp1.s3; - } - else if (i < 11) - { - if (tmp0.s0 == ' ') buf0[2].s0 &= tmp1.s0; - if (tmp0.s1 == ' ') buf0[2].s1 &= tmp1.s1; - if (tmp0.s2 == ' ') buf0[2].s2 &= tmp1.s2; - if (tmp0.s3 == ' ') buf0[2].s3 &= tmp1.s3; - } - else if (i < 15) - { - if (tmp0.s0 == ' ') buf0[3].s0 &= tmp1.s0; - if (tmp0.s1 == ' ') buf0[3].s1 &= tmp1.s1; - if (tmp0.s2 == ' ') buf0[3].s2 &= tmp1.s2; - if (tmp0.s3 == ' ') buf0[3].s3 &= tmp1.s3; - } - else if (i < 19) - { - if (tmp0.s0 == ' ') buf1[0].s0 &= tmp1.s0; - if (tmp0.s1 == ' ') buf1[0].s1 &= tmp1.s1; - if (tmp0.s2 == ' ') buf1[0].s2 &= tmp1.s2; - if (tmp0.s3 == ' ') buf1[0].s3 &= tmp1.s3; - } - else if (i < 23) - { - if (tmp0.s0 == ' ') buf1[1].s0 &= tmp1.s0; - if (tmp0.s1 == ' ') buf1[1].s1 &= tmp1.s1; - if (tmp0.s2 == ' ') buf1[1].s2 &= tmp1.s2; - if (tmp0.s3 == ' ') buf1[1].s3 &= tmp1.s3; - } - else if (i < 27) - { - if (tmp0.s0 == ' ') buf1[2].s0 &= tmp1.s0; - if (tmp0.s1 == ' ') buf1[2].s1 &= tmp1.s1; - if (tmp0.s2 == ' ') buf1[2].s2 &= tmp1.s2; - if (tmp0.s3 == ' ') buf1[2].s3 &= tmp1.s3; - } - else if (i < 31) - { - if (tmp0.s0 == ' ') buf1[3].s0 &= tmp1.s0; - if (tmp0.s1 == ' ') buf1[3].s1 &= tmp1.s1; - if (tmp0.s2 == ' ') buf1[3].s2 &= tmp1.s2; - if (tmp0.s3 == ' ') buf1[3].s3 &= tmp1.s3; - } - #endif - } - - #endif - - return in_len; -} - -u32 apply_rule (const u32 name, const u32 p0, const u32 p1, u32x buf0[4], u32x buf1[4], const u32 in_len) -{ - u32 out_len = in_len; - - switch (name) - { - case RULE_OP_MANGLE_LREST: out_len = rule_op_mangle_lrest (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_UREST: out_len = rule_op_mangle_urest (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_LREST_UFIRST: out_len = rule_op_mangle_lrest_ufirst (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_UREST_LFIRST: out_len = rule_op_mangle_urest_lfirst (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_TREST: out_len = rule_op_mangle_trest (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_TOGGLE_AT: out_len = rule_op_mangle_toggle_at (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_REVERSE: out_len = rule_op_mangle_reverse (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_DUPEWORD: out_len = rule_op_mangle_dupeword (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_DUPEWORD_TIMES: out_len = rule_op_mangle_dupeword_times (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_REFLECT: out_len = rule_op_mangle_reflect (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_APPEND: out_len = rule_op_mangle_append (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_PREPEND: out_len = rule_op_mangle_prepend (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_ROTATE_LEFT: out_len = rule_op_mangle_rotate_left (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_ROTATE_RIGHT: out_len = rule_op_mangle_rotate_right (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_DELETE_FIRST: out_len = rule_op_mangle_delete_first (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_DELETE_LAST: out_len = rule_op_mangle_delete_last (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_DELETE_AT: out_len = rule_op_mangle_delete_at (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_EXTRACT: out_len = rule_op_mangle_extract (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_OMIT: out_len = rule_op_mangle_omit (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_INSERT: out_len = rule_op_mangle_insert (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_OVERSTRIKE: out_len = rule_op_mangle_overstrike (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_TRUNCATE_AT: out_len = rule_op_mangle_truncate_at (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_REPLACE: out_len = rule_op_mangle_replace (p0, p1, buf0, buf1, out_len); break; - //case RULE_OP_MANGLE_PURGECHAR: out_len = rule_op_mangle_purgechar (p0, p1, buf0, buf1, out_len); break; - //case RULE_OP_MANGLE_TOGGLECASE_REC: out_len = rule_op_mangle_togglecase_rec (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_DUPECHAR_FIRST: out_len = rule_op_mangle_dupechar_first (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_DUPECHAR_LAST: out_len = rule_op_mangle_dupechar_last (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_DUPECHAR_ALL: out_len = rule_op_mangle_dupechar_all (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_SWITCH_FIRST: out_len = rule_op_mangle_switch_first (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_SWITCH_LAST: out_len = rule_op_mangle_switch_last (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_SWITCH_AT: out_len = rule_op_mangle_switch_at (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_CHR_SHIFTL: out_len = rule_op_mangle_chr_shiftl (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_CHR_SHIFTR: out_len = rule_op_mangle_chr_shiftr (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_CHR_INCR: out_len = rule_op_mangle_chr_incr (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_CHR_DECR: out_len = rule_op_mangle_chr_decr (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_REPLACE_NP1: out_len = rule_op_mangle_replace_np1 (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_REPLACE_NM1: out_len = rule_op_mangle_replace_nm1 (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_DUPEBLOCK_FIRST: out_len = rule_op_mangle_dupeblock_first (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_DUPEBLOCK_LAST: out_len = rule_op_mangle_dupeblock_last (p0, p1, buf0, buf1, out_len); break; - case RULE_OP_MANGLE_TITLE: out_len = rule_op_mangle_title (p0, p1, buf0, buf1, out_len); break; - } - - return out_len; -} - -u32 apply_rules (__global u32 *cmds, u32x buf0[4], u32x buf1[4], const u32 len) -{ - u32 out_len = len; - - for (u32 i = 0; cmds[i] != 0; i++) - { - const u32 cmd = cmds[i]; - - const u32 name = (cmd >> 0) & 0xff; - const u32 p0 = (cmd >> 8) & 0xff; - const u32 p1 = (cmd >> 16) & 0xff; - - out_len = apply_rule (name, p0, p1, buf0, buf1, out_len); - } - - return out_len; -} diff --git a/docs/changes.txt b/docs/changes.txt index 2f99087..ca5a265 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -1,5 +1,10 @@ * changes v2.01 -> not-known-yet: +Type.: Feature +File.: Kernel +Desc.: For NVidia, dropped CUDA support and switched to OpenCL +Issue: 1 + Type.: Feature File.: Host Desc.: Implemented a new feature that allows to quit at next restore point update (and disable it) @@ -10,10 +15,6 @@ File.: Host Desc.: Fixed a bug in combination of --restore and a user immediately aborting the session after restart Trac.: 684 -Type.: Change -File.: Docs -Desc.: Updated docs/readme.txt -- Starting with v2.01 OpenCL 2.00 is required; we need Catalyst 15.7 or higher - * changes v2.00 -> v2.01: Type.: Bug diff --git a/docs/readme.txt b/docs/readme.txt index bf03f20..7866da0 100644 --- a/docs/readme.txt +++ b/docs/readme.txt @@ -2,7 +2,7 @@ oclHashcat v2.00 ================ NV users require ForceWare 346.59 or later -AMD users require Catalyst 15.7 or later +AMD users require Catalyst 14.9 or later ## ## Features diff --git a/include/constants.h b/include/constants.h index 0b39e52..6ede6ed 100644 --- a/include/constants.h +++ b/include/constants.h @@ -35,10 +35,10 @@ * SipHash Constants */ -#define SIPHASHM_0 0x736f6d6570736575ull -#define SIPHASHM_1 0x646f72616e646f6dull -#define SIPHASHM_2 0x6c7967656e657261ull -#define SIPHASHM_3 0x7465646279746573ull +#define SIPHASHM_0 0x736f6d6570736575 +#define SIPHASHM_1 0x646f72616e646f6d +#define SIPHASHM_2 0x6c7967656e657261 +#define SIPHASHM_3 0x7465646279746573 #endif #if defined _BCRYPT_ || defined _PSAFE2_ @@ -277,95 +277,95 @@ * SHA384 Constants (64 bits) */ -#define SHA384M_A 0xcbbb9d5dc1059ed8ull -#define SHA384M_B 0x629a292a367cd507ull -#define SHA384M_C 0x9159015a3070dd17ull -#define SHA384M_D 0x152fecd8f70e5939ull -#define SHA384M_E 0x67332667ffc00b31ull -#define SHA384M_F 0x8eb44a8768581511ull -#define SHA384M_G 0xdb0c2e0d64f98fa7ull -#define SHA384M_H 0x47b5481dbefa4fa4ull - -#define SHA384C00 0x428a2f98d728ae22ull -#define SHA384C01 0x7137449123ef65cdull -#define SHA384C02 0xb5c0fbcfec4d3b2full -#define SHA384C03 0xe9b5dba58189dbbcull -#define SHA384C04 0x3956c25bf348b538ull -#define SHA384C05 0x59f111f1b605d019ull -#define SHA384C06 0x923f82a4af194f9bull -#define SHA384C07 0xab1c5ed5da6d8118ull -#define SHA384C08 0xd807aa98a3030242ull -#define SHA384C09 0x12835b0145706fbeull -#define SHA384C0a 0x243185be4ee4b28cull -#define SHA384C0b 0x550c7dc3d5ffb4e2ull -#define SHA384C0c 0x72be5d74f27b896full -#define SHA384C0d 0x80deb1fe3b1696b1ull -#define SHA384C0e 0x9bdc06a725c71235ull -#define SHA384C0f 0xc19bf174cf692694ull -#define SHA384C10 0xe49b69c19ef14ad2ull -#define SHA384C11 0xefbe4786384f25e3ull -#define SHA384C12 0x0fc19dc68b8cd5b5ull -#define SHA384C13 0x240ca1cc77ac9c65ull -#define SHA384C14 0x2de92c6f592b0275ull -#define SHA384C15 0x4a7484aa6ea6e483ull -#define SHA384C16 0x5cb0a9dcbd41fbd4ull -#define SHA384C17 0x76f988da831153b5ull -#define SHA384C18 0x983e5152ee66dfabull -#define SHA384C19 0xa831c66d2db43210ull -#define SHA384C1a 0xb00327c898fb213full -#define SHA384C1b 0xbf597fc7beef0ee4ull -#define SHA384C1c 0xc6e00bf33da88fc2ull -#define SHA384C1d 0xd5a79147930aa725ull -#define SHA384C1e 0x06ca6351e003826full -#define SHA384C1f 0x142929670a0e6e70ull -#define SHA384C20 0x27b70a8546d22ffcull -#define SHA384C21 0x2e1b21385c26c926ull -#define SHA384C22 0x4d2c6dfc5ac42aedull -#define SHA384C23 0x53380d139d95b3dfull -#define SHA384C24 0x650a73548baf63deull -#define SHA384C25 0x766a0abb3c77b2a8ull -#define SHA384C26 0x81c2c92e47edaee6ull -#define SHA384C27 0x92722c851482353bull -#define SHA384C28 0xa2bfe8a14cf10364ull -#define SHA384C29 0xa81a664bbc423001ull -#define SHA384C2a 0xc24b8b70d0f89791ull -#define SHA384C2b 0xc76c51a30654be30ull -#define SHA384C2c 0xd192e819d6ef5218ull -#define SHA384C2d 0xd69906245565a910ull -#define SHA384C2e 0xf40e35855771202aull -#define SHA384C2f 0x106aa07032bbd1b8ull -#define SHA384C30 0x19a4c116b8d2d0c8ull -#define SHA384C31 0x1e376c085141ab53ull -#define SHA384C32 0x2748774cdf8eeb99ull -#define SHA384C33 0x34b0bcb5e19b48a8ull -#define SHA384C34 0x391c0cb3c5c95a63ull -#define SHA384C35 0x4ed8aa4ae3418acbull -#define SHA384C36 0x5b9cca4f7763e373ull -#define SHA384C37 0x682e6ff3d6b2b8a3ull -#define SHA384C38 0x748f82ee5defb2fcull -#define SHA384C39 0x78a5636f43172f60ull -#define SHA384C3a 0x84c87814a1f0ab72ull -#define SHA384C3b 0x8cc702081a6439ecull -#define SHA384C3c 0x90befffa23631e28ull -#define SHA384C3d 0xa4506cebde82bde9ull -#define SHA384C3e 0xbef9a3f7b2c67915ull -#define SHA384C3f 0xc67178f2e372532bull -#define SHA384C40 0xca273eceea26619cull -#define SHA384C41 0xd186b8c721c0c207ull -#define SHA384C42 0xeada7dd6cde0eb1eull -#define SHA384C43 0xf57d4f7fee6ed178ull -#define SHA384C44 0x06f067aa72176fbaull -#define SHA384C45 0x0a637dc5a2c898a6ull -#define SHA384C46 0x113f9804bef90daeull -#define SHA384C47 0x1b710b35131c471bull -#define SHA384C48 0x28db77f523047d84ull -#define SHA384C49 0x32caab7b40c72493ull -#define SHA384C4a 0x3c9ebe0a15c9bebcull -#define SHA384C4b 0x431d67c49c100d4cull -#define SHA384C4c 0x4cc5d4becb3e42b6ull -#define SHA384C4d 0x597f299cfc657e2aull -#define SHA384C4e 0x5fcb6fab3ad6faecull -#define SHA384C4f 0x6c44198c4a475817ull +#define SHA384M_A 0xcbbb9d5dc1059ed8 +#define SHA384M_B 0x629a292a367cd507 +#define SHA384M_C 0x9159015a3070dd17 +#define SHA384M_D 0x152fecd8f70e5939 +#define SHA384M_E 0x67332667ffc00b31 +#define SHA384M_F 0x8eb44a8768581511 +#define SHA384M_G 0xdb0c2e0d64f98fa7 +#define SHA384M_H 0x47b5481dbefa4fa4 + +#define SHA384C00 0x428a2f98d728ae22 +#define SHA384C01 0x7137449123ef65cd +#define SHA384C02 0xb5c0fbcfec4d3b2f +#define SHA384C03 0xe9b5dba58189dbbc +#define SHA384C04 0x3956c25bf348b538 +#define SHA384C05 0x59f111f1b605d019 +#define SHA384C06 0x923f82a4af194f9b +#define SHA384C07 0xab1c5ed5da6d8118 +#define SHA384C08 0xd807aa98a3030242 +#define SHA384C09 0x12835b0145706fbe +#define SHA384C0a 0x243185be4ee4b28c +#define SHA384C0b 0x550c7dc3d5ffb4e2 +#define SHA384C0c 0x72be5d74f27b896f +#define SHA384C0d 0x80deb1fe3b1696b1 +#define SHA384C0e 0x9bdc06a725c71235 +#define SHA384C0f 0xc19bf174cf692694 +#define SHA384C10 0xe49b69c19ef14ad2 +#define SHA384C11 0xefbe4786384f25e3 +#define SHA384C12 0x0fc19dc68b8cd5b5 +#define SHA384C13 0x240ca1cc77ac9c65 +#define SHA384C14 0x2de92c6f592b0275 +#define SHA384C15 0x4a7484aa6ea6e483 +#define SHA384C16 0x5cb0a9dcbd41fbd4 +#define SHA384C17 0x76f988da831153b5 +#define SHA384C18 0x983e5152ee66dfab +#define SHA384C19 0xa831c66d2db43210 +#define SHA384C1a 0xb00327c898fb213f +#define SHA384C1b 0xbf597fc7beef0ee4 +#define SHA384C1c 0xc6e00bf33da88fc2 +#define SHA384C1d 0xd5a79147930aa725 +#define SHA384C1e 0x06ca6351e003826f +#define SHA384C1f 0x142929670a0e6e70 +#define SHA384C20 0x27b70a8546d22ffc +#define SHA384C21 0x2e1b21385c26c926 +#define SHA384C22 0x4d2c6dfc5ac42aed +#define SHA384C23 0x53380d139d95b3df +#define SHA384C24 0x650a73548baf63de +#define SHA384C25 0x766a0abb3c77b2a8 +#define SHA384C26 0x81c2c92e47edaee6 +#define SHA384C27 0x92722c851482353b +#define SHA384C28 0xa2bfe8a14cf10364 +#define SHA384C29 0xa81a664bbc423001 +#define SHA384C2a 0xc24b8b70d0f89791 +#define SHA384C2b 0xc76c51a30654be30 +#define SHA384C2c 0xd192e819d6ef5218 +#define SHA384C2d 0xd69906245565a910 +#define SHA384C2e 0xf40e35855771202a +#define SHA384C2f 0x106aa07032bbd1b8 +#define SHA384C30 0x19a4c116b8d2d0c8 +#define SHA384C31 0x1e376c085141ab53 +#define SHA384C32 0x2748774cdf8eeb99 +#define SHA384C33 0x34b0bcb5e19b48a8 +#define SHA384C34 0x391c0cb3c5c95a63 +#define SHA384C35 0x4ed8aa4ae3418acb +#define SHA384C36 0x5b9cca4f7763e373 +#define SHA384C37 0x682e6ff3d6b2b8a3 +#define SHA384C38 0x748f82ee5defb2fc +#define SHA384C39 0x78a5636f43172f60 +#define SHA384C3a 0x84c87814a1f0ab72 +#define SHA384C3b 0x8cc702081a6439ec +#define SHA384C3c 0x90befffa23631e28 +#define SHA384C3d 0xa4506cebde82bde9 +#define SHA384C3e 0xbef9a3f7b2c67915 +#define SHA384C3f 0xc67178f2e372532b +#define SHA384C40 0xca273eceea26619c +#define SHA384C41 0xd186b8c721c0c207 +#define SHA384C42 0xeada7dd6cde0eb1e +#define SHA384C43 0xf57d4f7fee6ed178 +#define SHA384C44 0x06f067aa72176fba +#define SHA384C45 0x0a637dc5a2c898a6 +#define SHA384C46 0x113f9804bef90dae +#define SHA384C47 0x1b710b35131c471b +#define SHA384C48 0x28db77f523047d84 +#define SHA384C49 0x32caab7b40c72493 +#define SHA384C4a 0x3c9ebe0a15c9bebc +#define SHA384C4b 0x431d67c49c100d4c +#define SHA384C4c 0x4cc5d4becb3e42b6 +#define SHA384C4d 0x597f299cfc657e2a +#define SHA384C4e 0x5fcb6fab3ad6faec +#define SHA384C4f 0x6c44198c4a475817 #endif @@ -374,98 +374,98 @@ * SHA512 Constants (64 bits) */ -#define SHA512M_A 0x6a09e667f3bcc908ull -#define SHA512M_B 0xbb67ae8584caa73bull -#define SHA512M_C 0x3c6ef372fe94f82bull -#define SHA512M_D 0xa54ff53a5f1d36f1ull -#define SHA512M_E 0x510e527fade682d1ull -#define SHA512M_F 0x9b05688c2b3e6c1full -#define SHA512M_G 0x1f83d9abfb41bd6bull -#define SHA512M_H 0x5be0cd19137e2179ull - -#define SHA512C00 0x428a2f98d728ae22ull -#define SHA512C01 0x7137449123ef65cdull -#define SHA512C02 0xb5c0fbcfec4d3b2full -#define SHA512C03 0xe9b5dba58189dbbcull -#define SHA512C04 0x3956c25bf348b538ull -#define SHA512C05 0x59f111f1b605d019ull -#define SHA512C06 0x923f82a4af194f9bull -#define SHA512C07 0xab1c5ed5da6d8118ull -#define SHA512C08 0xd807aa98a3030242ull -#define SHA512C09 0x12835b0145706fbeull -#define SHA512C0a 0x243185be4ee4b28cull -#define SHA512C0b 0x550c7dc3d5ffb4e2ull -#define SHA512C0c 0x72be5d74f27b896full -#define SHA512C0d 0x80deb1fe3b1696b1ull -#define SHA512C0e 0x9bdc06a725c71235ull -#define SHA512C0f 0xc19bf174cf692694ull -#define SHA512C10 0xe49b69c19ef14ad2ull -#define SHA512C11 0xefbe4786384f25e3ull -#define SHA512C12 0x0fc19dc68b8cd5b5ull -#define SHA512C13 0x240ca1cc77ac9c65ull -#define SHA512C14 0x2de92c6f592b0275ull -#define SHA512C15 0x4a7484aa6ea6e483ull -#define SHA512C16 0x5cb0a9dcbd41fbd4ull -#define SHA512C17 0x76f988da831153b5ull -#define SHA512C18 0x983e5152ee66dfabull -#define SHA512C19 0xa831c66d2db43210ull -#define SHA512C1a 0xb00327c898fb213full -#define SHA512C1b 0xbf597fc7beef0ee4ull -#define SHA512C1c 0xc6e00bf33da88fc2ull -#define SHA512C1d 0xd5a79147930aa725ull -#define SHA512C1e 0x06ca6351e003826full -#define SHA512C1f 0x142929670a0e6e70ull -#define SHA512C20 0x27b70a8546d22ffcull -#define SHA512C21 0x2e1b21385c26c926ull -#define SHA512C22 0x4d2c6dfc5ac42aedull -#define SHA512C23 0x53380d139d95b3dfull -#define SHA512C24 0x650a73548baf63deull -#define SHA512C25 0x766a0abb3c77b2a8ull -#define SHA512C26 0x81c2c92e47edaee6ull -#define SHA512C27 0x92722c851482353bull -#define SHA512C28 0xa2bfe8a14cf10364ull -#define SHA512C29 0xa81a664bbc423001ull -#define SHA512C2a 0xc24b8b70d0f89791ull -#define SHA512C2b 0xc76c51a30654be30ull -#define SHA512C2c 0xd192e819d6ef5218ull -#define SHA512C2d 0xd69906245565a910ull -#define SHA512C2e 0xf40e35855771202aull -#define SHA512C2f 0x106aa07032bbd1b8ull -#define SHA512C30 0x19a4c116b8d2d0c8ull -#define SHA512C31 0x1e376c085141ab53ull -#define SHA512C32 0x2748774cdf8eeb99ull -#define SHA512C33 0x34b0bcb5e19b48a8ull -#define SHA512C34 0x391c0cb3c5c95a63ull -#define SHA512C35 0x4ed8aa4ae3418acbull -#define SHA512C36 0x5b9cca4f7763e373ull -#define SHA512C37 0x682e6ff3d6b2b8a3ull -#define SHA512C38 0x748f82ee5defb2fcull -#define SHA512C39 0x78a5636f43172f60ull -#define SHA512C3a 0x84c87814a1f0ab72ull -#define SHA512C3b 0x8cc702081a6439ecull -#define SHA512C3c 0x90befffa23631e28ull -#define SHA512C3d 0xa4506cebde82bde9ull -#define SHA512C3e 0xbef9a3f7b2c67915ull -#define SHA512C3f 0xc67178f2e372532bull -#define SHA512C40 0xca273eceea26619cull -#define SHA512C41 0xd186b8c721c0c207ull -#define SHA512C42 0xeada7dd6cde0eb1eull -#define SHA512C43 0xf57d4f7fee6ed178ull -#define SHA512C44 0x06f067aa72176fbaull -#define SHA512C45 0x0a637dc5a2c898a6ull -#define SHA512C46 0x113f9804bef90daeull -#define SHA512C47 0x1b710b35131c471bull -#define SHA512C48 0x28db77f523047d84ull -#define SHA512C49 0x32caab7b40c72493ull -#define SHA512C4a 0x3c9ebe0a15c9bebcull -#define SHA512C4b 0x431d67c49c100d4cull -#define SHA512C4c 0x4cc5d4becb3e42b6ull -#define SHA512C4d 0x597f299cfc657e2aull -#define SHA512C4e 0x5fcb6fab3ad6faecull -#define SHA512C4f 0x6c44198c4a475817ull - -#define SHA512REV0 0x5218a97a1b97e8a0ull -#define SHA512REV1 0x4334c1bea164f555ull +#define SHA512M_A 0x6a09e667f3bcc908 +#define SHA512M_B 0xbb67ae8584caa73b +#define SHA512M_C 0x3c6ef372fe94f82b +#define SHA512M_D 0xa54ff53a5f1d36f1 +#define SHA512M_E 0x510e527fade682d1 +#define SHA512M_F 0x9b05688c2b3e6c1f +#define SHA512M_G 0x1f83d9abfb41bd6b +#define SHA512M_H 0x5be0cd19137e2179 + +#define SHA512C00 0x428a2f98d728ae22 +#define SHA512C01 0x7137449123ef65cd +#define SHA512C02 0xb5c0fbcfec4d3b2f +#define SHA512C03 0xe9b5dba58189dbbc +#define SHA512C04 0x3956c25bf348b538 +#define SHA512C05 0x59f111f1b605d019 +#define SHA512C06 0x923f82a4af194f9b +#define SHA512C07 0xab1c5ed5da6d8118 +#define SHA512C08 0xd807aa98a3030242 +#define SHA512C09 0x12835b0145706fbe +#define SHA512C0a 0x243185be4ee4b28c +#define SHA512C0b 0x550c7dc3d5ffb4e2 +#define SHA512C0c 0x72be5d74f27b896f +#define SHA512C0d 0x80deb1fe3b1696b1 +#define SHA512C0e 0x9bdc06a725c71235 +#define SHA512C0f 0xc19bf174cf692694 +#define SHA512C10 0xe49b69c19ef14ad2 +#define SHA512C11 0xefbe4786384f25e3 +#define SHA512C12 0x0fc19dc68b8cd5b5 +#define SHA512C13 0x240ca1cc77ac9c65 +#define SHA512C14 0x2de92c6f592b0275 +#define SHA512C15 0x4a7484aa6ea6e483 +#define SHA512C16 0x5cb0a9dcbd41fbd4 +#define SHA512C17 0x76f988da831153b5 +#define SHA512C18 0x983e5152ee66dfab +#define SHA512C19 0xa831c66d2db43210 +#define SHA512C1a 0xb00327c898fb213f +#define SHA512C1b 0xbf597fc7beef0ee4 +#define SHA512C1c 0xc6e00bf33da88fc2 +#define SHA512C1d 0xd5a79147930aa725 +#define SHA512C1e 0x06ca6351e003826f +#define SHA512C1f 0x142929670a0e6e70 +#define SHA512C20 0x27b70a8546d22ffc +#define SHA512C21 0x2e1b21385c26c926 +#define SHA512C22 0x4d2c6dfc5ac42aed +#define SHA512C23 0x53380d139d95b3df +#define SHA512C24 0x650a73548baf63de +#define SHA512C25 0x766a0abb3c77b2a8 +#define SHA512C26 0x81c2c92e47edaee6 +#define SHA512C27 0x92722c851482353b +#define SHA512C28 0xa2bfe8a14cf10364 +#define SHA512C29 0xa81a664bbc423001 +#define SHA512C2a 0xc24b8b70d0f89791 +#define SHA512C2b 0xc76c51a30654be30 +#define SHA512C2c 0xd192e819d6ef5218 +#define SHA512C2d 0xd69906245565a910 +#define SHA512C2e 0xf40e35855771202a +#define SHA512C2f 0x106aa07032bbd1b8 +#define SHA512C30 0x19a4c116b8d2d0c8 +#define SHA512C31 0x1e376c085141ab53 +#define SHA512C32 0x2748774cdf8eeb99 +#define SHA512C33 0x34b0bcb5e19b48a8 +#define SHA512C34 0x391c0cb3c5c95a63 +#define SHA512C35 0x4ed8aa4ae3418acb +#define SHA512C36 0x5b9cca4f7763e373 +#define SHA512C37 0x682e6ff3d6b2b8a3 +#define SHA512C38 0x748f82ee5defb2fc +#define SHA512C39 0x78a5636f43172f60 +#define SHA512C3a 0x84c87814a1f0ab72 +#define SHA512C3b 0x8cc702081a6439ec +#define SHA512C3c 0x90befffa23631e28 +#define SHA512C3d 0xa4506cebde82bde9 +#define SHA512C3e 0xbef9a3f7b2c67915 +#define SHA512C3f 0xc67178f2e372532b +#define SHA512C40 0xca273eceea26619c +#define SHA512C41 0xd186b8c721c0c207 +#define SHA512C42 0xeada7dd6cde0eb1e +#define SHA512C43 0xf57d4f7fee6ed178 +#define SHA512C44 0x06f067aa72176fba +#define SHA512C45 0x0a637dc5a2c898a6 +#define SHA512C46 0x113f9804bef90dae +#define SHA512C47 0x1b710b35131c471b +#define SHA512C48 0x28db77f523047d84 +#define SHA512C49 0x32caab7b40c72493 +#define SHA512C4a 0x3c9ebe0a15c9bebc +#define SHA512C4b 0x431d67c49c100d4c +#define SHA512C4c 0x4cc5d4becb3e42b6 +#define SHA512C4d 0x597f299cfc657e2a +#define SHA512C4e 0x5fcb6fab3ad6faec +#define SHA512C4f 0x6c44198c4a475817 + +#define SHA512REV0 0x5218a97a1b97e8a0 +#define SHA512REV1 0x4334c1bea164f555 #endif diff --git a/include/ext_ADL.h b/include/ext_ADL.h index 82bad73..43a4aa0 100644 --- a/include/ext_ADL.h +++ b/include/ext_ADL.h @@ -14,7 +14,7 @@ typedef int bool; #include -typedef int HM_ADAPTER; +//typedef int HM_ADAPTER; #ifdef _POSIX void *GetProcAddress (void *pLibrary, const char *name); diff --git a/include/ext_OpenCL.h b/include/ext_OpenCL.h index b67dc63..937d31f 100644 --- a/include/ext_OpenCL.h +++ b/include/ext_OpenCL.h @@ -24,8 +24,8 @@ void hc_clBuildProgram (cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, void (CL_CALLBACK *pfn_notify) (cl_program program, void *user_data), void *user_data); cl_mem hc_clCreateBuffer (cl_context context, cl_mem_flags flags, size_t size, void *host_ptr); -//cl_command_queue hc_clCreateCommandQueue (cl_context context, cl_device_id device, cl_command_queue_properties properties); -cl_command_queue hc_clCreateCommandQueueWithProperties (cl_context context, cl_device_id device, const cl_queue_properties *properties); +cl_command_queue hc_clCreateCommandQueue (cl_context context, cl_device_id device, cl_command_queue_properties properties); +//cl_command_queue hc_clCreateCommandQueueWithProperties (cl_context context, cl_device_id device, const cl_queue_properties *properties); cl_context hc_clCreateContext (cl_context_properties *properties, cl_uint num_devices, const cl_device_id *devices, void (CL_CALLBACK *pfn_notify) (const char *, const void *, size_t, void *), void *user_data); cl_kernel hc_clCreateKernel (cl_program program, const char *kernel_name); cl_program hc_clCreateProgramWithSource (cl_context context, cl_uint count, const char **strings, const size_t *lengths); diff --git a/include/ext_nvapi.h b/include/ext_nvapi.h index 0d7a8fd..7d7df11 100644 --- a/include/ext_nvapi.h +++ b/include/ext_nvapi.h @@ -54,7 +54,7 @@ #include -typedef NvPhysicalGpuHandle HM_ADAPTER; +//typedef NvPhysicalGpuHandle HM_ADAPTER; int hc_NvAPI_EnumPhysicalGPUs (NvPhysicalGpuHandle nvGPUHandle[NVAPI_MAX_PHYSICAL_GPUS], NvU32 *pGpuCount); int hc_NvAPI_GPU_GetThermalSettings (NvPhysicalGpuHandle hPhysicalGpu, NvU32 sensorIndex, NV_GPU_THERMAL_SETTINGS *pThermalSettings); diff --git a/include/ext_nvml.h b/include/ext_nvml.h index fb69097..674754a 100644 --- a/include/ext_nvml.h +++ b/include/ext_nvml.h @@ -10,7 +10,7 @@ #include -typedef nvmlDevice_t HM_ADAPTER; +//typedef nvmlDevice_t HM_ADAPTER; nvmlReturn_t hc_NVML_nvmlInit (void); nvmlReturn_t hc_NVML_nvmlShutdown (void); diff --git a/include/kernel_functions.c b/include/kernel_functions.c index 6d1989b..9654409 100644 --- a/include/kernel_functions.c +++ b/include/kernel_functions.c @@ -233,7 +233,7 @@ #define SHA384_STEP(F0,F1,a,b,c,d,e,f,g,h,x,K) \ { \ - u64x temp0; \ + u64 temp0; \ temp0 = K; \ temp0 += x; \ temp0 += h; \ @@ -273,7 +273,7 @@ #define SHA512_STEP(F0,F1,a,b,c,d,e,f,g,h,x,K) \ { \ - u64x temp0; \ + u64 temp0; \ temp0 = K; \ temp0 += x; \ temp0 += h; \ diff --git a/include/kernel_vendor.h b/include/kernel_vendor.h index ca76828..3c0b513 100644 --- a/include/kernel_vendor.h +++ b/include/kernel_vendor.h @@ -3,15 +3,17 @@ * License.....: MIT */ +#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable + /** * vendor specific */ -#ifdef __GPU__ +#if VENDOR_ID == 4096 #define IS_AMD #endif -#ifdef __CUDACC__ +#if VENDOR_ID == 4318 #define IS_NV #endif @@ -19,92 +21,14 @@ * AMD specific */ -/* -#ifdef IS_AMD -#ifdef __ATI_RV710__ -#define VLIW1 -#elif __ATI_RV730__ -#define VLIW1 -#elif __ATI_RV770__ -#define VLIW4 -#elif __Barts__ -#define VLIW5 -#elif __BeaverCreek__ -#define VLIW5 -#elif __Caicos__ -#define VLIW5 -#elif __Capeverde__ -#define VLIW1 -#elif __Cayman__ -#define VLIW4 -#elif __Cedar__ -#define VLIW5 -#elif __Cypress__ -#define VLIW5 -#elif __Devastator__ -#define VLIW4 -#elif __Juniper__ -#define VLIW5 -#elif __Loveland__ -#define VLIW5 -#elif __Pitcairn__ -#define VLIW1 -#elif __Redwood__ -#define VLIW5 -#elif __Tahiti__ -#define VLIW1 -#elif __Turks__ -#define VLIW5 -#elif __Scrapper__ -#define VLIW4 -#elif __WinterPark__ -#define VLIW5 -#endif -#endif -*/ - #ifdef IS_AMD - -#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable - -#ifdef OSX - -#else - -#ifdef cl_amd_media_ops #pragma OPENCL EXTENSION cl_amd_media_ops : enable -#endif - -#ifdef cl_amd_media_ops2 #pragma OPENCL EXTENSION cl_amd_media_ops2 : enable #endif -#endif - -#endif +/** + * NV specific + */ #ifdef IS_NV -#ifdef sm_10 -#define VLIW1 -#elif sm_11 -#define VLIW1 -#elif sm_12 -#define VLIW1 -#elif sm_13 -#define VLIW1 -#elif sm_20 -#define VLIW1 -#elif sm_21 -#define VLIW2 -#elif sm_30 -#define VLIW2 -#elif sm_35 -#define VLIW2 -#elif sm_37 -#define VLIW2 -#elif sm_50 -#define VLIW2 -#elif sm_52 -#define VLIW2 -#endif #endif diff --git a/include/shared.h b/include/shared.h index 22379ec..4942091 100644 --- a/include/shared.h +++ b/include/shared.h @@ -54,39 +54,26 @@ #define hc_sleep(x) sleep ((x)); #endif -#ifdef _CUDA -#include -#elif _OCL #include -#endif /** * temperature management */ #ifdef LINUX -#ifdef _CUDA #include -#elif _OCL #include #endif -#endif #ifdef WIN -#ifdef _CUDA #include -#elif _OCL #include #endif -#endif #ifdef OSX -#ifdef _CUDA #include -#elif _OCL #include #endif -#endif /** * shared stuff @@ -97,11 +84,16 @@ #define DEVICES_MAX 128 #define CL_PLATFORMS_MAX 16 + #define CL_VENDOR_NV "NVIDIA Corporation" #define CL_VENDOR_AMD "Advanced Micro Devices, Inc." #define CL_VENDOR_SDS "Shiloh Distributed Solutions" #define CL_VENDOR_APPLE "Apple" +#define VENDOR_ID_AMD 4098 +#define VENDOR_ID_NV 4318 +#define VENDOR_ID_UNKNOWN 0 + #define BLOCK_SIZE 64 #define CHARSIZ 0x100 @@ -118,9 +110,6 @@ #define LOOPBACK_FILE "loopback" -#define VENDOR_ID_AMD 4098 -#define VENDOR_ID_NV 4318 - /** * types */ @@ -1769,7 +1758,6 @@ extern hc_thread_mutex_t mux_display; #define OPTI_TYPE_SINGLE_HASH (1 << 11) #define OPTI_TYPE_SINGLE_SALT (1 << 12) #define OPTI_TYPE_BRUTE_FORCE (1 << 13) -#define OPTI_TYPE_SCALAR_MODE (1 << 14) #define OPTI_TYPE_RAW_HASH (1 << 15) #define OPTI_STR_ZERO_BYTE "Zero-Byte" @@ -1785,7 +1773,6 @@ extern hc_thread_mutex_t mux_display; #define OPTI_STR_SINGLE_HASH "Single-Hash" #define OPTI_STR_SINGLE_SALT "Single-Salt" #define OPTI_STR_BRUTE_FORCE "Brute-Force" -#define OPTI_STR_SCALAR_MODE "Scalar-Mode" #define OPTI_STR_RAW_HASH "Raw-Hash" /** @@ -2022,11 +2009,9 @@ void logfile_append (const char *fmt, ...); void fsync (int fd); #endif -#ifdef _CUDA +/* int hm_get_adapter_index (HM_ADAPTER nvGPUHandle[DEVICES_MAX]); -#endif -#ifdef _OCL int get_adapters_num (HM_LIB hm_dll, int *iNumberAdapters); int hm_get_device_num (HM_LIB hm_dll, HM_ADAPTER hm_adapter_index, int *hm_device_num); @@ -2045,13 +2030,13 @@ int hm_check_fanspeed_control (HM_LIB hm_dll, hm_attrs_t *hm_device, uint32_t *v void hm_close (HM_LIB hm_dll); HM_LIB hm_init (); -#endif int hm_get_temperature_with_device_id (const uint device_id); int hm_get_fanspeed_with_device_id (const uint device_id); int hm_get_utilization_with_device_id (const uint device_id); int hm_set_fanspeed_with_device_id (const uint device_id, const int fanspeed); +*/ void myabort (); void myquit (); diff --git a/include/types.h b/include/types.h index 309398a..ba5d462 100644 --- a/include/types.h +++ b/include/types.h @@ -812,10 +812,12 @@ struct __hc_device_param { uint device_id; + uint sm_major; + uint sm_minor; + uint gpu_processors; uint gpu_threads; uint gpu_accel; - uint gpu_vector_width; uint64_t gpu_maxmem_alloc; uint gpu_power; // these both are based on their _user counterpart uint gpu_blocks; // but are modified by autotuner and used inside crack loops @@ -833,8 +835,6 @@ struct __hc_device_param uint size_results; uint size_plains; - uint vect_size; - uint (*pw_add) (struct __hc_device_param *, const uint8_t *, const uint); void (*pw_transpose) (const pw_t *, pw_t *); @@ -870,68 +870,6 @@ struct __hc_device_param // device specific attributes starting - #ifdef _CUDA - - int sm_major; - int sm_minor; - - CUdevice device; - - CUfunction function1; - CUfunction function12; - CUfunction function2; - CUfunction function23; - CUfunction function3; - CUfunction function_mp; - CUfunction function_mp_l; - CUfunction function_mp_r; - CUfunction function_amp; - CUfunction function_tb; - CUfunction function_tm; - - CUcontext context; - CUmodule module; - CUmodule module_mp; - CUmodule module_amp; - CUstream stream; - - CUdeviceptr d_pws_buf; - CUdeviceptr d_pws_amp_buf; - CUdeviceptr d_words_buf_l; - CUdeviceptr d_words_buf_r; - CUdeviceptr c_words_buf_r; - CUdeviceptr d_rules; - CUdeviceptr c_rules; - CUdeviceptr d_combs; - CUdeviceptr c_combs; - CUdeviceptr d_bfs; - CUdeviceptr c_bfs; - CUdeviceptr d_tm; - CUdeviceptr c_tm; - size_t c_bytes; - CUdeviceptr d_bitmap_s1_a; - CUdeviceptr d_bitmap_s1_b; - CUdeviceptr d_bitmap_s1_c; - CUdeviceptr d_bitmap_s1_d; - CUdeviceptr d_bitmap_s2_a; - CUdeviceptr d_bitmap_s2_b; - CUdeviceptr d_bitmap_s2_c; - CUdeviceptr d_bitmap_s2_d; - CUdeviceptr d_plain_bufs; - CUdeviceptr d_digests_buf; - CUdeviceptr d_digests_shown; - CUdeviceptr d_salt_bufs; - CUdeviceptr d_esalt_bufs; - CUdeviceptr d_bcrypt_bufs; - CUdeviceptr d_tmps; - CUdeviceptr d_hooks; - CUdeviceptr d_result; - CUdeviceptr d_scryptV_buf; - CUdeviceptr d_root_css_buf; - CUdeviceptr d_markov_css_buf; - - #elif _OCL - char *device_name; char *device_version; char *driver_version; @@ -990,8 +928,6 @@ struct __hc_device_param cl_mem d_root_css_buf; cl_mem d_markov_css_buf; - #endif - #define PARAMCNT 32 void *kernel_params[PARAMCNT]; @@ -1021,11 +957,9 @@ typedef struct __hc_device_param hc_device_param_t; typedef struct { - HM_ADAPTER adapter_index; +// HM_ADAPTER adapter_index; - #ifdef _OCL int od_version; - #endif int fan_supported; @@ -1040,6 +974,8 @@ typedef struct * threads */ + uint vendor_id; + uint devices_status; uint devices_cnt; hc_device_param_t *devices_param; diff --git a/nv/amp_a0_v1.cu b/nv/amp_a0_v1.cu deleted file mode 100644 index cff9dd3..0000000 --- a/nv/amp_a0_v1.cu +++ /dev/null @@ -1,58 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define VECT_SIZE1 - -#include "include/constants.h" -#include "include/kernel_vendor.h" -#include "types_nv.c" - -__device__ static u32x swap_workaround (const u32x v) -{ - #if __CUDA_ARCH__ >= 200 - return __byte_perm (v, 0, 0x0123); - #else - return (v << 24) + ((v & 0x0000FF00) << 8) + ((v & 0x00FF0000) >> 8) + (v >> 24); - #endif -} - -#include "include/rp_gpu.h" -#include "rp_nv.c" - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) amp (pw_t *pws, pw_t *pws_amp, gpu_rule_t *rules_buf, comb_t *combs_buf, bf_t *bfs_buf, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 pw_len = pws[gid].pw_len; - - u32x w0[4]; - u32x w1[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - const u32 out_len = apply_rules (c_rules[0].cmds, w0, w1, pw_len); - - pws_amp[gid].i[0] = w0[0]; - pws_amp[gid].i[1] = w0[1]; - pws_amp[gid].i[2] = w0[2]; - pws_amp[gid].i[3] = w0[3]; - pws_amp[gid].i[4] = w1[0]; - pws_amp[gid].i[5] = w1[1]; - pws_amp[gid].i[6] = w1[2]; - pws_amp[gid].i[7] = w1[3]; - - pws_amp[gid].pw_len = out_len; -} diff --git a/nv/amp_a0_v2.cu b/nv/amp_a0_v2.cu deleted file mode 100644 index a6a01d7..0000000 --- a/nv/amp_a0_v2.cu +++ /dev/null @@ -1,58 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define VECT_SIZE2 - -#include "include/constants.h" -#include "include/kernel_vendor.h" -#include "types_nv.c" - -__device__ static u32x swap_workaround (const u32x v) -{ - #if __CUDA_ARCH__ >= 200 - return __byte_perm (v, 0, 0x0123); - #else - return (v << 24) + ((v & 0x0000FF00) << 8) + ((v & 0x00FF0000) >> 8) + (v >> 24); - #endif -} - -#include "include/rp_gpu.h" -#include "rp_nv.c" - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) amp (pw_t *pws, pw_t *pws_amp, gpu_rule_t *rules_buf, comb_t *combs_buf, bf_t *bfs_buf, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 pw_len = pws[gid].pw_len; - - u32x w0[4]; - u32x w1[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - const u32 out_len = apply_rules (c_rules[0].cmds, w0, w1, pw_len); - - pws_amp[gid].i[0] = w0[0]; - pws_amp[gid].i[1] = w0[1]; - pws_amp[gid].i[2] = w0[2]; - pws_amp[gid].i[3] = w0[3]; - pws_amp[gid].i[4] = w1[0]; - pws_amp[gid].i[5] = w1[1]; - pws_amp[gid].i[6] = w1[2]; - pws_amp[gid].i[7] = w1[3]; - - pws_amp[gid].pw_len = out_len; -} diff --git a/nv/amp_a0_v4.cu b/nv/amp_a0_v4.cu deleted file mode 100644 index 1f4a0c0..0000000 --- a/nv/amp_a0_v4.cu +++ /dev/null @@ -1,58 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define VECT_SIZE4 - -#include "include/constants.h" -#include "include/kernel_vendor.h" -#include "types_nv.c" - -__device__ static u32x swap_workaround (const u32x v) -{ - #if __CUDA_ARCH__ >= 200 - return __byte_perm (v, 0, 0x0123); - #else - return (v << 24) + ((v & 0x0000FF00) << 8) + ((v & 0x00FF0000) >> 8) + (v >> 24); - #endif -} - -#include "include/rp_gpu.h" -#include "rp_nv.c" - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) amp (pw_t *pws, pw_t *pws_amp, gpu_rule_t *rules_buf, comb_t *combs_buf, bf_t *bfs_buf, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 pw_len = pws[gid].pw_len; - - u32x w0[4]; - u32x w1[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - const u32 out_len = apply_rules (c_rules[0].cmds, w0, w1, pw_len); - - pws_amp[gid].i[0] = w0[0]; - pws_amp[gid].i[1] = w0[1]; - pws_amp[gid].i[2] = w0[2]; - pws_amp[gid].i[3] = w0[3]; - pws_amp[gid].i[4] = w1[0]; - pws_amp[gid].i[5] = w1[1]; - pws_amp[gid].i[6] = w1[2]; - pws_amp[gid].i[7] = w1[3]; - - pws_amp[gid].pw_len = out_len; -} diff --git a/nv/amp_a1_v1.cu b/nv/amp_a1_v1.cu deleted file mode 100644 index 4d2ace7..0000000 --- a/nv/amp_a1_v1.cu +++ /dev/null @@ -1,702 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define VECT_SIZE1 - -#include "include/constants.h" -#include "types_nv.c" - -__device__ static void switch_buffer_by_offset (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset) -{ - #if __CUDA_ARCH__ >= 200 - - const int offset_minus_4 = 4 - (offset % 4); - - int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - - switch (offset / 4) - { - case 0: - w3[1] = __byte_perm (w3[0], w3[1], selector); - w3[0] = __byte_perm (w2[3], w3[0], selector); - w2[3] = __byte_perm (w2[2], w2[3], selector); - w2[2] = __byte_perm (w2[1], w2[2], selector); - w2[1] = __byte_perm (w2[0], w2[1], selector); - w2[0] = __byte_perm (w1[3], w2[0], selector); - w1[3] = __byte_perm (w1[2], w1[3], selector); - w1[2] = __byte_perm (w1[1], w1[2], selector); - w1[1] = __byte_perm (w1[0], w1[1], selector); - w1[0] = __byte_perm (w0[3], w1[0], selector); - w0[3] = __byte_perm (w0[2], w0[3], selector); - w0[2] = __byte_perm (w0[1], w0[2], selector); - w0[1] = __byte_perm (w0[0], w0[1], selector); - w0[0] = __byte_perm ( 0, w0[0], selector); - - break; - - case 1: - w3[1] = __byte_perm (w2[3], w3[0], selector); - w3[0] = __byte_perm (w2[2], w2[3], selector); - w2[3] = __byte_perm (w2[1], w2[2], selector); - w2[2] = __byte_perm (w2[0], w2[1], selector); - w2[1] = __byte_perm (w1[3], w2[0], selector); - w2[0] = __byte_perm (w1[2], w1[3], selector); - w1[3] = __byte_perm (w1[1], w1[2], selector); - w1[2] = __byte_perm (w1[0], w1[1], selector); - w1[1] = __byte_perm (w0[3], w1[0], selector); - w1[0] = __byte_perm (w0[2], w0[3], selector); - w0[3] = __byte_perm (w0[1], w0[2], selector); - w0[2] = __byte_perm (w0[0], w0[1], selector); - w0[1] = __byte_perm ( 0, w0[0], selector); - w0[0] = 0; - - break; - - case 2: - w3[1] = __byte_perm (w2[2], w2[3], selector); - w3[0] = __byte_perm (w2[1], w2[2], selector); - w2[3] = __byte_perm (w2[0], w2[1], selector); - w2[2] = __byte_perm (w1[3], w2[0], selector); - w2[1] = __byte_perm (w1[2], w1[3], selector); - w2[0] = __byte_perm (w1[1], w1[2], selector); - w1[3] = __byte_perm (w1[0], w1[1], selector); - w1[2] = __byte_perm (w0[3], w1[0], selector); - w1[1] = __byte_perm (w0[2], w0[3], selector); - w1[0] = __byte_perm (w0[1], w0[2], selector); - w0[3] = __byte_perm (w0[0], w0[1], selector); - w0[2] = __byte_perm ( 0, w0[0], selector); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - w3[1] = __byte_perm (w2[1], w2[2], selector); - w3[0] = __byte_perm (w2[0], w2[1], selector); - w2[3] = __byte_perm (w1[3], w2[0], selector); - w2[2] = __byte_perm (w1[2], w1[3], selector); - w2[1] = __byte_perm (w1[1], w1[2], selector); - w2[0] = __byte_perm (w1[0], w1[1], selector); - w1[3] = __byte_perm (w0[3], w1[0], selector); - w1[2] = __byte_perm (w0[2], w0[3], selector); - w1[1] = __byte_perm (w0[1], w0[2], selector); - w1[0] = __byte_perm (w0[0], w0[1], selector); - w0[3] = __byte_perm ( 0, w0[0], selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - w3[1] = __byte_perm (w2[0], w2[1], selector); - w3[0] = __byte_perm (w1[3], w2[0], selector); - w2[3] = __byte_perm (w1[2], w1[3], selector); - w2[2] = __byte_perm (w1[1], w1[2], selector); - w2[1] = __byte_perm (w1[0], w1[1], selector); - w2[0] = __byte_perm (w0[3], w1[0], selector); - w1[3] = __byte_perm (w0[2], w0[3], selector); - w1[2] = __byte_perm (w0[1], w0[2], selector); - w1[1] = __byte_perm (w0[0], w0[1], selector); - w1[0] = __byte_perm ( 0, w0[0], selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - w3[1] = __byte_perm (w1[3], w2[0], selector); - w3[0] = __byte_perm (w1[2], w1[3], selector); - w2[3] = __byte_perm (w1[1], w1[2], selector); - w2[2] = __byte_perm (w1[0], w1[1], selector); - w2[1] = __byte_perm (w0[3], w1[0], selector); - w2[0] = __byte_perm (w0[2], w0[3], selector); - w1[3] = __byte_perm (w0[1], w0[2], selector); - w1[2] = __byte_perm (w0[0], w0[1], selector); - w1[1] = __byte_perm ( 0, w0[0], selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - w3[1] = __byte_perm (w1[2], w1[3], selector); - w3[0] = __byte_perm (w1[1], w1[2], selector); - w2[3] = __byte_perm (w1[0], w1[1], selector); - w2[2] = __byte_perm (w0[3], w1[0], selector); - w2[1] = __byte_perm (w0[2], w0[3], selector); - w2[0] = __byte_perm (w0[1], w0[2], selector); - w1[3] = __byte_perm (w0[0], w0[1], selector); - w1[2] = __byte_perm ( 0, w0[0], selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - w3[1] = __byte_perm (w1[1], w1[2], selector); - w3[0] = __byte_perm (w1[0], w1[1], selector); - w2[3] = __byte_perm (w0[3], w1[0], selector); - w2[2] = __byte_perm (w0[2], w0[3], selector); - w2[1] = __byte_perm (w0[1], w0[2], selector); - w2[0] = __byte_perm (w0[0], w0[1], selector); - w1[3] = __byte_perm ( 0, w0[0], selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - w3[1] = __byte_perm (w1[0], w1[1], selector); - w3[0] = __byte_perm (w0[3], w1[0], selector); - w2[3] = __byte_perm (w0[2], w0[3], selector); - w2[2] = __byte_perm (w0[1], w0[2], selector); - w2[1] = __byte_perm (w0[0], w0[1], selector); - w2[0] = __byte_perm ( 0, w0[0], selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - w3[1] = __byte_perm (w0[3], w1[0], selector); - w3[0] = __byte_perm (w0[2], w0[3], selector); - w2[3] = __byte_perm (w0[1], w0[2], selector); - w2[2] = __byte_perm (w0[0], w0[1], selector); - w2[1] = __byte_perm ( 0, w0[0], selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - w3[1] = __byte_perm (w0[2], w0[3], selector); - w3[0] = __byte_perm (w0[1], w0[2], selector); - w2[3] = __byte_perm (w0[0], w0[1], selector); - w2[2] = __byte_perm ( 0, w0[0], selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - w3[1] = __byte_perm (w0[1], w0[2], selector); - w3[0] = __byte_perm (w0[0], w0[1], selector); - w2[3] = __byte_perm ( 0, w0[0], selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - w3[1] = __byte_perm (w0[0], w0[1], selector); - w3[0] = __byte_perm ( 0, w0[0], selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - w3[1] = __byte_perm ( 0, w0[0], selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - - #else - - u32 tmp0[4]; - u32 tmp1[4]; - u32 tmp2[1]; - - switch (offset % 4) - { - case 0: - tmp0[0] = w0[0]; - tmp0[1] = w0[1]; - tmp0[2] = w0[2]; - tmp0[3] = w0[3]; - tmp1[0] = w1[0]; - tmp1[1] = w1[1]; - tmp1[2] = w1[2]; - tmp1[3] = w1[3]; - tmp2[0] = 0; - break; - - case 1: - tmp0[0] = w0[0] << 8; - tmp0[1] = w0[0] >> 24 | w0[1] << 8; - tmp0[2] = w0[1] >> 24 | w0[2] << 8; - tmp0[3] = w0[2] >> 24 | w0[3] << 8; - tmp1[0] = w0[3] >> 24 | w1[0] << 8; - tmp1[1] = w1[0] >> 24 | w1[1] << 8; - tmp1[2] = w1[1] >> 24 | w1[2] << 8; - tmp1[3] = w1[2] >> 24 | w1[3] << 8; - tmp2[0] = w1[3] >> 24; - break; - - case 2: - tmp0[0] = w0[0] << 16; - tmp0[1] = w0[0] >> 16 | w0[1] << 16; - tmp0[2] = w0[1] >> 16 | w0[2] << 16; - tmp0[3] = w0[2] >> 16 | w0[3] << 16; - tmp1[0] = w0[3] >> 16 | w1[0] << 16; - tmp1[1] = w1[0] >> 16 | w1[1] << 16; - tmp1[2] = w1[1] >> 16 | w1[2] << 16; - tmp1[3] = w1[2] >> 16 | w1[3] << 16; - tmp2[0] = w1[3] >> 16; - break; - - case 3: - tmp0[0] = w0[0] << 24; - tmp0[1] = w0[0] >> 8 | w0[1] << 24; - tmp0[2] = w0[1] >> 8 | w0[2] << 24; - tmp0[3] = w0[2] >> 8 | w0[3] << 24; - tmp1[0] = w0[3] >> 8 | w1[0] << 24; - tmp1[1] = w1[0] >> 8 | w1[1] << 24; - tmp1[2] = w1[1] >> 8 | w1[2] << 24; - tmp1[3] = w1[2] >> 8 | w1[3] << 24; - tmp2[0] = w1[3] >> 8; - break; - } - - switch (offset / 4) - { - case 0: - w0[0] = tmp0[0]; - w0[1] = tmp0[1]; - w0[2] = tmp0[2]; - w0[3] = tmp0[3]; - w1[0] = tmp1[0]; - w1[1] = tmp1[1]; - w1[2] = tmp1[2]; - w1[3] = tmp1[3]; - w2[0] = tmp2[0]; - break; - - case 1: - w0[0] = 0; - w0[1] = tmp0[0]; - w0[2] = tmp0[1]; - w0[3] = tmp0[2]; - w1[0] = tmp0[3]; - w1[1] = tmp1[0]; - w1[2] = tmp1[1]; - w1[3] = tmp1[2]; - w2[0] = tmp1[3]; - w2[1] = tmp2[0]; - break; - - case 2: - w0[0] = 0; - w0[1] = 0; - w0[2] = tmp0[0]; - w0[3] = tmp0[1]; - w1[0] = tmp0[2]; - w1[1] = tmp0[3]; - w1[2] = tmp1[0]; - w1[3] = tmp1[1]; - w2[0] = tmp1[2]; - w2[1] = tmp1[3]; - w2[2] = tmp2[0]; - break; - - case 3: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = tmp0[0]; - w1[0] = tmp0[1]; - w1[1] = tmp0[2]; - w1[2] = tmp0[3]; - w1[3] = tmp1[0]; - w2[0] = tmp1[1]; - w2[1] = tmp1[2]; - w2[2] = tmp1[3]; - w2[3] = tmp2[0]; - break; - - case 4: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = tmp0[0]; - w1[1] = tmp0[1]; - w1[2] = tmp0[2]; - w1[3] = tmp0[3]; - w2[0] = tmp1[0]; - w2[1] = tmp1[1]; - w2[2] = tmp1[2]; - w2[3] = tmp1[3]; - w3[0] = tmp2[0]; - break; - - case 5: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = tmp0[0]; - w1[2] = tmp0[1]; - w1[3] = tmp0[2]; - w2[0] = tmp0[3]; - w2[1] = tmp1[0]; - w2[2] = tmp1[1]; - w2[3] = tmp1[2]; - w3[0] = tmp1[3]; - w3[1] = tmp2[0]; - break; - - case 6: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = tmp0[0]; - w1[3] = tmp0[1]; - w2[0] = tmp0[2]; - w2[1] = tmp0[3]; - w2[2] = tmp1[0]; - w2[3] = tmp1[1]; - w3[0] = tmp1[2]; - w3[1] = tmp1[3]; - w3[2] = tmp2[0]; - break; - - case 7: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = tmp0[0]; - w2[0] = tmp0[1]; - w2[1] = tmp0[2]; - w2[2] = tmp0[3]; - w2[3] = tmp1[0]; - w3[0] = tmp1[1]; - w3[1] = tmp1[2]; - w3[2] = tmp1[3]; - w3[3] = tmp2[0]; - break; - - case 8: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = tmp0[0]; - w2[1] = tmp0[1]; - w2[2] = tmp0[2]; - w2[3] = tmp0[3]; - w3[0] = tmp1[0]; - w3[1] = tmp1[1]; - w3[2] = tmp1[2]; - w3[3] = tmp1[3]; - break; - - case 9: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = tmp0[0]; - w2[2] = tmp0[1]; - w2[3] = tmp0[2]; - w3[0] = tmp0[3]; - w3[1] = tmp1[0]; - w3[2] = tmp1[1]; - w3[3] = tmp1[2]; - break; - - case 10: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = tmp0[0]; - w2[3] = tmp0[1]; - w3[0] = tmp0[2]; - w3[1] = tmp0[3]; - w3[2] = tmp1[0]; - w3[3] = tmp1[1]; - break; - - case 11: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = tmp0[0]; - w3[0] = tmp0[1]; - w3[1] = tmp0[2]; - w3[2] = tmp0[3]; - w3[3] = tmp1[0]; - break; - - case 12: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = tmp0[0]; - w3[1] = tmp0[1]; - w3[2] = tmp0[2]; - w3[3] = tmp0[3]; - break; - - case 13: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = tmp0[0]; - w3[2] = tmp0[1]; - w3[3] = tmp0[2]; - break; - - } - - #endif -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) amp (pw_t *pws, pw_t *pws_amp, gpu_rule_t *rules_buf, comb_t *combs_buf, bf_t *bfs_buf, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 pw_l_len = pws[gid].pw_len; - - u32 wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32 wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32 wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32 wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_r_len = c_combs[0].pw_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[0].i[0]; - wordr0[1] = c_combs[0].i[1]; - wordr0[2] = c_combs[0].i[2]; - wordr0[3] = c_combs[0].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[0].i[4]; - wordr1[1] = c_combs[0].i[5]; - wordr1[2] = c_combs[0].i[6]; - wordr1[3] = c_combs[0].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, pw_r_len); - } - - u32 w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32 w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32 w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32 w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - const u32 pw_len = pw_l_len + pw_r_len; - - pws_amp[gid].i[ 0] = w0[0]; - pws_amp[gid].i[ 1] = w0[1]; - pws_amp[gid].i[ 2] = w0[2]; - pws_amp[gid].i[ 3] = w0[3]; - pws_amp[gid].i[ 4] = w1[0]; - pws_amp[gid].i[ 5] = w1[1]; - pws_amp[gid].i[ 6] = w1[2]; - pws_amp[gid].i[ 7] = w1[3]; - pws_amp[gid].i[ 8] = w2[0]; - pws_amp[gid].i[ 9] = w2[1]; - pws_amp[gid].i[10] = w2[2]; - pws_amp[gid].i[11] = w2[3]; - pws_amp[gid].i[12] = w3[0]; - pws_amp[gid].i[13] = w3[1]; - pws_amp[gid].i[14] = w3[2]; - pws_amp[gid].i[15] = w3[3]; - - pws_amp[gid].pw_len = pw_len; -} diff --git a/nv/amp_a1_v2.cu b/nv/amp_a1_v2.cu deleted file mode 100644 index f000471..0000000 --- a/nv/amp_a1_v2.cu +++ /dev/null @@ -1,702 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define VECT_SIZE2 - -#include "include/constants.h" -#include "types_nv.c" - -__device__ static void switch_buffer_by_offset (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 offset) -{ - #if __CUDA_ARCH__ >= 200 - - const int offset_minus_4 = 4 - (offset % 4); - - int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - - switch (offset / 4) - { - case 0: - w3[1] = __byte_perm (w3[0], w3[1], selector); - w3[0] = __byte_perm (w2[3], w3[0], selector); - w2[3] = __byte_perm (w2[2], w2[3], selector); - w2[2] = __byte_perm (w2[1], w2[2], selector); - w2[1] = __byte_perm (w2[0], w2[1], selector); - w2[0] = __byte_perm (w1[3], w2[0], selector); - w1[3] = __byte_perm (w1[2], w1[3], selector); - w1[2] = __byte_perm (w1[1], w1[2], selector); - w1[1] = __byte_perm (w1[0], w1[1], selector); - w1[0] = __byte_perm (w0[3], w1[0], selector); - w0[3] = __byte_perm (w0[2], w0[3], selector); - w0[2] = __byte_perm (w0[1], w0[2], selector); - w0[1] = __byte_perm (w0[0], w0[1], selector); - w0[0] = __byte_perm ( 0, w0[0], selector); - - break; - - case 1: - w3[1] = __byte_perm (w2[3], w3[0], selector); - w3[0] = __byte_perm (w2[2], w2[3], selector); - w2[3] = __byte_perm (w2[1], w2[2], selector); - w2[2] = __byte_perm (w2[0], w2[1], selector); - w2[1] = __byte_perm (w1[3], w2[0], selector); - w2[0] = __byte_perm (w1[2], w1[3], selector); - w1[3] = __byte_perm (w1[1], w1[2], selector); - w1[2] = __byte_perm (w1[0], w1[1], selector); - w1[1] = __byte_perm (w0[3], w1[0], selector); - w1[0] = __byte_perm (w0[2], w0[3], selector); - w0[3] = __byte_perm (w0[1], w0[2], selector); - w0[2] = __byte_perm (w0[0], w0[1], selector); - w0[1] = __byte_perm ( 0, w0[0], selector); - w0[0] = 0; - - break; - - case 2: - w3[1] = __byte_perm (w2[2], w2[3], selector); - w3[0] = __byte_perm (w2[1], w2[2], selector); - w2[3] = __byte_perm (w2[0], w2[1], selector); - w2[2] = __byte_perm (w1[3], w2[0], selector); - w2[1] = __byte_perm (w1[2], w1[3], selector); - w2[0] = __byte_perm (w1[1], w1[2], selector); - w1[3] = __byte_perm (w1[0], w1[1], selector); - w1[2] = __byte_perm (w0[3], w1[0], selector); - w1[1] = __byte_perm (w0[2], w0[3], selector); - w1[0] = __byte_perm (w0[1], w0[2], selector); - w0[3] = __byte_perm (w0[0], w0[1], selector); - w0[2] = __byte_perm ( 0, w0[0], selector); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - w3[1] = __byte_perm (w2[1], w2[2], selector); - w3[0] = __byte_perm (w2[0], w2[1], selector); - w2[3] = __byte_perm (w1[3], w2[0], selector); - w2[2] = __byte_perm (w1[2], w1[3], selector); - w2[1] = __byte_perm (w1[1], w1[2], selector); - w2[0] = __byte_perm (w1[0], w1[1], selector); - w1[3] = __byte_perm (w0[3], w1[0], selector); - w1[2] = __byte_perm (w0[2], w0[3], selector); - w1[1] = __byte_perm (w0[1], w0[2], selector); - w1[0] = __byte_perm (w0[0], w0[1], selector); - w0[3] = __byte_perm ( 0, w0[0], selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - w3[1] = __byte_perm (w2[0], w2[1], selector); - w3[0] = __byte_perm (w1[3], w2[0], selector); - w2[3] = __byte_perm (w1[2], w1[3], selector); - w2[2] = __byte_perm (w1[1], w1[2], selector); - w2[1] = __byte_perm (w1[0], w1[1], selector); - w2[0] = __byte_perm (w0[3], w1[0], selector); - w1[3] = __byte_perm (w0[2], w0[3], selector); - w1[2] = __byte_perm (w0[1], w0[2], selector); - w1[1] = __byte_perm (w0[0], w0[1], selector); - w1[0] = __byte_perm ( 0, w0[0], selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - w3[1] = __byte_perm (w1[3], w2[0], selector); - w3[0] = __byte_perm (w1[2], w1[3], selector); - w2[3] = __byte_perm (w1[1], w1[2], selector); - w2[2] = __byte_perm (w1[0], w1[1], selector); - w2[1] = __byte_perm (w0[3], w1[0], selector); - w2[0] = __byte_perm (w0[2], w0[3], selector); - w1[3] = __byte_perm (w0[1], w0[2], selector); - w1[2] = __byte_perm (w0[0], w0[1], selector); - w1[1] = __byte_perm ( 0, w0[0], selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - w3[1] = __byte_perm (w1[2], w1[3], selector); - w3[0] = __byte_perm (w1[1], w1[2], selector); - w2[3] = __byte_perm (w1[0], w1[1], selector); - w2[2] = __byte_perm (w0[3], w1[0], selector); - w2[1] = __byte_perm (w0[2], w0[3], selector); - w2[0] = __byte_perm (w0[1], w0[2], selector); - w1[3] = __byte_perm (w0[0], w0[1], selector); - w1[2] = __byte_perm ( 0, w0[0], selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - w3[1] = __byte_perm (w1[1], w1[2], selector); - w3[0] = __byte_perm (w1[0], w1[1], selector); - w2[3] = __byte_perm (w0[3], w1[0], selector); - w2[2] = __byte_perm (w0[2], w0[3], selector); - w2[1] = __byte_perm (w0[1], w0[2], selector); - w2[0] = __byte_perm (w0[0], w0[1], selector); - w1[3] = __byte_perm ( 0, w0[0], selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - w3[1] = __byte_perm (w1[0], w1[1], selector); - w3[0] = __byte_perm (w0[3], w1[0], selector); - w2[3] = __byte_perm (w0[2], w0[3], selector); - w2[2] = __byte_perm (w0[1], w0[2], selector); - w2[1] = __byte_perm (w0[0], w0[1], selector); - w2[0] = __byte_perm ( 0, w0[0], selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - w3[1] = __byte_perm (w0[3], w1[0], selector); - w3[0] = __byte_perm (w0[2], w0[3], selector); - w2[3] = __byte_perm (w0[1], w0[2], selector); - w2[2] = __byte_perm (w0[0], w0[1], selector); - w2[1] = __byte_perm ( 0, w0[0], selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - w3[1] = __byte_perm (w0[2], w0[3], selector); - w3[0] = __byte_perm (w0[1], w0[2], selector); - w2[3] = __byte_perm (w0[0], w0[1], selector); - w2[2] = __byte_perm ( 0, w0[0], selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - w3[1] = __byte_perm (w0[1], w0[2], selector); - w3[0] = __byte_perm (w0[0], w0[1], selector); - w2[3] = __byte_perm ( 0, w0[0], selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - w3[1] = __byte_perm (w0[0], w0[1], selector); - w3[0] = __byte_perm ( 0, w0[0], selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - w3[1] = __byte_perm ( 0, w0[0], selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - - #else - - u32x tmp0[4]; - u32x tmp1[4]; - u32x tmp2[1]; - - switch (offset % 4) - { - case 0: - tmp0[0] = w0[0]; - tmp0[1] = w0[1]; - tmp0[2] = w0[2]; - tmp0[3] = w0[3]; - tmp1[0] = w1[0]; - tmp1[1] = w1[1]; - tmp1[2] = w1[2]; - tmp1[3] = w1[3]; - tmp2[0] = 0; - break; - - case 1: - tmp0[0] = w0[0] << 8; - tmp0[1] = w0[0] >> 24 | w0[1] << 8; - tmp0[2] = w0[1] >> 24 | w0[2] << 8; - tmp0[3] = w0[2] >> 24 | w0[3] << 8; - tmp1[0] = w0[3] >> 24 | w1[0] << 8; - tmp1[1] = w1[0] >> 24 | w1[1] << 8; - tmp1[2] = w1[1] >> 24 | w1[2] << 8; - tmp1[3] = w1[2] >> 24 | w1[3] << 8; - tmp2[0] = w1[3] >> 24; - break; - - case 2: - tmp0[0] = w0[0] << 16; - tmp0[1] = w0[0] >> 16 | w0[1] << 16; - tmp0[2] = w0[1] >> 16 | w0[2] << 16; - tmp0[3] = w0[2] >> 16 | w0[3] << 16; - tmp1[0] = w0[3] >> 16 | w1[0] << 16; - tmp1[1] = w1[0] >> 16 | w1[1] << 16; - tmp1[2] = w1[1] >> 16 | w1[2] << 16; - tmp1[3] = w1[2] >> 16 | w1[3] << 16; - tmp2[0] = w1[3] >> 16; - break; - - case 3: - tmp0[0] = w0[0] << 24; - tmp0[1] = w0[0] >> 8 | w0[1] << 24; - tmp0[2] = w0[1] >> 8 | w0[2] << 24; - tmp0[3] = w0[2] >> 8 | w0[3] << 24; - tmp1[0] = w0[3] >> 8 | w1[0] << 24; - tmp1[1] = w1[0] >> 8 | w1[1] << 24; - tmp1[2] = w1[1] >> 8 | w1[2] << 24; - tmp1[3] = w1[2] >> 8 | w1[3] << 24; - tmp2[0] = w1[3] >> 8; - break; - } - - switch (offset / 4) - { - case 0: - w0[0] = tmp0[0]; - w0[1] = tmp0[1]; - w0[2] = tmp0[2]; - w0[3] = tmp0[3]; - w1[0] = tmp1[0]; - w1[1] = tmp1[1]; - w1[2] = tmp1[2]; - w1[3] = tmp1[3]; - w2[0] = tmp2[0]; - break; - - case 1: - w0[0] = 0; - w0[1] = tmp0[0]; - w0[2] = tmp0[1]; - w0[3] = tmp0[2]; - w1[0] = tmp0[3]; - w1[1] = tmp1[0]; - w1[2] = tmp1[1]; - w1[3] = tmp1[2]; - w2[0] = tmp1[3]; - w2[1] = tmp2[0]; - break; - - case 2: - w0[0] = 0; - w0[1] = 0; - w0[2] = tmp0[0]; - w0[3] = tmp0[1]; - w1[0] = tmp0[2]; - w1[1] = tmp0[3]; - w1[2] = tmp1[0]; - w1[3] = tmp1[1]; - w2[0] = tmp1[2]; - w2[1] = tmp1[3]; - w2[2] = tmp2[0]; - break; - - case 3: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = tmp0[0]; - w1[0] = tmp0[1]; - w1[1] = tmp0[2]; - w1[2] = tmp0[3]; - w1[3] = tmp1[0]; - w2[0] = tmp1[1]; - w2[1] = tmp1[2]; - w2[2] = tmp1[3]; - w2[3] = tmp2[0]; - break; - - case 4: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = tmp0[0]; - w1[1] = tmp0[1]; - w1[2] = tmp0[2]; - w1[3] = tmp0[3]; - w2[0] = tmp1[0]; - w2[1] = tmp1[1]; - w2[2] = tmp1[2]; - w2[3] = tmp1[3]; - w3[0] = tmp2[0]; - break; - - case 5: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = tmp0[0]; - w1[2] = tmp0[1]; - w1[3] = tmp0[2]; - w2[0] = tmp0[3]; - w2[1] = tmp1[0]; - w2[2] = tmp1[1]; - w2[3] = tmp1[2]; - w3[0] = tmp1[3]; - w3[1] = tmp2[0]; - break; - - case 6: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = tmp0[0]; - w1[3] = tmp0[1]; - w2[0] = tmp0[2]; - w2[1] = tmp0[3]; - w2[2] = tmp1[0]; - w2[3] = tmp1[1]; - w3[0] = tmp1[2]; - w3[1] = tmp1[3]; - w3[2] = tmp2[0]; - break; - - case 7: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = tmp0[0]; - w2[0] = tmp0[1]; - w2[1] = tmp0[2]; - w2[2] = tmp0[3]; - w2[3] = tmp1[0]; - w3[0] = tmp1[1]; - w3[1] = tmp1[2]; - w3[2] = tmp1[3]; - w3[3] = tmp2[0]; - break; - - case 8: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = tmp0[0]; - w2[1] = tmp0[1]; - w2[2] = tmp0[2]; - w2[3] = tmp0[3]; - w3[0] = tmp1[0]; - w3[1] = tmp1[1]; - w3[2] = tmp1[2]; - w3[3] = tmp1[3]; - break; - - case 9: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = tmp0[0]; - w2[2] = tmp0[1]; - w2[3] = tmp0[2]; - w3[0] = tmp0[3]; - w3[1] = tmp1[0]; - w3[2] = tmp1[1]; - w3[3] = tmp1[2]; - break; - - case 10: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = tmp0[0]; - w2[3] = tmp0[1]; - w3[0] = tmp0[2]; - w3[1] = tmp0[3]; - w3[2] = tmp1[0]; - w3[3] = tmp1[1]; - break; - - case 11: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = tmp0[0]; - w3[0] = tmp0[1]; - w3[1] = tmp0[2]; - w3[2] = tmp0[3]; - w3[3] = tmp1[0]; - break; - - case 12: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = tmp0[0]; - w3[1] = tmp0[1]; - w3[2] = tmp0[2]; - w3[3] = tmp0[3]; - break; - - case 13: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = tmp0[0]; - w3[2] = tmp0[1]; - w3[3] = tmp0[2]; - break; - - } - - #endif -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) amp (pw_t *pws, pw_t *pws_amp, gpu_rule_t *rules_buf, comb_t *combs_buf, bf_t *bfs_buf, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 pw_l_len = pws[gid].pw_len; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_r_len = c_combs[0].pw_len; - - u32x wordr0[4]; - - wordr0[0] = c_combs[0].i[0]; - wordr0[1] = c_combs[0].i[1]; - wordr0[2] = c_combs[0].i[2]; - wordr0[3] = c_combs[0].i[3]; - - u32x wordr1[4]; - - wordr1[0] = c_combs[0].i[4]; - wordr1[1] = c_combs[0].i[5]; - wordr1[2] = c_combs[0].i[6]; - wordr1[3] = c_combs[0].i[7]; - - u32x wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32x wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, pw_r_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - const u32 pw_len = pw_l_len + pw_r_len; - - pws_amp[gid].i[ 0] = w0[0]; - pws_amp[gid].i[ 1] = w0[1]; - pws_amp[gid].i[ 2] = w0[2]; - pws_amp[gid].i[ 3] = w0[3]; - pws_amp[gid].i[ 4] = w1[0]; - pws_amp[gid].i[ 5] = w1[1]; - pws_amp[gid].i[ 6] = w1[2]; - pws_amp[gid].i[ 7] = w1[3]; - pws_amp[gid].i[ 8] = w2[0]; - pws_amp[gid].i[ 9] = w2[1]; - pws_amp[gid].i[10] = w2[2]; - pws_amp[gid].i[11] = w2[3]; - pws_amp[gid].i[12] = w3[0]; - pws_amp[gid].i[13] = w3[1]; - pws_amp[gid].i[14] = w3[2]; - pws_amp[gid].i[15] = w3[3]; - - pws_amp[gid].pw_len = pw_len; -} diff --git a/nv/amp_a1_v4.cu b/nv/amp_a1_v4.cu deleted file mode 100644 index abd999a..0000000 --- a/nv/amp_a1_v4.cu +++ /dev/null @@ -1,702 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define VECT_SIZE4 - -#include "include/constants.h" -#include "types_nv.c" - -__device__ static void switch_buffer_by_offset (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 offset) -{ - #if __CUDA_ARCH__ >= 200 - - const int offset_minus_4 = 4 - (offset % 4); - - int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - - switch (offset / 4) - { - case 0: - w3[1] = __byte_perm (w3[0], w3[1], selector); - w3[0] = __byte_perm (w2[3], w3[0], selector); - w2[3] = __byte_perm (w2[2], w2[3], selector); - w2[2] = __byte_perm (w2[1], w2[2], selector); - w2[1] = __byte_perm (w2[0], w2[1], selector); - w2[0] = __byte_perm (w1[3], w2[0], selector); - w1[3] = __byte_perm (w1[2], w1[3], selector); - w1[2] = __byte_perm (w1[1], w1[2], selector); - w1[1] = __byte_perm (w1[0], w1[1], selector); - w1[0] = __byte_perm (w0[3], w1[0], selector); - w0[3] = __byte_perm (w0[2], w0[3], selector); - w0[2] = __byte_perm (w0[1], w0[2], selector); - w0[1] = __byte_perm (w0[0], w0[1], selector); - w0[0] = __byte_perm ( 0, w0[0], selector); - - break; - - case 1: - w3[1] = __byte_perm (w2[3], w3[0], selector); - w3[0] = __byte_perm (w2[2], w2[3], selector); - w2[3] = __byte_perm (w2[1], w2[2], selector); - w2[2] = __byte_perm (w2[0], w2[1], selector); - w2[1] = __byte_perm (w1[3], w2[0], selector); - w2[0] = __byte_perm (w1[2], w1[3], selector); - w1[3] = __byte_perm (w1[1], w1[2], selector); - w1[2] = __byte_perm (w1[0], w1[1], selector); - w1[1] = __byte_perm (w0[3], w1[0], selector); - w1[0] = __byte_perm (w0[2], w0[3], selector); - w0[3] = __byte_perm (w0[1], w0[2], selector); - w0[2] = __byte_perm (w0[0], w0[1], selector); - w0[1] = __byte_perm ( 0, w0[0], selector); - w0[0] = 0; - - break; - - case 2: - w3[1] = __byte_perm (w2[2], w2[3], selector); - w3[0] = __byte_perm (w2[1], w2[2], selector); - w2[3] = __byte_perm (w2[0], w2[1], selector); - w2[2] = __byte_perm (w1[3], w2[0], selector); - w2[1] = __byte_perm (w1[2], w1[3], selector); - w2[0] = __byte_perm (w1[1], w1[2], selector); - w1[3] = __byte_perm (w1[0], w1[1], selector); - w1[2] = __byte_perm (w0[3], w1[0], selector); - w1[1] = __byte_perm (w0[2], w0[3], selector); - w1[0] = __byte_perm (w0[1], w0[2], selector); - w0[3] = __byte_perm (w0[0], w0[1], selector); - w0[2] = __byte_perm ( 0, w0[0], selector); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - w3[1] = __byte_perm (w2[1], w2[2], selector); - w3[0] = __byte_perm (w2[0], w2[1], selector); - w2[3] = __byte_perm (w1[3], w2[0], selector); - w2[2] = __byte_perm (w1[2], w1[3], selector); - w2[1] = __byte_perm (w1[1], w1[2], selector); - w2[0] = __byte_perm (w1[0], w1[1], selector); - w1[3] = __byte_perm (w0[3], w1[0], selector); - w1[2] = __byte_perm (w0[2], w0[3], selector); - w1[1] = __byte_perm (w0[1], w0[2], selector); - w1[0] = __byte_perm (w0[0], w0[1], selector); - w0[3] = __byte_perm ( 0, w0[0], selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - w3[1] = __byte_perm (w2[0], w2[1], selector); - w3[0] = __byte_perm (w1[3], w2[0], selector); - w2[3] = __byte_perm (w1[2], w1[3], selector); - w2[2] = __byte_perm (w1[1], w1[2], selector); - w2[1] = __byte_perm (w1[0], w1[1], selector); - w2[0] = __byte_perm (w0[3], w1[0], selector); - w1[3] = __byte_perm (w0[2], w0[3], selector); - w1[2] = __byte_perm (w0[1], w0[2], selector); - w1[1] = __byte_perm (w0[0], w0[1], selector); - w1[0] = __byte_perm ( 0, w0[0], selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - w3[1] = __byte_perm (w1[3], w2[0], selector); - w3[0] = __byte_perm (w1[2], w1[3], selector); - w2[3] = __byte_perm (w1[1], w1[2], selector); - w2[2] = __byte_perm (w1[0], w1[1], selector); - w2[1] = __byte_perm (w0[3], w1[0], selector); - w2[0] = __byte_perm (w0[2], w0[3], selector); - w1[3] = __byte_perm (w0[1], w0[2], selector); - w1[2] = __byte_perm (w0[0], w0[1], selector); - w1[1] = __byte_perm ( 0, w0[0], selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - w3[1] = __byte_perm (w1[2], w1[3], selector); - w3[0] = __byte_perm (w1[1], w1[2], selector); - w2[3] = __byte_perm (w1[0], w1[1], selector); - w2[2] = __byte_perm (w0[3], w1[0], selector); - w2[1] = __byte_perm (w0[2], w0[3], selector); - w2[0] = __byte_perm (w0[1], w0[2], selector); - w1[3] = __byte_perm (w0[0], w0[1], selector); - w1[2] = __byte_perm ( 0, w0[0], selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - w3[1] = __byte_perm (w1[1], w1[2], selector); - w3[0] = __byte_perm (w1[0], w1[1], selector); - w2[3] = __byte_perm (w0[3], w1[0], selector); - w2[2] = __byte_perm (w0[2], w0[3], selector); - w2[1] = __byte_perm (w0[1], w0[2], selector); - w2[0] = __byte_perm (w0[0], w0[1], selector); - w1[3] = __byte_perm ( 0, w0[0], selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - w3[1] = __byte_perm (w1[0], w1[1], selector); - w3[0] = __byte_perm (w0[3], w1[0], selector); - w2[3] = __byte_perm (w0[2], w0[3], selector); - w2[2] = __byte_perm (w0[1], w0[2], selector); - w2[1] = __byte_perm (w0[0], w0[1], selector); - w2[0] = __byte_perm ( 0, w0[0], selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - w3[1] = __byte_perm (w0[3], w1[0], selector); - w3[0] = __byte_perm (w0[2], w0[3], selector); - w2[3] = __byte_perm (w0[1], w0[2], selector); - w2[2] = __byte_perm (w0[0], w0[1], selector); - w2[1] = __byte_perm ( 0, w0[0], selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - w3[1] = __byte_perm (w0[2], w0[3], selector); - w3[0] = __byte_perm (w0[1], w0[2], selector); - w2[3] = __byte_perm (w0[0], w0[1], selector); - w2[2] = __byte_perm ( 0, w0[0], selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - w3[1] = __byte_perm (w0[1], w0[2], selector); - w3[0] = __byte_perm (w0[0], w0[1], selector); - w2[3] = __byte_perm ( 0, w0[0], selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - w3[1] = __byte_perm (w0[0], w0[1], selector); - w3[0] = __byte_perm ( 0, w0[0], selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - w3[1] = __byte_perm ( 0, w0[0], selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - - #else - - u32x tmp0[4]; - u32x tmp1[4]; - u32x tmp2[1]; - - switch (offset % 4) - { - case 0: - tmp0[0] = w0[0]; - tmp0[1] = w0[1]; - tmp0[2] = w0[2]; - tmp0[3] = w0[3]; - tmp1[0] = w1[0]; - tmp1[1] = w1[1]; - tmp1[2] = w1[2]; - tmp1[3] = w1[3]; - tmp2[0] = 0; - break; - - case 1: - tmp0[0] = w0[0] << 8; - tmp0[1] = w0[0] >> 24 | w0[1] << 8; - tmp0[2] = w0[1] >> 24 | w0[2] << 8; - tmp0[3] = w0[2] >> 24 | w0[3] << 8; - tmp1[0] = w0[3] >> 24 | w1[0] << 8; - tmp1[1] = w1[0] >> 24 | w1[1] << 8; - tmp1[2] = w1[1] >> 24 | w1[2] << 8; - tmp1[3] = w1[2] >> 24 | w1[3] << 8; - tmp2[0] = w1[3] >> 24; - break; - - case 2: - tmp0[0] = w0[0] << 16; - tmp0[1] = w0[0] >> 16 | w0[1] << 16; - tmp0[2] = w0[1] >> 16 | w0[2] << 16; - tmp0[3] = w0[2] >> 16 | w0[3] << 16; - tmp1[0] = w0[3] >> 16 | w1[0] << 16; - tmp1[1] = w1[0] >> 16 | w1[1] << 16; - tmp1[2] = w1[1] >> 16 | w1[2] << 16; - tmp1[3] = w1[2] >> 16 | w1[3] << 16; - tmp2[0] = w1[3] >> 16; - break; - - case 3: - tmp0[0] = w0[0] << 24; - tmp0[1] = w0[0] >> 8 | w0[1] << 24; - tmp0[2] = w0[1] >> 8 | w0[2] << 24; - tmp0[3] = w0[2] >> 8 | w0[3] << 24; - tmp1[0] = w0[3] >> 8 | w1[0] << 24; - tmp1[1] = w1[0] >> 8 | w1[1] << 24; - tmp1[2] = w1[1] >> 8 | w1[2] << 24; - tmp1[3] = w1[2] >> 8 | w1[3] << 24; - tmp2[0] = w1[3] >> 8; - break; - } - - switch (offset / 4) - { - case 0: - w0[0] = tmp0[0]; - w0[1] = tmp0[1]; - w0[2] = tmp0[2]; - w0[3] = tmp0[3]; - w1[0] = tmp1[0]; - w1[1] = tmp1[1]; - w1[2] = tmp1[2]; - w1[3] = tmp1[3]; - w2[0] = tmp2[0]; - break; - - case 1: - w0[0] = 0; - w0[1] = tmp0[0]; - w0[2] = tmp0[1]; - w0[3] = tmp0[2]; - w1[0] = tmp0[3]; - w1[1] = tmp1[0]; - w1[2] = tmp1[1]; - w1[3] = tmp1[2]; - w2[0] = tmp1[3]; - w2[1] = tmp2[0]; - break; - - case 2: - w0[0] = 0; - w0[1] = 0; - w0[2] = tmp0[0]; - w0[3] = tmp0[1]; - w1[0] = tmp0[2]; - w1[1] = tmp0[3]; - w1[2] = tmp1[0]; - w1[3] = tmp1[1]; - w2[0] = tmp1[2]; - w2[1] = tmp1[3]; - w2[2] = tmp2[0]; - break; - - case 3: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = tmp0[0]; - w1[0] = tmp0[1]; - w1[1] = tmp0[2]; - w1[2] = tmp0[3]; - w1[3] = tmp1[0]; - w2[0] = tmp1[1]; - w2[1] = tmp1[2]; - w2[2] = tmp1[3]; - w2[3] = tmp2[0]; - break; - - case 4: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = tmp0[0]; - w1[1] = tmp0[1]; - w1[2] = tmp0[2]; - w1[3] = tmp0[3]; - w2[0] = tmp1[0]; - w2[1] = tmp1[1]; - w2[2] = tmp1[2]; - w2[3] = tmp1[3]; - w3[0] = tmp2[0]; - break; - - case 5: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = tmp0[0]; - w1[2] = tmp0[1]; - w1[3] = tmp0[2]; - w2[0] = tmp0[3]; - w2[1] = tmp1[0]; - w2[2] = tmp1[1]; - w2[3] = tmp1[2]; - w3[0] = tmp1[3]; - w3[1] = tmp2[0]; - break; - - case 6: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = tmp0[0]; - w1[3] = tmp0[1]; - w2[0] = tmp0[2]; - w2[1] = tmp0[3]; - w2[2] = tmp1[0]; - w2[3] = tmp1[1]; - w3[0] = tmp1[2]; - w3[1] = tmp1[3]; - w3[2] = tmp2[0]; - break; - - case 7: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = tmp0[0]; - w2[0] = tmp0[1]; - w2[1] = tmp0[2]; - w2[2] = tmp0[3]; - w2[3] = tmp1[0]; - w3[0] = tmp1[1]; - w3[1] = tmp1[2]; - w3[2] = tmp1[3]; - w3[3] = tmp2[0]; - break; - - case 8: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = tmp0[0]; - w2[1] = tmp0[1]; - w2[2] = tmp0[2]; - w2[3] = tmp0[3]; - w3[0] = tmp1[0]; - w3[1] = tmp1[1]; - w3[2] = tmp1[2]; - w3[3] = tmp1[3]; - break; - - case 9: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = tmp0[0]; - w2[2] = tmp0[1]; - w2[3] = tmp0[2]; - w3[0] = tmp0[3]; - w3[1] = tmp1[0]; - w3[2] = tmp1[1]; - w3[3] = tmp1[2]; - break; - - case 10: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = tmp0[0]; - w2[3] = tmp0[1]; - w3[0] = tmp0[2]; - w3[1] = tmp0[3]; - w3[2] = tmp1[0]; - w3[3] = tmp1[1]; - break; - - case 11: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = tmp0[0]; - w3[0] = tmp0[1]; - w3[1] = tmp0[2]; - w3[2] = tmp0[3]; - w3[3] = tmp1[0]; - break; - - case 12: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = tmp0[0]; - w3[1] = tmp0[1]; - w3[2] = tmp0[2]; - w3[3] = tmp0[3]; - break; - - case 13: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = tmp0[0]; - w3[2] = tmp0[1]; - w3[3] = tmp0[2]; - break; - - } - - #endif -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) amp (pw_t *pws, pw_t *pws_amp, gpu_rule_t *rules_buf, comb_t *combs_buf, bf_t *bfs_buf, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 pw_l_len = pws[gid].pw_len; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_r_len = c_combs[0].pw_len; - - u32x wordr0[4]; - - wordr0[0] = c_combs[0].i[0]; - wordr0[1] = c_combs[0].i[1]; - wordr0[2] = c_combs[0].i[2]; - wordr0[3] = c_combs[0].i[3]; - - u32x wordr1[4]; - - wordr1[0] = c_combs[0].i[4]; - wordr1[1] = c_combs[0].i[5]; - wordr1[2] = c_combs[0].i[6]; - wordr1[3] = c_combs[0].i[7]; - - u32x wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32x wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, pw_r_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - const u32 pw_len = pw_l_len + pw_r_len; - - pws_amp[gid].i[ 0] = w0[0]; - pws_amp[gid].i[ 1] = w0[1]; - pws_amp[gid].i[ 2] = w0[2]; - pws_amp[gid].i[ 3] = w0[3]; - pws_amp[gid].i[ 4] = w1[0]; - pws_amp[gid].i[ 5] = w1[1]; - pws_amp[gid].i[ 6] = w1[2]; - pws_amp[gid].i[ 7] = w1[3]; - pws_amp[gid].i[ 8] = w2[0]; - pws_amp[gid].i[ 9] = w2[1]; - pws_amp[gid].i[10] = w2[2]; - pws_amp[gid].i[11] = w2[3]; - pws_amp[gid].i[12] = w3[0]; - pws_amp[gid].i[13] = w3[1]; - pws_amp[gid].i[14] = w3[2]; - pws_amp[gid].i[15] = w3[3]; - - pws_amp[gid].pw_len = pw_len; -} diff --git a/nv/amp_a3_v1.cu b/nv/amp_a3_v1.cu deleted file mode 100644 index 9fd5ebf..0000000 --- a/nv/amp_a3_v1.cu +++ /dev/null @@ -1,63 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define VECT_SIZE1 - -#include "include/constants.h" -#include "types_nv.c" - -__device__ __constant__ bf_t c_bfs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) amp (pw_t *pws, pw_t *pws_amp, gpu_rule_t *rules_buf, comb_t *combs_buf, bf_t *bfs_buf, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 pw_len = pws[gid].pw_len; - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 w0r = c_bfs[0].i; - - pws_amp[gid].i[ 0] = w0[0] | w0r; - pws_amp[gid].i[ 1] = w0[1]; - pws_amp[gid].i[ 2] = w0[2]; - pws_amp[gid].i[ 3] = w0[3]; - pws_amp[gid].i[ 4] = w1[0]; - pws_amp[gid].i[ 5] = w1[1]; - pws_amp[gid].i[ 6] = w1[2]; - pws_amp[gid].i[ 7] = w1[3]; - pws_amp[gid].i[ 8] = w2[0]; - pws_amp[gid].i[ 9] = w2[1]; - pws_amp[gid].i[10] = w2[2]; - pws_amp[gid].i[11] = w2[3]; - pws_amp[gid].i[12] = w3[0]; - pws_amp[gid].i[13] = w3[1]; - pws_amp[gid].i[14] = w3[2]; - pws_amp[gid].i[15] = w3[3]; - - pws_amp[gid].pw_len = pw_len; -} diff --git a/nv/amp_a3_v2.cu b/nv/amp_a3_v2.cu deleted file mode 100644 index f7e1881..0000000 --- a/nv/amp_a3_v2.cu +++ /dev/null @@ -1,63 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define VECT_SIZE2 - -#include "include/constants.h" -#include "types_nv.c" - -__device__ __constant__ bf_t c_bfs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) amp (pw_t *pws, pw_t *pws_amp, gpu_rule_t *rules_buf, comb_t *combs_buf, bf_t *bfs_buf, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 pw_len = pws[gid].pw_len; - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 w0r = c_bfs[0].i; - - pws_amp[gid].i[ 0] = w0[0] | w0r; - pws_amp[gid].i[ 1] = w0[1]; - pws_amp[gid].i[ 2] = w0[2]; - pws_amp[gid].i[ 3] = w0[3]; - pws_amp[gid].i[ 4] = w1[0]; - pws_amp[gid].i[ 5] = w1[1]; - pws_amp[gid].i[ 6] = w1[2]; - pws_amp[gid].i[ 7] = w1[3]; - pws_amp[gid].i[ 8] = w2[0]; - pws_amp[gid].i[ 9] = w2[1]; - pws_amp[gid].i[10] = w2[2]; - pws_amp[gid].i[11] = w2[3]; - pws_amp[gid].i[12] = w3[0]; - pws_amp[gid].i[13] = w3[1]; - pws_amp[gid].i[14] = w3[2]; - pws_amp[gid].i[15] = w3[3]; - - pws_amp[gid].pw_len = pw_len; -} diff --git a/nv/amp_a3_v4.cu b/nv/amp_a3_v4.cu deleted file mode 100644 index 082bbc7..0000000 --- a/nv/amp_a3_v4.cu +++ /dev/null @@ -1,63 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define VECT_SIZE4 - -#include "include/constants.h" -#include "types_nv.c" - -__device__ __constant__ bf_t c_bfs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) amp (pw_t *pws, pw_t *pws_amp, gpu_rule_t *rules_buf, comb_t *combs_buf, bf_t *bfs_buf, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 pw_len = pws[gid].pw_len; - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 w0r = c_bfs[0].i; - - pws_amp[gid].i[ 0] = w0[0] | w0r; - pws_amp[gid].i[ 1] = w0[1]; - pws_amp[gid].i[ 2] = w0[2]; - pws_amp[gid].i[ 3] = w0[3]; - pws_amp[gid].i[ 4] = w1[0]; - pws_amp[gid].i[ 5] = w1[1]; - pws_amp[gid].i[ 6] = w1[2]; - pws_amp[gid].i[ 7] = w1[3]; - pws_amp[gid].i[ 8] = w2[0]; - pws_amp[gid].i[ 9] = w2[1]; - pws_amp[gid].i[10] = w2[2]; - pws_amp[gid].i[11] = w2[3]; - pws_amp[gid].i[12] = w3[0]; - pws_amp[gid].i[13] = w3[1]; - pws_amp[gid].i[14] = w3[2]; - pws_amp[gid].i[15] = w3[3]; - - pws_amp[gid].pw_len = pw_len; -} diff --git a/nv/check_multi_vect1_comp4_warp.c b/nv/check_multi_vect1_comp4_warp.c deleted file mode 100644 index 500d37d..0000000 --- a/nv/check_multi_vect1_comp4_warp.c +++ /dev/null @@ -1,34 +0,0 @@ -u32 digest_tp[4]; - -digest_tp[0] = r0; -digest_tp[1] = r1; -digest_tp[2] = r2; -digest_tp[3] = r3; - -if (check (digest_tp, - bitmaps_buf_s1_a, - bitmaps_buf_s1_b, - bitmaps_buf_s1_c, - bitmaps_buf_s1_d, - bitmaps_buf_s2_a, - bitmaps_buf_s2_b, - bitmaps_buf_s2_c, - bitmaps_buf_s2_d, - bitmap_mask, - bitmap_shift1, - bitmap_shift2)) -{ - int hash_pos = find_hash (digest_tp, digests_cnt, &digests_buf[digests_offset]); - - if (hash_pos != -1) - { - const u32 final_hash_pos = digests_offset + hash_pos; - - if ((atomicAdd (&hashes_shown[final_hash_pos], 1) == 0) && (check_vector_accessible (il_pos, bf_loops, bfs_cnt, 0) == 1)) - { - mark_hash_s0_warp (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } - } -} diff --git a/nv/check_multi_vect1_comp4_warp_bs.c b/nv/check_multi_vect1_comp4_warp_bs.c deleted file mode 100644 index 031227e..0000000 --- a/nv/check_multi_vect1_comp4_warp_bs.c +++ /dev/null @@ -1,34 +0,0 @@ -u32 digest_tp[4]; - -digest_tp[0] = r0; -digest_tp[1] = r1; -digest_tp[2] = r2; -digest_tp[3] = r3; - -if (check (digest_tp, - bitmaps_buf_s1_a, - bitmaps_buf_s1_b, - bitmaps_buf_s1_c, - bitmaps_buf_s1_d, - bitmaps_buf_s2_a, - bitmaps_buf_s2_b, - bitmaps_buf_s2_c, - bitmaps_buf_s2_d, - bitmap_mask, - bitmap_shift1, - bitmap_shift2)) -{ - int hash_pos = find_hash (digest_tp, digests_cnt, &digests_buf[digests_offset]); - - if (hash_pos != -1) - { - const u32 final_hash_pos = digests_offset + hash_pos; - - if ((atomicAdd (&hashes_shown[final_hash_pos], 1) == 0) && (check_vector_accessible (il_pos + slice, bf_loops, bfs_cnt, 0) == 1)) - { - mark_hash_s0_warp (plains_buf, hashes_shown, final_hash_pos, gid, il_pos + slice); - - d_return_buf[lid] = 1; - } - } -} diff --git a/nv/check_multi_vect2_comp4.c b/nv/check_multi_vect2_comp4.c deleted file mode 100644 index c1a7f65..0000000 --- a/nv/check_multi_vect2_comp4.c +++ /dev/null @@ -1,67 +0,0 @@ -u32 digest_tp[4]; - -digest_tp[0] = r0.x; -digest_tp[1] = r1.x; -digest_tp[2] = r2.x; -digest_tp[3] = r3.x; - -if (check (digest_tp, - bitmaps_buf_s1_a, - bitmaps_buf_s1_b, - bitmaps_buf_s1_c, - bitmaps_buf_s1_d, - bitmaps_buf_s2_a, - bitmaps_buf_s2_b, - bitmaps_buf_s2_c, - bitmaps_buf_s2_d, - bitmap_mask, - bitmap_shift1, - bitmap_shift2)) -{ - int hash_pos = find_hash (digest_tp, digests_cnt, &digests_buf[digests_offset]); - - if (hash_pos != -1) - { - const u32 final_hash_pos = digests_offset + hash_pos; - - if (atomicAdd (&hashes_shown[final_hash_pos], 1) == 0) - { - mark_hash_s0 (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } - } -} - -digest_tp[0] = r0.y; -digest_tp[1] = r1.y; -digest_tp[2] = r2.y; -digest_tp[3] = r3.y; - -if (check (digest_tp, - bitmaps_buf_s1_a, - bitmaps_buf_s1_b, - bitmaps_buf_s1_c, - bitmaps_buf_s1_d, - bitmaps_buf_s2_a, - bitmaps_buf_s2_b, - bitmaps_buf_s2_c, - bitmaps_buf_s2_d, - bitmap_mask, - bitmap_shift1, - bitmap_shift2)) -{ - int hash_pos = find_hash (digest_tp, digests_cnt, &digests_buf[digests_offset]); - - if (hash_pos != -1) - { - const u32 final_hash_pos = digests_offset + hash_pos; - - if (atomicAdd (&hashes_shown[final_hash_pos], 1) == 0) - { - mark_hash_s1 (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } - } -} diff --git a/nv/check_multi_vect2_comp4_warp.c b/nv/check_multi_vect2_comp4_warp.c deleted file mode 100644 index 749790c..0000000 --- a/nv/check_multi_vect2_comp4_warp.c +++ /dev/null @@ -1,67 +0,0 @@ -u32 digest_tp[4]; - -digest_tp[0] = r0.x; -digest_tp[1] = r1.x; -digest_tp[2] = r2.x; -digest_tp[3] = r3.x; - -if (check (digest_tp, - bitmaps_buf_s1_a, - bitmaps_buf_s1_b, - bitmaps_buf_s1_c, - bitmaps_buf_s1_d, - bitmaps_buf_s2_a, - bitmaps_buf_s2_b, - bitmaps_buf_s2_c, - bitmaps_buf_s2_d, - bitmap_mask, - bitmap_shift1, - bitmap_shift2)) -{ - int hash_pos = find_hash (digest_tp, digests_cnt, &digests_buf[digests_offset]); - - if (hash_pos != -1) - { - const u32 final_hash_pos = digests_offset + hash_pos; - - if ((atomicAdd (&hashes_shown[final_hash_pos], 1) == 0) && (check_vector_accessible (il_pos, bf_loops, bfs_cnt, 0) == 1)) - { - mark_hash_s0_warp (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } - } -} - -digest_tp[0] = r0.y; -digest_tp[1] = r1.y; -digest_tp[2] = r2.y; -digest_tp[3] = r3.y; - -if (check (digest_tp, - bitmaps_buf_s1_a, - bitmaps_buf_s1_b, - bitmaps_buf_s1_c, - bitmaps_buf_s1_d, - bitmaps_buf_s2_a, - bitmaps_buf_s2_b, - bitmaps_buf_s2_c, - bitmaps_buf_s2_d, - bitmap_mask, - bitmap_shift1, - bitmap_shift2)) -{ - int hash_pos = find_hash (digest_tp, digests_cnt, &digests_buf[digests_offset]); - - if (hash_pos != -1) - { - const u32 final_hash_pos = digests_offset + hash_pos; - - if ((atomicAdd (&hashes_shown[final_hash_pos], 1) == 0) && (check_vector_accessible (il_pos, bf_loops, bfs_cnt, 1) == 1)) - { - mark_hash_s1_warp (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } - } -} diff --git a/nv/check_multi_vect4_comp4.c b/nv/check_multi_vect4_comp4.c deleted file mode 100644 index 1e83553..0000000 --- a/nv/check_multi_vect4_comp4.c +++ /dev/null @@ -1,133 +0,0 @@ -u32 digest_tp[4]; - -digest_tp[0] = r0.x; -digest_tp[1] = r1.x; -digest_tp[2] = r2.x; -digest_tp[3] = r3.x; - -if (check (digest_tp, - bitmaps_buf_s1_a, - bitmaps_buf_s1_b, - bitmaps_buf_s1_c, - bitmaps_buf_s1_d, - bitmaps_buf_s2_a, - bitmaps_buf_s2_b, - bitmaps_buf_s2_c, - bitmaps_buf_s2_d, - bitmap_mask, - bitmap_shift1, - bitmap_shift2)) -{ - int hash_pos = find_hash (digest_tp, digests_cnt, &digests_buf[digests_offset]); - - if (hash_pos != -1) - { - const u32 final_hash_pos = digests_offset + hash_pos; - - if (atomicAdd (&hashes_shown[final_hash_pos], 1) == 0) - { - mark_hash_s0 (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } - } -} - -digest_tp[0] = r0.y; -digest_tp[1] = r1.y; -digest_tp[2] = r2.y; -digest_tp[3] = r3.y; - -if (check (digest_tp, - bitmaps_buf_s1_a, - bitmaps_buf_s1_b, - bitmaps_buf_s1_c, - bitmaps_buf_s1_d, - bitmaps_buf_s2_a, - bitmaps_buf_s2_b, - bitmaps_buf_s2_c, - bitmaps_buf_s2_d, - bitmap_mask, - bitmap_shift1, - bitmap_shift2)) -{ - int hash_pos = find_hash (digest_tp, digests_cnt, &digests_buf[digests_offset]); - - if (hash_pos != -1) - { - const u32 final_hash_pos = digests_offset + hash_pos; - - if (atomicAdd (&hashes_shown[final_hash_pos], 1) == 0) - { - mark_hash_s1 (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } - } -} - -digest_tp[0] = r0.z; -digest_tp[1] = r1.z; -digest_tp[2] = r2.z; -digest_tp[3] = r3.z; - -if (check (digest_tp, - bitmaps_buf_s1_a, - bitmaps_buf_s1_b, - bitmaps_buf_s1_c, - bitmaps_buf_s1_d, - bitmaps_buf_s2_a, - bitmaps_buf_s2_b, - bitmaps_buf_s2_c, - bitmaps_buf_s2_d, - bitmap_mask, - bitmap_shift1, - bitmap_shift2)) -{ - int hash_pos = find_hash (digest_tp, digests_cnt, &digests_buf[digests_offset]); - - if (hash_pos != -1) - { - const u32 final_hash_pos = digests_offset + hash_pos; - - if (atomicAdd (&hashes_shown[final_hash_pos], 1) == 0) - { - mark_hash_s2 (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } - } -} - -digest_tp[0] = r0.w; -digest_tp[1] = r1.w; -digest_tp[2] = r2.w; -digest_tp[3] = r3.w; - -if (check (digest_tp, - bitmaps_buf_s1_a, - bitmaps_buf_s1_b, - bitmaps_buf_s1_c, - bitmaps_buf_s1_d, - bitmaps_buf_s2_a, - bitmaps_buf_s2_b, - bitmaps_buf_s2_c, - bitmaps_buf_s2_d, - bitmap_mask, - bitmap_shift1, - bitmap_shift2)) -{ - int hash_pos = find_hash (digest_tp, digests_cnt, &digests_buf[digests_offset]); - - if (hash_pos != -1) - { - const u32 final_hash_pos = digests_offset + hash_pos; - - if (atomicAdd (&hashes_shown[final_hash_pos], 1) == 0) - { - mark_hash_s3 (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } - } -} diff --git a/nv/check_multi_vect4_comp4_warp.c b/nv/check_multi_vect4_comp4_warp.c deleted file mode 100644 index 5710ba4..0000000 --- a/nv/check_multi_vect4_comp4_warp.c +++ /dev/null @@ -1,133 +0,0 @@ -u32 digest_tp[4]; - -digest_tp[0] = r0.x; -digest_tp[1] = r1.x; -digest_tp[2] = r2.x; -digest_tp[3] = r3.x; - -if (check (digest_tp, - bitmaps_buf_s1_a, - bitmaps_buf_s1_b, - bitmaps_buf_s1_c, - bitmaps_buf_s1_d, - bitmaps_buf_s2_a, - bitmaps_buf_s2_b, - bitmaps_buf_s2_c, - bitmaps_buf_s2_d, - bitmap_mask, - bitmap_shift1, - bitmap_shift2)) -{ - int hash_pos = find_hash (digest_tp, digests_cnt, &digests_buf[digests_offset]); - - if (hash_pos != -1) - { - const u32 final_hash_pos = digests_offset + hash_pos; - - if ((atomicAdd (&hashes_shown[final_hash_pos], 1) == 0) && (check_vector_accessible (il_pos, bf_loops, bfs_cnt, 0) == 1)) - { - mark_hash_s0_warp (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } - } -} - -digest_tp[0] = r0.y; -digest_tp[1] = r1.y; -digest_tp[2] = r2.y; -digest_tp[3] = r3.y; - -if (check (digest_tp, - bitmaps_buf_s1_a, - bitmaps_buf_s1_b, - bitmaps_buf_s1_c, - bitmaps_buf_s1_d, - bitmaps_buf_s2_a, - bitmaps_buf_s2_b, - bitmaps_buf_s2_c, - bitmaps_buf_s2_d, - bitmap_mask, - bitmap_shift1, - bitmap_shift2)) -{ - int hash_pos = find_hash (digest_tp, digests_cnt, &digests_buf[digests_offset]); - - if (hash_pos != -1) - { - const u32 final_hash_pos = digests_offset + hash_pos; - - if ((atomicAdd (&hashes_shown[final_hash_pos], 1) == 0) && (check_vector_accessible (il_pos, bf_loops, bfs_cnt, 1) == 1)) - { - mark_hash_s1_warp (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } - } -} - -digest_tp[0] = r0.z; -digest_tp[1] = r1.z; -digest_tp[2] = r2.z; -digest_tp[3] = r3.z; - -if (check (digest_tp, - bitmaps_buf_s1_a, - bitmaps_buf_s1_b, - bitmaps_buf_s1_c, - bitmaps_buf_s1_d, - bitmaps_buf_s2_a, - bitmaps_buf_s2_b, - bitmaps_buf_s2_c, - bitmaps_buf_s2_d, - bitmap_mask, - bitmap_shift1, - bitmap_shift2)) -{ - int hash_pos = find_hash (digest_tp, digests_cnt, &digests_buf[digests_offset]); - - if (hash_pos != -1) - { - const u32 final_hash_pos = digests_offset + hash_pos; - - if ((atomicAdd (&hashes_shown[final_hash_pos], 1) == 0) && (check_vector_accessible (il_pos, bf_loops, bfs_cnt, 2) == 1)) - { - mark_hash_s2_warp (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } - } -} - -digest_tp[0] = r0.w; -digest_tp[1] = r1.w; -digest_tp[2] = r2.w; -digest_tp[3] = r3.w; - -if (check (digest_tp, - bitmaps_buf_s1_a, - bitmaps_buf_s1_b, - bitmaps_buf_s1_c, - bitmaps_buf_s1_d, - bitmaps_buf_s2_a, - bitmaps_buf_s2_b, - bitmaps_buf_s2_c, - bitmaps_buf_s2_d, - bitmap_mask, - bitmap_shift1, - bitmap_shift2)) -{ - int hash_pos = find_hash (digest_tp, digests_cnt, &digests_buf[digests_offset]); - - if (hash_pos != -1) - { - const u32 final_hash_pos = digests_offset + hash_pos; - - if ((atomicAdd (&hashes_shown[final_hash_pos], 1) == 0) && (check_vector_accessible (il_pos, bf_loops, bfs_cnt, 3) == 1)) - { - mark_hash_s3_warp (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } - } -} diff --git a/nv/check_single_vect1_comp4.c b/nv/check_single_vect1_comp4.c deleted file mode 100644 index c49d06e..0000000 --- a/nv/check_single_vect1_comp4.c +++ /dev/null @@ -1,14 +0,0 @@ -if ((r0 == search[0]) - && (r1 == search[1]) - && (r2 == search[2]) - && (r3 == search[3])) -{ - const u32 final_hash_pos = digests_offset + 0; - - if (atomicAdd (&hashes_shown[final_hash_pos], 1) == 0) - { - mark_hash_s0 (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } -} diff --git a/nv/check_single_vect1_comp4_warp.c b/nv/check_single_vect1_comp4_warp.c deleted file mode 100644 index ee990a6..0000000 --- a/nv/check_single_vect1_comp4_warp.c +++ /dev/null @@ -1,14 +0,0 @@ -if ((r0 == search[0]) - && (r1 == search[1]) - && (r2 == search[2]) - && (r3 == search[3])) -{ - const u32 final_hash_pos = digests_offset + 0; - - if ((atomicAdd (&hashes_shown[final_hash_pos], 1) == 0) && (check_vector_accessible (il_pos, bf_loops, bfs_cnt, 0) == 1)) - { - mark_hash_s0_warp (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } -} diff --git a/nv/check_single_vect1_comp4_warp_bs.c b/nv/check_single_vect1_comp4_warp_bs.c deleted file mode 100644 index 3d5729f..0000000 --- a/nv/check_single_vect1_comp4_warp_bs.c +++ /dev/null @@ -1,3 +0,0 @@ -mark_hash_s0_warp (plains_buf, hashes_shown, 0, gid, il_pos + slice); - -d_return_buf[lid] = 1; diff --git a/nv/check_single_vect2_comp4.c b/nv/check_single_vect2_comp4.c deleted file mode 100644 index f7a7987..0000000 --- a/nv/check_single_vect2_comp4.c +++ /dev/null @@ -1,29 +0,0 @@ -if ((r0.x == search[0]) - && (r1.x == search[1]) - && (r2.x == search[2]) - && (r3.x == search[3])) -{ - const u32 final_hash_pos = digests_offset + 0; - - if (atomicAdd (&hashes_shown[final_hash_pos], 1) == 0) - { - mark_hash_s0 (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } -} - -if ((r0.y == search[0]) - && (r1.y == search[1]) - && (r2.y == search[2]) - && (r3.y == search[3])) -{ - const u32 final_hash_pos = digests_offset + 0; - - if (atomicAdd (&hashes_shown[final_hash_pos], 1) == 0) - { - mark_hash_s1 (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } -} diff --git a/nv/check_single_vect2_comp4_warp.c b/nv/check_single_vect2_comp4_warp.c deleted file mode 100644 index fc261c5..0000000 --- a/nv/check_single_vect2_comp4_warp.c +++ /dev/null @@ -1,29 +0,0 @@ -if ((r0.x == search[0]) - && (r1.x == search[1]) - && (r2.x == search[2]) - && (r3.x == search[3])) -{ - const u32 final_hash_pos = digests_offset + 0; - - if ((atomicAdd (&hashes_shown[final_hash_pos], 1) == 0) && (check_vector_accessible (il_pos, bf_loops, bfs_cnt, 0) == 1)) - { - mark_hash_s0_warp (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } -} - -if ((r0.y == search[0]) - && (r1.y == search[1]) - && (r2.y == search[2]) - && (r3.y == search[3])) -{ - const u32 final_hash_pos = digests_offset + 0; - - if ((atomicAdd (&hashes_shown[final_hash_pos], 1) == 0) && (check_vector_accessible (il_pos, bf_loops, bfs_cnt, 1) == 1)) - { - mark_hash_s1_warp (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } -} diff --git a/nv/check_single_vect4_comp4.c b/nv/check_single_vect4_comp4.c deleted file mode 100644 index cde5eb7..0000000 --- a/nv/check_single_vect4_comp4.c +++ /dev/null @@ -1,59 +0,0 @@ -if ((r0.x == search[0]) - && (r1.x == search[1]) - && (r2.x == search[2]) - && (r3.x == search[3])) -{ - const u32 final_hash_pos = digests_offset + 0; - - if (atomicAdd (&hashes_shown[final_hash_pos], 1) == 0) - { - mark_hash_s0 (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } -} - -if ((r0.y == search[0]) - && (r1.y == search[1]) - && (r2.y == search[2]) - && (r3.y == search[3])) -{ - const u32 final_hash_pos = digests_offset + 0; - - if (atomicAdd (&hashes_shown[final_hash_pos], 1) == 0) - { - mark_hash_s1 (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } -} - -if ((r0.z == search[0]) - && (r1.z == search[1]) - && (r2.z == search[2]) - && (r3.z == search[3])) -{ - const u32 final_hash_pos = digests_offset + 0; - - if (atomicAdd (&hashes_shown[final_hash_pos], 1) == 0) - { - mark_hash_s2 (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } -} - -if ((r0.w == search[0]) - && (r1.w == search[1]) - && (r2.w == search[2]) - && (r3.w == search[3])) -{ - const u32 final_hash_pos = digests_offset + 0; - - if (atomicAdd (&hashes_shown[final_hash_pos], 1) == 0) - { - mark_hash_s3 (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } -} diff --git a/nv/check_single_vect4_comp4_warp.c b/nv/check_single_vect4_comp4_warp.c deleted file mode 100644 index c541794..0000000 --- a/nv/check_single_vect4_comp4_warp.c +++ /dev/null @@ -1,59 +0,0 @@ -if ((r0.x == search[0]) - && (r1.x == search[1]) - && (r2.x == search[2]) - && (r3.x == search[3])) -{ - const u32 final_hash_pos = digests_offset + 0; - - if ((atomicAdd (&hashes_shown[final_hash_pos], 1) == 0) && (check_vector_accessible (il_pos, bf_loops, bfs_cnt, 0) == 1)) - { - mark_hash_s0_warp (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } -} - -if ((r0.y == search[0]) - && (r1.y == search[1]) - && (r2.y == search[2]) - && (r3.y == search[3])) -{ - const u32 final_hash_pos = digests_offset + 0; - - if ((atomicAdd (&hashes_shown[final_hash_pos], 1) == 0) && (check_vector_accessible (il_pos, bf_loops, bfs_cnt, 1) == 1)) - { - mark_hash_s1_warp (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } -} - -if ((r0.z == search[0]) - && (r1.z == search[1]) - && (r2.z == search[2]) - && (r3.z == search[3])) -{ - const u32 final_hash_pos = digests_offset + 0; - - if ((atomicAdd (&hashes_shown[final_hash_pos], 1) == 0) && (check_vector_accessible (il_pos, bf_loops, bfs_cnt, 2) == 1)) - { - mark_hash_s2_warp (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } -} - -if ((r0.w == search[0]) - && (r1.w == search[1]) - && (r2.w == search[2]) - && (r3.w == search[3])) -{ - const u32 final_hash_pos = digests_offset + 0; - - if ((atomicAdd (&hashes_shown[final_hash_pos], 1) == 0) && (check_vector_accessible (il_pos, bf_loops, bfs_cnt, 3) == 1)) - { - mark_hash_s3_warp (plains_buf, hashes_shown, final_hash_pos, gid, il_pos); - - d_return_buf[lid] = 1; - } -} diff --git a/nv/common_nv.c b/nv/common_nv.c deleted file mode 100644 index 86fe19a..0000000 --- a/nv/common_nv.c +++ /dev/null @@ -1,15025 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -__device__ static int device_memcmp (const u32 d1[4], const u32 *d2) -{ - if (d1[3] > d2[DGST_R3]) return ( 1); - if (d1[3] < d2[DGST_R3]) return (-1); - if (d1[2] > d2[DGST_R2]) return ( 1); - if (d1[2] < d2[DGST_R2]) return (-1); - if (d1[1] > d2[DGST_R1]) return ( 1); - if (d1[1] < d2[DGST_R1]) return (-1); - if (d1[0] > d2[DGST_R0]) return ( 1); - if (d1[0] < d2[DGST_R0]) return (-1); - - return (0); -} - -__device__ static int find_hash (const u32 digest[4], const u32 digests_cnt, const digest_t *digests_buf) -{ - for (u32 l = 0, r = digests_cnt; r; r >>= 1) - { - const u32 m = r >> 1; - - const u32 c = l + m; - - const int cmp = device_memcmp (digest, digests_buf[c].digest_buf); - - if (cmp > 0) - { - l += m + 1; - - r--; - } - - if (cmp == 0) return (c); - } - - return (-1); -} - -__device__ static u32 check_bitmap (const u32 *bitmap, const u32 bitmap_mask, const u32 bitmap_shift, const u32 digest) -{ - return (bitmap[(digest >> bitmap_shift) & bitmap_mask] & (1 << (digest & 0x1f))); -} - -__device__ static u32 check (const u32 digest[2], const u32 *bitmap_s1_a, const u32 *bitmap_s1_b, const u32 *bitmap_s1_c, const u32 *bitmap_s1_d, const u32 *bitmap_s2_a, const u32 *bitmap_s2_b, const u32 *bitmap_s2_c, const u32 *bitmap_s2_d, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2) -{ - if (check_bitmap (bitmap_s1_a, bitmap_mask, bitmap_shift1, digest[0]) == 0) return (0); - if (check_bitmap (bitmap_s1_b, bitmap_mask, bitmap_shift1, digest[1]) == 0) return (0); - if (check_bitmap (bitmap_s1_c, bitmap_mask, bitmap_shift1, digest[2]) == 0) return (0); - if (check_bitmap (bitmap_s1_d, bitmap_mask, bitmap_shift1, digest[3]) == 0) return (0); - - if (check_bitmap (bitmap_s2_a, bitmap_mask, bitmap_shift2, digest[0]) == 0) return (0); - if (check_bitmap (bitmap_s2_b, bitmap_mask, bitmap_shift2, digest[1]) == 0) return (0); - if (check_bitmap (bitmap_s2_c, bitmap_mask, bitmap_shift2, digest[2]) == 0) return (0); - if (check_bitmap (bitmap_s2_d, bitmap_mask, bitmap_shift2, digest[3]) == 0) return (0); - - return (1); -} - -#ifdef VECT_SIZE1 -__device__ static void mark_hash_s0 (plain_t *plains_buf, u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos) -{ - hashes_shown[hash_pos] = 1; - - plains_buf[hash_pos].gidvid = (gid * 1) + 0; - plains_buf[hash_pos].il_pos = il_pos; -} - -__device__ static void mark_hash_s0_warp (plain_t *plains_buf, u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos) -{ - hashes_shown[hash_pos] = 1; - - plains_buf[hash_pos].gidvid = gid; - plains_buf[hash_pos].il_pos = (il_pos * 1) + 0; -} -#endif - -#ifdef VECT_SIZE2 -__device__ static void mark_hash_s0 (plain_t *plains_buf, u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos) -{ - hashes_shown[hash_pos] = 1; - - plains_buf[hash_pos].gidvid = (gid * 2) + 0; - plains_buf[hash_pos].il_pos = il_pos; -} - -__device__ static void mark_hash_s1 (plain_t *plains_buf, u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos) -{ - hashes_shown[hash_pos] = 1; - - plains_buf[hash_pos].gidvid = (gid * 2) + 1; - plains_buf[hash_pos].il_pos = il_pos; -} - -__device__ static void mark_hash_s0_warp (plain_t *plains_buf, u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos) -{ - hashes_shown[hash_pos] = 1; - - plains_buf[hash_pos].gidvid = gid; - plains_buf[hash_pos].il_pos = (il_pos * 2) + 0; -} - -__device__ static void mark_hash_s1_warp (plain_t *plains_buf, u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos) -{ - hashes_shown[hash_pos] = 1; - - plains_buf[hash_pos].gidvid = gid; - plains_buf[hash_pos].il_pos = (il_pos * 2) + 1; -} -#endif - -#ifdef VECT_SIZE4 -__device__ static void mark_hash_s0 (plain_t *plains_buf, u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos) -{ - hashes_shown[hash_pos] = 1; - - plains_buf[hash_pos].gidvid = (gid * 4) + 0; - plains_buf[hash_pos].il_pos = il_pos; -} - -__device__ static void mark_hash_s1 (plain_t *plains_buf, u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos) -{ - hashes_shown[hash_pos] = 1; - - plains_buf[hash_pos].gidvid = (gid * 4) + 1; - plains_buf[hash_pos].il_pos = il_pos; -} - -__device__ static void mark_hash_s2 (plain_t *plains_buf, u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos) -{ - hashes_shown[hash_pos] = 1; - - plains_buf[hash_pos].gidvid = (gid * 4) + 2; - plains_buf[hash_pos].il_pos = il_pos; -} - -__device__ static void mark_hash_s3 (plain_t *plains_buf, u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos) -{ - hashes_shown[hash_pos] = 1; - - plains_buf[hash_pos].gidvid = (gid * 4) + 3; - plains_buf[hash_pos].il_pos = il_pos; -} - -__device__ static void mark_hash_s0_warp (plain_t *plains_buf, u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos) -{ - hashes_shown[hash_pos] = 1; - - plains_buf[hash_pos].gidvid = gid; - plains_buf[hash_pos].il_pos = (il_pos * 4) + 0; -} - -__device__ static void mark_hash_s1_warp (plain_t *plains_buf, u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos) -{ - hashes_shown[hash_pos] = 1; - - plains_buf[hash_pos].gidvid = gid; - plains_buf[hash_pos].il_pos = (il_pos * 4) + 1; -} - -__device__ static void mark_hash_s2_warp (plain_t *plains_buf, u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos) -{ - hashes_shown[hash_pos] = 1; - - plains_buf[hash_pos].gidvid = gid; - plains_buf[hash_pos].il_pos = (il_pos * 4) + 2; -} - -__device__ static void mark_hash_s3_warp (plain_t *plains_buf, u32 *hashes_shown, const int hash_pos, const u32 gid, const u32 il_pos) -{ - hashes_shown[hash_pos] = 1; - - plains_buf[hash_pos].gidvid = gid; - plains_buf[hash_pos].il_pos = (il_pos * 4) + 3; -} -#endif - -/** - * scalar - */ - -__device__ static u32 swap_workaround (const u32 v) -{ - #if __CUDA_ARCH__ >= 200 - return __byte_perm (v, 0, 0x0123); - - #else - return (v << 24) + ((v & 0x0000FF00) << 8) + ((v & 0x00FF0000) >> 8) + (v >> 24); - - #endif -} - -__device__ static u64 swap_workaround (const u64 v) -{ - return (((v & 0xff00000000000000ull) >> 56) - | ((v & 0x00ff000000000000ull) >> 40) - | ((v & 0x0000ff0000000000ull) >> 24) - | ((v & 0x000000ff00000000ull) >> 8) - | ((v & 0x00000000ff000000ull) << 8) - | ((v & 0x0000000000ff0000ull) << 24) - | ((v & 0x000000000000ff00ull) << 40) - | ((v & 0x00000000000000ffull) << 56)); -} - -__device__ static void truncate_block (u32 w[4], const u32 len) -{ - switch (len) - { - case 0: w[0] &= 0; - w[1] &= 0; - w[2] &= 0; - w[3] &= 0; - break; - case 1: w[0] &= 0x000000FF; - w[1] &= 0; - w[2] &= 0; - w[3] &= 0; - break; - case 2: w[0] &= 0x0000FFFF; - w[1] &= 0; - w[2] &= 0; - w[3] &= 0; - break; - case 3: w[0] &= 0x00FFFFFF; - w[1] &= 0; - w[2] &= 0; - w[3] &= 0; - break; - case 4: w[1] &= 0; - w[2] &= 0; - w[3] &= 0; - break; - case 5: w[1] &= 0x000000FF; - w[2] &= 0; - w[3] &= 0; - break; - case 6: w[1] &= 0x0000FFFF; - w[2] &= 0; - w[3] &= 0; - break; - case 7: w[1] &= 0x00FFFFFF; - w[2] &= 0; - w[3] &= 0; - break; - case 8: w[2] &= 0; - w[3] &= 0; - break; - case 9: w[2] &= 0x000000FF; - w[3] &= 0; - break; - case 10: w[2] &= 0x0000FFFF; - w[3] &= 0; - break; - case 11: w[2] &= 0x00FFFFFF; - w[3] &= 0; - break; - case 12: w[3] &= 0; - break; - case 13: w[3] &= 0x000000FF; - break; - case 14: w[3] &= 0x0000FFFF; - break; - case 15: w[3] &= 0x00FFFFFF; - break; - } -} - -__device__ static void make_unicode (const u32 in[4], u32 out1[4], u32 out2[4]) -{ - #if __CUDA_ARCH__ >= 200 - out2[3] = __byte_perm (in[3], 0, 0x7372); - out2[2] = __byte_perm (in[3], 0, 0x7170); - out2[1] = __byte_perm (in[2], 0, 0x7372); - out2[0] = __byte_perm (in[2], 0, 0x7170); - out1[3] = __byte_perm (in[1], 0, 0x7372); - out1[2] = __byte_perm (in[1], 0, 0x7170); - out1[1] = __byte_perm (in[0], 0, 0x7372); - out1[0] = __byte_perm (in[0], 0, 0x7170); - #else - out2[3] = ((in[3] >> 8) & 0x00FF0000) | ((in[3] >> 16) & 0x000000FF); - out2[2] = ((in[3] << 8) & 0x00FF0000) | ((in[3] >> 0) & 0x000000FF); - out2[1] = ((in[2] >> 8) & 0x00FF0000) | ((in[2] >> 16) & 0x000000FF); - out2[0] = ((in[2] << 8) & 0x00FF0000) | ((in[2] >> 0) & 0x000000FF); - out1[3] = ((in[1] >> 8) & 0x00FF0000) | ((in[1] >> 16) & 0x000000FF); - out1[2] = ((in[1] << 8) & 0x00FF0000) | ((in[1] >> 0) & 0x000000FF); - out1[1] = ((in[0] >> 8) & 0x00FF0000) | ((in[0] >> 16) & 0x000000FF); - out1[0] = ((in[0] << 8) & 0x00FF0000) | ((in[0] >> 0) & 0x000000FF); - #endif -} - -__device__ static void undo_unicode (const u32 in1[4], const u32 in2[4], u32 out[4]) -{ - #if __CUDA_ARCH__ >= 200 - out[0] = __byte_perm (in1[0], in1[1], 0x6420); - out[1] = __byte_perm (in1[2], in1[3], 0x6420); - out[2] = __byte_perm (in2[0], in2[1], 0x6420); - out[3] = __byte_perm (in2[2], in2[3], 0x6420); - #else - out[0] = ((in1[0] & 0x000000ff) >> 0) | ((in1[0] & 0x00ff0000) >> 8) - | ((in1[1] & 0x000000ff) << 16) | ((in1[1] & 0x00ff0000) << 8); - out[1] = ((in1[2] & 0x000000ff) >> 0) | ((in1[2] & 0x00ff0000) >> 8) - | ((in1[3] & 0x000000ff) << 16) | ((in1[3] & 0x00ff0000) << 8); - out[2] = ((in2[0] & 0x000000ff) >> 0) | ((in2[0] & 0x00ff0000) >> 8) - | ((in2[1] & 0x000000ff) << 16) | ((in2[1] & 0x00ff0000) << 8); - out[3] = ((in2[2] & 0x000000ff) >> 0) | ((in2[2] & 0x00ff0000) >> 8) - | ((in2[3] & 0x000000ff) << 16) | ((in2[3] & 0x00ff0000) << 8); - #endif -} - -__device__ static void append_0x01_1 (u32 w0[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x01; - break; - - case 1: - w0[0] = w0[0] | 0x0100; - break; - - case 2: - w0[0] = w0[0] | 0x010000; - break; - - case 3: - w0[0] = w0[0] | 0x01000000; - break; - - case 4: - w0[1] = 0x01; - break; - - case 5: - w0[1] = w0[1] | 0x0100; - break; - - case 6: - w0[1] = w0[1] | 0x010000; - break; - - case 7: - w0[1] = w0[1] | 0x01000000; - break; - - case 8: - w0[2] = 0x01; - break; - - case 9: - w0[2] = w0[2] | 0x0100; - break; - - case 10: - w0[2] = w0[2] | 0x010000; - break; - - case 11: - w0[2] = w0[2] | 0x01000000; - break; - - case 12: - w0[3] = 0x01; - break; - - case 13: - w0[3] = w0[3] | 0x0100; - break; - - case 14: - w0[3] = w0[3] | 0x010000; - break; - - case 15: - w0[3] = w0[3] | 0x01000000; - break; - } -} - -__device__ static void append_0x01_2 (u32 w0[4], u32 w1[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x01; - break; - - case 1: - w0[0] = w0[0] | 0x0100; - break; - - case 2: - w0[0] = w0[0] | 0x010000; - break; - - case 3: - w0[0] = w0[0] | 0x01000000; - break; - - case 4: - w0[1] = 0x01; - break; - - case 5: - w0[1] = w0[1] | 0x0100; - break; - - case 6: - w0[1] = w0[1] | 0x010000; - break; - - case 7: - w0[1] = w0[1] | 0x01000000; - break; - - case 8: - w0[2] = 0x01; - break; - - case 9: - w0[2] = w0[2] | 0x0100; - break; - - case 10: - w0[2] = w0[2] | 0x010000; - break; - - case 11: - w0[2] = w0[2] | 0x01000000; - break; - - case 12: - w0[3] = 0x01; - break; - - case 13: - w0[3] = w0[3] | 0x0100; - break; - - case 14: - w0[3] = w0[3] | 0x010000; - break; - - case 15: - w0[3] = w0[3] | 0x01000000; - break; - - case 16: - w1[0] = 0x01; - break; - - case 17: - w1[0] = w1[0] | 0x0100; - break; - - case 18: - w1[0] = w1[0] | 0x010000; - break; - - case 19: - w1[0] = w1[0] | 0x01000000; - break; - - case 20: - w1[1] = 0x01; - break; - - case 21: - w1[1] = w1[1] | 0x0100; - break; - - case 22: - w1[1] = w1[1] | 0x010000; - break; - - case 23: - w1[1] = w1[1] | 0x01000000; - break; - - case 24: - w1[2] = 0x01; - break; - - case 25: - w1[2] = w1[2] | 0x0100; - break; - - case 26: - w1[2] = w1[2] | 0x010000; - break; - - case 27: - w1[2] = w1[2] | 0x01000000; - break; - - case 28: - w1[3] = 0x01; - break; - - case 29: - w1[3] = w1[3] | 0x0100; - break; - - case 30: - w1[3] = w1[3] | 0x010000; - break; - - case 31: - w1[3] = w1[3] | 0x01000000; - break; - } -} - -__device__ static void append_0x01_3 (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x01; - break; - - case 1: - w0[0] = w0[0] | 0x0100; - break; - - case 2: - w0[0] = w0[0] | 0x010000; - break; - - case 3: - w0[0] = w0[0] | 0x01000000; - break; - - case 4: - w0[1] = 0x01; - break; - - case 5: - w0[1] = w0[1] | 0x0100; - break; - - case 6: - w0[1] = w0[1] | 0x010000; - break; - - case 7: - w0[1] = w0[1] | 0x01000000; - break; - - case 8: - w0[2] = 0x01; - break; - - case 9: - w0[2] = w0[2] | 0x0100; - break; - - case 10: - w0[2] = w0[2] | 0x010000; - break; - - case 11: - w0[2] = w0[2] | 0x01000000; - break; - - case 12: - w0[3] = 0x01; - break; - - case 13: - w0[3] = w0[3] | 0x0100; - break; - - case 14: - w0[3] = w0[3] | 0x010000; - break; - - case 15: - w0[3] = w0[3] | 0x01000000; - break; - - case 16: - w1[0] = 0x01; - break; - - case 17: - w1[0] = w1[0] | 0x0100; - break; - - case 18: - w1[0] = w1[0] | 0x010000; - break; - - case 19: - w1[0] = w1[0] | 0x01000000; - break; - - case 20: - w1[1] = 0x01; - break; - - case 21: - w1[1] = w1[1] | 0x0100; - break; - - case 22: - w1[1] = w1[1] | 0x010000; - break; - - case 23: - w1[1] = w1[1] | 0x01000000; - break; - - case 24: - w1[2] = 0x01; - break; - - case 25: - w1[2] = w1[2] | 0x0100; - break; - - case 26: - w1[2] = w1[2] | 0x010000; - break; - - case 27: - w1[2] = w1[2] | 0x01000000; - break; - - case 28: - w1[3] = 0x01; - break; - - case 29: - w1[3] = w1[3] | 0x0100; - break; - - case 30: - w1[3] = w1[3] | 0x010000; - break; - - case 31: - w1[3] = w1[3] | 0x01000000; - break; - - case 32: - w2[0] = 0x01; - break; - - case 33: - w2[0] = w2[0] | 0x0100; - break; - - case 34: - w2[0] = w2[0] | 0x010000; - break; - - case 35: - w2[0] = w2[0] | 0x01000000; - break; - - case 36: - w2[1] = 0x01; - break; - - case 37: - w2[1] = w2[1] | 0x0100; - break; - - case 38: - w2[1] = w2[1] | 0x010000; - break; - - case 39: - w2[1] = w2[1] | 0x01000000; - break; - - case 40: - w2[2] = 0x01; - break; - - case 41: - w2[2] = w2[2] | 0x0100; - break; - - case 42: - w2[2] = w2[2] | 0x010000; - break; - - case 43: - w2[2] = w2[2] | 0x01000000; - break; - - case 44: - w2[3] = 0x01; - break; - - case 45: - w2[3] = w2[3] | 0x0100; - break; - - case 46: - w2[3] = w2[3] | 0x010000; - break; - - case 47: - w2[3] = w2[3] | 0x01000000; - break; - } -} - -__device__ static void append_0x01_4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x01; - break; - - case 1: - w0[0] = w0[0] | 0x0100; - break; - - case 2: - w0[0] = w0[0] | 0x010000; - break; - - case 3: - w0[0] = w0[0] | 0x01000000; - break; - - case 4: - w0[1] = 0x01; - break; - - case 5: - w0[1] = w0[1] | 0x0100; - break; - - case 6: - w0[1] = w0[1] | 0x010000; - break; - - case 7: - w0[1] = w0[1] | 0x01000000; - break; - - case 8: - w0[2] = 0x01; - break; - - case 9: - w0[2] = w0[2] | 0x0100; - break; - - case 10: - w0[2] = w0[2] | 0x010000; - break; - - case 11: - w0[2] = w0[2] | 0x01000000; - break; - - case 12: - w0[3] = 0x01; - break; - - case 13: - w0[3] = w0[3] | 0x0100; - break; - - case 14: - w0[3] = w0[3] | 0x010000; - break; - - case 15: - w0[3] = w0[3] | 0x01000000; - break; - - case 16: - w1[0] = 0x01; - break; - - case 17: - w1[0] = w1[0] | 0x0100; - break; - - case 18: - w1[0] = w1[0] | 0x010000; - break; - - case 19: - w1[0] = w1[0] | 0x01000000; - break; - - case 20: - w1[1] = 0x01; - break; - - case 21: - w1[1] = w1[1] | 0x0100; - break; - - case 22: - w1[1] = w1[1] | 0x010000; - break; - - case 23: - w1[1] = w1[1] | 0x01000000; - break; - - case 24: - w1[2] = 0x01; - break; - - case 25: - w1[2] = w1[2] | 0x0100; - break; - - case 26: - w1[2] = w1[2] | 0x010000; - break; - - case 27: - w1[2] = w1[2] | 0x01000000; - break; - - case 28: - w1[3] = 0x01; - break; - - case 29: - w1[3] = w1[3] | 0x0100; - break; - - case 30: - w1[3] = w1[3] | 0x010000; - break; - - case 31: - w1[3] = w1[3] | 0x01000000; - break; - - case 32: - w2[0] = 0x01; - break; - - case 33: - w2[0] = w2[0] | 0x0100; - break; - - case 34: - w2[0] = w2[0] | 0x010000; - break; - - case 35: - w2[0] = w2[0] | 0x01000000; - break; - - case 36: - w2[1] = 0x01; - break; - - case 37: - w2[1] = w2[1] | 0x0100; - break; - - case 38: - w2[1] = w2[1] | 0x010000; - break; - - case 39: - w2[1] = w2[1] | 0x01000000; - break; - - case 40: - w2[2] = 0x01; - break; - - case 41: - w2[2] = w2[2] | 0x0100; - break; - - case 42: - w2[2] = w2[2] | 0x010000; - break; - - case 43: - w2[2] = w2[2] | 0x01000000; - break; - - case 44: - w2[3] = 0x01; - break; - - case 45: - w2[3] = w2[3] | 0x0100; - break; - - case 46: - w2[3] = w2[3] | 0x010000; - break; - - case 47: - w2[3] = w2[3] | 0x01000000; - break; - - case 48: - w3[0] = 0x01; - break; - - case 49: - w3[0] = w3[0] | 0x0100; - break; - - case 50: - w3[0] = w3[0] | 0x010000; - break; - - case 51: - w3[0] = w3[0] | 0x01000000; - break; - - case 52: - w3[1] = 0x01; - break; - - case 53: - w3[1] = w3[1] | 0x0100; - break; - - case 54: - w3[1] = w3[1] | 0x010000; - break; - - case 55: - w3[1] = w3[1] | 0x01000000; - break; - - case 56: - w3[2] = 0x01; - break; - - case 57: - w3[2] = w3[2] | 0x0100; - break; - - case 58: - w3[2] = w3[2] | 0x010000; - break; - - case 59: - w3[2] = w3[2] | 0x01000000; - break; - - case 60: - w3[3] = 0x01; - break; - - case 61: - w3[3] = w3[3] | 0x0100; - break; - - case 62: - w3[3] = w3[3] | 0x010000; - break; - - case 63: - w3[3] = w3[3] | 0x01000000; - break; - } -} - -__device__ static void append_0x01_8 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x01; - break; - - case 1: - w0[0] = w0[0] | 0x0100; - break; - - case 2: - w0[0] = w0[0] | 0x010000; - break; - - case 3: - w0[0] = w0[0] | 0x01000000; - break; - - case 4: - w0[1] = 0x01; - break; - - case 5: - w0[1] = w0[1] | 0x0100; - break; - - case 6: - w0[1] = w0[1] | 0x010000; - break; - - case 7: - w0[1] = w0[1] | 0x01000000; - break; - - case 8: - w0[2] = 0x01; - break; - - case 9: - w0[2] = w0[2] | 0x0100; - break; - - case 10: - w0[2] = w0[2] | 0x010000; - break; - - case 11: - w0[2] = w0[2] | 0x01000000; - break; - - case 12: - w0[3] = 0x01; - break; - - case 13: - w0[3] = w0[3] | 0x0100; - break; - - case 14: - w0[3] = w0[3] | 0x010000; - break; - - case 15: - w0[3] = w0[3] | 0x01000000; - break; - - case 16: - w1[0] = 0x01; - break; - - case 17: - w1[0] = w1[0] | 0x0100; - break; - - case 18: - w1[0] = w1[0] | 0x010000; - break; - - case 19: - w1[0] = w1[0] | 0x01000000; - break; - - case 20: - w1[1] = 0x01; - break; - - case 21: - w1[1] = w1[1] | 0x0100; - break; - - case 22: - w1[1] = w1[1] | 0x010000; - break; - - case 23: - w1[1] = w1[1] | 0x01000000; - break; - - case 24: - w1[2] = 0x01; - break; - - case 25: - w1[2] = w1[2] | 0x0100; - break; - - case 26: - w1[2] = w1[2] | 0x010000; - break; - - case 27: - w1[2] = w1[2] | 0x01000000; - break; - - case 28: - w1[3] = 0x01; - break; - - case 29: - w1[3] = w1[3] | 0x0100; - break; - - case 30: - w1[3] = w1[3] | 0x010000; - break; - - case 31: - w1[3] = w1[3] | 0x01000000; - break; - - case 32: - w2[0] = 0x01; - break; - - case 33: - w2[0] = w2[0] | 0x0100; - break; - - case 34: - w2[0] = w2[0] | 0x010000; - break; - - case 35: - w2[0] = w2[0] | 0x01000000; - break; - - case 36: - w2[1] = 0x01; - break; - - case 37: - w2[1] = w2[1] | 0x0100; - break; - - case 38: - w2[1] = w2[1] | 0x010000; - break; - - case 39: - w2[1] = w2[1] | 0x01000000; - break; - - case 40: - w2[2] = 0x01; - break; - - case 41: - w2[2] = w2[2] | 0x0100; - break; - - case 42: - w2[2] = w2[2] | 0x010000; - break; - - case 43: - w2[2] = w2[2] | 0x01000000; - break; - - case 44: - w2[3] = 0x01; - break; - - case 45: - w2[3] = w2[3] | 0x0100; - break; - - case 46: - w2[3] = w2[3] | 0x010000; - break; - - case 47: - w2[3] = w2[3] | 0x01000000; - break; - - case 48: - w3[0] = 0x01; - break; - - case 49: - w3[0] = w3[0] | 0x0100; - break; - - case 50: - w3[0] = w3[0] | 0x010000; - break; - - case 51: - w3[0] = w3[0] | 0x01000000; - break; - - case 52: - w3[1] = 0x01; - break; - - case 53: - w3[1] = w3[1] | 0x0100; - break; - - case 54: - w3[1] = w3[1] | 0x010000; - break; - - case 55: - w3[1] = w3[1] | 0x01000000; - break; - - case 56: - w3[2] = 0x01; - break; - - case 57: - w3[2] = w3[2] | 0x0100; - break; - - case 58: - w3[2] = w3[2] | 0x010000; - break; - - case 59: - w3[2] = w3[2] | 0x01000000; - break; - - case 60: - w3[3] = 0x01; - break; - - case 61: - w3[3] = w3[3] | 0x0100; - break; - - case 62: - w3[3] = w3[3] | 0x010000; - break; - - case 63: - w3[3] = w3[3] | 0x01000000; - break; - - case 64: - w4[0] = 0x01; - break; - - case 65: - w4[0] = w4[0] | 0x0100; - break; - - case 66: - w4[0] = w4[0] | 0x010000; - break; - - case 67: - w4[0] = w4[0] | 0x01000000; - break; - - case 68: - w4[1] = 0x01; - break; - - case 69: - w4[1] = w4[1] | 0x0100; - break; - - case 70: - w4[1] = w4[1] | 0x010000; - break; - - case 71: - w4[1] = w4[1] | 0x01000000; - break; - - case 72: - w4[2] = 0x01; - break; - - case 73: - w4[2] = w4[2] | 0x0100; - break; - - case 74: - w4[2] = w4[2] | 0x010000; - break; - - case 75: - w4[2] = w4[2] | 0x01000000; - break; - - case 76: - w4[3] = 0x01; - break; - - case 77: - w4[3] = w4[3] | 0x0100; - break; - - case 78: - w4[3] = w4[3] | 0x010000; - break; - - case 79: - w4[3] = w4[3] | 0x01000000; - break; - - case 80: - w5[0] = 0x01; - break; - - case 81: - w5[0] = w5[0] | 0x0100; - break; - - case 82: - w5[0] = w5[0] | 0x010000; - break; - - case 83: - w5[0] = w5[0] | 0x01000000; - break; - - case 84: - w5[1] = 0x01; - break; - - case 85: - w5[1] = w5[1] | 0x0100; - break; - - case 86: - w5[1] = w5[1] | 0x010000; - break; - - case 87: - w5[1] = w5[1] | 0x01000000; - break; - - case 88: - w5[2] = 0x01; - break; - - case 89: - w5[2] = w5[2] | 0x0100; - break; - - case 90: - w5[2] = w5[2] | 0x010000; - break; - - case 91: - w5[2] = w5[2] | 0x01000000; - break; - - case 92: - w5[3] = 0x01; - break; - - case 93: - w5[3] = w5[3] | 0x0100; - break; - - case 94: - w5[3] = w5[3] | 0x010000; - break; - - case 95: - w5[3] = w5[3] | 0x01000000; - break; - - case 96: - w6[0] = 0x01; - break; - - case 97: - w6[0] = w6[0] | 0x0100; - break; - - case 98: - w6[0] = w6[0] | 0x010000; - break; - - case 99: - w6[0] = w6[0] | 0x01000000; - break; - - case 100: - w6[1] = 0x01; - break; - - case 101: - w6[1] = w6[1] | 0x0100; - break; - - case 102: - w6[1] = w6[1] | 0x010000; - break; - - case 103: - w6[1] = w6[1] | 0x01000000; - break; - - case 104: - w6[2] = 0x01; - break; - - case 105: - w6[2] = w6[2] | 0x0100; - break; - - case 106: - w6[2] = w6[2] | 0x010000; - break; - - case 107: - w6[2] = w6[2] | 0x01000000; - break; - - case 108: - w6[3] = 0x01; - break; - - case 109: - w6[3] = w6[3] | 0x0100; - break; - - case 110: - w6[3] = w6[3] | 0x010000; - break; - - case 111: - w6[3] = w6[3] | 0x01000000; - break; - - case 112: - w7[0] = 0x01; - break; - - case 113: - w7[0] = w7[0] | 0x0100; - break; - - case 114: - w7[0] = w7[0] | 0x010000; - break; - - case 115: - w7[0] = w7[0] | 0x01000000; - break; - - case 116: - w7[1] = 0x01; - break; - - case 117: - w7[1] = w7[1] | 0x0100; - break; - - case 118: - w7[1] = w7[1] | 0x010000; - break; - - case 119: - w7[1] = w7[1] | 0x01000000; - break; - - case 120: - w7[2] = 0x01; - break; - - case 121: - w7[2] = w7[2] | 0x0100; - break; - - case 122: - w7[2] = w7[2] | 0x010000; - break; - - case 123: - w7[2] = w7[2] | 0x01000000; - break; - - case 124: - w7[3] = 0x01; - break; - - case 125: - w7[3] = w7[3] | 0x0100; - break; - - case 126: - w7[3] = w7[3] | 0x010000; - break; - - case 127: - w7[3] = w7[3] | 0x01000000; - break; - } -} - -__device__ static void append_0x02_1 (u32 w0[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x02; - break; - - case 1: - w0[0] = w0[0] | 0x0200; - break; - - case 2: - w0[0] = w0[0] | 0x020000; - break; - - case 3: - w0[0] = w0[0] | 0x02000000; - break; - - case 4: - w0[1] = 0x02; - break; - - case 5: - w0[1] = w0[1] | 0x0200; - break; - - case 6: - w0[1] = w0[1] | 0x020000; - break; - - case 7: - w0[1] = w0[1] | 0x02000000; - break; - - case 8: - w0[2] = 0x02; - break; - - case 9: - w0[2] = w0[2] | 0x0200; - break; - - case 10: - w0[2] = w0[2] | 0x020000; - break; - - case 11: - w0[2] = w0[2] | 0x02000000; - break; - - case 12: - w0[3] = 0x02; - break; - - case 13: - w0[3] = w0[3] | 0x0200; - break; - - case 14: - w0[3] = w0[3] | 0x020000; - break; - - case 15: - w0[3] = w0[3] | 0x02000000; - break; - } -} - -__device__ static void append_0x02_2 (u32 w0[4], u32 w1[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x02; - break; - - case 1: - w0[0] = w0[0] | 0x0200; - break; - - case 2: - w0[0] = w0[0] | 0x020000; - break; - - case 3: - w0[0] = w0[0] | 0x02000000; - break; - - case 4: - w0[1] = 0x02; - break; - - case 5: - w0[1] = w0[1] | 0x0200; - break; - - case 6: - w0[1] = w0[1] | 0x020000; - break; - - case 7: - w0[1] = w0[1] | 0x02000000; - break; - - case 8: - w0[2] = 0x02; - break; - - case 9: - w0[2] = w0[2] | 0x0200; - break; - - case 10: - w0[2] = w0[2] | 0x020000; - break; - - case 11: - w0[2] = w0[2] | 0x02000000; - break; - - case 12: - w0[3] = 0x02; - break; - - case 13: - w0[3] = w0[3] | 0x0200; - break; - - case 14: - w0[3] = w0[3] | 0x020000; - break; - - case 15: - w0[3] = w0[3] | 0x02000000; - break; - - case 16: - w1[0] = 0x02; - break; - - case 17: - w1[0] = w1[0] | 0x0200; - break; - - case 18: - w1[0] = w1[0] | 0x020000; - break; - - case 19: - w1[0] = w1[0] | 0x02000000; - break; - - case 20: - w1[1] = 0x02; - break; - - case 21: - w1[1] = w1[1] | 0x0200; - break; - - case 22: - w1[1] = w1[1] | 0x020000; - break; - - case 23: - w1[1] = w1[1] | 0x02000000; - break; - - case 24: - w1[2] = 0x02; - break; - - case 25: - w1[2] = w1[2] | 0x0200; - break; - - case 26: - w1[2] = w1[2] | 0x020000; - break; - - case 27: - w1[2] = w1[2] | 0x02000000; - break; - - case 28: - w1[3] = 0x02; - break; - - case 29: - w1[3] = w1[3] | 0x0200; - break; - - case 30: - w1[3] = w1[3] | 0x020000; - break; - - case 31: - w1[3] = w1[3] | 0x02000000; - break; - } -} - -__device__ static void append_0x02_3 (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x02; - break; - - case 1: - w0[0] = w0[0] | 0x0200; - break; - - case 2: - w0[0] = w0[0] | 0x020000; - break; - - case 3: - w0[0] = w0[0] | 0x02000000; - break; - - case 4: - w0[1] = 0x02; - break; - - case 5: - w0[1] = w0[1] | 0x0200; - break; - - case 6: - w0[1] = w0[1] | 0x020000; - break; - - case 7: - w0[1] = w0[1] | 0x02000000; - break; - - case 8: - w0[2] = 0x02; - break; - - case 9: - w0[2] = w0[2] | 0x0200; - break; - - case 10: - w0[2] = w0[2] | 0x020000; - break; - - case 11: - w0[2] = w0[2] | 0x02000000; - break; - - case 12: - w0[3] = 0x02; - break; - - case 13: - w0[3] = w0[3] | 0x0200; - break; - - case 14: - w0[3] = w0[3] | 0x020000; - break; - - case 15: - w0[3] = w0[3] | 0x02000000; - break; - - case 16: - w1[0] = 0x02; - break; - - case 17: - w1[0] = w1[0] | 0x0200; - break; - - case 18: - w1[0] = w1[0] | 0x020000; - break; - - case 19: - w1[0] = w1[0] | 0x02000000; - break; - - case 20: - w1[1] = 0x02; - break; - - case 21: - w1[1] = w1[1] | 0x0200; - break; - - case 22: - w1[1] = w1[1] | 0x020000; - break; - - case 23: - w1[1] = w1[1] | 0x02000000; - break; - - case 24: - w1[2] = 0x02; - break; - - case 25: - w1[2] = w1[2] | 0x0200; - break; - - case 26: - w1[2] = w1[2] | 0x020000; - break; - - case 27: - w1[2] = w1[2] | 0x02000000; - break; - - case 28: - w1[3] = 0x02; - break; - - case 29: - w1[3] = w1[3] | 0x0200; - break; - - case 30: - w1[3] = w1[3] | 0x020000; - break; - - case 31: - w1[3] = w1[3] | 0x02000000; - break; - - case 32: - w2[0] = 0x02; - break; - - case 33: - w2[0] = w2[0] | 0x0200; - break; - - case 34: - w2[0] = w2[0] | 0x020000; - break; - - case 35: - w2[0] = w2[0] | 0x02000000; - break; - - case 36: - w2[1] = 0x02; - break; - - case 37: - w2[1] = w2[1] | 0x0200; - break; - - case 38: - w2[1] = w2[1] | 0x020000; - break; - - case 39: - w2[1] = w2[1] | 0x02000000; - break; - - case 40: - w2[2] = 0x02; - break; - - case 41: - w2[2] = w2[2] | 0x0200; - break; - - case 42: - w2[2] = w2[2] | 0x020000; - break; - - case 43: - w2[2] = w2[2] | 0x02000000; - break; - - case 44: - w2[3] = 0x02; - break; - - case 45: - w2[3] = w2[3] | 0x0200; - break; - - case 46: - w2[3] = w2[3] | 0x020000; - break; - - case 47: - w2[3] = w2[3] | 0x02000000; - break; - } -} - -__device__ static void append_0x02_4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x02; - break; - - case 1: - w0[0] = w0[0] | 0x0200; - break; - - case 2: - w0[0] = w0[0] | 0x020000; - break; - - case 3: - w0[0] = w0[0] | 0x02000000; - break; - - case 4: - w0[1] = 0x02; - break; - - case 5: - w0[1] = w0[1] | 0x0200; - break; - - case 6: - w0[1] = w0[1] | 0x020000; - break; - - case 7: - w0[1] = w0[1] | 0x02000000; - break; - - case 8: - w0[2] = 0x02; - break; - - case 9: - w0[2] = w0[2] | 0x0200; - break; - - case 10: - w0[2] = w0[2] | 0x020000; - break; - - case 11: - w0[2] = w0[2] | 0x02000000; - break; - - case 12: - w0[3] = 0x02; - break; - - case 13: - w0[3] = w0[3] | 0x0200; - break; - - case 14: - w0[3] = w0[3] | 0x020000; - break; - - case 15: - w0[3] = w0[3] | 0x02000000; - break; - - case 16: - w1[0] = 0x02; - break; - - case 17: - w1[0] = w1[0] | 0x0200; - break; - - case 18: - w1[0] = w1[0] | 0x020000; - break; - - case 19: - w1[0] = w1[0] | 0x02000000; - break; - - case 20: - w1[1] = 0x02; - break; - - case 21: - w1[1] = w1[1] | 0x0200; - break; - - case 22: - w1[1] = w1[1] | 0x020000; - break; - - case 23: - w1[1] = w1[1] | 0x02000000; - break; - - case 24: - w1[2] = 0x02; - break; - - case 25: - w1[2] = w1[2] | 0x0200; - break; - - case 26: - w1[2] = w1[2] | 0x020000; - break; - - case 27: - w1[2] = w1[2] | 0x02000000; - break; - - case 28: - w1[3] = 0x02; - break; - - case 29: - w1[3] = w1[3] | 0x0200; - break; - - case 30: - w1[3] = w1[3] | 0x020000; - break; - - case 31: - w1[3] = w1[3] | 0x02000000; - break; - - case 32: - w2[0] = 0x02; - break; - - case 33: - w2[0] = w2[0] | 0x0200; - break; - - case 34: - w2[0] = w2[0] | 0x020000; - break; - - case 35: - w2[0] = w2[0] | 0x02000000; - break; - - case 36: - w2[1] = 0x02; - break; - - case 37: - w2[1] = w2[1] | 0x0200; - break; - - case 38: - w2[1] = w2[1] | 0x020000; - break; - - case 39: - w2[1] = w2[1] | 0x02000000; - break; - - case 40: - w2[2] = 0x02; - break; - - case 41: - w2[2] = w2[2] | 0x0200; - break; - - case 42: - w2[2] = w2[2] | 0x020000; - break; - - case 43: - w2[2] = w2[2] | 0x02000000; - break; - - case 44: - w2[3] = 0x02; - break; - - case 45: - w2[3] = w2[3] | 0x0200; - break; - - case 46: - w2[3] = w2[3] | 0x020000; - break; - - case 47: - w2[3] = w2[3] | 0x02000000; - break; - - case 48: - w3[0] = 0x02; - break; - - case 49: - w3[0] = w3[0] | 0x0200; - break; - - case 50: - w3[0] = w3[0] | 0x020000; - break; - - case 51: - w3[0] = w3[0] | 0x02000000; - break; - - case 52: - w3[1] = 0x02; - break; - - case 53: - w3[1] = w3[1] | 0x0200; - break; - - case 54: - w3[1] = w3[1] | 0x020000; - break; - - case 55: - w3[1] = w3[1] | 0x02000000; - break; - - case 56: - w3[2] = 0x02; - break; - - case 57: - w3[2] = w3[2] | 0x0200; - break; - - case 58: - w3[2] = w3[2] | 0x020000; - break; - - case 59: - w3[2] = w3[2] | 0x02000000; - break; - - case 60: - w3[3] = 0x02; - break; - - case 61: - w3[3] = w3[3] | 0x0200; - break; - - case 62: - w3[3] = w3[3] | 0x020000; - break; - - case 63: - w3[3] = w3[3] | 0x02000000; - break; - } -} - -__device__ static void append_0x02_8 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x02; - break; - - case 1: - w0[0] = w0[0] | 0x0200; - break; - - case 2: - w0[0] = w0[0] | 0x020000; - break; - - case 3: - w0[0] = w0[0] | 0x02000000; - break; - - case 4: - w0[1] = 0x02; - break; - - case 5: - w0[1] = w0[1] | 0x0200; - break; - - case 6: - w0[1] = w0[1] | 0x020000; - break; - - case 7: - w0[1] = w0[1] | 0x02000000; - break; - - case 8: - w0[2] = 0x02; - break; - - case 9: - w0[2] = w0[2] | 0x0200; - break; - - case 10: - w0[2] = w0[2] | 0x020000; - break; - - case 11: - w0[2] = w0[2] | 0x02000000; - break; - - case 12: - w0[3] = 0x02; - break; - - case 13: - w0[3] = w0[3] | 0x0200; - break; - - case 14: - w0[3] = w0[3] | 0x020000; - break; - - case 15: - w0[3] = w0[3] | 0x02000000; - break; - - case 16: - w1[0] = 0x02; - break; - - case 17: - w1[0] = w1[0] | 0x0200; - break; - - case 18: - w1[0] = w1[0] | 0x020000; - break; - - case 19: - w1[0] = w1[0] | 0x02000000; - break; - - case 20: - w1[1] = 0x02; - break; - - case 21: - w1[1] = w1[1] | 0x0200; - break; - - case 22: - w1[1] = w1[1] | 0x020000; - break; - - case 23: - w1[1] = w1[1] | 0x02000000; - break; - - case 24: - w1[2] = 0x02; - break; - - case 25: - w1[2] = w1[2] | 0x0200; - break; - - case 26: - w1[2] = w1[2] | 0x020000; - break; - - case 27: - w1[2] = w1[2] | 0x02000000; - break; - - case 28: - w1[3] = 0x02; - break; - - case 29: - w1[3] = w1[3] | 0x0200; - break; - - case 30: - w1[3] = w1[3] | 0x020000; - break; - - case 31: - w1[3] = w1[3] | 0x02000000; - break; - - case 32: - w2[0] = 0x02; - break; - - case 33: - w2[0] = w2[0] | 0x0200; - break; - - case 34: - w2[0] = w2[0] | 0x020000; - break; - - case 35: - w2[0] = w2[0] | 0x02000000; - break; - - case 36: - w2[1] = 0x02; - break; - - case 37: - w2[1] = w2[1] | 0x0200; - break; - - case 38: - w2[1] = w2[1] | 0x020000; - break; - - case 39: - w2[1] = w2[1] | 0x02000000; - break; - - case 40: - w2[2] = 0x02; - break; - - case 41: - w2[2] = w2[2] | 0x0200; - break; - - case 42: - w2[2] = w2[2] | 0x020000; - break; - - case 43: - w2[2] = w2[2] | 0x02000000; - break; - - case 44: - w2[3] = 0x02; - break; - - case 45: - w2[3] = w2[3] | 0x0200; - break; - - case 46: - w2[3] = w2[3] | 0x020000; - break; - - case 47: - w2[3] = w2[3] | 0x02000000; - break; - - case 48: - w3[0] = 0x02; - break; - - case 49: - w3[0] = w3[0] | 0x0200; - break; - - case 50: - w3[0] = w3[0] | 0x020000; - break; - - case 51: - w3[0] = w3[0] | 0x02000000; - break; - - case 52: - w3[1] = 0x02; - break; - - case 53: - w3[1] = w3[1] | 0x0200; - break; - - case 54: - w3[1] = w3[1] | 0x020000; - break; - - case 55: - w3[1] = w3[1] | 0x02000000; - break; - - case 56: - w3[2] = 0x02; - break; - - case 57: - w3[2] = w3[2] | 0x0200; - break; - - case 58: - w3[2] = w3[2] | 0x020000; - break; - - case 59: - w3[2] = w3[2] | 0x02000000; - break; - - case 60: - w3[3] = 0x02; - break; - - case 61: - w3[3] = w3[3] | 0x0200; - break; - - case 62: - w3[3] = w3[3] | 0x020000; - break; - - case 63: - w3[3] = w3[3] | 0x02000000; - break; - - case 64: - w4[0] = 0x02; - break; - - case 65: - w4[0] = w4[0] | 0x0200; - break; - - case 66: - w4[0] = w4[0] | 0x020000; - break; - - case 67: - w4[0] = w4[0] | 0x02000000; - break; - - case 68: - w4[1] = 0x02; - break; - - case 69: - w4[1] = w4[1] | 0x0200; - break; - - case 70: - w4[1] = w4[1] | 0x020000; - break; - - case 71: - w4[1] = w4[1] | 0x02000000; - break; - - case 72: - w4[2] = 0x02; - break; - - case 73: - w4[2] = w4[2] | 0x0200; - break; - - case 74: - w4[2] = w4[2] | 0x020000; - break; - - case 75: - w4[2] = w4[2] | 0x02000000; - break; - - case 76: - w4[3] = 0x02; - break; - - case 77: - w4[3] = w4[3] | 0x0200; - break; - - case 78: - w4[3] = w4[3] | 0x020000; - break; - - case 79: - w4[3] = w4[3] | 0x02000000; - break; - - case 80: - w5[0] = 0x02; - break; - - case 81: - w5[0] = w5[0] | 0x0200; - break; - - case 82: - w5[0] = w5[0] | 0x020000; - break; - - case 83: - w5[0] = w5[0] | 0x02000000; - break; - - case 84: - w5[1] = 0x02; - break; - - case 85: - w5[1] = w5[1] | 0x0200; - break; - - case 86: - w5[1] = w5[1] | 0x020000; - break; - - case 87: - w5[1] = w5[1] | 0x02000000; - break; - - case 88: - w5[2] = 0x02; - break; - - case 89: - w5[2] = w5[2] | 0x0200; - break; - - case 90: - w5[2] = w5[2] | 0x020000; - break; - - case 91: - w5[2] = w5[2] | 0x02000000; - break; - - case 92: - w5[3] = 0x02; - break; - - case 93: - w5[3] = w5[3] | 0x0200; - break; - - case 94: - w5[3] = w5[3] | 0x020000; - break; - - case 95: - w5[3] = w5[3] | 0x02000000; - break; - - case 96: - w6[0] = 0x02; - break; - - case 97: - w6[0] = w6[0] | 0x0200; - break; - - case 98: - w6[0] = w6[0] | 0x020000; - break; - - case 99: - w6[0] = w6[0] | 0x02000000; - break; - - case 100: - w6[1] = 0x02; - break; - - case 101: - w6[1] = w6[1] | 0x0200; - break; - - case 102: - w6[1] = w6[1] | 0x020000; - break; - - case 103: - w6[1] = w6[1] | 0x02000000; - break; - - case 104: - w6[2] = 0x02; - break; - - case 105: - w6[2] = w6[2] | 0x0200; - break; - - case 106: - w6[2] = w6[2] | 0x020000; - break; - - case 107: - w6[2] = w6[2] | 0x02000000; - break; - - case 108: - w6[3] = 0x02; - break; - - case 109: - w6[3] = w6[3] | 0x0200; - break; - - case 110: - w6[3] = w6[3] | 0x020000; - break; - - case 111: - w6[3] = w6[3] | 0x02000000; - break; - - case 112: - w7[0] = 0x02; - break; - - case 113: - w7[0] = w7[0] | 0x0200; - break; - - case 114: - w7[0] = w7[0] | 0x020000; - break; - - case 115: - w7[0] = w7[0] | 0x02000000; - break; - - case 116: - w7[1] = 0x02; - break; - - case 117: - w7[1] = w7[1] | 0x0200; - break; - - case 118: - w7[1] = w7[1] | 0x020000; - break; - - case 119: - w7[1] = w7[1] | 0x02000000; - break; - - case 120: - w7[2] = 0x02; - break; - - case 121: - w7[2] = w7[2] | 0x0200; - break; - - case 122: - w7[2] = w7[2] | 0x020000; - break; - - case 123: - w7[2] = w7[2] | 0x02000000; - break; - - case 124: - w7[3] = 0x02; - break; - - case 125: - w7[3] = w7[3] | 0x0200; - break; - - case 126: - w7[3] = w7[3] | 0x020000; - break; - - case 127: - w7[3] = w7[3] | 0x02000000; - break; - } -} - -__device__ static void append_0x80_1 (u32 w0[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x80; - break; - - case 1: - w0[0] = w0[0] | 0x8000; - break; - - case 2: - w0[0] = w0[0] | 0x800000; - break; - - case 3: - w0[0] = w0[0] | 0x80000000; - break; - - case 4: - w0[1] = 0x80; - break; - - case 5: - w0[1] = w0[1] | 0x8000; - break; - - case 6: - w0[1] = w0[1] | 0x800000; - break; - - case 7: - w0[1] = w0[1] | 0x80000000; - break; - - case 8: - w0[2] = 0x80; - break; - - case 9: - w0[2] = w0[2] | 0x8000; - break; - - case 10: - w0[2] = w0[2] | 0x800000; - break; - - case 11: - w0[2] = w0[2] | 0x80000000; - break; - - case 12: - w0[3] = 0x80; - break; - - case 13: - w0[3] = w0[3] | 0x8000; - break; - - case 14: - w0[3] = w0[3] | 0x800000; - break; - - case 15: - w0[3] = w0[3] | 0x80000000; - break; - } -} - -__device__ static void append_0x80_2 (u32 w0[4], u32 w1[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x80; - break; - - case 1: - w0[0] = w0[0] | 0x8000; - break; - - case 2: - w0[0] = w0[0] | 0x800000; - break; - - case 3: - w0[0] = w0[0] | 0x80000000; - break; - - case 4: - w0[1] = 0x80; - break; - - case 5: - w0[1] = w0[1] | 0x8000; - break; - - case 6: - w0[1] = w0[1] | 0x800000; - break; - - case 7: - w0[1] = w0[1] | 0x80000000; - break; - - case 8: - w0[2] = 0x80; - break; - - case 9: - w0[2] = w0[2] | 0x8000; - break; - - case 10: - w0[2] = w0[2] | 0x800000; - break; - - case 11: - w0[2] = w0[2] | 0x80000000; - break; - - case 12: - w0[3] = 0x80; - break; - - case 13: - w0[3] = w0[3] | 0x8000; - break; - - case 14: - w0[3] = w0[3] | 0x800000; - break; - - case 15: - w0[3] = w0[3] | 0x80000000; - break; - - case 16: - w1[0] = 0x80; - break; - - case 17: - w1[0] = w1[0] | 0x8000; - break; - - case 18: - w1[0] = w1[0] | 0x800000; - break; - - case 19: - w1[0] = w1[0] | 0x80000000; - break; - - case 20: - w1[1] = 0x80; - break; - - case 21: - w1[1] = w1[1] | 0x8000; - break; - - case 22: - w1[1] = w1[1] | 0x800000; - break; - - case 23: - w1[1] = w1[1] | 0x80000000; - break; - - case 24: - w1[2] = 0x80; - break; - - case 25: - w1[2] = w1[2] | 0x8000; - break; - - case 26: - w1[2] = w1[2] | 0x800000; - break; - - case 27: - w1[2] = w1[2] | 0x80000000; - break; - - case 28: - w1[3] = 0x80; - break; - - case 29: - w1[3] = w1[3] | 0x8000; - break; - - case 30: - w1[3] = w1[3] | 0x800000; - break; - - case 31: - w1[3] = w1[3] | 0x80000000; - break; - } -} - -__device__ static void append_0x80_3 (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x80; - break; - - case 1: - w0[0] = w0[0] | 0x8000; - break; - - case 2: - w0[0] = w0[0] | 0x800000; - break; - - case 3: - w0[0] = w0[0] | 0x80000000; - break; - - case 4: - w0[1] = 0x80; - break; - - case 5: - w0[1] = w0[1] | 0x8000; - break; - - case 6: - w0[1] = w0[1] | 0x800000; - break; - - case 7: - w0[1] = w0[1] | 0x80000000; - break; - - case 8: - w0[2] = 0x80; - break; - - case 9: - w0[2] = w0[2] | 0x8000; - break; - - case 10: - w0[2] = w0[2] | 0x800000; - break; - - case 11: - w0[2] = w0[2] | 0x80000000; - break; - - case 12: - w0[3] = 0x80; - break; - - case 13: - w0[3] = w0[3] | 0x8000; - break; - - case 14: - w0[3] = w0[3] | 0x800000; - break; - - case 15: - w0[3] = w0[3] | 0x80000000; - break; - - case 16: - w1[0] = 0x80; - break; - - case 17: - w1[0] = w1[0] | 0x8000; - break; - - case 18: - w1[0] = w1[0] | 0x800000; - break; - - case 19: - w1[0] = w1[0] | 0x80000000; - break; - - case 20: - w1[1] = 0x80; - break; - - case 21: - w1[1] = w1[1] | 0x8000; - break; - - case 22: - w1[1] = w1[1] | 0x800000; - break; - - case 23: - w1[1] = w1[1] | 0x80000000; - break; - - case 24: - w1[2] = 0x80; - break; - - case 25: - w1[2] = w1[2] | 0x8000; - break; - - case 26: - w1[2] = w1[2] | 0x800000; - break; - - case 27: - w1[2] = w1[2] | 0x80000000; - break; - - case 28: - w1[3] = 0x80; - break; - - case 29: - w1[3] = w1[3] | 0x8000; - break; - - case 30: - w1[3] = w1[3] | 0x800000; - break; - - case 31: - w1[3] = w1[3] | 0x80000000; - break; - - case 32: - w2[0] = 0x80; - break; - - case 33: - w2[0] = w2[0] | 0x8000; - break; - - case 34: - w2[0] = w2[0] | 0x800000; - break; - - case 35: - w2[0] = w2[0] | 0x80000000; - break; - - case 36: - w2[1] = 0x80; - break; - - case 37: - w2[1] = w2[1] | 0x8000; - break; - - case 38: - w2[1] = w2[1] | 0x800000; - break; - - case 39: - w2[1] = w2[1] | 0x80000000; - break; - - case 40: - w2[2] = 0x80; - break; - - case 41: - w2[2] = w2[2] | 0x8000; - break; - - case 42: - w2[2] = w2[2] | 0x800000; - break; - - case 43: - w2[2] = w2[2] | 0x80000000; - break; - - case 44: - w2[3] = 0x80; - break; - - case 45: - w2[3] = w2[3] | 0x8000; - break; - - case 46: - w2[3] = w2[3] | 0x800000; - break; - - case 47: - w2[3] = w2[3] | 0x80000000; - break; - } -} - -__device__ static void append_0x80_4 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x80; - break; - - case 1: - w0[0] = w0[0] | 0x8000; - break; - - case 2: - w0[0] = w0[0] | 0x800000; - break; - - case 3: - w0[0] = w0[0] | 0x80000000; - break; - - case 4: - w0[1] = 0x80; - break; - - case 5: - w0[1] = w0[1] | 0x8000; - break; - - case 6: - w0[1] = w0[1] | 0x800000; - break; - - case 7: - w0[1] = w0[1] | 0x80000000; - break; - - case 8: - w0[2] = 0x80; - break; - - case 9: - w0[2] = w0[2] | 0x8000; - break; - - case 10: - w0[2] = w0[2] | 0x800000; - break; - - case 11: - w0[2] = w0[2] | 0x80000000; - break; - - case 12: - w0[3] = 0x80; - break; - - case 13: - w0[3] = w0[3] | 0x8000; - break; - - case 14: - w0[3] = w0[3] | 0x800000; - break; - - case 15: - w0[3] = w0[3] | 0x80000000; - break; - - case 16: - w1[0] = 0x80; - break; - - case 17: - w1[0] = w1[0] | 0x8000; - break; - - case 18: - w1[0] = w1[0] | 0x800000; - break; - - case 19: - w1[0] = w1[0] | 0x80000000; - break; - - case 20: - w1[1] = 0x80; - break; - - case 21: - w1[1] = w1[1] | 0x8000; - break; - - case 22: - w1[1] = w1[1] | 0x800000; - break; - - case 23: - w1[1] = w1[1] | 0x80000000; - break; - - case 24: - w1[2] = 0x80; - break; - - case 25: - w1[2] = w1[2] | 0x8000; - break; - - case 26: - w1[2] = w1[2] | 0x800000; - break; - - case 27: - w1[2] = w1[2] | 0x80000000; - break; - - case 28: - w1[3] = 0x80; - break; - - case 29: - w1[3] = w1[3] | 0x8000; - break; - - case 30: - w1[3] = w1[3] | 0x800000; - break; - - case 31: - w1[3] = w1[3] | 0x80000000; - break; - - case 32: - w2[0] = 0x80; - break; - - case 33: - w2[0] = w2[0] | 0x8000; - break; - - case 34: - w2[0] = w2[0] | 0x800000; - break; - - case 35: - w2[0] = w2[0] | 0x80000000; - break; - - case 36: - w2[1] = 0x80; - break; - - case 37: - w2[1] = w2[1] | 0x8000; - break; - - case 38: - w2[1] = w2[1] | 0x800000; - break; - - case 39: - w2[1] = w2[1] | 0x80000000; - break; - - case 40: - w2[2] = 0x80; - break; - - case 41: - w2[2] = w2[2] | 0x8000; - break; - - case 42: - w2[2] = w2[2] | 0x800000; - break; - - case 43: - w2[2] = w2[2] | 0x80000000; - break; - - case 44: - w2[3] = 0x80; - break; - - case 45: - w2[3] = w2[3] | 0x8000; - break; - - case 46: - w2[3] = w2[3] | 0x800000; - break; - - case 47: - w2[3] = w2[3] | 0x80000000; - break; - - case 48: - w3[0] = 0x80; - break; - - case 49: - w3[0] = w3[0] | 0x8000; - break; - - case 50: - w3[0] = w3[0] | 0x800000; - break; - - case 51: - w3[0] = w3[0] | 0x80000000; - break; - - case 52: - w3[1] = 0x80; - break; - - case 53: - w3[1] = w3[1] | 0x8000; - break; - - case 54: - w3[1] = w3[1] | 0x800000; - break; - - case 55: - w3[1] = w3[1] | 0x80000000; - break; - - case 56: - w3[2] = 0x80; - break; - - case 57: - w3[2] = w3[2] | 0x8000; - break; - - case 58: - w3[2] = w3[2] | 0x800000; - break; - - case 59: - w3[2] = w3[2] | 0x80000000; - break; - - case 60: - w3[3] = 0x80; - break; - - case 61: - w3[3] = w3[3] | 0x8000; - break; - - case 62: - w3[3] = w3[3] | 0x800000; - break; - - case 63: - w3[3] = w3[3] | 0x80000000; - break; - } -} - -__device__ static void append_0x80_8 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x80; - break; - - case 1: - w0[0] = w0[0] | 0x8000; - break; - - case 2: - w0[0] = w0[0] | 0x800000; - break; - - case 3: - w0[0] = w0[0] | 0x80000000; - break; - - case 4: - w0[1] = 0x80; - break; - - case 5: - w0[1] = w0[1] | 0x8000; - break; - - case 6: - w0[1] = w0[1] | 0x800000; - break; - - case 7: - w0[1] = w0[1] | 0x80000000; - break; - - case 8: - w0[2] = 0x80; - break; - - case 9: - w0[2] = w0[2] | 0x8000; - break; - - case 10: - w0[2] = w0[2] | 0x800000; - break; - - case 11: - w0[2] = w0[2] | 0x80000000; - break; - - case 12: - w0[3] = 0x80; - break; - - case 13: - w0[3] = w0[3] | 0x8000; - break; - - case 14: - w0[3] = w0[3] | 0x800000; - break; - - case 15: - w0[3] = w0[3] | 0x80000000; - break; - - case 16: - w1[0] = 0x80; - break; - - case 17: - w1[0] = w1[0] | 0x8000; - break; - - case 18: - w1[0] = w1[0] | 0x800000; - break; - - case 19: - w1[0] = w1[0] | 0x80000000; - break; - - case 20: - w1[1] = 0x80; - break; - - case 21: - w1[1] = w1[1] | 0x8000; - break; - - case 22: - w1[1] = w1[1] | 0x800000; - break; - - case 23: - w1[1] = w1[1] | 0x80000000; - break; - - case 24: - w1[2] = 0x80; - break; - - case 25: - w1[2] = w1[2] | 0x8000; - break; - - case 26: - w1[2] = w1[2] | 0x800000; - break; - - case 27: - w1[2] = w1[2] | 0x80000000; - break; - - case 28: - w1[3] = 0x80; - break; - - case 29: - w1[3] = w1[3] | 0x8000; - break; - - case 30: - w1[3] = w1[3] | 0x800000; - break; - - case 31: - w1[3] = w1[3] | 0x80000000; - break; - - case 32: - w2[0] = 0x80; - break; - - case 33: - w2[0] = w2[0] | 0x8000; - break; - - case 34: - w2[0] = w2[0] | 0x800000; - break; - - case 35: - w2[0] = w2[0] | 0x80000000; - break; - - case 36: - w2[1] = 0x80; - break; - - case 37: - w2[1] = w2[1] | 0x8000; - break; - - case 38: - w2[1] = w2[1] | 0x800000; - break; - - case 39: - w2[1] = w2[1] | 0x80000000; - break; - - case 40: - w2[2] = 0x80; - break; - - case 41: - w2[2] = w2[2] | 0x8000; - break; - - case 42: - w2[2] = w2[2] | 0x800000; - break; - - case 43: - w2[2] = w2[2] | 0x80000000; - break; - - case 44: - w2[3] = 0x80; - break; - - case 45: - w2[3] = w2[3] | 0x8000; - break; - - case 46: - w2[3] = w2[3] | 0x800000; - break; - - case 47: - w2[3] = w2[3] | 0x80000000; - break; - - case 48: - w3[0] = 0x80; - break; - - case 49: - w3[0] = w3[0] | 0x8000; - break; - - case 50: - w3[0] = w3[0] | 0x800000; - break; - - case 51: - w3[0] = w3[0] | 0x80000000; - break; - - case 52: - w3[1] = 0x80; - break; - - case 53: - w3[1] = w3[1] | 0x8000; - break; - - case 54: - w3[1] = w3[1] | 0x800000; - break; - - case 55: - w3[1] = w3[1] | 0x80000000; - break; - - case 56: - w3[2] = 0x80; - break; - - case 57: - w3[2] = w3[2] | 0x8000; - break; - - case 58: - w3[2] = w3[2] | 0x800000; - break; - - case 59: - w3[2] = w3[2] | 0x80000000; - break; - - case 60: - w3[3] = 0x80; - break; - - case 61: - w3[3] = w3[3] | 0x8000; - break; - - case 62: - w3[3] = w3[3] | 0x800000; - break; - - case 63: - w3[3] = w3[3] | 0x80000000; - break; - - case 64: - w4[0] = 0x80; - break; - - case 65: - w4[0] = w4[0] | 0x8000; - break; - - case 66: - w4[0] = w4[0] | 0x800000; - break; - - case 67: - w4[0] = w4[0] | 0x80000000; - break; - - case 68: - w4[1] = 0x80; - break; - - case 69: - w4[1] = w4[1] | 0x8000; - break; - - case 70: - w4[1] = w4[1] | 0x800000; - break; - - case 71: - w4[1] = w4[1] | 0x80000000; - break; - - case 72: - w4[2] = 0x80; - break; - - case 73: - w4[2] = w4[2] | 0x8000; - break; - - case 74: - w4[2] = w4[2] | 0x800000; - break; - - case 75: - w4[2] = w4[2] | 0x80000000; - break; - - case 76: - w4[3] = 0x80; - break; - - case 77: - w4[3] = w4[3] | 0x8000; - break; - - case 78: - w4[3] = w4[3] | 0x800000; - break; - - case 79: - w4[3] = w4[3] | 0x80000000; - break; - - case 80: - w5[0] = 0x80; - break; - - case 81: - w5[0] = w5[0] | 0x8000; - break; - - case 82: - w5[0] = w5[0] | 0x800000; - break; - - case 83: - w5[0] = w5[0] | 0x80000000; - break; - - case 84: - w5[1] = 0x80; - break; - - case 85: - w5[1] = w5[1] | 0x8000; - break; - - case 86: - w5[1] = w5[1] | 0x800000; - break; - - case 87: - w5[1] = w5[1] | 0x80000000; - break; - - case 88: - w5[2] = 0x80; - break; - - case 89: - w5[2] = w5[2] | 0x8000; - break; - - case 90: - w5[2] = w5[2] | 0x800000; - break; - - case 91: - w5[2] = w5[2] | 0x80000000; - break; - - case 92: - w5[3] = 0x80; - break; - - case 93: - w5[3] = w5[3] | 0x8000; - break; - - case 94: - w5[3] = w5[3] | 0x800000; - break; - - case 95: - w5[3] = w5[3] | 0x80000000; - break; - - case 96: - w6[0] = 0x80; - break; - - case 97: - w6[0] = w6[0] | 0x8000; - break; - - case 98: - w6[0] = w6[0] | 0x800000; - break; - - case 99: - w6[0] = w6[0] | 0x80000000; - break; - - case 100: - w6[1] = 0x80; - break; - - case 101: - w6[1] = w6[1] | 0x8000; - break; - - case 102: - w6[1] = w6[1] | 0x800000; - break; - - case 103: - w6[1] = w6[1] | 0x80000000; - break; - - case 104: - w6[2] = 0x80; - break; - - case 105: - w6[2] = w6[2] | 0x8000; - break; - - case 106: - w6[2] = w6[2] | 0x800000; - break; - - case 107: - w6[2] = w6[2] | 0x80000000; - break; - - case 108: - w6[3] = 0x80; - break; - - case 109: - w6[3] = w6[3] | 0x8000; - break; - - case 110: - w6[3] = w6[3] | 0x800000; - break; - - case 111: - w6[3] = w6[3] | 0x80000000; - break; - - case 112: - w7[0] = 0x80; - break; - - case 113: - w7[0] = w7[0] | 0x8000; - break; - - case 114: - w7[0] = w7[0] | 0x800000; - break; - - case 115: - w7[0] = w7[0] | 0x80000000; - break; - - case 116: - w7[1] = 0x80; - break; - - case 117: - w7[1] = w7[1] | 0x8000; - break; - - case 118: - w7[1] = w7[1] | 0x800000; - break; - - case 119: - w7[1] = w7[1] | 0x80000000; - break; - - case 120: - w7[2] = 0x80; - break; - - case 121: - w7[2] = w7[2] | 0x8000; - break; - - case 122: - w7[2] = w7[2] | 0x800000; - break; - - case 123: - w7[2] = w7[2] | 0x80000000; - break; - - case 124: - w7[3] = 0x80; - break; - - case 125: - w7[3] = w7[3] | 0x8000; - break; - - case 126: - w7[3] = w7[3] | 0x800000; - break; - - case 127: - w7[3] = w7[3] | 0x80000000; - break; - } -} - -__device__ static void append_0x80_4 (u32 w[16], const u32 offset) -{ - switch (offset) - { - case 0: - w[ 0] = 0x80; - break; - - case 1: - w[ 0] = w[ 0] | 0x8000; - break; - - case 2: - w[ 0] = w[ 0] | 0x800000; - break; - - case 3: - w[ 0] = w[ 0] | 0x80000000; - break; - - case 4: - w[ 1] = 0x80; - break; - - case 5: - w[ 1] = w[ 1] | 0x8000; - break; - - case 6: - w[ 1] = w[ 1] | 0x800000; - break; - - case 7: - w[ 1] = w[ 1] | 0x80000000; - break; - - case 8: - w[ 2] = 0x80; - break; - - case 9: - w[ 2] = w[ 2] | 0x8000; - break; - - case 10: - w[ 2] = w[ 2] | 0x800000; - break; - - case 11: - w[ 2] = w[ 2] | 0x80000000; - break; - - case 12: - w[ 3] = 0x80; - break; - - case 13: - w[ 3] = w[ 3] | 0x8000; - break; - - case 14: - w[ 3] = w[ 3] | 0x800000; - break; - - case 15: - w[ 3] = w[ 3] | 0x80000000; - break; - - case 16: - w[ 4] = 0x80; - break; - - case 17: - w[ 4] = w[ 4] | 0x8000; - break; - - case 18: - w[ 4] = w[ 4] | 0x800000; - break; - - case 19: - w[ 4] = w[ 4] | 0x80000000; - break; - - case 20: - w[ 5] = 0x80; - break; - - case 21: - w[ 5] = w[ 5] | 0x8000; - break; - - case 22: - w[ 5] = w[ 5] | 0x800000; - break; - - case 23: - w[ 5] = w[ 5] | 0x80000000; - break; - - case 24: - w[ 6] = 0x80; - break; - - case 25: - w[ 6] = w[ 6] | 0x8000; - break; - - case 26: - w[ 6] = w[ 6] | 0x800000; - break; - - case 27: - w[ 6] = w[ 6] | 0x80000000; - break; - - case 28: - w[ 7] = 0x80; - break; - - case 29: - w[ 7] = w[ 7] | 0x8000; - break; - - case 30: - w[ 7] = w[ 7] | 0x800000; - break; - - case 31: - w[ 7] = w[ 7] | 0x80000000; - break; - - case 32: - w[ 8] = 0x80; - break; - - case 33: - w[ 8] = w[ 8] | 0x8000; - break; - - case 34: - w[ 8] = w[ 8] | 0x800000; - break; - - case 35: - w[ 8] = w[ 8] | 0x80000000; - break; - - case 36: - w[ 9] = 0x80; - break; - - case 37: - w[ 9] = w[ 9] | 0x8000; - break; - - case 38: - w[ 9] = w[ 9] | 0x800000; - break; - - case 39: - w[ 9] = w[ 9] | 0x80000000; - break; - - case 40: - w[10] = 0x80; - break; - - case 41: - w[10] = w[10] | 0x8000; - break; - - case 42: - w[10] = w[10] | 0x800000; - break; - - case 43: - w[10] = w[10] | 0x80000000; - break; - - case 44: - w[11] = 0x80; - break; - - case 45: - w[11] = w[11] | 0x8000; - break; - - case 46: - w[11] = w[11] | 0x800000; - break; - - case 47: - w[11] = w[11] | 0x80000000; - break; - - case 48: - w[12] = 0x80; - break; - - case 49: - w[12] = w[12] | 0x8000; - break; - - case 50: - w[12] = w[12] | 0x800000; - break; - - case 51: - w[12] = w[12] | 0x80000000; - break; - - case 52: - w[13] = 0x80; - break; - - case 53: - w[13] = w[13] | 0x8000; - break; - - case 54: - w[13] = w[13] | 0x800000; - break; - - case 55: - w[13] = w[13] | 0x80000000; - break; - - case 56: - w[14] = 0x80; - break; - - case 57: - w[14] = w[14] | 0x8000; - break; - - case 58: - w[14] = w[14] | 0x800000; - break; - - case 59: - w[14] = w[14] | 0x80000000; - break; - - case 60: - w[15] = 0x80; - break; - - case 61: - w[15] = w[15] | 0x8000; - break; - - case 62: - w[15] = w[15] | 0x800000; - break; - - case 63: - w[15] = w[15] | 0x80000000; - break; - } -} - -__device__ static void append_0x80_8 (u32 w[32], const u32 offset) -{ - switch (offset) - { - case 0: - w[ 0] = 0x80; - break; - - case 1: - w[ 0] = w[ 0] | 0x8000; - break; - - case 2: - w[ 0] = w[ 0] | 0x800000; - break; - - case 3: - w[ 0] = w[ 0] | 0x80000000; - break; - - case 4: - w[ 1] = 0x80; - break; - - case 5: - w[ 1] = w[ 1] | 0x8000; - break; - - case 6: - w[ 1] = w[ 1] | 0x800000; - break; - - case 7: - w[ 1] = w[ 1] | 0x80000000; - break; - - case 8: - w[ 2] = 0x80; - break; - - case 9: - w[ 2] = w[ 2] | 0x8000; - break; - - case 10: - w[ 2] = w[ 2] | 0x800000; - break; - - case 11: - w[ 2] = w[ 2] | 0x80000000; - break; - - case 12: - w[ 3] = 0x80; - break; - - case 13: - w[ 3] = w[ 3] | 0x8000; - break; - - case 14: - w[ 3] = w[ 3] | 0x800000; - break; - - case 15: - w[ 3] = w[ 3] | 0x80000000; - break; - - case 16: - w[ 4] = 0x80; - break; - - case 17: - w[ 4] = w[ 4] | 0x8000; - break; - - case 18: - w[ 4] = w[ 4] | 0x800000; - break; - - case 19: - w[ 4] = w[ 4] | 0x80000000; - break; - - case 20: - w[ 5] = 0x80; - break; - - case 21: - w[ 5] = w[ 5] | 0x8000; - break; - - case 22: - w[ 5] = w[ 5] | 0x800000; - break; - - case 23: - w[ 5] = w[ 5] | 0x80000000; - break; - - case 24: - w[ 6] = 0x80; - break; - - case 25: - w[ 6] = w[ 6] | 0x8000; - break; - - case 26: - w[ 6] = w[ 6] | 0x800000; - break; - - case 27: - w[ 6] = w[ 6] | 0x80000000; - break; - - case 28: - w[ 7] = 0x80; - break; - - case 29: - w[ 7] = w[ 7] | 0x8000; - break; - - case 30: - w[ 7] = w[ 7] | 0x800000; - break; - - case 31: - w[ 7] = w[ 7] | 0x80000000; - break; - - case 32: - w[ 8] = 0x80; - break; - - case 33: - w[ 8] = w[ 8] | 0x8000; - break; - - case 34: - w[ 8] = w[ 8] | 0x800000; - break; - - case 35: - w[ 8] = w[ 8] | 0x80000000; - break; - - case 36: - w[ 9] = 0x80; - break; - - case 37: - w[ 9] = w[ 9] | 0x8000; - break; - - case 38: - w[ 9] = w[ 9] | 0x800000; - break; - - case 39: - w[ 9] = w[ 9] | 0x80000000; - break; - - case 40: - w[10] = 0x80; - break; - - case 41: - w[10] = w[10] | 0x8000; - break; - - case 42: - w[10] = w[10] | 0x800000; - break; - - case 43: - w[10] = w[10] | 0x80000000; - break; - - case 44: - w[11] = 0x80; - break; - - case 45: - w[11] = w[11] | 0x8000; - break; - - case 46: - w[11] = w[11] | 0x800000; - break; - - case 47: - w[11] = w[11] | 0x80000000; - break; - - case 48: - w[12] = 0x80; - break; - - case 49: - w[12] = w[12] | 0x8000; - break; - - case 50: - w[12] = w[12] | 0x800000; - break; - - case 51: - w[12] = w[12] | 0x80000000; - break; - - case 52: - w[13] = 0x80; - break; - - case 53: - w[13] = w[13] | 0x8000; - break; - - case 54: - w[13] = w[13] | 0x800000; - break; - - case 55: - w[13] = w[13] | 0x80000000; - break; - - case 56: - w[14] = 0x80; - break; - - case 57: - w[14] = w[14] | 0x8000; - break; - - case 58: - w[14] = w[14] | 0x800000; - break; - - case 59: - w[14] = w[14] | 0x80000000; - break; - - case 60: - w[15] = 0x80; - break; - - case 61: - w[15] = w[15] | 0x8000; - break; - - case 62: - w[15] = w[15] | 0x800000; - break; - - case 63: - w[15] = w[15] | 0x80000000; - break; - - case 64: - w[16] = 0x80; - break; - - case 65: - w[16] = w[16] | 0x8000; - break; - - case 66: - w[16] = w[16] | 0x800000; - break; - - case 67: - w[16] = w[16] | 0x80000000; - break; - - case 68: - w[17] = 0x80; - break; - - case 69: - w[17] = w[17] | 0x8000; - break; - - case 70: - w[17] = w[17] | 0x800000; - break; - - case 71: - w[17] = w[17] | 0x80000000; - break; - - case 72: - w[18] = 0x80; - break; - - case 73: - w[18] = w[18] | 0x8000; - break; - - case 74: - w[18] = w[18] | 0x800000; - break; - - case 75: - w[18] = w[18] | 0x80000000; - break; - - case 76: - w[19] = 0x80; - break; - - case 77: - w[19] = w[19] | 0x8000; - break; - - case 78: - w[19] = w[19] | 0x800000; - break; - - case 79: - w[19] = w[19] | 0x80000000; - break; - - case 80: - w[20] = 0x80; - break; - - case 81: - w[20] = w[20] | 0x8000; - break; - - case 82: - w[20] = w[20] | 0x800000; - break; - - case 83: - w[20] = w[20] | 0x80000000; - break; - - case 84: - w[21] = 0x80; - break; - - case 85: - w[21] = w[21] | 0x8000; - break; - - case 86: - w[21] = w[21] | 0x800000; - break; - - case 87: - w[21] = w[21] | 0x80000000; - break; - - case 88: - w[22] = 0x80; - break; - - case 89: - w[22] = w[22] | 0x8000; - break; - - case 90: - w[22] = w[22] | 0x800000; - break; - - case 91: - w[22] = w[22] | 0x80000000; - break; - - case 92: - w[23] = 0x80; - break; - - case 93: - w[23] = w[23] | 0x8000; - break; - - case 94: - w[23] = w[23] | 0x800000; - break; - - case 95: - w[23] = w[23] | 0x80000000; - break; - - case 96: - w[24] = 0x80; - break; - - case 97: - w[24] = w[24] | 0x8000; - break; - - case 98: - w[24] = w[24] | 0x800000; - break; - - case 99: - w[24] = w[24] | 0x80000000; - break; - - case 100: - w[25] = 0x80; - break; - - case 101: - w[25] = w[25] | 0x8000; - break; - - case 102: - w[25] = w[25] | 0x800000; - break; - - case 103: - w[25] = w[25] | 0x80000000; - break; - - case 104: - w[26] = 0x80; - break; - - case 105: - w[26] = w[26] | 0x8000; - break; - - case 106: - w[26] = w[26] | 0x800000; - break; - - case 107: - w[26] = w[26] | 0x80000000; - break; - - case 108: - w[27] = 0x80; - break; - - case 109: - w[27] = w[27] | 0x8000; - break; - - case 110: - w[27] = w[27] | 0x800000; - break; - - case 111: - w[27] = w[27] | 0x80000000; - break; - - case 112: - w[28] = 0x80; - break; - - case 113: - w[28] = w[28] | 0x8000; - break; - - case 114: - w[28] = w[28] | 0x800000; - break; - - case 115: - w[28] = w[28] | 0x80000000; - break; - - case 116: - w[29] = 0x80; - break; - - case 117: - w[29] = w[29] | 0x8000; - break; - - case 118: - w[29] = w[29] | 0x800000; - break; - - case 119: - w[29] = w[29] | 0x80000000; - break; - - case 120: - w[30] = 0x80; - break; - - case 121: - w[30] = w[30] | 0x8000; - break; - - case 122: - w[30] = w[30] | 0x800000; - break; - - case 123: - w[30] = w[30] | 0x80000000; - break; - - case 124: - w[31] = 0x80; - break; - - case 125: - w[31] = w[31] | 0x8000; - break; - - case 126: - w[31] = w[31] | 0x800000; - break; - - case 127: - w[31] = w[31] | 0x80000000; - break; - } -} - -__device__ static void device_memcat2L (const u32 offset, u32 dst0[2], u32 src_l0[2], u32 src_r0[2]) -{ - switch (offset) - { - case 1: - dst0[0] = src_l0[0] | src_r0[0] << 8; - dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; - break; - - case 2: - dst0[0] = src_l0[0] | src_r0[0] << 16; - dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; - break; - - case 3: - dst0[0] = src_l0[0] | src_r0[0] << 24; - dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; - break; - - case 4: - dst0[1] = src_r0[0]; - break; - - case 5: - dst0[1] = src_l0[1] | src_r0[0] << 8; - break; - - case 6: - dst0[1] = src_l0[1] | src_r0[0] << 16; - break; - - case 7: - dst0[1] = src_l0[1] | src_r0[0] << 24; - break; - } -} - -__device__ static void device_memcat4L (const u32 offset, u32 dst0[4], u32 src_l0[4], u32 src_r0[4]) -{ - switch (offset) - { - case 1: - dst0[0] = src_l0[0] | src_r0[0] << 8; - dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8; - break; - - case 2: - dst0[0] = src_l0[0] | src_r0[0] << 16; - dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16; - break; - - case 3: - dst0[0] = src_l0[0] | src_r0[0] << 24; - dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24; - break; - - case 4: - dst0[1] = src_r0[0]; - dst0[2] = src_r0[1]; - dst0[3] = src_r0[2]; - break; - - case 5: - dst0[1] = src_l0[1] | src_r0[0] << 8; - dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8; - break; - - case 6: - dst0[1] = src_l0[1] | src_r0[0] << 16; - dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16; - break; - - case 7: - dst0[1] = src_l0[1] | src_r0[0] << 24; - dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24; - break; - - case 8: - dst0[2] = src_r0[0]; - dst0[3] = src_r0[1]; - break; - - case 9: - dst0[2] = src_l0[2] | src_r0[0] << 8; - dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8; - break; - - case 10: - dst0[2] = src_l0[2] | src_r0[0] << 16; - dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16; - break; - - case 11: - dst0[2] = src_l0[2] | src_r0[0] << 24; - dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24; - break; - - case 12: - dst0[3] = src_r0[0]; - break; - - case 13: - dst0[3] = src_l0[3] | src_r0[0] << 8; - break; - - case 14: - dst0[3] = src_l0[3] | src_r0[0] << 16; - break; - - case 15: - dst0[3] = src_l0[3] | src_r0[0] << 24; - break; - } -} - -__device__ static void device_memcat8L (const u32 offset, u32 dst0[4], u32 dst1[4], u32 src_l0[4], u32 src_l1[4], u32 src_r0[4]) -{ - switch (offset) - { - case 1: - dst0[0] = src_l0[0] | src_r0[0] << 8; - dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[0] = src_r0[3] >> 24; - break; - - case 2: - dst0[0] = src_l0[0] | src_r0[0] << 16; - dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[0] = src_r0[3] >> 16; - break; - - case 3: - dst0[0] = src_l0[0] | src_r0[0] << 24; - dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[0] = src_r0[3] >> 8; - break; - - case 4: - dst0[1] = src_r0[0]; - dst0[2] = src_r0[1]; - dst0[3] = src_r0[2]; - dst1[0] = src_r0[3]; - break; - - case 5: - dst0[1] = src_l0[1] | src_r0[0] << 8; - dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[1] = src_r0[3] >> 24; - break; - - case 6: - dst0[1] = src_l0[1] | src_r0[0] << 16; - dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[1] = src_r0[3] >> 16; - break; - - case 7: - dst0[1] = src_l0[1] | src_r0[0] << 24; - dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[1] = src_r0[3] >> 8; - break; - - case 8: - dst0[2] = src_r0[0]; - dst0[3] = src_r0[1]; - dst1[0] = src_r0[2]; - dst1[1] = src_r0[3]; - break; - - case 9: - dst0[2] = src_l0[2] | src_r0[0] << 8; - dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[2] = src_r0[3] >> 24; - break; - - case 10: - dst0[2] = src_l0[2] | src_r0[0] << 16; - dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[2] = src_r0[3] >> 16; - break; - - case 11: - dst0[2] = src_l0[2] | src_r0[0] << 24; - dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[2] = src_r0[3] >> 8; - break; - - case 12: - dst0[3] = src_r0[0]; - dst1[0] = src_r0[1]; - dst1[1] = src_r0[2]; - dst1[2] = src_r0[3]; - break; - - case 13: - dst0[3] = src_l0[3] | src_r0[0] << 8; - dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[3] = src_r0[3] >> 24; - break; - - case 14: - dst0[3] = src_l0[3] | src_r0[0] << 16; - dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[3] = src_r0[3] >> 16; - break; - - case 15: - dst0[3] = src_l0[3] | src_r0[0] << 24; - dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[3] = src_r0[3] >> 8; - break; - - case 16: - dst1[0] = src_r0[0]; - dst1[1] = src_r0[1]; - dst1[2] = src_r0[2]; - dst1[3] = src_r0[3]; - break; - - case 17: - dst1[0] = src_l1[0] | src_r0[0] << 8; - dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8; - break; - - case 18: - dst1[0] = src_l1[0] | src_r0[0] << 16; - dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16; - break; - - case 19: - dst1[0] = src_l1[0] | src_r0[0] << 24; - dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24; - break; - - case 20: - dst1[1] = src_r0[0]; - dst1[2] = src_r0[1]; - dst1[3] = src_r0[2]; - break; - - case 21: - dst1[1] = src_l1[1] | src_r0[0] << 8; - dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8; - break; - - case 22: - dst1[1] = src_l1[1] | src_r0[0] << 16; - dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16; - break; - - case 23: - dst1[1] = src_l1[1] | src_r0[0] << 24; - dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24; - break; - - case 24: - dst1[2] = src_r0[0]; - dst1[3] = src_r0[1]; - break; - - case 25: - dst1[2] = src_l1[2] | src_r0[0] << 8; - dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8; - break; - - case 26: - dst1[2] = src_l1[2] | src_r0[0] << 16; - dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16; - break; - - case 27: - dst1[2] = src_l1[2] | src_r0[0] << 24; - dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24; - break; - - case 28: - dst1[3] = src_r0[0]; - break; - - case 29: - dst1[3] = src_l1[3] | src_r0[0] << 8; - break; - - case 30: - dst1[3] = src_l1[3] | src_r0[0] << 16; - break; - - case 31: - dst1[3] = src_l1[3] | src_r0[0] << 24; - break; - } -} - -__device__ static void device_memcat12L (const u32 offset, u32 dst0[4], u32 dst1[4], u32 dst2[4], u32 src_l0[4], u32 src_l1[4], u32 src_l2[4], u32 src_r0[4]) -{ - switch (offset) - { - case 1: - dst0[0] = src_l0[0] | src_r0[0] << 8; - dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[0] = src_r0[3] >> 24; - break; - - case 2: - dst0[0] = src_l0[0] | src_r0[0] << 16; - dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[0] = src_r0[3] >> 16; - break; - - case 3: - dst0[0] = src_l0[0] | src_r0[0] << 24; - dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[0] = src_r0[3] >> 8; - break; - - case 4: - dst0[1] = src_r0[0]; - dst0[2] = src_r0[1]; - dst0[3] = src_r0[2]; - dst1[0] = src_r0[3]; - break; - - case 5: - dst0[1] = src_l0[1] | src_r0[0] << 8; - dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[1] = src_r0[3] >> 24; - break; - - case 6: - dst0[1] = src_l0[1] | src_r0[0] << 16; - dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[1] = src_r0[3] >> 16; - break; - - case 7: - dst0[1] = src_l0[1] | src_r0[0] << 24; - dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[1] = src_r0[3] >> 8; - break; - - case 8: - dst0[2] = src_r0[0]; - dst0[3] = src_r0[1]; - dst1[0] = src_r0[2]; - dst1[1] = src_r0[3]; - break; - - case 9: - dst0[2] = src_l0[2] | src_r0[0] << 8; - dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[2] = src_r0[3] >> 24; - break; - - case 10: - dst0[2] = src_l0[2] | src_r0[0] << 16; - dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[2] = src_r0[3] >> 16; - break; - - case 11: - dst0[2] = src_l0[2] | src_r0[0] << 24; - dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[2] = src_r0[3] >> 8; - break; - - case 12: - dst0[3] = src_r0[0]; - dst1[0] = src_r0[1]; - dst1[1] = src_r0[2]; - dst1[2] = src_r0[3]; - break; - - case 13: - dst0[3] = src_l0[3] | src_r0[0] << 8; - dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[3] = src_r0[3] >> 24; - break; - - case 14: - dst0[3] = src_l0[3] | src_r0[0] << 16; - dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[3] = src_r0[3] >> 16; - break; - - case 15: - dst0[3] = src_l0[3] | src_r0[0] << 24; - dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[3] = src_r0[3] >> 8; - break; - - case 16: - dst1[0] = src_r0[0]; - dst1[1] = src_r0[1]; - dst1[2] = src_r0[2]; - dst1[3] = src_r0[3]; - break; - - case 17: - dst1[0] = src_l1[0] | src_r0[0] << 8; - dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[0] = src_r0[3] >> 24; - break; - - case 18: - dst1[0] = src_l1[0] | src_r0[0] << 16; - dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[0] = src_r0[3] >> 16; - break; - - case 19: - dst1[0] = src_l1[0] | src_r0[0] << 24; - dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[0] = src_r0[3] >> 8; - break; - - case 20: - dst1[1] = src_r0[0]; - dst1[2] = src_r0[1]; - dst1[3] = src_r0[2]; - dst2[0] = src_r0[3]; - break; - - case 21: - dst1[1] = src_l1[1] | src_r0[0] << 8; - dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[0] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[1] = src_r0[3] >> 24; - break; - - case 22: - dst1[1] = src_l1[1] | src_r0[0] << 16; - dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[0] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[1] = src_r0[3] >> 16; - break; - - case 23: - dst1[1] = src_l1[1] | src_r0[0] << 24; - dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[0] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[1] = src_r0[3] >> 8; - break; - - case 24: - dst1[2] = src_r0[0]; - dst1[3] = src_r0[1]; - dst2[0] = src_r0[2]; - dst2[1] = src_r0[3]; - break; - - case 25: - dst1[2] = src_l1[2] | src_r0[0] << 8; - dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[0] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[1] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[2] = src_r0[3] >> 24; - break; - - case 26: - dst1[2] = src_l1[2] | src_r0[0] << 16; - dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[0] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[1] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[2] = src_r0[3] >> 16; - break; - - case 27: - dst1[2] = src_l1[2] | src_r0[0] << 24; - dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[0] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[1] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[2] = src_r0[3] >> 8; - break; - - case 28: - dst1[3] = src_r0[0]; - dst2[0] = src_r0[1]; - dst2[1] = src_r0[2]; - dst2[2] = src_r0[3]; - break; - - case 29: - dst1[3] = src_l1[3] | src_r0[0] << 8; - dst2[0] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[1] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[2] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[3] = src_r0[3] >> 24; - break; - - case 30: - dst1[3] = src_l1[3] | src_r0[0] << 16; - dst2[0] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[1] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[2] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[3] = src_r0[3] >> 16; - break; - - case 31: - dst1[3] = src_l1[3] | src_r0[0] << 24; - dst2[0] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[1] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[2] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[3] = src_r0[3] >> 8; - break; - - case 32: - dst2[0] = src_r0[0]; - dst2[1] = src_r0[1]; - dst2[2] = src_r0[2]; - dst2[3] = src_r0[3]; - break; - - case 33: - dst2[0] = src_l2[0] | src_r0[0] << 8; - dst2[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[3] = src_r0[2] >> 24 | src_r0[3] << 8; - break; - - case 34: - dst2[0] = src_l2[0] | src_r0[0] << 16; - dst2[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[3] = src_r0[2] >> 16 | src_r0[3] << 16; - break; - - case 35: - dst2[0] = src_l2[0] | src_r0[0] << 24; - dst2[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[3] = src_r0[2] >> 8 | src_r0[3] << 24; - break; - - case 36: - dst2[1] = src_r0[0]; - dst2[2] = src_r0[1]; - dst2[3] = src_r0[2]; - break; - - case 37: - dst2[1] = src_l2[1] | src_r0[0] << 8; - dst2[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[3] = src_r0[1] >> 24 | src_r0[2] << 8; - break; - - case 38: - dst2[1] = src_l2[1] | src_r0[0] << 16; - dst2[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[3] = src_r0[1] >> 16 | src_r0[2] << 16; - break; - - case 39: - dst2[1] = src_l2[1] | src_r0[0] << 24; - dst2[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[3] = src_r0[1] >> 8 | src_r0[2] << 24; - break; - - case 40: - dst2[2] = src_r0[0]; - dst2[3] = src_r0[1]; - break; - - case 41: - dst2[2] = src_l2[2] | src_r0[0] << 8; - dst2[3] = src_r0[0] >> 24 | src_r0[1] << 8; - break; - - case 42: - dst2[2] = src_l2[2] | src_r0[0] << 16; - dst2[3] = src_r0[0] >> 16 | src_r0[1] << 16; - break; - - case 43: - dst2[2] = src_l2[2] | src_r0[0] << 24; - dst2[3] = src_r0[0] >> 8 | src_r0[1] << 24; - break; - - case 44: - dst2[3] = src_r0[0]; - break; - - case 45: - dst2[3] = src_l2[3] | src_r0[0] << 8; - break; - - case 46: - dst2[3] = src_l2[3] | src_r0[0] << 16; - break; - - case 47: - dst2[3] = src_l2[3] | src_r0[0] << 24; - break; - } -} - -__device__ static void device_memcat12L (const u32 offset, u32 dst0[4], u32 dst1[4], u32 dst2[4], u32 src_l0[4], u32 src_l1[4], u32 src_l2[4], u32 src_r0[4], u32 src_r1[4]) -{ - switch (offset) - { - case 0: - dst0[0] = src_r0[0]; - dst0[1] = src_r0[1]; - dst0[2] = src_r0[2]; - dst0[3] = src_r0[3]; - dst1[0] = src_r1[0]; - dst1[1] = src_r1[1]; - dst1[2] = src_r1[2]; - dst1[3] = src_r1[3]; - break; - - case 1: - dst0[0] = src_l0[0] | src_r0[0] << 8; - dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[0] = src_r0[3] >> 24 | src_r1[0] << 8; - dst1[1] = src_r1[0] >> 24 | src_r1[1] << 8; - dst1[2] = src_r1[1] >> 24 | src_r1[2] << 8; - dst1[3] = src_r1[2] >> 24 | src_r1[3] << 8; - dst2[0] = src_r1[3] >> 24; - break; - - case 2: - dst0[0] = src_l0[0] | src_r0[0] << 16; - dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[0] = src_r0[3] >> 16 | src_r1[0] << 16; - dst1[1] = src_r1[0] >> 16 | src_r1[1] << 16; - dst1[2] = src_r1[1] >> 16 | src_r1[2] << 16; - dst1[3] = src_r1[2] >> 16 | src_r1[3] << 16; - dst2[0] = src_r1[3] >> 16; - break; - - case 3: - dst0[0] = src_l0[0] | src_r0[0] << 24; - dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[0] = src_r0[3] >> 8 | src_r1[0] << 24; - dst1[1] = src_r1[0] >> 8 | src_r1[1] << 24; - dst1[2] = src_r1[1] >> 8 | src_r1[2] << 24; - dst1[3] = src_r1[2] >> 8 | src_r1[3] << 24; - dst2[0] = src_r1[3] >> 8; - break; - - case 4: - dst0[1] = src_r0[0]; - dst0[2] = src_r0[1]; - dst0[3] = src_r0[2]; - dst1[0] = src_r0[3]; - dst1[1] = src_r1[0]; - dst1[2] = src_r1[1]; - dst1[3] = src_r1[2]; - dst2[0] = src_r1[3]; - break; - - case 5: - dst0[1] = src_l0[1] | src_r0[0] << 8; - dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[1] = src_r0[3] >> 24 | src_r1[0] << 8; - dst1[2] = src_r1[0] >> 24 | src_r1[1] << 8; - dst1[3] = src_r1[1] >> 24 | src_r1[2] << 8; - dst2[0] = src_r1[2] >> 24 | src_r1[3] << 8; - dst2[1] = src_r1[3] >> 24; - break; - - case 6: - dst0[1] = src_l0[1] | src_r0[0] << 16; - dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[1] = src_r0[3] >> 16 | src_r1[0] << 16; - dst1[2] = src_r1[0] >> 16 | src_r1[1] << 16; - dst1[3] = src_r1[1] >> 16 | src_r1[2] << 16; - dst2[0] = src_r1[2] >> 16 | src_r1[3] << 16; - dst2[1] = src_r1[3] >> 16; - break; - - case 7: - dst0[1] = src_l0[1] | src_r0[0] << 24; - dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[1] = src_r0[3] >> 8 | src_r1[0] << 24; - dst1[2] = src_r1[0] >> 8 | src_r1[1] << 24; - dst1[3] = src_r1[1] >> 8 | src_r1[2] << 24; - dst2[0] = src_r1[2] >> 8 | src_r1[3] << 24; - dst2[1] = src_r1[3] >> 8; - break; - - case 8: - dst0[2] = src_r0[0]; - dst0[3] = src_r0[1]; - dst1[0] = src_r0[2]; - dst1[1] = src_r0[3]; - dst1[2] = src_r1[0]; - dst1[3] = src_r1[1]; - dst2[0] = src_r1[2]; - dst2[1] = src_r1[3]; - break; - - case 9: - dst0[2] = src_l0[2] | src_r0[0] << 8; - dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[2] = src_r0[3] >> 24 | src_r1[0] << 8; - dst1[3] = src_r1[0] >> 24 | src_r1[1] << 8; - dst2[0] = src_r1[1] >> 24 | src_r1[2] << 8; - dst2[1] = src_r1[2] >> 24 | src_r1[3] << 8; - dst2[2] = src_r1[3] >> 24; - break; - - case 10: - dst0[2] = src_l0[2] | src_r0[0] << 16; - dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[2] = src_r0[3] >> 16 | src_r1[0] << 16; - dst1[3] = src_r1[0] >> 16 | src_r1[1] << 16; - dst2[0] = src_r1[1] >> 16 | src_r1[2] << 16; - dst2[1] = src_r1[2] >> 16 | src_r1[3] << 16; - dst2[2] = src_r1[3] >> 16; - break; - - case 11: - dst0[2] = src_l0[2] | src_r0[0] << 24; - dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[2] = src_r0[3] >> 8 | src_r1[0] << 24; - dst1[3] = src_r1[0] >> 8 | src_r1[1] << 24; - dst2[0] = src_r1[1] >> 8 | src_r1[2] << 24; - dst2[1] = src_r1[2] >> 8 | src_r1[3] << 24; - dst2[2] = src_r1[3] >> 8; - break; - - case 12: - dst0[3] = src_r0[0]; - dst1[0] = src_r0[1]; - dst1[1] = src_r0[2]; - dst1[2] = src_r0[3]; - dst1[3] = src_r1[0]; - dst2[0] = src_r1[1]; - dst2[1] = src_r1[2]; - dst2[2] = src_r1[3]; - break; - - case 13: - dst0[3] = src_l0[3] | src_r0[0] << 8; - dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[3] = src_r0[3] >> 24 | src_r1[0] << 8; - dst2[0] = src_r1[0] >> 24 | src_r1[1] << 8; - dst2[1] = src_r1[1] >> 24 | src_r1[2] << 8; - dst2[2] = src_r1[2] >> 24 | src_r1[3] << 8; - dst2[3] = src_r1[3] >> 24; - break; - - case 14: - dst0[3] = src_l0[3] | src_r0[0] << 16; - dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[3] = src_r0[3] >> 16 | src_r1[0] << 16; - dst2[0] = src_r1[0] >> 16 | src_r1[1] << 16; - dst2[1] = src_r1[1] >> 16 | src_r1[2] << 16; - dst2[2] = src_r1[2] >> 16 | src_r1[3] << 16; - dst2[3] = src_r1[3] >> 16; - break; - - case 15: - dst0[3] = src_l0[3] | src_r0[0] << 24; - dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[3] = src_r0[3] >> 8 | src_r1[0] << 24; - dst2[0] = src_r1[0] >> 8 | src_r1[1] << 24; - dst2[1] = src_r1[1] >> 8 | src_r1[2] << 24; - dst2[2] = src_r1[2] >> 8 | src_r1[3] << 24; - dst2[3] = src_r1[3] >> 8; - break; - - case 16: - dst1[0] = src_r0[0]; - dst1[1] = src_r0[1]; - dst1[2] = src_r0[2]; - dst1[3] = src_r0[3]; - dst2[0] = src_r1[0]; - dst2[1] = src_r1[1]; - dst2[2] = src_r1[2]; - dst2[3] = src_r1[3]; - break; - - case 17: - dst1[0] = src_l1[0] | src_r0[0] << 8; - dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[0] = src_r0[3] >> 24 | src_r1[0] << 8; - dst2[1] = src_r1[0] >> 24 | src_r1[1] << 8; - dst2[2] = src_r1[1] >> 24 | src_r1[2] << 8; - dst2[3] = src_r1[2] >> 24 | src_r1[3] << 8; - break; - - case 18: - dst1[0] = src_l1[0] | src_r0[0] << 16; - dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[0] = src_r0[3] >> 16 | src_r1[0] << 16; - dst2[1] = src_r1[0] >> 16 | src_r1[1] << 16; - dst2[2] = src_r1[1] >> 16 | src_r1[2] << 16; - dst2[3] = src_r1[2] >> 16 | src_r1[3] << 16; - break; - - case 19: - dst1[0] = src_l1[0] | src_r0[0] << 24; - dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[0] = src_r0[3] >> 8 | src_r1[0] << 24; - dst2[1] = src_r1[0] >> 8 | src_r1[1] << 24; - dst2[2] = src_r1[1] >> 8 | src_r1[2] << 24; - dst2[3] = src_r1[2] >> 8 | src_r1[3] << 24; - break; - - case 20: - dst1[1] = src_r1[0]; - dst1[2] = src_r0[1]; - dst1[3] = src_r0[2]; - dst2[0] = src_r0[3]; - dst2[1] = src_r1[0]; - dst2[2] = src_r1[1]; - dst2[3] = src_r1[2]; - break; - - case 21: - dst1[1] = src_l1[1] | src_r0[0] << 8; - dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[0] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[1] = src_r0[3] >> 24 | src_r1[0] << 8; - dst2[2] = src_r1[0] >> 24 | src_r1[1] << 8; - dst2[3] = src_r1[1] >> 24 | src_r1[2] << 8; - break; - - case 22: - dst1[1] = src_l1[1] | src_r0[0] << 16; - dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[0] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[1] = src_r0[3] >> 16 | src_r1[0] << 16; - dst2[2] = src_r1[0] >> 16 | src_r1[1] << 16; - dst2[3] = src_r1[1] >> 16 | src_r1[2] << 16; - break; - - case 23: - dst1[1] = src_l1[1] | src_r0[0] << 24; - dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[0] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[1] = src_r0[3] >> 8 | src_r1[0] << 24; - dst2[2] = src_r1[0] >> 8 | src_r1[1] << 24; - dst2[3] = src_r1[1] >> 8 | src_r1[2] << 24; - break; - - case 24: - dst1[2] = src_r1[0]; - dst1[3] = src_r0[1]; - dst2[0] = src_r0[2]; - dst2[1] = src_r0[3]; - dst2[2] = src_r1[0]; - dst2[3] = src_r1[1]; - break; - - case 25: - dst1[2] = src_l1[2] | src_r0[0] << 8; - dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[0] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[1] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[2] = src_r0[3] >> 24 | src_r1[0] << 8; - dst2[3] = src_r1[0] >> 24 | src_r1[1] << 8; - break; - - case 26: - dst1[2] = src_l1[2] | src_r0[0] << 16; - dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[0] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[1] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[2] = src_r0[3] >> 16 | src_r1[0] << 16; - dst2[3] = src_r1[0] >> 16 | src_r1[1] << 16; - break; - - case 27: - dst1[2] = src_l1[2] | src_r0[0] << 24; - dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[0] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[1] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[2] = src_r0[3] >> 8 | src_r1[0] << 24; - dst2[3] = src_r1[0] >> 8 | src_r1[1] << 24; - break; - - case 28: - dst1[3] = src_r1[0]; - dst2[0] = src_r0[1]; - dst2[1] = src_r0[2]; - dst2[2] = src_r0[3]; - dst2[3] = src_r1[0]; - break; - - case 29: - dst1[3] = src_l1[3] | src_r0[0] << 8; - dst2[0] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[1] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[2] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[3] = src_r0[3] >> 24 | src_r1[0] << 8; - break; - - case 30: - dst1[3] = src_l1[3] | src_r0[0] << 16; - dst2[0] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[1] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[2] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[3] = src_r0[3] >> 16 | src_r1[0] << 16; - break; - - case 31: - dst1[3] = src_l1[3] | src_r0[0] << 24; - dst2[0] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[1] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[2] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[3] = src_r0[3] >> 8 | src_r1[0] << 24; - break; - - case 32: - dst2[0] = src_r0[0]; - dst2[1] = src_r0[1]; - dst2[2] = src_r0[2]; - dst2[3] = src_r0[3]; - break; - - case 33: - dst2[0] = src_l2[0] | src_r0[0] << 8; - dst2[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[3] = src_r0[2] >> 24 | src_r0[3] << 8; - break; - - case 34: - dst2[0] = src_l2[0] | src_r0[0] << 16; - dst2[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[3] = src_r0[2] >> 16 | src_r0[3] << 16; - break; - - case 35: - dst2[0] = src_l2[0] | src_r0[0] << 24; - dst2[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[3] = src_r0[2] >> 8 | src_r0[3] << 24; - break; - - case 36: - dst2[1] = src_r0[0]; - dst2[2] = src_r0[1]; - dst2[3] = src_r0[2]; - break; - - case 37: - dst2[1] = src_l2[1] | src_r0[0] << 8; - dst2[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[3] = src_r0[1] >> 24 | src_r0[2] << 8; - break; - - case 38: - dst2[1] = src_l2[1] | src_r0[0] << 16; - dst2[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[3] = src_r0[1] >> 16 | src_r0[2] << 16; - break; - - case 39: - dst2[1] = src_l2[1] | src_r0[0] << 24; - dst2[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[3] = src_r0[1] >> 8 | src_r0[2] << 24; - break; - - case 40: - dst2[2] = src_r0[0]; - dst2[3] = src_r0[1]; - break; - - case 41: - dst2[2] = src_l2[2] | src_r0[0] << 8; - dst2[3] = src_r0[0] >> 24 | src_r0[1] << 8; - break; - - case 42: - dst2[2] = src_l2[2] | src_r0[0] << 16; - dst2[3] = src_r0[0] >> 16 | src_r0[1] << 16; - break; - - case 43: - dst2[2] = src_l2[2] | src_r0[0] << 24; - dst2[3] = src_r0[0] >> 8 | src_r0[1] << 24; - break; - - case 44: - dst2[3] = src_r0[0]; - break; - - case 45: - dst2[3] = src_l2[3] | src_r0[0] << 8; - break; - - case 46: - dst2[3] = src_l2[3] | src_r0[0] << 16; - break; - - case 47: - dst2[3] = src_l2[3] | src_r0[0] << 24; - break; - } -} - -__device__ static void memcat16_9 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = append0[0]; - w0[1] = append0[1]; - w0[2] = append0[2]; - w0[3] = append0[3]; - w1[0] = append1[0]; - w1[1] = append1[1]; - w1[2] = append1[2]; - w1[3] = append1[3]; - w2[0] = append2[0]; - break; - - case 1: - w0[0] = w0[0] | append0[0] << 8; - w0[1] = append0[0] >> 24 | append0[1] << 8; - w0[2] = append0[1] >> 24 | append0[2] << 8; - w0[3] = append0[2] >> 24 | append0[3] << 8; - w1[0] = append0[3] >> 24 | append1[0] << 8; - w1[1] = append1[0] >> 24 | append1[1] << 8; - w1[2] = append1[1] >> 24 | append1[2] << 8; - w1[3] = append1[2] >> 24 | append1[3] << 8; - w2[0] = append1[3] >> 24 | append2[0] << 8; - w2[1] = append2[0] >> 24; - break; - - case 2: - w0[0] = w0[0] | append0[0] << 16; - w0[1] = append0[0] >> 16 | append0[1] << 16; - w0[2] = append0[1] >> 16 | append0[2] << 16; - w0[3] = append0[2] >> 16 | append0[3] << 16; - w1[0] = append0[3] >> 16 | append1[0] << 16; - w1[1] = append1[0] >> 16 | append1[1] << 16; - w1[2] = append1[1] >> 16 | append1[2] << 16; - w1[3] = append1[2] >> 16 | append1[3] << 16; - w2[0] = append1[3] >> 16 | append2[0] << 16; - w2[1] = append2[0] >> 16; - break; - - case 3: - w0[0] = w0[0] | append0[0] << 24; - w0[1] = append0[0] >> 8 | append0[1] << 24; - w0[2] = append0[1] >> 8 | append0[2] << 24; - w0[3] = append0[2] >> 8 | append0[3] << 24; - w1[0] = append0[3] >> 8 | append1[0] << 24; - w1[1] = append1[0] >> 8 | append1[1] << 24; - w1[2] = append1[1] >> 8 | append1[2] << 24; - w1[3] = append1[2] >> 8 | append1[3] << 24; - w2[0] = append1[3] >> 8 | append2[0] << 24; - w2[1] = append2[0] >> 8; - break; - - case 4: - w0[1] = append0[0]; - w0[2] = append0[1]; - w0[3] = append0[2]; - w1[0] = append0[3]; - w1[1] = append1[0]; - w1[2] = append1[1]; - w1[3] = append1[2]; - w2[0] = append1[3]; - w2[1] = append2[0]; - break; - - case 5: - w0[1] = w0[1] | append0[0] << 8; - w0[2] = append0[0] >> 24 | append0[1] << 8; - w0[3] = append0[1] >> 24 | append0[2] << 8; - w1[0] = append0[2] >> 24 | append0[3] << 8; - w1[1] = append0[3] >> 24 | append1[0] << 8; - w1[2] = append1[0] >> 24 | append1[1] << 8; - w1[3] = append1[1] >> 24 | append1[2] << 8; - w2[0] = append1[2] >> 24 | append1[3] << 8; - w2[1] = append1[3] >> 24 | append2[0] << 8; - w2[2] = append2[0] >> 24; - break; - - case 6: - w0[1] = w0[1] | append0[0] << 16; - w0[2] = append0[0] >> 16 | append0[1] << 16; - w0[3] = append0[1] >> 16 | append0[2] << 16; - w1[0] = append0[2] >> 16 | append0[3] << 16; - w1[1] = append0[3] >> 16 | append1[0] << 16; - w1[2] = append1[0] >> 16 | append1[1] << 16; - w1[3] = append1[1] >> 16 | append1[2] << 16; - w2[0] = append1[2] >> 16 | append1[3] << 16; - w2[1] = append1[3] >> 16 | append2[0] << 16; - w2[2] = append2[0] >> 16; - break; - - case 7: - w0[1] = w0[1] | append0[0] << 24; - w0[2] = append0[0] >> 8 | append0[1] << 24; - w0[3] = append0[1] >> 8 | append0[2] << 24; - w1[0] = append0[2] >> 8 | append0[3] << 24; - w1[1] = append0[3] >> 8 | append1[0] << 24; - w1[2] = append1[0] >> 8 | append1[1] << 24; - w1[3] = append1[1] >> 8 | append1[2] << 24; - w2[0] = append1[2] >> 8 | append1[3] << 24; - w2[1] = append1[3] >> 8 | append2[0] << 24; - w2[2] = append2[0] >> 8; - break; - - case 8: - w0[2] = append0[0]; - w0[3] = append0[1]; - w1[0] = append0[2]; - w1[1] = append0[3]; - w1[2] = append1[0]; - w1[3] = append1[1]; - w2[0] = append1[2]; - w2[1] = append1[3]; - w2[2] = append2[0]; - break; - - case 9: - w0[2] = w0[2] | append0[0] << 8; - w0[3] = append0[0] >> 24 | append0[1] << 8; - w1[0] = append0[1] >> 24 | append0[2] << 8; - w1[1] = append0[2] >> 24 | append0[3] << 8; - w1[2] = append0[3] >> 24 | append1[0] << 8; - w1[3] = append1[0] >> 24 | append1[1] << 8; - w2[0] = append1[1] >> 24 | append1[2] << 8; - w2[1] = append1[2] >> 24 | append1[3] << 8; - w2[2] = append1[3] >> 24 | append2[0] << 8; - w2[3] = append2[0] >> 24; - break; - - case 10: - w0[2] = w0[2] | append0[0] << 16; - w0[3] = append0[0] >> 16 | append0[1] << 16; - w1[0] = append0[1] >> 16 | append0[2] << 16; - w1[1] = append0[2] >> 16 | append0[3] << 16; - w1[2] = append0[3] >> 16 | append1[0] << 16; - w1[3] = append1[0] >> 16 | append1[1] << 16; - w2[0] = append1[1] >> 16 | append1[2] << 16; - w2[1] = append1[2] >> 16 | append1[3] << 16; - w2[2] = append1[3] >> 16 | append2[0] << 16; - w2[3] = append2[0] >> 16; - break; - - case 11: - w0[2] = w0[2] | append0[0] << 24; - w0[3] = append0[0] >> 8 | append0[1] << 24; - w1[0] = append0[1] >> 8 | append0[2] << 24; - w1[1] = append0[2] >> 8 | append0[3] << 24; - w1[2] = append0[3] >> 8 | append1[0] << 24; - w1[3] = append1[0] >> 8 | append1[1] << 24; - w2[0] = append1[1] >> 8 | append1[2] << 24; - w2[1] = append1[2] >> 8 | append1[3] << 24; - w2[2] = append1[3] >> 8 | append2[0] << 24; - w2[3] = append2[0] >> 8; - break; - - case 12: - w0[3] = append0[0]; - w1[0] = append0[1]; - w1[1] = append0[2]; - w1[2] = append0[3]; - w1[3] = append1[0]; - w2[0] = append1[1]; - w2[1] = append1[2]; - w2[2] = append1[3]; - w2[3] = append2[0]; - break; - - case 13: - w0[3] = w0[3] | append0[0] << 8; - w1[0] = append0[0] >> 24 | append0[1] << 8; - w1[1] = append0[1] >> 24 | append0[2] << 8; - w1[2] = append0[2] >> 24 | append0[3] << 8; - w1[3] = append0[3] >> 24 | append1[0] << 8; - w2[0] = append1[0] >> 24 | append1[1] << 8; - w2[1] = append1[1] >> 24 | append1[2] << 8; - w2[2] = append1[2] >> 24 | append1[3] << 8; - w2[3] = append1[3] >> 24 | append2[0] << 8; - w3[0] = append2[0] >> 24; - break; - - case 14: - w0[3] = w0[3] | append0[0] << 16; - w1[0] = append0[0] >> 16 | append0[1] << 16; - w1[1] = append0[1] >> 16 | append0[2] << 16; - w1[2] = append0[2] >> 16 | append0[3] << 16; - w1[3] = append0[3] >> 16 | append1[0] << 16; - w2[0] = append1[0] >> 16 | append1[1] << 16; - w2[1] = append1[1] >> 16 | append1[2] << 16; - w2[2] = append1[2] >> 16 | append1[3] << 16; - w2[3] = append1[3] >> 16 | append2[0] << 16; - w3[0] = append2[0] >> 16; - break; - - case 15: - w0[3] = w0[3] | append0[0] << 24; - w1[0] = append0[0] >> 8 | append0[1] << 24; - w1[1] = append0[1] >> 8 | append0[2] << 24; - w1[2] = append0[2] >> 8 | append0[3] << 24; - w1[3] = append0[3] >> 8 | append1[0] << 24; - w2[0] = append1[0] >> 8 | append1[1] << 24; - w2[1] = append1[1] >> 8 | append1[2] << 24; - w2[2] = append1[2] >> 8 | append1[3] << 24; - w2[3] = append1[3] >> 8 | append2[0] << 24; - w3[0] = append2[0] >> 8; - break; - } -} - -__device__ static void memcat32_8 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = append0[0]; - w0[1] = append0[1]; - w0[2] = append0[2]; - w0[3] = append0[3]; - w1[0] = append1[0]; - w1[1] = append1[1]; - w1[2] = append1[2]; - w1[3] = append1[3]; - break; - - case 1: - w0[0] = w0[0] | append0[0] << 8; - w0[1] = append0[0] >> 24 | append0[1] << 8; - w0[2] = append0[1] >> 24 | append0[2] << 8; - w0[3] = append0[2] >> 24 | append0[3] << 8; - w1[0] = append0[3] >> 24 | append1[0] << 8; - w1[1] = append1[0] >> 24 | append1[1] << 8; - w1[2] = append1[1] >> 24 | append1[2] << 8; - w1[3] = append1[2] >> 24 | append1[3] << 8; - w2[0] = append1[3] >> 24; - break; - - case 2: - w0[0] = w0[0] | append0[0] << 16; - w0[1] = append0[0] >> 16 | append0[1] << 16; - w0[2] = append0[1] >> 16 | append0[2] << 16; - w0[3] = append0[2] >> 16 | append0[3] << 16; - w1[0] = append0[3] >> 16 | append1[0] << 16; - w1[1] = append1[0] >> 16 | append1[1] << 16; - w1[2] = append1[1] >> 16 | append1[2] << 16; - w1[3] = append1[2] >> 16 | append1[3] << 16; - w2[0] = append1[3] >> 16; - break; - - case 3: - w0[0] = w0[0] | append0[0] << 24; - w0[1] = append0[0] >> 8 | append0[1] << 24; - w0[2] = append0[1] >> 8 | append0[2] << 24; - w0[3] = append0[2] >> 8 | append0[3] << 24; - w1[0] = append0[3] >> 8 | append1[0] << 24; - w1[1] = append1[0] >> 8 | append1[1] << 24; - w1[2] = append1[1] >> 8 | append1[2] << 24; - w1[3] = append1[2] >> 8 | append1[3] << 24; - w2[0] = append1[3] >> 8; - break; - - case 4: - w0[1] = append0[0]; - w0[2] = append0[1]; - w0[3] = append0[2]; - w1[0] = append0[3]; - w1[1] = append1[0]; - w1[2] = append1[1]; - w1[3] = append1[2]; - w2[0] = append1[3]; - break; - - case 5: - w0[1] = w0[1] | append0[0] << 8; - w0[2] = append0[0] >> 24 | append0[1] << 8; - w0[3] = append0[1] >> 24 | append0[2] << 8; - w1[0] = append0[2] >> 24 | append0[3] << 8; - w1[1] = append0[3] >> 24 | append1[0] << 8; - w1[2] = append1[0] >> 24 | append1[1] << 8; - w1[3] = append1[1] >> 24 | append1[2] << 8; - w2[0] = append1[2] >> 24 | append1[3] << 8; - w2[1] = append1[3] >> 24; - break; - - case 6: - w0[1] = w0[1] | append0[0] << 16; - w0[2] = append0[0] >> 16 | append0[1] << 16; - w0[3] = append0[1] >> 16 | append0[2] << 16; - w1[0] = append0[2] >> 16 | append0[3] << 16; - w1[1] = append0[3] >> 16 | append1[0] << 16; - w1[2] = append1[0] >> 16 | append1[1] << 16; - w1[3] = append1[1] >> 16 | append1[2] << 16; - w2[0] = append1[2] >> 16 | append1[3] << 16; - w2[1] = append1[3] >> 16; - break; - - case 7: - w0[1] = w0[1] | append0[0] << 24; - w0[2] = append0[0] >> 8 | append0[1] << 24; - w0[3] = append0[1] >> 8 | append0[2] << 24; - w1[0] = append0[2] >> 8 | append0[3] << 24; - w1[1] = append0[3] >> 8 | append1[0] << 24; - w1[2] = append1[0] >> 8 | append1[1] << 24; - w1[3] = append1[1] >> 8 | append1[2] << 24; - w2[0] = append1[2] >> 8 | append1[3] << 24; - w2[1] = append1[3] >> 8; - break; - - case 8: - w0[2] = append0[0]; - w0[3] = append0[1]; - w1[0] = append0[2]; - w1[1] = append0[3]; - w1[2] = append1[0]; - w1[3] = append1[1]; - w2[0] = append1[2]; - w2[1] = append1[3]; - break; - - case 9: - w0[2] = w0[2] | append0[0] << 8; - w0[3] = append0[0] >> 24 | append0[1] << 8; - w1[0] = append0[1] >> 24 | append0[2] << 8; - w1[1] = append0[2] >> 24 | append0[3] << 8; - w1[2] = append0[3] >> 24 | append1[0] << 8; - w1[3] = append1[0] >> 24 | append1[1] << 8; - w2[0] = append1[1] >> 24 | append1[2] << 8; - w2[1] = append1[2] >> 24 | append1[3] << 8; - w2[2] = append1[3] >> 24; - break; - - case 10: - w0[2] = w0[2] | append0[0] << 16; - w0[3] = append0[0] >> 16 | append0[1] << 16; - w1[0] = append0[1] >> 16 | append0[2] << 16; - w1[1] = append0[2] >> 16 | append0[3] << 16; - w1[2] = append0[3] >> 16 | append1[0] << 16; - w1[3] = append1[0] >> 16 | append1[1] << 16; - w2[0] = append1[1] >> 16 | append1[2] << 16; - w2[1] = append1[2] >> 16 | append1[3] << 16; - w2[2] = append1[3] >> 16; - break; - - case 11: - w0[2] = w0[2] | append0[0] << 24; - w0[3] = append0[0] >> 8 | append0[1] << 24; - w1[0] = append0[1] >> 8 | append0[2] << 24; - w1[1] = append0[2] >> 8 | append0[3] << 24; - w1[2] = append0[3] >> 8 | append1[0] << 24; - w1[3] = append1[0] >> 8 | append1[1] << 24; - w2[0] = append1[1] >> 8 | append1[2] << 24; - w2[1] = append1[2] >> 8 | append1[3] << 24; - w2[2] = append1[3] >> 8; - break; - - case 12: - w0[3] = append0[0]; - w1[0] = append0[1]; - w1[1] = append0[2]; - w1[2] = append0[3]; - w1[3] = append1[0]; - w2[0] = append1[1]; - w2[1] = append1[2]; - w2[2] = append1[3]; - break; - - case 13: - w0[3] = w0[3] | append0[0] << 8; - w1[0] = append0[0] >> 24 | append0[1] << 8; - w1[1] = append0[1] >> 24 | append0[2] << 8; - w1[2] = append0[2] >> 24 | append0[3] << 8; - w1[3] = append0[3] >> 24 | append1[0] << 8; - w2[0] = append1[0] >> 24 | append1[1] << 8; - w2[1] = append1[1] >> 24 | append1[2] << 8; - w2[2] = append1[2] >> 24 | append1[3] << 8; - w2[3] = append1[3] >> 24; - break; - - case 14: - w0[3] = w0[3] | append0[0] << 16; - w1[0] = append0[0] >> 16 | append0[1] << 16; - w1[1] = append0[1] >> 16 | append0[2] << 16; - w1[2] = append0[2] >> 16 | append0[3] << 16; - w1[3] = append0[3] >> 16 | append1[0] << 16; - w2[0] = append1[0] >> 16 | append1[1] << 16; - w2[1] = append1[1] >> 16 | append1[2] << 16; - w2[2] = append1[2] >> 16 | append1[3] << 16; - w2[3] = append1[3] >> 16; - break; - - case 15: - w0[3] = w0[3] | append0[0] << 24; - w1[0] = append0[0] >> 8 | append0[1] << 24; - w1[1] = append0[1] >> 8 | append0[2] << 24; - w1[2] = append0[2] >> 8 | append0[3] << 24; - w1[3] = append0[3] >> 8 | append1[0] << 24; - w2[0] = append1[0] >> 8 | append1[1] << 24; - w2[1] = append1[1] >> 8 | append1[2] << 24; - w2[2] = append1[2] >> 8 | append1[3] << 24; - w2[3] = append1[3] >> 8; - break; - - case 16: - w1[0] = append0[0]; - w1[1] = append0[1]; - w1[2] = append0[2]; - w1[3] = append0[3]; - w2[0] = append1[0]; - w2[1] = append1[1]; - w2[2] = append1[2]; - w2[3] = append1[3]; - break; - - case 17: - w1[0] = w1[0] | append0[0] << 8; - w1[1] = append0[0] >> 24 | append0[1] << 8; - w1[2] = append0[1] >> 24 | append0[2] << 8; - w1[3] = append0[2] >> 24 | append0[3] << 8; - w2[0] = append0[3] >> 24 | append1[0] << 8; - w2[1] = append1[0] >> 24 | append1[1] << 8; - w2[2] = append1[1] >> 24 | append1[2] << 8; - w2[3] = append1[2] >> 24 | append1[3] << 8; - w3[0] = append1[3] >> 24; - break; - - case 18: - w1[0] = w1[0] | append0[0] << 16; - w1[1] = append0[0] >> 16 | append0[1] << 16; - w1[2] = append0[1] >> 16 | append0[2] << 16; - w1[3] = append0[2] >> 16 | append0[3] << 16; - w2[0] = append0[3] >> 16 | append1[0] << 16; - w2[1] = append1[0] >> 16 | append1[1] << 16; - w2[2] = append1[1] >> 16 | append1[2] << 16; - w2[3] = append1[2] >> 16 | append1[3] << 16; - w3[0] = append1[3] >> 16; - break; - - case 19: - w1[0] = w1[0] | append0[0] << 24; - w1[1] = append0[0] >> 8 | append0[1] << 24; - w1[2] = append0[1] >> 8 | append0[2] << 24; - w1[3] = append0[2] >> 8 | append0[3] << 24; - w2[0] = append0[3] >> 8 | append1[0] << 24; - w2[1] = append1[0] >> 8 | append1[1] << 24; - w2[2] = append1[1] >> 8 | append1[2] << 24; - w2[3] = append1[2] >> 8 | append1[3] << 24; - w3[0] = append1[3] >> 8; - break; - - case 20: - w1[1] = append0[0]; - w1[2] = append0[1]; - w1[3] = append0[2]; - w2[0] = append0[3]; - w2[1] = append1[0]; - w2[2] = append1[1]; - w2[3] = append1[2]; - w3[0] = append1[3]; - break; - - case 21: - w1[1] = w1[1] | append0[0] << 8; - w1[2] = append0[0] >> 24 | append0[1] << 8; - w1[3] = append0[1] >> 24 | append0[2] << 8; - w2[0] = append0[2] >> 24 | append0[3] << 8; - w2[1] = append0[3] >> 24 | append1[0] << 8; - w2[2] = append1[0] >> 24 | append1[1] << 8; - w2[3] = append1[1] >> 24 | append1[2] << 8; - w3[0] = append1[2] >> 24 | append1[3] << 8; - w3[1] = append1[3] >> 24; - break; - - case 22: - w1[1] = w1[1] | append0[0] << 16; - w1[2] = append0[0] >> 16 | append0[1] << 16; - w1[3] = append0[1] >> 16 | append0[2] << 16; - w2[0] = append0[2] >> 16 | append0[3] << 16; - w2[1] = append0[3] >> 16 | append1[0] << 16; - w2[2] = append1[0] >> 16 | append1[1] << 16; - w2[3] = append1[1] >> 16 | append1[2] << 16; - w3[0] = append1[2] >> 16 | append1[3] << 16; - w3[1] = append1[3] >> 16; - break; - - case 23: - w1[1] = w1[1] | append0[0] << 24; - w1[2] = append0[0] >> 8 | append0[1] << 24; - w1[3] = append0[1] >> 8 | append0[2] << 24; - w2[0] = append0[2] >> 8 | append0[3] << 24; - w2[1] = append0[3] >> 8 | append1[0] << 24; - w2[2] = append1[0] >> 8 | append1[1] << 24; - w2[3] = append1[1] >> 8 | append1[2] << 24; - w3[0] = append1[2] >> 8 | append1[3] << 24; - w3[1] = append1[3] >> 8; - break; - - case 24: - w1[2] = append0[0]; - w1[3] = append0[1]; - w2[0] = append0[2]; - w2[1] = append0[3]; - w2[2] = append1[0]; - w2[3] = append1[1]; - w3[0] = append1[2]; - w3[1] = append1[3]; - break; - - case 25: - w1[2] = w1[2] | append0[0] << 8; - w1[3] = append0[0] >> 24 | append0[1] << 8; - w2[0] = append0[1] >> 24 | append0[2] << 8; - w2[1] = append0[2] >> 24 | append0[3] << 8; - w2[2] = append0[3] >> 24 | append1[0] << 8; - w2[3] = append1[0] >> 24 | append1[1] << 8; - w3[0] = append1[1] >> 24 | append1[2] << 8; - w3[1] = append1[2] >> 24 | append1[3] << 8; - break; - - case 26: - w1[2] = w1[2] | append0[0] << 16; - w1[3] = append0[0] >> 16 | append0[1] << 16; - w2[0] = append0[1] >> 16 | append0[2] << 16; - w2[1] = append0[2] >> 16 | append0[3] << 16; - w2[2] = append0[3] >> 16 | append1[0] << 16; - w2[3] = append1[0] >> 16 | append1[1] << 16; - w3[0] = append1[1] >> 16 | append1[2] << 16; - w3[1] = append1[2] >> 16 | append1[3] << 16; - break; - - case 27: - w1[2] = w1[2] | append0[0] << 24; - w1[3] = append0[0] >> 8 | append0[1] << 24; - w2[0] = append0[1] >> 8 | append0[2] << 24; - w2[1] = append0[2] >> 8 | append0[3] << 24; - w2[2] = append0[3] >> 8 | append1[0] << 24; - w2[3] = append1[0] >> 8 | append1[1] << 24; - w3[0] = append1[1] >> 8 | append1[2] << 24; - w3[1] = append1[2] >> 8 | append1[3] << 24; - break; - - case 28: - w1[3] = append0[0]; - w2[0] = append0[1]; - w2[1] = append0[2]; - w2[2] = append0[3]; - w2[3] = append1[0]; - w3[0] = append1[1]; - w3[1] = append1[2]; - break; - - case 29: - w1[3] = w1[3] | append0[0] << 8; - w2[0] = append0[0] >> 24 | append0[1] << 8; - w2[1] = append0[1] >> 24 | append0[2] << 8; - w2[2] = append0[2] >> 24 | append0[3] << 8; - w2[3] = append0[3] >> 24 | append1[0] << 8; - w3[0] = append1[0] >> 24 | append1[1] << 8; - w3[1] = append1[1] >> 24 | append1[2] << 8; - break; - - case 30: - w1[3] = w1[3] | append0[0] << 16; - w2[0] = append0[0] >> 16 | append0[1] << 16; - w2[1] = append0[1] >> 16 | append0[2] << 16; - w2[2] = append0[2] >> 16 | append0[3] << 16; - w2[3] = append0[3] >> 16 | append1[0] << 16; - w3[0] = append1[0] >> 16 | append1[1] << 16; - w3[1] = append1[1] >> 16 | append1[2] << 16; - break; - - case 31: - w1[3] = w1[3] | append0[0] << 24; - w2[0] = append0[0] >> 8 | append0[1] << 24; - w2[1] = append0[1] >> 8 | append0[2] << 24; - w2[2] = append0[2] >> 8 | append0[3] << 24; - w2[3] = append0[3] >> 8 | append1[0] << 24; - w3[0] = append1[0] >> 8 | append1[1] << 24; - w3[1] = append1[1] >> 8 | append1[2] << 24; - break; - - case 32: - w2[0] = append0[0]; - w2[1] = append0[1]; - w2[2] = append0[2]; - w2[3] = append0[3]; - w3[0] = append1[0]; - w3[1] = append1[1]; - break; - } -} - -__device__ static void memcat32_9 (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = append0[0]; - w0[1] = append0[1]; - w0[2] = append0[2]; - w0[3] = append0[3]; - w1[0] = append1[0]; - w1[1] = append1[1]; - w1[2] = append1[2]; - w1[3] = append1[3]; - w2[0] = append2[0]; - break; - - case 1: - w0[0] = w0[0] | append0[0] << 8; - w0[1] = append0[0] >> 24 | append0[1] << 8; - w0[2] = append0[1] >> 24 | append0[2] << 8; - w0[3] = append0[2] >> 24 | append0[3] << 8; - w1[0] = append0[3] >> 24 | append1[0] << 8; - w1[1] = append1[0] >> 24 | append1[1] << 8; - w1[2] = append1[1] >> 24 | append1[2] << 8; - w1[3] = append1[2] >> 24 | append1[3] << 8; - w2[0] = append1[3] >> 24 | append2[0] << 8; - w2[1] = append2[0] >> 24; - break; - - case 2: - w0[0] = w0[0] | append0[0] << 16; - w0[1] = append0[0] >> 16 | append0[1] << 16; - w0[2] = append0[1] >> 16 | append0[2] << 16; - w0[3] = append0[2] >> 16 | append0[3] << 16; - w1[0] = append0[3] >> 16 | append1[0] << 16; - w1[1] = append1[0] >> 16 | append1[1] << 16; - w1[2] = append1[1] >> 16 | append1[2] << 16; - w1[3] = append1[2] >> 16 | append1[3] << 16; - w2[0] = append1[3] >> 16 | append2[0] << 16; - w2[1] = append2[0] >> 16; - break; - - case 3: - w0[0] = w0[0] | append0[0] << 24; - w0[1] = append0[0] >> 8 | append0[1] << 24; - w0[2] = append0[1] >> 8 | append0[2] << 24; - w0[3] = append0[2] >> 8 | append0[3] << 24; - w1[0] = append0[3] >> 8 | append1[0] << 24; - w1[1] = append1[0] >> 8 | append1[1] << 24; - w1[2] = append1[1] >> 8 | append1[2] << 24; - w1[3] = append1[2] >> 8 | append1[3] << 24; - w2[0] = append1[3] >> 8 | append2[0] << 24; - w2[1] = append2[0] >> 8; - break; - - case 4: - w0[1] = append0[0]; - w0[2] = append0[1]; - w0[3] = append0[2]; - w1[0] = append0[3]; - w1[1] = append1[0]; - w1[2] = append1[1]; - w1[3] = append1[2]; - w2[0] = append1[3]; - w2[1] = append2[0]; - break; - - case 5: - w0[1] = w0[1] | append0[0] << 8; - w0[2] = append0[0] >> 24 | append0[1] << 8; - w0[3] = append0[1] >> 24 | append0[2] << 8; - w1[0] = append0[2] >> 24 | append0[3] << 8; - w1[1] = append0[3] >> 24 | append1[0] << 8; - w1[2] = append1[0] >> 24 | append1[1] << 8; - w1[3] = append1[1] >> 24 | append1[2] << 8; - w2[0] = append1[2] >> 24 | append1[3] << 8; - w2[1] = append1[3] >> 24 | append2[0] << 8; - w2[2] = append2[0] >> 24; - break; - - case 6: - w0[1] = w0[1] | append0[0] << 16; - w0[2] = append0[0] >> 16 | append0[1] << 16; - w0[3] = append0[1] >> 16 | append0[2] << 16; - w1[0] = append0[2] >> 16 | append0[3] << 16; - w1[1] = append0[3] >> 16 | append1[0] << 16; - w1[2] = append1[0] >> 16 | append1[1] << 16; - w1[3] = append1[1] >> 16 | append1[2] << 16; - w2[0] = append1[2] >> 16 | append1[3] << 16; - w2[1] = append1[3] >> 16 | append2[0] << 16; - w2[2] = append2[0] >> 16; - break; - - case 7: - w0[1] = w0[1] | append0[0] << 24; - w0[2] = append0[0] >> 8 | append0[1] << 24; - w0[3] = append0[1] >> 8 | append0[2] << 24; - w1[0] = append0[2] >> 8 | append0[3] << 24; - w1[1] = append0[3] >> 8 | append1[0] << 24; - w1[2] = append1[0] >> 8 | append1[1] << 24; - w1[3] = append1[1] >> 8 | append1[2] << 24; - w2[0] = append1[2] >> 8 | append1[3] << 24; - w2[1] = append1[3] >> 8 | append2[0] << 24; - w2[2] = append2[0] >> 8; - break; - - case 8: - w0[2] = append0[0]; - w0[3] = append0[1]; - w1[0] = append0[2]; - w1[1] = append0[3]; - w1[2] = append1[0]; - w1[3] = append1[1]; - w2[0] = append1[2]; - w2[1] = append1[3]; - w2[2] = append2[0]; - break; - - case 9: - w0[2] = w0[2] | append0[0] << 8; - w0[3] = append0[0] >> 24 | append0[1] << 8; - w1[0] = append0[1] >> 24 | append0[2] << 8; - w1[1] = append0[2] >> 24 | append0[3] << 8; - w1[2] = append0[3] >> 24 | append1[0] << 8; - w1[3] = append1[0] >> 24 | append1[1] << 8; - w2[0] = append1[1] >> 24 | append1[2] << 8; - w2[1] = append1[2] >> 24 | append1[3] << 8; - w2[2] = append1[3] >> 24 | append2[0] << 8; - w2[3] = append2[0] >> 24; - break; - - case 10: - w0[2] = w0[2] | append0[0] << 16; - w0[3] = append0[0] >> 16 | append0[1] << 16; - w1[0] = append0[1] >> 16 | append0[2] << 16; - w1[1] = append0[2] >> 16 | append0[3] << 16; - w1[2] = append0[3] >> 16 | append1[0] << 16; - w1[3] = append1[0] >> 16 | append1[1] << 16; - w2[0] = append1[1] >> 16 | append1[2] << 16; - w2[1] = append1[2] >> 16 | append1[3] << 16; - w2[2] = append1[3] >> 16 | append2[0] << 16; - w2[3] = append2[0] >> 16; - break; - - case 11: - w0[2] = w0[2] | append0[0] << 24; - w0[3] = append0[0] >> 8 | append0[1] << 24; - w1[0] = append0[1] >> 8 | append0[2] << 24; - w1[1] = append0[2] >> 8 | append0[3] << 24; - w1[2] = append0[3] >> 8 | append1[0] << 24; - w1[3] = append1[0] >> 8 | append1[1] << 24; - w2[0] = append1[1] >> 8 | append1[2] << 24; - w2[1] = append1[2] >> 8 | append1[3] << 24; - w2[2] = append1[3] >> 8 | append2[0] << 24; - w2[3] = append2[0] >> 8; - break; - - case 12: - w0[3] = append0[0]; - w1[0] = append0[1]; - w1[1] = append0[2]; - w1[2] = append0[3]; - w1[3] = append1[0]; - w2[0] = append1[1]; - w2[1] = append1[2]; - w2[2] = append1[3]; - w2[3] = append2[0]; - break; - - case 13: - w0[3] = w0[3] | append0[0] << 8; - w1[0] = append0[0] >> 24 | append0[1] << 8; - w1[1] = append0[1] >> 24 | append0[2] << 8; - w1[2] = append0[2] >> 24 | append0[3] << 8; - w1[3] = append0[3] >> 24 | append1[0] << 8; - w2[0] = append1[0] >> 24 | append1[1] << 8; - w2[1] = append1[1] >> 24 | append1[2] << 8; - w2[2] = append1[2] >> 24 | append1[3] << 8; - w2[3] = append1[3] >> 24 | append2[0] << 8; - w3[0] = append2[0] >> 24; - break; - - case 14: - w0[3] = w0[3] | append0[0] << 16; - w1[0] = append0[0] >> 16 | append0[1] << 16; - w1[1] = append0[1] >> 16 | append0[2] << 16; - w1[2] = append0[2] >> 16 | append0[3] << 16; - w1[3] = append0[3] >> 16 | append1[0] << 16; - w2[0] = append1[0] >> 16 | append1[1] << 16; - w2[1] = append1[1] >> 16 | append1[2] << 16; - w2[2] = append1[2] >> 16 | append1[3] << 16; - w2[3] = append1[3] >> 16 | append2[0] << 16; - w3[0] = append2[0] >> 16; - break; - - case 15: - w0[3] = w0[3] | append0[0] << 24; - w1[0] = append0[0] >> 8 | append0[1] << 24; - w1[1] = append0[1] >> 8 | append0[2] << 24; - w1[2] = append0[2] >> 8 | append0[3] << 24; - w1[3] = append0[3] >> 8 | append1[0] << 24; - w2[0] = append1[0] >> 8 | append1[1] << 24; - w2[1] = append1[1] >> 8 | append1[2] << 24; - w2[2] = append1[2] >> 8 | append1[3] << 24; - w2[3] = append1[3] >> 8 | append2[0] << 24; - w3[0] = append2[0] >> 8; - break; - - case 16: - w1[0] = append0[0]; - w1[1] = append0[1]; - w1[2] = append0[2]; - w1[3] = append0[3]; - w2[0] = append1[0]; - w2[1] = append1[1]; - w2[2] = append1[2]; - w2[3] = append1[3]; - w3[0] = append2[0]; - break; - - case 17: - w1[0] = w1[0] | append0[0] << 8; - w1[1] = append0[0] >> 24 | append0[1] << 8; - w1[2] = append0[1] >> 24 | append0[2] << 8; - w1[3] = append0[2] >> 24 | append0[3] << 8; - w2[0] = append0[3] >> 24 | append1[0] << 8; - w2[1] = append1[0] >> 24 | append1[1] << 8; - w2[2] = append1[1] >> 24 | append1[2] << 8; - w2[3] = append1[2] >> 24 | append1[3] << 8; - w3[0] = append1[3] >> 24 | append2[0] << 8; - w3[1] = append2[0] >> 24; - break; - - case 18: - w1[0] = w1[0] | append0[0] << 16; - w1[1] = append0[0] >> 16 | append0[1] << 16; - w1[2] = append0[1] >> 16 | append0[2] << 16; - w1[3] = append0[2] >> 16 | append0[3] << 16; - w2[0] = append0[3] >> 16 | append1[0] << 16; - w2[1] = append1[0] >> 16 | append1[1] << 16; - w2[2] = append1[1] >> 16 | append1[2] << 16; - w2[3] = append1[2] >> 16 | append1[3] << 16; - w3[0] = append1[3] >> 16 | append2[0] << 16; - w3[1] = append2[0] >> 16; - break; - - case 19: - w1[0] = w1[0] | append0[0] << 24; - w1[1] = append0[0] >> 8 | append0[1] << 24; - w1[2] = append0[1] >> 8 | append0[2] << 24; - w1[3] = append0[2] >> 8 | append0[3] << 24; - w2[0] = append0[3] >> 8 | append1[0] << 24; - w2[1] = append1[0] >> 8 | append1[1] << 24; - w2[2] = append1[1] >> 8 | append1[2] << 24; - w2[3] = append1[2] >> 8 | append1[3] << 24; - w3[0] = append1[3] >> 8 | append2[0] << 24; - w3[1] = append2[0] >> 8; - break; - - case 20: - w1[1] = append0[0]; - w1[2] = append0[1]; - w1[3] = append0[2]; - w2[0] = append0[3]; - w2[1] = append1[0]; - w2[2] = append1[1]; - w2[3] = append1[2]; - w3[0] = append1[3]; - w3[1] = append2[0]; - break; - - case 21: - w1[1] = w1[1] | append0[0] << 8; - w1[2] = append0[0] >> 24 | append0[1] << 8; - w1[3] = append0[1] >> 24 | append0[2] << 8; - w2[0] = append0[2] >> 24 | append0[3] << 8; - w2[1] = append0[3] >> 24 | append1[0] << 8; - w2[2] = append1[0] >> 24 | append1[1] << 8; - w2[3] = append1[1] >> 24 | append1[2] << 8; - w3[0] = append1[2] >> 24 | append1[3] << 8; - w3[1] = append1[3] >> 24 | append2[0] << 8; - break; - - case 22: - w1[1] = w1[1] | append0[0] << 16; - w1[2] = append0[0] >> 16 | append0[1] << 16; - w1[3] = append0[1] >> 16 | append0[2] << 16; - w2[0] = append0[2] >> 16 | append0[3] << 16; - w2[1] = append0[3] >> 16 | append1[0] << 16; - w2[2] = append1[0] >> 16 | append1[1] << 16; - w2[3] = append1[1] >> 16 | append1[2] << 16; - w3[0] = append1[2] >> 16 | append1[3] << 16; - w3[1] = append1[3] >> 16 | append2[0] << 16; - break; - - case 23: - w1[1] = w1[1] | append0[0] << 24; - w1[2] = append0[0] >> 8 | append0[1] << 24; - w1[3] = append0[1] >> 8 | append0[2] << 24; - w2[0] = append0[2] >> 8 | append0[3] << 24; - w2[1] = append0[3] >> 8 | append1[0] << 24; - w2[2] = append1[0] >> 8 | append1[1] << 24; - w2[3] = append1[1] >> 8 | append1[2] << 24; - w3[0] = append1[2] >> 8 | append1[3] << 24; - w3[1] = append1[3] >> 8 | append2[0] << 24; - break; - - case 24: - w1[2] = append0[0]; - w1[3] = append0[1]; - w2[0] = append0[2]; - w2[1] = append0[3]; - w2[2] = append1[0]; - w2[3] = append1[1]; - w3[0] = append1[2]; - w3[1] = append1[3]; - break; - - case 25: - w1[2] = w1[2] | append0[0] << 8; - w1[3] = append0[0] >> 24 | append0[1] << 8; - w2[0] = append0[1] >> 24 | append0[2] << 8; - w2[1] = append0[2] >> 24 | append0[3] << 8; - w2[2] = append0[3] >> 24 | append1[0] << 8; - w2[3] = append1[0] >> 24 | append1[1] << 8; - w3[0] = append1[1] >> 24 | append1[2] << 8; - w3[1] = append1[2] >> 24 | append1[3] << 8; - break; - - case 26: - w1[2] = w1[2] | append0[0] << 16; - w1[3] = append0[0] >> 16 | append0[1] << 16; - w2[0] = append0[1] >> 16 | append0[2] << 16; - w2[1] = append0[2] >> 16 | append0[3] << 16; - w2[2] = append0[3] >> 16 | append1[0] << 16; - w2[3] = append1[0] >> 16 | append1[1] << 16; - w3[0] = append1[1] >> 16 | append1[2] << 16; - w3[1] = append1[2] >> 16 | append1[3] << 16; - break; - - case 27: - w1[2] = w1[2] | append0[0] << 24; - w1[3] = append0[0] >> 8 | append0[1] << 24; - w2[0] = append0[1] >> 8 | append0[2] << 24; - w2[1] = append0[2] >> 8 | append0[3] << 24; - w2[2] = append0[3] >> 8 | append1[0] << 24; - w2[3] = append1[0] >> 8 | append1[1] << 24; - w3[0] = append1[1] >> 8 | append1[2] << 24; - w3[1] = append1[2] >> 8 | append1[3] << 24; - break; - - case 28: - w1[3] = append0[0]; - w2[0] = append0[1]; - w2[1] = append0[2]; - w2[2] = append0[3]; - w2[3] = append1[0]; - w3[0] = append1[1]; - w3[1] = append1[2]; - break; - - case 29: - w1[3] = w1[3] | append0[0] << 8; - w2[0] = append0[0] >> 24 | append0[1] << 8; - w2[1] = append0[1] >> 24 | append0[2] << 8; - w2[2] = append0[2] >> 24 | append0[3] << 8; - w2[3] = append0[3] >> 24 | append1[0] << 8; - w3[0] = append1[0] >> 24 | append1[1] << 8; - w3[1] = append1[1] >> 24 | append1[2] << 8; - break; - - case 30: - w1[3] = w1[3] | append0[0] << 16; - w2[0] = append0[0] >> 16 | append0[1] << 16; - w2[1] = append0[1] >> 16 | append0[2] << 16; - w2[2] = append0[2] >> 16 | append0[3] << 16; - w2[3] = append0[3] >> 16 | append1[0] << 16; - w3[0] = append1[0] >> 16 | append1[1] << 16; - w3[1] = append1[1] >> 16 | append1[2] << 16; - break; - - case 31: - w1[3] = w1[3] | append0[0] << 24; - w2[0] = append0[0] >> 8 | append0[1] << 24; - w2[1] = append0[1] >> 8 | append0[2] << 24; - w2[2] = append0[2] >> 8 | append0[3] << 24; - w2[3] = append0[3] >> 8 | append1[0] << 24; - w3[0] = append1[0] >> 8 | append1[1] << 24; - w3[1] = append1[1] >> 8 | append1[2] << 24; - break; - - case 32: - w2[0] = append0[0]; - w2[1] = append0[1]; - w2[2] = append0[2]; - w2[3] = append0[3]; - w3[0] = append1[0]; - w3[1] = append1[1]; - break; - } -} - -__device__ static void switch_buffer_by_offset (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset) -{ - #if __CUDA_ARCH__ >= 200 - - const int offset_minus_4 = 4 - (offset % 4); - - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - - switch (offset / 4) - { - case 0: - w3[1] = __byte_perm (w3[0], w3[1], selector); - w3[0] = __byte_perm (w2[3], w3[0], selector); - w2[3] = __byte_perm (w2[2], w2[3], selector); - w2[2] = __byte_perm (w2[1], w2[2], selector); - w2[1] = __byte_perm (w2[0], w2[1], selector); - w2[0] = __byte_perm (w1[3], w2[0], selector); - w1[3] = __byte_perm (w1[2], w1[3], selector); - w1[2] = __byte_perm (w1[1], w1[2], selector); - w1[1] = __byte_perm (w1[0], w1[1], selector); - w1[0] = __byte_perm (w0[3], w1[0], selector); - w0[3] = __byte_perm (w0[2], w0[3], selector); - w0[2] = __byte_perm (w0[1], w0[2], selector); - w0[1] = __byte_perm (w0[0], w0[1], selector); - w0[0] = __byte_perm ( 0, w0[0], selector); - - break; - - case 1: - w3[1] = __byte_perm (w2[3], w3[0], selector); - w3[0] = __byte_perm (w2[2], w2[3], selector); - w2[3] = __byte_perm (w2[1], w2[2], selector); - w2[2] = __byte_perm (w2[0], w2[1], selector); - w2[1] = __byte_perm (w1[3], w2[0], selector); - w2[0] = __byte_perm (w1[2], w1[3], selector); - w1[3] = __byte_perm (w1[1], w1[2], selector); - w1[2] = __byte_perm (w1[0], w1[1], selector); - w1[1] = __byte_perm (w0[3], w1[0], selector); - w1[0] = __byte_perm (w0[2], w0[3], selector); - w0[3] = __byte_perm (w0[1], w0[2], selector); - w0[2] = __byte_perm (w0[0], w0[1], selector); - w0[1] = __byte_perm ( 0, w0[0], selector); - w0[0] = 0; - - break; - - case 2: - w3[1] = __byte_perm (w2[2], w2[3], selector); - w3[0] = __byte_perm (w2[1], w2[2], selector); - w2[3] = __byte_perm (w2[0], w2[1], selector); - w2[2] = __byte_perm (w1[3], w2[0], selector); - w2[1] = __byte_perm (w1[2], w1[3], selector); - w2[0] = __byte_perm (w1[1], w1[2], selector); - w1[3] = __byte_perm (w1[0], w1[1], selector); - w1[2] = __byte_perm (w0[3], w1[0], selector); - w1[1] = __byte_perm (w0[2], w0[3], selector); - w1[0] = __byte_perm (w0[1], w0[2], selector); - w0[3] = __byte_perm (w0[0], w0[1], selector); - w0[2] = __byte_perm ( 0, w0[0], selector); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - w3[1] = __byte_perm (w2[1], w2[2], selector); - w3[0] = __byte_perm (w2[0], w2[1], selector); - w2[3] = __byte_perm (w1[3], w2[0], selector); - w2[2] = __byte_perm (w1[2], w1[3], selector); - w2[1] = __byte_perm (w1[1], w1[2], selector); - w2[0] = __byte_perm (w1[0], w1[1], selector); - w1[3] = __byte_perm (w0[3], w1[0], selector); - w1[2] = __byte_perm (w0[2], w0[3], selector); - w1[1] = __byte_perm (w0[1], w0[2], selector); - w1[0] = __byte_perm (w0[0], w0[1], selector); - w0[3] = __byte_perm ( 0, w0[0], selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - w3[1] = __byte_perm (w2[0], w2[1], selector); - w3[0] = __byte_perm (w1[3], w2[0], selector); - w2[3] = __byte_perm (w1[2], w1[3], selector); - w2[2] = __byte_perm (w1[1], w1[2], selector); - w2[1] = __byte_perm (w1[0], w1[1], selector); - w2[0] = __byte_perm (w0[3], w1[0], selector); - w1[3] = __byte_perm (w0[2], w0[3], selector); - w1[2] = __byte_perm (w0[1], w0[2], selector); - w1[1] = __byte_perm (w0[0], w0[1], selector); - w1[0] = __byte_perm ( 0, w0[0], selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - w3[1] = __byte_perm (w1[3], w2[0], selector); - w3[0] = __byte_perm (w1[2], w1[3], selector); - w2[3] = __byte_perm (w1[1], w1[2], selector); - w2[2] = __byte_perm (w1[0], w1[1], selector); - w2[1] = __byte_perm (w0[3], w1[0], selector); - w2[0] = __byte_perm (w0[2], w0[3], selector); - w1[3] = __byte_perm (w0[1], w0[2], selector); - w1[2] = __byte_perm (w0[0], w0[1], selector); - w1[1] = __byte_perm ( 0, w0[0], selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - w3[1] = __byte_perm (w1[2], w1[3], selector); - w3[0] = __byte_perm (w1[1], w1[2], selector); - w2[3] = __byte_perm (w1[0], w1[1], selector); - w2[2] = __byte_perm (w0[3], w1[0], selector); - w2[1] = __byte_perm (w0[2], w0[3], selector); - w2[0] = __byte_perm (w0[1], w0[2], selector); - w1[3] = __byte_perm (w0[0], w0[1], selector); - w1[2] = __byte_perm ( 0, w0[0], selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - w3[1] = __byte_perm (w1[1], w1[2], selector); - w3[0] = __byte_perm (w1[0], w1[1], selector); - w2[3] = __byte_perm (w0[3], w1[0], selector); - w2[2] = __byte_perm (w0[2], w0[3], selector); - w2[1] = __byte_perm (w0[1], w0[2], selector); - w2[0] = __byte_perm (w0[0], w0[1], selector); - w1[3] = __byte_perm ( 0, w0[0], selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - w3[1] = __byte_perm (w1[0], w1[1], selector); - w3[0] = __byte_perm (w0[3], w1[0], selector); - w2[3] = __byte_perm (w0[2], w0[3], selector); - w2[2] = __byte_perm (w0[1], w0[2], selector); - w2[1] = __byte_perm (w0[0], w0[1], selector); - w2[0] = __byte_perm ( 0, w0[0], selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - w3[1] = __byte_perm (w0[3], w1[0], selector); - w3[0] = __byte_perm (w0[2], w0[3], selector); - w2[3] = __byte_perm (w0[1], w0[2], selector); - w2[2] = __byte_perm (w0[0], w0[1], selector); - w2[1] = __byte_perm ( 0, w0[0], selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - w3[1] = __byte_perm (w0[2], w0[3], selector); - w3[0] = __byte_perm (w0[1], w0[2], selector); - w2[3] = __byte_perm (w0[0], w0[1], selector); - w2[2] = __byte_perm ( 0, w0[0], selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - w3[1] = __byte_perm (w0[1], w0[2], selector); - w3[0] = __byte_perm (w0[0], w0[1], selector); - w2[3] = __byte_perm ( 0, w0[0], selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - w3[1] = __byte_perm (w0[0], w0[1], selector); - w3[0] = __byte_perm ( 0, w0[0], selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - w3[1] = __byte_perm ( 0, w0[0], selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - - #else - - u32 tmp0[4]; - u32 tmp1[4]; - u32 tmp2[1]; - - switch (offset % 4) - { - case 0: - tmp0[0] = w0[0]; - tmp0[1] = w0[1]; - tmp0[2] = w0[2]; - tmp0[3] = w0[3]; - tmp1[0] = w1[0]; - tmp1[1] = w1[1]; - tmp1[2] = w1[2]; - tmp1[3] = w1[3]; - tmp2[0] = 0; - break; - - case 1: - tmp0[0] = w0[0] << 8; - tmp0[1] = w0[0] >> 24 | w0[1] << 8; - tmp0[2] = w0[1] >> 24 | w0[2] << 8; - tmp0[3] = w0[2] >> 24 | w0[3] << 8; - tmp1[0] = w0[3] >> 24 | w1[0] << 8; - tmp1[1] = w1[0] >> 24 | w1[1] << 8; - tmp1[2] = w1[1] >> 24 | w1[2] << 8; - tmp1[3] = w1[2] >> 24 | w1[3] << 8; - tmp2[0] = w1[3] >> 24; - break; - - case 2: - tmp0[0] = w0[0] << 16; - tmp0[1] = w0[0] >> 16 | w0[1] << 16; - tmp0[2] = w0[1] >> 16 | w0[2] << 16; - tmp0[3] = w0[2] >> 16 | w0[3] << 16; - tmp1[0] = w0[3] >> 16 | w1[0] << 16; - tmp1[1] = w1[0] >> 16 | w1[1] << 16; - tmp1[2] = w1[1] >> 16 | w1[2] << 16; - tmp1[3] = w1[2] >> 16 | w1[3] << 16; - tmp2[0] = w1[3] >> 16; - break; - - case 3: - tmp0[0] = w0[0] << 24; - tmp0[1] = w0[0] >> 8 | w0[1] << 24; - tmp0[2] = w0[1] >> 8 | w0[2] << 24; - tmp0[3] = w0[2] >> 8 | w0[3] << 24; - tmp1[0] = w0[3] >> 8 | w1[0] << 24; - tmp1[1] = w1[0] >> 8 | w1[1] << 24; - tmp1[2] = w1[1] >> 8 | w1[2] << 24; - tmp1[3] = w1[2] >> 8 | w1[3] << 24; - tmp2[0] = w1[3] >> 8; - break; - } - - switch (offset / 4) - { - case 0: - w0[0] = tmp0[0]; - w0[1] = tmp0[1]; - w0[2] = tmp0[2]; - w0[3] = tmp0[3]; - w1[0] = tmp1[0]; - w1[1] = tmp1[1]; - w1[2] = tmp1[2]; - w1[3] = tmp1[3]; - w2[0] = tmp2[0]; - break; - - case 1: - w0[0] = 0; - w0[1] = tmp0[0]; - w0[2] = tmp0[1]; - w0[3] = tmp0[2]; - w1[0] = tmp0[3]; - w1[1] = tmp1[0]; - w1[2] = tmp1[1]; - w1[3] = tmp1[2]; - w2[0] = tmp1[3]; - w2[1] = tmp2[0]; - break; - - case 2: - w0[0] = 0; - w0[1] = 0; - w0[2] = tmp0[0]; - w0[3] = tmp0[1]; - w1[0] = tmp0[2]; - w1[1] = tmp0[3]; - w1[2] = tmp1[0]; - w1[3] = tmp1[1]; - w2[0] = tmp1[2]; - w2[1] = tmp1[3]; - w2[2] = tmp2[0]; - break; - - case 3: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = tmp0[0]; - w1[0] = tmp0[1]; - w1[1] = tmp0[2]; - w1[2] = tmp0[3]; - w1[3] = tmp1[0]; - w2[0] = tmp1[1]; - w2[1] = tmp1[2]; - w2[2] = tmp1[3]; - w2[3] = tmp2[0]; - break; - - case 4: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = tmp0[0]; - w1[1] = tmp0[1]; - w1[2] = tmp0[2]; - w1[3] = tmp0[3]; - w2[0] = tmp1[0]; - w2[1] = tmp1[1]; - w2[2] = tmp1[2]; - w2[3] = tmp1[3]; - w3[0] = tmp2[0]; - break; - - case 5: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = tmp0[0]; - w1[2] = tmp0[1]; - w1[3] = tmp0[2]; - w2[0] = tmp0[3]; - w2[1] = tmp1[0]; - w2[2] = tmp1[1]; - w2[3] = tmp1[2]; - w3[0] = tmp1[3]; - w3[1] = tmp2[0]; - break; - - case 6: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = tmp0[0]; - w1[3] = tmp0[1]; - w2[0] = tmp0[2]; - w2[1] = tmp0[3]; - w2[2] = tmp1[0]; - w2[3] = tmp1[1]; - w3[0] = tmp1[2]; - w3[1] = tmp1[3]; - w3[2] = tmp2[0]; - break; - - case 7: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = tmp0[0]; - w2[0] = tmp0[1]; - w2[1] = tmp0[2]; - w2[2] = tmp0[3]; - w2[3] = tmp1[0]; - w3[0] = tmp1[1]; - w3[1] = tmp1[2]; - w3[2] = tmp1[3]; - w3[3] = tmp2[0]; - break; - - case 8: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = tmp0[0]; - w2[1] = tmp0[1]; - w2[2] = tmp0[2]; - w2[3] = tmp0[3]; - w3[0] = tmp1[0]; - w3[1] = tmp1[1]; - w3[2] = tmp1[2]; - w3[3] = tmp1[3]; - break; - - case 9: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = tmp0[0]; - w2[2] = tmp0[1]; - w2[3] = tmp0[2]; - w3[0] = tmp0[3]; - w3[1] = tmp1[0]; - w3[2] = tmp1[1]; - w3[3] = tmp1[2]; - break; - - case 10: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = tmp0[0]; - w2[3] = tmp0[1]; - w3[0] = tmp0[2]; - w3[1] = tmp0[3]; - w3[2] = tmp1[0]; - w3[3] = tmp1[1]; - break; - - case 11: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = tmp0[0]; - w3[0] = tmp0[1]; - w3[1] = tmp0[2]; - w3[2] = tmp0[3]; - w3[3] = tmp1[0]; - break; - - case 12: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = tmp0[0]; - w3[1] = tmp0[1]; - w3[2] = tmp0[2]; - w3[3] = tmp0[3]; - break; - - case 13: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = tmp0[0]; - w3[2] = tmp0[1]; - w3[3] = tmp0[2]; - break; - - } - - #endif -} - -__device__ static void switch_buffer_by_offset_be (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset) -{ - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - - switch (offset / 4) - { - case 0: - w3[1] = __byte_perm (w3[1], w3[0], selector); - w3[0] = __byte_perm (w3[0], w2[3], selector); - w2[3] = __byte_perm (w2[3], w2[2], selector); - w2[2] = __byte_perm (w2[2], w2[1], selector); - w2[1] = __byte_perm (w2[1], w2[0], selector); - w2[0] = __byte_perm (w2[0], w1[3], selector); - w1[3] = __byte_perm (w1[3], w1[2], selector); - w1[2] = __byte_perm (w1[2], w1[1], selector); - w1[1] = __byte_perm (w1[1], w1[0], selector); - w1[0] = __byte_perm (w1[0], w0[3], selector); - w0[3] = __byte_perm (w0[3], w0[2], selector); - w0[2] = __byte_perm (w0[2], w0[1], selector); - w0[1] = __byte_perm (w0[1], w0[0], selector); - w0[0] = __byte_perm (w0[0], 0, selector); - break; - - case 1: - w3[1] = __byte_perm (w3[0], w2[3], selector); - w3[0] = __byte_perm (w2[3], w2[2], selector); - w2[3] = __byte_perm (w2[2], w2[1], selector); - w2[2] = __byte_perm (w2[1], w2[0], selector); - w2[1] = __byte_perm (w2[0], w1[3], selector); - w2[0] = __byte_perm (w1[3], w1[2], selector); - w1[3] = __byte_perm (w1[2], w1[1], selector); - w1[2] = __byte_perm (w1[1], w1[0], selector); - w1[1] = __byte_perm (w1[0], w0[3], selector); - w1[0] = __byte_perm (w0[3], w0[2], selector); - w0[3] = __byte_perm (w0[2], w0[1], selector); - w0[2] = __byte_perm (w0[1], w0[0], selector); - w0[1] = __byte_perm (w0[0], 0, selector); - w0[0] = 0; - break; - - case 2: - w3[1] = __byte_perm (w2[3], w2[2], selector); - w3[0] = __byte_perm (w2[2], w2[1], selector); - w2[3] = __byte_perm (w2[1], w2[0], selector); - w2[2] = __byte_perm (w2[0], w1[3], selector); - w2[1] = __byte_perm (w1[3], w1[2], selector); - w2[0] = __byte_perm (w1[2], w1[1], selector); - w1[3] = __byte_perm (w1[1], w1[0], selector); - w1[2] = __byte_perm (w1[0], w0[3], selector); - w1[1] = __byte_perm (w0[3], w0[2], selector); - w1[0] = __byte_perm (w0[2], w0[1], selector); - w0[3] = __byte_perm (w0[1], w0[0], selector); - w0[2] = __byte_perm (w0[0], 0, selector); - w0[1] = 0; - w0[0] = 0; - break; - - case 3: - w3[1] = __byte_perm (w2[2], w2[1], selector); - w3[0] = __byte_perm (w2[1], w2[0], selector); - w2[3] = __byte_perm (w2[0], w1[3], selector); - w2[2] = __byte_perm (w1[3], w1[2], selector); - w2[1] = __byte_perm (w1[2], w1[1], selector); - w2[0] = __byte_perm (w1[1], w1[0], selector); - w1[3] = __byte_perm (w1[0], w0[3], selector); - w1[2] = __byte_perm (w0[3], w0[2], selector); - w1[1] = __byte_perm (w0[2], w0[1], selector); - w1[0] = __byte_perm (w0[1], w0[0], selector); - w0[3] = __byte_perm (w0[0], 0, selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 4: - w3[1] = __byte_perm (w2[1], w2[0], selector); - w3[0] = __byte_perm (w2[0], w1[3], selector); - w2[3] = __byte_perm (w1[3], w1[2], selector); - w2[2] = __byte_perm (w1[2], w1[1], selector); - w2[1] = __byte_perm (w1[1], w1[0], selector); - w2[0] = __byte_perm (w1[0], w0[3], selector); - w1[3] = __byte_perm (w0[3], w0[2], selector); - w1[2] = __byte_perm (w0[2], w0[1], selector); - w1[1] = __byte_perm (w0[1], w0[0], selector); - w1[0] = __byte_perm (w0[0], 0, selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 5: - w3[1] = __byte_perm (w2[0], w1[3], selector); - w3[0] = __byte_perm (w1[3], w1[2], selector); - w2[3] = __byte_perm (w1[2], w1[1], selector); - w2[2] = __byte_perm (w1[1], w1[0], selector); - w2[1] = __byte_perm (w1[0], w0[3], selector); - w2[0] = __byte_perm (w0[3], w0[2], selector); - w1[3] = __byte_perm (w0[2], w0[1], selector); - w1[2] = __byte_perm (w0[1], w0[0], selector); - w1[1] = __byte_perm (w0[0], 0, selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 6: - w3[1] = __byte_perm (w1[3], w1[2], selector); - w3[0] = __byte_perm (w1[2], w1[1], selector); - w2[3] = __byte_perm (w1[1], w1[0], selector); - w2[2] = __byte_perm (w1[0], w0[3], selector); - w2[1] = __byte_perm (w0[3], w0[2], selector); - w2[0] = __byte_perm (w0[2], w0[1], selector); - w1[3] = __byte_perm (w0[1], w0[0], selector); - w1[2] = __byte_perm (w0[0], 0, selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 7: - w3[1] = __byte_perm (w1[2], w1[1], selector); - w3[0] = __byte_perm (w1[1], w1[0], selector); - w2[3] = __byte_perm (w1[0], w0[3], selector); - w2[2] = __byte_perm (w0[3], w0[2], selector); - w2[1] = __byte_perm (w0[2], w0[1], selector); - w2[0] = __byte_perm (w0[1], w0[0], selector); - w1[3] = __byte_perm (w0[0], 0, selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 8: - w3[1] = __byte_perm (w1[1], w1[0], selector); - w3[0] = __byte_perm (w1[0], w0[3], selector); - w2[3] = __byte_perm (w0[3], w0[2], selector); - w2[2] = __byte_perm (w0[2], w0[1], selector); - w2[1] = __byte_perm (w0[1], w0[0], selector); - w2[0] = __byte_perm (w0[0], 0, selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 9: - w3[1] = __byte_perm (w1[0], w0[3], selector); - w3[0] = __byte_perm (w0[3], w0[2], selector); - w2[3] = __byte_perm (w0[2], w0[1], selector); - w2[2] = __byte_perm (w0[1], w0[0], selector); - w2[1] = __byte_perm (w0[0], 0, selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 10: - w3[1] = __byte_perm (w0[3], w0[2], selector); - w3[0] = __byte_perm (w0[2], w0[1], selector); - w2[3] = __byte_perm (w0[1], w0[0], selector); - w2[2] = __byte_perm (w0[0], 0, selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 11: - w3[1] = __byte_perm (w0[2], w0[1], selector); - w3[0] = __byte_perm (w0[1], w0[0], selector); - w2[3] = __byte_perm (w0[0], 0, selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 12: - w3[1] = __byte_perm (w0[1], w0[0], selector); - w3[0] = __byte_perm (w0[0], 0, selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 13: - w3[1] = __byte_perm (w0[0], 0, selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - } -} - -/** - * vector - */ - -#ifndef VECT_SIZE1 -__device__ static u32x swap_workaround (const u32x v) -{ - #if __CUDA_ARCH__ >= 200 - return __byte_perm (v, 0, 0x0123); - - #else - return (v << 24) + ((v & 0x0000FF00) << 8) + ((v & 0x00FF0000) >> 8) + (v >> 24); - - #endif -} - -__device__ static u64x swap_workaround (const u64x v) -{ - return (((v & 0xff00000000000000) >> 56) - | ((v & 0x00ff000000000000) >> 40) - | ((v & 0x0000ff0000000000) >> 24) - | ((v & 0x000000ff00000000) >> 8) - | ((v & 0x00000000ff000000) << 8) - | ((v & 0x0000000000ff0000) << 24) - | ((v & 0x000000000000ff00) << 40) - | ((v & 0x00000000000000ff) << 56)); -} - -__device__ static void truncate_block (u32x w[4], const u32 len) -{ - switch (len) - { - case 0: w[0] &= 0; - w[1] &= 0; - w[2] &= 0; - w[3] &= 0; - break; - case 1: w[0] &= 0x000000FF; - w[1] &= 0; - w[2] &= 0; - w[3] &= 0; - break; - case 2: w[0] &= 0x0000FFFF; - w[1] &= 0; - w[2] &= 0; - w[3] &= 0; - break; - case 3: w[0] &= 0x00FFFFFF; - w[1] &= 0; - w[2] &= 0; - w[3] &= 0; - break; - case 4: w[1] &= 0; - w[2] &= 0; - w[3] &= 0; - break; - case 5: w[1] &= 0x000000FF; - w[2] &= 0; - w[3] &= 0; - break; - case 6: w[1] &= 0x0000FFFF; - w[2] &= 0; - w[3] &= 0; - break; - case 7: w[1] &= 0x00FFFFFF; - w[2] &= 0; - w[3] &= 0; - break; - case 8: w[2] &= 0; - w[3] &= 0; - break; - case 9: w[2] &= 0x000000FF; - w[3] &= 0; - break; - case 10: w[2] &= 0x0000FFFF; - w[3] &= 0; - break; - case 11: w[2] &= 0x00FFFFFF; - w[3] &= 0; - break; - case 12: w[3] &= 0; - break; - case 13: w[3] &= 0x000000FF; - break; - case 14: w[3] &= 0x0000FFFF; - break; - case 15: w[3] &= 0x00FFFFFF; - break; - } -} - -__device__ static void make_unicode (const u32x in[4], u32x out1[4], u32x out2[4]) -{ - #if __CUDA_ARCH__ >= 200 - out2[3] = __byte_perm (in[3], 0, 0x7372); - out2[2] = __byte_perm (in[3], 0, 0x7170); - out2[1] = __byte_perm (in[2], 0, 0x7372); - out2[0] = __byte_perm (in[2], 0, 0x7170); - out1[3] = __byte_perm (in[1], 0, 0x7372); - out1[2] = __byte_perm (in[1], 0, 0x7170); - out1[1] = __byte_perm (in[0], 0, 0x7372); - out1[0] = __byte_perm (in[0], 0, 0x7170); - #else - out2[3] = ((in[3] >> 8) & 0x00FF0000) | ((in[3] >> 16) & 0x000000FF); - out2[2] = ((in[3] << 8) & 0x00FF0000) | ((in[3] >> 0) & 0x000000FF); - out2[1] = ((in[2] >> 8) & 0x00FF0000) | ((in[2] >> 16) & 0x000000FF); - out2[0] = ((in[2] << 8) & 0x00FF0000) | ((in[2] >> 0) & 0x000000FF); - out1[3] = ((in[1] >> 8) & 0x00FF0000) | ((in[1] >> 16) & 0x000000FF); - out1[2] = ((in[1] << 8) & 0x00FF0000) | ((in[1] >> 0) & 0x000000FF); - out1[1] = ((in[0] >> 8) & 0x00FF0000) | ((in[0] >> 16) & 0x000000FF); - out1[0] = ((in[0] << 8) & 0x00FF0000) | ((in[0] >> 0) & 0x000000FF); - #endif -} - -__device__ static void append_0x01_1 (u32x w0[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x01; - break; - - case 1: - w0[0] = w0[0] | 0x0100; - break; - - case 2: - w0[0] = w0[0] | 0x010000; - break; - - case 3: - w0[0] = w0[0] | 0x01000000; - break; - - case 4: - w0[1] = 0x01; - break; - - case 5: - w0[1] = w0[1] | 0x0100; - break; - - case 6: - w0[1] = w0[1] | 0x010000; - break; - - case 7: - w0[1] = w0[1] | 0x01000000; - break; - - case 8: - w0[2] = 0x01; - break; - - case 9: - w0[2] = w0[2] | 0x0100; - break; - - case 10: - w0[2] = w0[2] | 0x010000; - break; - - case 11: - w0[2] = w0[2] | 0x01000000; - break; - - case 12: - w0[3] = 0x01; - break; - - case 13: - w0[3] = w0[3] | 0x0100; - break; - - case 14: - w0[3] = w0[3] | 0x010000; - break; - - case 15: - w0[3] = w0[3] | 0x01000000; - break; - } -} - -__device__ static void append_0x01_2 (u32x w0[4], u32x w1[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x01; - break; - - case 1: - w0[0] = w0[0] | 0x0100; - break; - - case 2: - w0[0] = w0[0] | 0x010000; - break; - - case 3: - w0[0] = w0[0] | 0x01000000; - break; - - case 4: - w0[1] = 0x01; - break; - - case 5: - w0[1] = w0[1] | 0x0100; - break; - - case 6: - w0[1] = w0[1] | 0x010000; - break; - - case 7: - w0[1] = w0[1] | 0x01000000; - break; - - case 8: - w0[2] = 0x01; - break; - - case 9: - w0[2] = w0[2] | 0x0100; - break; - - case 10: - w0[2] = w0[2] | 0x010000; - break; - - case 11: - w0[2] = w0[2] | 0x01000000; - break; - - case 12: - w0[3] = 0x01; - break; - - case 13: - w0[3] = w0[3] | 0x0100; - break; - - case 14: - w0[3] = w0[3] | 0x010000; - break; - - case 15: - w0[3] = w0[3] | 0x01000000; - break; - - case 16: - w1[0] = 0x01; - break; - - case 17: - w1[0] = w1[0] | 0x0100; - break; - - case 18: - w1[0] = w1[0] | 0x010000; - break; - - case 19: - w1[0] = w1[0] | 0x01000000; - break; - - case 20: - w1[1] = 0x01; - break; - - case 21: - w1[1] = w1[1] | 0x0100; - break; - - case 22: - w1[1] = w1[1] | 0x010000; - break; - - case 23: - w1[1] = w1[1] | 0x01000000; - break; - - case 24: - w1[2] = 0x01; - break; - - case 25: - w1[2] = w1[2] | 0x0100; - break; - - case 26: - w1[2] = w1[2] | 0x010000; - break; - - case 27: - w1[2] = w1[2] | 0x01000000; - break; - - case 28: - w1[3] = 0x01; - break; - - case 29: - w1[3] = w1[3] | 0x0100; - break; - - case 30: - w1[3] = w1[3] | 0x010000; - break; - - case 31: - w1[3] = w1[3] | 0x01000000; - break; - } -} - -__device__ static void append_0x01_3 (u32x w0[4], u32x w1[4], u32x w2[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x01; - break; - - case 1: - w0[0] = w0[0] | 0x0100; - break; - - case 2: - w0[0] = w0[0] | 0x010000; - break; - - case 3: - w0[0] = w0[0] | 0x01000000; - break; - - case 4: - w0[1] = 0x01; - break; - - case 5: - w0[1] = w0[1] | 0x0100; - break; - - case 6: - w0[1] = w0[1] | 0x010000; - break; - - case 7: - w0[1] = w0[1] | 0x01000000; - break; - - case 8: - w0[2] = 0x01; - break; - - case 9: - w0[2] = w0[2] | 0x0100; - break; - - case 10: - w0[2] = w0[2] | 0x010000; - break; - - case 11: - w0[2] = w0[2] | 0x01000000; - break; - - case 12: - w0[3] = 0x01; - break; - - case 13: - w0[3] = w0[3] | 0x0100; - break; - - case 14: - w0[3] = w0[3] | 0x010000; - break; - - case 15: - w0[3] = w0[3] | 0x01000000; - break; - - case 16: - w1[0] = 0x01; - break; - - case 17: - w1[0] = w1[0] | 0x0100; - break; - - case 18: - w1[0] = w1[0] | 0x010000; - break; - - case 19: - w1[0] = w1[0] | 0x01000000; - break; - - case 20: - w1[1] = 0x01; - break; - - case 21: - w1[1] = w1[1] | 0x0100; - break; - - case 22: - w1[1] = w1[1] | 0x010000; - break; - - case 23: - w1[1] = w1[1] | 0x01000000; - break; - - case 24: - w1[2] = 0x01; - break; - - case 25: - w1[2] = w1[2] | 0x0100; - break; - - case 26: - w1[2] = w1[2] | 0x010000; - break; - - case 27: - w1[2] = w1[2] | 0x01000000; - break; - - case 28: - w1[3] = 0x01; - break; - - case 29: - w1[3] = w1[3] | 0x0100; - break; - - case 30: - w1[3] = w1[3] | 0x010000; - break; - - case 31: - w1[3] = w1[3] | 0x01000000; - break; - - case 32: - w2[0] = 0x01; - break; - - case 33: - w2[0] = w2[0] | 0x0100; - break; - - case 34: - w2[0] = w2[0] | 0x010000; - break; - - case 35: - w2[0] = w2[0] | 0x01000000; - break; - - case 36: - w2[1] = 0x01; - break; - - case 37: - w2[1] = w2[1] | 0x0100; - break; - - case 38: - w2[1] = w2[1] | 0x010000; - break; - - case 39: - w2[1] = w2[1] | 0x01000000; - break; - - case 40: - w2[2] = 0x01; - break; - - case 41: - w2[2] = w2[2] | 0x0100; - break; - - case 42: - w2[2] = w2[2] | 0x010000; - break; - - case 43: - w2[2] = w2[2] | 0x01000000; - break; - - case 44: - w2[3] = 0x01; - break; - - case 45: - w2[3] = w2[3] | 0x0100; - break; - - case 46: - w2[3] = w2[3] | 0x010000; - break; - - case 47: - w2[3] = w2[3] | 0x01000000; - break; - } -} - -__device__ static void append_0x01_4 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x01; - break; - - case 1: - w0[0] = w0[0] | 0x0100; - break; - - case 2: - w0[0] = w0[0] | 0x010000; - break; - - case 3: - w0[0] = w0[0] | 0x01000000; - break; - - case 4: - w0[1] = 0x01; - break; - - case 5: - w0[1] = w0[1] | 0x0100; - break; - - case 6: - w0[1] = w0[1] | 0x010000; - break; - - case 7: - w0[1] = w0[1] | 0x01000000; - break; - - case 8: - w0[2] = 0x01; - break; - - case 9: - w0[2] = w0[2] | 0x0100; - break; - - case 10: - w0[2] = w0[2] | 0x010000; - break; - - case 11: - w0[2] = w0[2] | 0x01000000; - break; - - case 12: - w0[3] = 0x01; - break; - - case 13: - w0[3] = w0[3] | 0x0100; - break; - - case 14: - w0[3] = w0[3] | 0x010000; - break; - - case 15: - w0[3] = w0[3] | 0x01000000; - break; - - case 16: - w1[0] = 0x01; - break; - - case 17: - w1[0] = w1[0] | 0x0100; - break; - - case 18: - w1[0] = w1[0] | 0x010000; - break; - - case 19: - w1[0] = w1[0] | 0x01000000; - break; - - case 20: - w1[1] = 0x01; - break; - - case 21: - w1[1] = w1[1] | 0x0100; - break; - - case 22: - w1[1] = w1[1] | 0x010000; - break; - - case 23: - w1[1] = w1[1] | 0x01000000; - break; - - case 24: - w1[2] = 0x01; - break; - - case 25: - w1[2] = w1[2] | 0x0100; - break; - - case 26: - w1[2] = w1[2] | 0x010000; - break; - - case 27: - w1[2] = w1[2] | 0x01000000; - break; - - case 28: - w1[3] = 0x01; - break; - - case 29: - w1[3] = w1[3] | 0x0100; - break; - - case 30: - w1[3] = w1[3] | 0x010000; - break; - - case 31: - w1[3] = w1[3] | 0x01000000; - break; - - case 32: - w2[0] = 0x01; - break; - - case 33: - w2[0] = w2[0] | 0x0100; - break; - - case 34: - w2[0] = w2[0] | 0x010000; - break; - - case 35: - w2[0] = w2[0] | 0x01000000; - break; - - case 36: - w2[1] = 0x01; - break; - - case 37: - w2[1] = w2[1] | 0x0100; - break; - - case 38: - w2[1] = w2[1] | 0x010000; - break; - - case 39: - w2[1] = w2[1] | 0x01000000; - break; - - case 40: - w2[2] = 0x01; - break; - - case 41: - w2[2] = w2[2] | 0x0100; - break; - - case 42: - w2[2] = w2[2] | 0x010000; - break; - - case 43: - w2[2] = w2[2] | 0x01000000; - break; - - case 44: - w2[3] = 0x01; - break; - - case 45: - w2[3] = w2[3] | 0x0100; - break; - - case 46: - w2[3] = w2[3] | 0x010000; - break; - - case 47: - w2[3] = w2[3] | 0x01000000; - break; - - case 48: - w3[0] = 0x01; - break; - - case 49: - w3[0] = w3[0] | 0x0100; - break; - - case 50: - w3[0] = w3[0] | 0x010000; - break; - - case 51: - w3[0] = w3[0] | 0x01000000; - break; - - case 52: - w3[1] = 0x01; - break; - - case 53: - w3[1] = w3[1] | 0x0100; - break; - - case 54: - w3[1] = w3[1] | 0x010000; - break; - - case 55: - w3[1] = w3[1] | 0x01000000; - break; - - case 56: - w3[2] = 0x01; - break; - - case 57: - w3[2] = w3[2] | 0x0100; - break; - - case 58: - w3[2] = w3[2] | 0x010000; - break; - - case 59: - w3[2] = w3[2] | 0x01000000; - break; - - case 60: - w3[3] = 0x01; - break; - - case 61: - w3[3] = w3[3] | 0x0100; - break; - - case 62: - w3[3] = w3[3] | 0x010000; - break; - - case 63: - w3[3] = w3[3] | 0x01000000; - break; - } -} - -__device__ static void append_0x01_8 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x01; - break; - - case 1: - w0[0] = w0[0] | 0x0100; - break; - - case 2: - w0[0] = w0[0] | 0x010000; - break; - - case 3: - w0[0] = w0[0] | 0x01000000; - break; - - case 4: - w0[1] = 0x01; - break; - - case 5: - w0[1] = w0[1] | 0x0100; - break; - - case 6: - w0[1] = w0[1] | 0x010000; - break; - - case 7: - w0[1] = w0[1] | 0x01000000; - break; - - case 8: - w0[2] = 0x01; - break; - - case 9: - w0[2] = w0[2] | 0x0100; - break; - - case 10: - w0[2] = w0[2] | 0x010000; - break; - - case 11: - w0[2] = w0[2] | 0x01000000; - break; - - case 12: - w0[3] = 0x01; - break; - - case 13: - w0[3] = w0[3] | 0x0100; - break; - - case 14: - w0[3] = w0[3] | 0x010000; - break; - - case 15: - w0[3] = w0[3] | 0x01000000; - break; - - case 16: - w1[0] = 0x01; - break; - - case 17: - w1[0] = w1[0] | 0x0100; - break; - - case 18: - w1[0] = w1[0] | 0x010000; - break; - - case 19: - w1[0] = w1[0] | 0x01000000; - break; - - case 20: - w1[1] = 0x01; - break; - - case 21: - w1[1] = w1[1] | 0x0100; - break; - - case 22: - w1[1] = w1[1] | 0x010000; - break; - - case 23: - w1[1] = w1[1] | 0x01000000; - break; - - case 24: - w1[2] = 0x01; - break; - - case 25: - w1[2] = w1[2] | 0x0100; - break; - - case 26: - w1[2] = w1[2] | 0x010000; - break; - - case 27: - w1[2] = w1[2] | 0x01000000; - break; - - case 28: - w1[3] = 0x01; - break; - - case 29: - w1[3] = w1[3] | 0x0100; - break; - - case 30: - w1[3] = w1[3] | 0x010000; - break; - - case 31: - w1[3] = w1[3] | 0x01000000; - break; - - case 32: - w2[0] = 0x01; - break; - - case 33: - w2[0] = w2[0] | 0x0100; - break; - - case 34: - w2[0] = w2[0] | 0x010000; - break; - - case 35: - w2[0] = w2[0] | 0x01000000; - break; - - case 36: - w2[1] = 0x01; - break; - - case 37: - w2[1] = w2[1] | 0x0100; - break; - - case 38: - w2[1] = w2[1] | 0x010000; - break; - - case 39: - w2[1] = w2[1] | 0x01000000; - break; - - case 40: - w2[2] = 0x01; - break; - - case 41: - w2[2] = w2[2] | 0x0100; - break; - - case 42: - w2[2] = w2[2] | 0x010000; - break; - - case 43: - w2[2] = w2[2] | 0x01000000; - break; - - case 44: - w2[3] = 0x01; - break; - - case 45: - w2[3] = w2[3] | 0x0100; - break; - - case 46: - w2[3] = w2[3] | 0x010000; - break; - - case 47: - w2[3] = w2[3] | 0x01000000; - break; - - case 48: - w3[0] = 0x01; - break; - - case 49: - w3[0] = w3[0] | 0x0100; - break; - - case 50: - w3[0] = w3[0] | 0x010000; - break; - - case 51: - w3[0] = w3[0] | 0x01000000; - break; - - case 52: - w3[1] = 0x01; - break; - - case 53: - w3[1] = w3[1] | 0x0100; - break; - - case 54: - w3[1] = w3[1] | 0x010000; - break; - - case 55: - w3[1] = w3[1] | 0x01000000; - break; - - case 56: - w3[2] = 0x01; - break; - - case 57: - w3[2] = w3[2] | 0x0100; - break; - - case 58: - w3[2] = w3[2] | 0x010000; - break; - - case 59: - w3[2] = w3[2] | 0x01000000; - break; - - case 60: - w3[3] = 0x01; - break; - - case 61: - w3[3] = w3[3] | 0x0100; - break; - - case 62: - w3[3] = w3[3] | 0x010000; - break; - - case 63: - w3[3] = w3[3] | 0x01000000; - break; - - case 64: - w4[0] = 0x01; - break; - - case 65: - w4[0] = w4[0] | 0x0100; - break; - - case 66: - w4[0] = w4[0] | 0x010000; - break; - - case 67: - w4[0] = w4[0] | 0x01000000; - break; - - case 68: - w4[1] = 0x01; - break; - - case 69: - w4[1] = w4[1] | 0x0100; - break; - - case 70: - w4[1] = w4[1] | 0x010000; - break; - - case 71: - w4[1] = w4[1] | 0x01000000; - break; - - case 72: - w4[2] = 0x01; - break; - - case 73: - w4[2] = w4[2] | 0x0100; - break; - - case 74: - w4[2] = w4[2] | 0x010000; - break; - - case 75: - w4[2] = w4[2] | 0x01000000; - break; - - case 76: - w4[3] = 0x01; - break; - - case 77: - w4[3] = w4[3] | 0x0100; - break; - - case 78: - w4[3] = w4[3] | 0x010000; - break; - - case 79: - w4[3] = w4[3] | 0x01000000; - break; - - case 80: - w5[0] = 0x01; - break; - - case 81: - w5[0] = w5[0] | 0x0100; - break; - - case 82: - w5[0] = w5[0] | 0x010000; - break; - - case 83: - w5[0] = w5[0] | 0x01000000; - break; - - case 84: - w5[1] = 0x01; - break; - - case 85: - w5[1] = w5[1] | 0x0100; - break; - - case 86: - w5[1] = w5[1] | 0x010000; - break; - - case 87: - w5[1] = w5[1] | 0x01000000; - break; - - case 88: - w5[2] = 0x01; - break; - - case 89: - w5[2] = w5[2] | 0x0100; - break; - - case 90: - w5[2] = w5[2] | 0x010000; - break; - - case 91: - w5[2] = w5[2] | 0x01000000; - break; - - case 92: - w5[3] = 0x01; - break; - - case 93: - w5[3] = w5[3] | 0x0100; - break; - - case 94: - w5[3] = w5[3] | 0x010000; - break; - - case 95: - w5[3] = w5[3] | 0x01000000; - break; - - case 96: - w6[0] = 0x01; - break; - - case 97: - w6[0] = w6[0] | 0x0100; - break; - - case 98: - w6[0] = w6[0] | 0x010000; - break; - - case 99: - w6[0] = w6[0] | 0x01000000; - break; - - case 100: - w6[1] = 0x01; - break; - - case 101: - w6[1] = w6[1] | 0x0100; - break; - - case 102: - w6[1] = w6[1] | 0x010000; - break; - - case 103: - w6[1] = w6[1] | 0x01000000; - break; - - case 104: - w6[2] = 0x01; - break; - - case 105: - w6[2] = w6[2] | 0x0100; - break; - - case 106: - w6[2] = w6[2] | 0x010000; - break; - - case 107: - w6[2] = w6[2] | 0x01000000; - break; - - case 108: - w6[3] = 0x01; - break; - - case 109: - w6[3] = w6[3] | 0x0100; - break; - - case 110: - w6[3] = w6[3] | 0x010000; - break; - - case 111: - w6[3] = w6[3] | 0x01000000; - break; - - case 112: - w7[0] = 0x01; - break; - - case 113: - w7[0] = w7[0] | 0x0100; - break; - - case 114: - w7[0] = w7[0] | 0x010000; - break; - - case 115: - w7[0] = w7[0] | 0x01000000; - break; - - case 116: - w7[1] = 0x01; - break; - - case 117: - w7[1] = w7[1] | 0x0100; - break; - - case 118: - w7[1] = w7[1] | 0x010000; - break; - - case 119: - w7[1] = w7[1] | 0x01000000; - break; - - case 120: - w7[2] = 0x01; - break; - - case 121: - w7[2] = w7[2] | 0x0100; - break; - - case 122: - w7[2] = w7[2] | 0x010000; - break; - - case 123: - w7[2] = w7[2] | 0x01000000; - break; - - case 124: - w7[3] = 0x01; - break; - - case 125: - w7[3] = w7[3] | 0x0100; - break; - - case 126: - w7[3] = w7[3] | 0x010000; - break; - - case 127: - w7[3] = w7[3] | 0x01000000; - break; - } -} - -__device__ static void append_0x02_1 (u32x w0[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x02; - break; - - case 1: - w0[0] = w0[0] | 0x0200; - break; - - case 2: - w0[0] = w0[0] | 0x020000; - break; - - case 3: - w0[0] = w0[0] | 0x02000000; - break; - - case 4: - w0[1] = 0x02; - break; - - case 5: - w0[1] = w0[1] | 0x0200; - break; - - case 6: - w0[1] = w0[1] | 0x020000; - break; - - case 7: - w0[1] = w0[1] | 0x02000000; - break; - - case 8: - w0[2] = 0x02; - break; - - case 9: - w0[2] = w0[2] | 0x0200; - break; - - case 10: - w0[2] = w0[2] | 0x020000; - break; - - case 11: - w0[2] = w0[2] | 0x02000000; - break; - - case 12: - w0[3] = 0x02; - break; - - case 13: - w0[3] = w0[3] | 0x0200; - break; - - case 14: - w0[3] = w0[3] | 0x020000; - break; - - case 15: - w0[3] = w0[3] | 0x02000000; - break; - } -} - -__device__ static void append_0x02_2 (u32x w0[4], u32x w1[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x02; - break; - - case 1: - w0[0] = w0[0] | 0x0200; - break; - - case 2: - w0[0] = w0[0] | 0x020000; - break; - - case 3: - w0[0] = w0[0] | 0x02000000; - break; - - case 4: - w0[1] = 0x02; - break; - - case 5: - w0[1] = w0[1] | 0x0200; - break; - - case 6: - w0[1] = w0[1] | 0x020000; - break; - - case 7: - w0[1] = w0[1] | 0x02000000; - break; - - case 8: - w0[2] = 0x02; - break; - - case 9: - w0[2] = w0[2] | 0x0200; - break; - - case 10: - w0[2] = w0[2] | 0x020000; - break; - - case 11: - w0[2] = w0[2] | 0x02000000; - break; - - case 12: - w0[3] = 0x02; - break; - - case 13: - w0[3] = w0[3] | 0x0200; - break; - - case 14: - w0[3] = w0[3] | 0x020000; - break; - - case 15: - w0[3] = w0[3] | 0x02000000; - break; - - case 16: - w1[0] = 0x02; - break; - - case 17: - w1[0] = w1[0] | 0x0200; - break; - - case 18: - w1[0] = w1[0] | 0x020000; - break; - - case 19: - w1[0] = w1[0] | 0x02000000; - break; - - case 20: - w1[1] = 0x02; - break; - - case 21: - w1[1] = w1[1] | 0x0200; - break; - - case 22: - w1[1] = w1[1] | 0x020000; - break; - - case 23: - w1[1] = w1[1] | 0x02000000; - break; - - case 24: - w1[2] = 0x02; - break; - - case 25: - w1[2] = w1[2] | 0x0200; - break; - - case 26: - w1[2] = w1[2] | 0x020000; - break; - - case 27: - w1[2] = w1[2] | 0x02000000; - break; - - case 28: - w1[3] = 0x02; - break; - - case 29: - w1[3] = w1[3] | 0x0200; - break; - - case 30: - w1[3] = w1[3] | 0x020000; - break; - - case 31: - w1[3] = w1[3] | 0x02000000; - break; - } -} - -__device__ static void append_0x02_3 (u32x w0[4], u32x w1[4], u32x w2[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x02; - break; - - case 1: - w0[0] = w0[0] | 0x0200; - break; - - case 2: - w0[0] = w0[0] | 0x020000; - break; - - case 3: - w0[0] = w0[0] | 0x02000000; - break; - - case 4: - w0[1] = 0x02; - break; - - case 5: - w0[1] = w0[1] | 0x0200; - break; - - case 6: - w0[1] = w0[1] | 0x020000; - break; - - case 7: - w0[1] = w0[1] | 0x02000000; - break; - - case 8: - w0[2] = 0x02; - break; - - case 9: - w0[2] = w0[2] | 0x0200; - break; - - case 10: - w0[2] = w0[2] | 0x020000; - break; - - case 11: - w0[2] = w0[2] | 0x02000000; - break; - - case 12: - w0[3] = 0x02; - break; - - case 13: - w0[3] = w0[3] | 0x0200; - break; - - case 14: - w0[3] = w0[3] | 0x020000; - break; - - case 15: - w0[3] = w0[3] | 0x02000000; - break; - - case 16: - w1[0] = 0x02; - break; - - case 17: - w1[0] = w1[0] | 0x0200; - break; - - case 18: - w1[0] = w1[0] | 0x020000; - break; - - case 19: - w1[0] = w1[0] | 0x02000000; - break; - - case 20: - w1[1] = 0x02; - break; - - case 21: - w1[1] = w1[1] | 0x0200; - break; - - case 22: - w1[1] = w1[1] | 0x020000; - break; - - case 23: - w1[1] = w1[1] | 0x02000000; - break; - - case 24: - w1[2] = 0x02; - break; - - case 25: - w1[2] = w1[2] | 0x0200; - break; - - case 26: - w1[2] = w1[2] | 0x020000; - break; - - case 27: - w1[2] = w1[2] | 0x02000000; - break; - - case 28: - w1[3] = 0x02; - break; - - case 29: - w1[3] = w1[3] | 0x0200; - break; - - case 30: - w1[3] = w1[3] | 0x020000; - break; - - case 31: - w1[3] = w1[3] | 0x02000000; - break; - - case 32: - w2[0] = 0x02; - break; - - case 33: - w2[0] = w2[0] | 0x0200; - break; - - case 34: - w2[0] = w2[0] | 0x020000; - break; - - case 35: - w2[0] = w2[0] | 0x02000000; - break; - - case 36: - w2[1] = 0x02; - break; - - case 37: - w2[1] = w2[1] | 0x0200; - break; - - case 38: - w2[1] = w2[1] | 0x020000; - break; - - case 39: - w2[1] = w2[1] | 0x02000000; - break; - - case 40: - w2[2] = 0x02; - break; - - case 41: - w2[2] = w2[2] | 0x0200; - break; - - case 42: - w2[2] = w2[2] | 0x020000; - break; - - case 43: - w2[2] = w2[2] | 0x02000000; - break; - - case 44: - w2[3] = 0x02; - break; - - case 45: - w2[3] = w2[3] | 0x0200; - break; - - case 46: - w2[3] = w2[3] | 0x020000; - break; - - case 47: - w2[3] = w2[3] | 0x02000000; - break; - } -} - -__device__ static void append_0x02_4 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x02; - break; - - case 1: - w0[0] = w0[0] | 0x0200; - break; - - case 2: - w0[0] = w0[0] | 0x020000; - break; - - case 3: - w0[0] = w0[0] | 0x02000000; - break; - - case 4: - w0[1] = 0x02; - break; - - case 5: - w0[1] = w0[1] | 0x0200; - break; - - case 6: - w0[1] = w0[1] | 0x020000; - break; - - case 7: - w0[1] = w0[1] | 0x02000000; - break; - - case 8: - w0[2] = 0x02; - break; - - case 9: - w0[2] = w0[2] | 0x0200; - break; - - case 10: - w0[2] = w0[2] | 0x020000; - break; - - case 11: - w0[2] = w0[2] | 0x02000000; - break; - - case 12: - w0[3] = 0x02; - break; - - case 13: - w0[3] = w0[3] | 0x0200; - break; - - case 14: - w0[3] = w0[3] | 0x020000; - break; - - case 15: - w0[3] = w0[3] | 0x02000000; - break; - - case 16: - w1[0] = 0x02; - break; - - case 17: - w1[0] = w1[0] | 0x0200; - break; - - case 18: - w1[0] = w1[0] | 0x020000; - break; - - case 19: - w1[0] = w1[0] | 0x02000000; - break; - - case 20: - w1[1] = 0x02; - break; - - case 21: - w1[1] = w1[1] | 0x0200; - break; - - case 22: - w1[1] = w1[1] | 0x020000; - break; - - case 23: - w1[1] = w1[1] | 0x02000000; - break; - - case 24: - w1[2] = 0x02; - break; - - case 25: - w1[2] = w1[2] | 0x0200; - break; - - case 26: - w1[2] = w1[2] | 0x020000; - break; - - case 27: - w1[2] = w1[2] | 0x02000000; - break; - - case 28: - w1[3] = 0x02; - break; - - case 29: - w1[3] = w1[3] | 0x0200; - break; - - case 30: - w1[3] = w1[3] | 0x020000; - break; - - case 31: - w1[3] = w1[3] | 0x02000000; - break; - - case 32: - w2[0] = 0x02; - break; - - case 33: - w2[0] = w2[0] | 0x0200; - break; - - case 34: - w2[0] = w2[0] | 0x020000; - break; - - case 35: - w2[0] = w2[0] | 0x02000000; - break; - - case 36: - w2[1] = 0x02; - break; - - case 37: - w2[1] = w2[1] | 0x0200; - break; - - case 38: - w2[1] = w2[1] | 0x020000; - break; - - case 39: - w2[1] = w2[1] | 0x02000000; - break; - - case 40: - w2[2] = 0x02; - break; - - case 41: - w2[2] = w2[2] | 0x0200; - break; - - case 42: - w2[2] = w2[2] | 0x020000; - break; - - case 43: - w2[2] = w2[2] | 0x02000000; - break; - - case 44: - w2[3] = 0x02; - break; - - case 45: - w2[3] = w2[3] | 0x0200; - break; - - case 46: - w2[3] = w2[3] | 0x020000; - break; - - case 47: - w2[3] = w2[3] | 0x02000000; - break; - - case 48: - w3[0] = 0x02; - break; - - case 49: - w3[0] = w3[0] | 0x0200; - break; - - case 50: - w3[0] = w3[0] | 0x020000; - break; - - case 51: - w3[0] = w3[0] | 0x02000000; - break; - - case 52: - w3[1] = 0x02; - break; - - case 53: - w3[1] = w3[1] | 0x0200; - break; - - case 54: - w3[1] = w3[1] | 0x020000; - break; - - case 55: - w3[1] = w3[1] | 0x02000000; - break; - - case 56: - w3[2] = 0x02; - break; - - case 57: - w3[2] = w3[2] | 0x0200; - break; - - case 58: - w3[2] = w3[2] | 0x020000; - break; - - case 59: - w3[2] = w3[2] | 0x02000000; - break; - - case 60: - w3[3] = 0x02; - break; - - case 61: - w3[3] = w3[3] | 0x0200; - break; - - case 62: - w3[3] = w3[3] | 0x020000; - break; - - case 63: - w3[3] = w3[3] | 0x02000000; - break; - } -} - -__device__ static void append_0x02_8 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x02; - break; - - case 1: - w0[0] = w0[0] | 0x0200; - break; - - case 2: - w0[0] = w0[0] | 0x020000; - break; - - case 3: - w0[0] = w0[0] | 0x02000000; - break; - - case 4: - w0[1] = 0x02; - break; - - case 5: - w0[1] = w0[1] | 0x0200; - break; - - case 6: - w0[1] = w0[1] | 0x020000; - break; - - case 7: - w0[1] = w0[1] | 0x02000000; - break; - - case 8: - w0[2] = 0x02; - break; - - case 9: - w0[2] = w0[2] | 0x0200; - break; - - case 10: - w0[2] = w0[2] | 0x020000; - break; - - case 11: - w0[2] = w0[2] | 0x02000000; - break; - - case 12: - w0[3] = 0x02; - break; - - case 13: - w0[3] = w0[3] | 0x0200; - break; - - case 14: - w0[3] = w0[3] | 0x020000; - break; - - case 15: - w0[3] = w0[3] | 0x02000000; - break; - - case 16: - w1[0] = 0x02; - break; - - case 17: - w1[0] = w1[0] | 0x0200; - break; - - case 18: - w1[0] = w1[0] | 0x020000; - break; - - case 19: - w1[0] = w1[0] | 0x02000000; - break; - - case 20: - w1[1] = 0x02; - break; - - case 21: - w1[1] = w1[1] | 0x0200; - break; - - case 22: - w1[1] = w1[1] | 0x020000; - break; - - case 23: - w1[1] = w1[1] | 0x02000000; - break; - - case 24: - w1[2] = 0x02; - break; - - case 25: - w1[2] = w1[2] | 0x0200; - break; - - case 26: - w1[2] = w1[2] | 0x020000; - break; - - case 27: - w1[2] = w1[2] | 0x02000000; - break; - - case 28: - w1[3] = 0x02; - break; - - case 29: - w1[3] = w1[3] | 0x0200; - break; - - case 30: - w1[3] = w1[3] | 0x020000; - break; - - case 31: - w1[3] = w1[3] | 0x02000000; - break; - - case 32: - w2[0] = 0x02; - break; - - case 33: - w2[0] = w2[0] | 0x0200; - break; - - case 34: - w2[0] = w2[0] | 0x020000; - break; - - case 35: - w2[0] = w2[0] | 0x02000000; - break; - - case 36: - w2[1] = 0x02; - break; - - case 37: - w2[1] = w2[1] | 0x0200; - break; - - case 38: - w2[1] = w2[1] | 0x020000; - break; - - case 39: - w2[1] = w2[1] | 0x02000000; - break; - - case 40: - w2[2] = 0x02; - break; - - case 41: - w2[2] = w2[2] | 0x0200; - break; - - case 42: - w2[2] = w2[2] | 0x020000; - break; - - case 43: - w2[2] = w2[2] | 0x02000000; - break; - - case 44: - w2[3] = 0x02; - break; - - case 45: - w2[3] = w2[3] | 0x0200; - break; - - case 46: - w2[3] = w2[3] | 0x020000; - break; - - case 47: - w2[3] = w2[3] | 0x02000000; - break; - - case 48: - w3[0] = 0x02; - break; - - case 49: - w3[0] = w3[0] | 0x0200; - break; - - case 50: - w3[0] = w3[0] | 0x020000; - break; - - case 51: - w3[0] = w3[0] | 0x02000000; - break; - - case 52: - w3[1] = 0x02; - break; - - case 53: - w3[1] = w3[1] | 0x0200; - break; - - case 54: - w3[1] = w3[1] | 0x020000; - break; - - case 55: - w3[1] = w3[1] | 0x02000000; - break; - - case 56: - w3[2] = 0x02; - break; - - case 57: - w3[2] = w3[2] | 0x0200; - break; - - case 58: - w3[2] = w3[2] | 0x020000; - break; - - case 59: - w3[2] = w3[2] | 0x02000000; - break; - - case 60: - w3[3] = 0x02; - break; - - case 61: - w3[3] = w3[3] | 0x0200; - break; - - case 62: - w3[3] = w3[3] | 0x020000; - break; - - case 63: - w3[3] = w3[3] | 0x02000000; - break; - - case 64: - w4[0] = 0x02; - break; - - case 65: - w4[0] = w4[0] | 0x0200; - break; - - case 66: - w4[0] = w4[0] | 0x020000; - break; - - case 67: - w4[0] = w4[0] | 0x02000000; - break; - - case 68: - w4[1] = 0x02; - break; - - case 69: - w4[1] = w4[1] | 0x0200; - break; - - case 70: - w4[1] = w4[1] | 0x020000; - break; - - case 71: - w4[1] = w4[1] | 0x02000000; - break; - - case 72: - w4[2] = 0x02; - break; - - case 73: - w4[2] = w4[2] | 0x0200; - break; - - case 74: - w4[2] = w4[2] | 0x020000; - break; - - case 75: - w4[2] = w4[2] | 0x02000000; - break; - - case 76: - w4[3] = 0x02; - break; - - case 77: - w4[3] = w4[3] | 0x0200; - break; - - case 78: - w4[3] = w4[3] | 0x020000; - break; - - case 79: - w4[3] = w4[3] | 0x02000000; - break; - - case 80: - w5[0] = 0x02; - break; - - case 81: - w5[0] = w5[0] | 0x0200; - break; - - case 82: - w5[0] = w5[0] | 0x020000; - break; - - case 83: - w5[0] = w5[0] | 0x02000000; - break; - - case 84: - w5[1] = 0x02; - break; - - case 85: - w5[1] = w5[1] | 0x0200; - break; - - case 86: - w5[1] = w5[1] | 0x020000; - break; - - case 87: - w5[1] = w5[1] | 0x02000000; - break; - - case 88: - w5[2] = 0x02; - break; - - case 89: - w5[2] = w5[2] | 0x0200; - break; - - case 90: - w5[2] = w5[2] | 0x020000; - break; - - case 91: - w5[2] = w5[2] | 0x02000000; - break; - - case 92: - w5[3] = 0x02; - break; - - case 93: - w5[3] = w5[3] | 0x0200; - break; - - case 94: - w5[3] = w5[3] | 0x020000; - break; - - case 95: - w5[3] = w5[3] | 0x02000000; - break; - - case 96: - w6[0] = 0x02; - break; - - case 97: - w6[0] = w6[0] | 0x0200; - break; - - case 98: - w6[0] = w6[0] | 0x020000; - break; - - case 99: - w6[0] = w6[0] | 0x02000000; - break; - - case 100: - w6[1] = 0x02; - break; - - case 101: - w6[1] = w6[1] | 0x0200; - break; - - case 102: - w6[1] = w6[1] | 0x020000; - break; - - case 103: - w6[1] = w6[1] | 0x02000000; - break; - - case 104: - w6[2] = 0x02; - break; - - case 105: - w6[2] = w6[2] | 0x0200; - break; - - case 106: - w6[2] = w6[2] | 0x020000; - break; - - case 107: - w6[2] = w6[2] | 0x02000000; - break; - - case 108: - w6[3] = 0x02; - break; - - case 109: - w6[3] = w6[3] | 0x0200; - break; - - case 110: - w6[3] = w6[3] | 0x020000; - break; - - case 111: - w6[3] = w6[3] | 0x02000000; - break; - - case 112: - w7[0] = 0x02; - break; - - case 113: - w7[0] = w7[0] | 0x0200; - break; - - case 114: - w7[0] = w7[0] | 0x020000; - break; - - case 115: - w7[0] = w7[0] | 0x02000000; - break; - - case 116: - w7[1] = 0x02; - break; - - case 117: - w7[1] = w7[1] | 0x0200; - break; - - case 118: - w7[1] = w7[1] | 0x020000; - break; - - case 119: - w7[1] = w7[1] | 0x02000000; - break; - - case 120: - w7[2] = 0x02; - break; - - case 121: - w7[2] = w7[2] | 0x0200; - break; - - case 122: - w7[2] = w7[2] | 0x020000; - break; - - case 123: - w7[2] = w7[2] | 0x02000000; - break; - - case 124: - w7[3] = 0x02; - break; - - case 125: - w7[3] = w7[3] | 0x0200; - break; - - case 126: - w7[3] = w7[3] | 0x020000; - break; - - case 127: - w7[3] = w7[3] | 0x02000000; - break; - } -} - -__device__ static void append_0x80_1 (u32x w0[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x80; - break; - - case 1: - w0[0] = w0[0] | 0x8000; - break; - - case 2: - w0[0] = w0[0] | 0x800000; - break; - - case 3: - w0[0] = w0[0] | 0x80000000; - break; - - case 4: - w0[1] = 0x80; - break; - - case 5: - w0[1] = w0[1] | 0x8000; - break; - - case 6: - w0[1] = w0[1] | 0x800000; - break; - - case 7: - w0[1] = w0[1] | 0x80000000; - break; - - case 8: - w0[2] = 0x80; - break; - - case 9: - w0[2] = w0[2] | 0x8000; - break; - - case 10: - w0[2] = w0[2] | 0x800000; - break; - - case 11: - w0[2] = w0[2] | 0x80000000; - break; - - case 12: - w0[3] = 0x80; - break; - - case 13: - w0[3] = w0[3] | 0x8000; - break; - - case 14: - w0[3] = w0[3] | 0x800000; - break; - - case 15: - w0[3] = w0[3] | 0x80000000; - break; - } -} - -__device__ static void append_0x80_2 (u32x w0[4], u32x w1[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x80; - break; - - case 1: - w0[0] = w0[0] | 0x8000; - break; - - case 2: - w0[0] = w0[0] | 0x800000; - break; - - case 3: - w0[0] = w0[0] | 0x80000000; - break; - - case 4: - w0[1] = 0x80; - break; - - case 5: - w0[1] = w0[1] | 0x8000; - break; - - case 6: - w0[1] = w0[1] | 0x800000; - break; - - case 7: - w0[1] = w0[1] | 0x80000000; - break; - - case 8: - w0[2] = 0x80; - break; - - case 9: - w0[2] = w0[2] | 0x8000; - break; - - case 10: - w0[2] = w0[2] | 0x800000; - break; - - case 11: - w0[2] = w0[2] | 0x80000000; - break; - - case 12: - w0[3] = 0x80; - break; - - case 13: - w0[3] = w0[3] | 0x8000; - break; - - case 14: - w0[3] = w0[3] | 0x800000; - break; - - case 15: - w0[3] = w0[3] | 0x80000000; - break; - - case 16: - w1[0] = 0x80; - break; - - case 17: - w1[0] = w1[0] | 0x8000; - break; - - case 18: - w1[0] = w1[0] | 0x800000; - break; - - case 19: - w1[0] = w1[0] | 0x80000000; - break; - - case 20: - w1[1] = 0x80; - break; - - case 21: - w1[1] = w1[1] | 0x8000; - break; - - case 22: - w1[1] = w1[1] | 0x800000; - break; - - case 23: - w1[1] = w1[1] | 0x80000000; - break; - - case 24: - w1[2] = 0x80; - break; - - case 25: - w1[2] = w1[2] | 0x8000; - break; - - case 26: - w1[2] = w1[2] | 0x800000; - break; - - case 27: - w1[2] = w1[2] | 0x80000000; - break; - - case 28: - w1[3] = 0x80; - break; - - case 29: - w1[3] = w1[3] | 0x8000; - break; - - case 30: - w1[3] = w1[3] | 0x800000; - break; - - case 31: - w1[3] = w1[3] | 0x80000000; - break; - } -} - -__device__ static void append_0x80_3 (u32x w0[4], u32x w1[4], u32x w2[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x80; - break; - - case 1: - w0[0] = w0[0] | 0x8000; - break; - - case 2: - w0[0] = w0[0] | 0x800000; - break; - - case 3: - w0[0] = w0[0] | 0x80000000; - break; - - case 4: - w0[1] = 0x80; - break; - - case 5: - w0[1] = w0[1] | 0x8000; - break; - - case 6: - w0[1] = w0[1] | 0x800000; - break; - - case 7: - w0[1] = w0[1] | 0x80000000; - break; - - case 8: - w0[2] = 0x80; - break; - - case 9: - w0[2] = w0[2] | 0x8000; - break; - - case 10: - w0[2] = w0[2] | 0x800000; - break; - - case 11: - w0[2] = w0[2] | 0x80000000; - break; - - case 12: - w0[3] = 0x80; - break; - - case 13: - w0[3] = w0[3] | 0x8000; - break; - - case 14: - w0[3] = w0[3] | 0x800000; - break; - - case 15: - w0[3] = w0[3] | 0x80000000; - break; - - case 16: - w1[0] = 0x80; - break; - - case 17: - w1[0] = w1[0] | 0x8000; - break; - - case 18: - w1[0] = w1[0] | 0x800000; - break; - - case 19: - w1[0] = w1[0] | 0x80000000; - break; - - case 20: - w1[1] = 0x80; - break; - - case 21: - w1[1] = w1[1] | 0x8000; - break; - - case 22: - w1[1] = w1[1] | 0x800000; - break; - - case 23: - w1[1] = w1[1] | 0x80000000; - break; - - case 24: - w1[2] = 0x80; - break; - - case 25: - w1[2] = w1[2] | 0x8000; - break; - - case 26: - w1[2] = w1[2] | 0x800000; - break; - - case 27: - w1[2] = w1[2] | 0x80000000; - break; - - case 28: - w1[3] = 0x80; - break; - - case 29: - w1[3] = w1[3] | 0x8000; - break; - - case 30: - w1[3] = w1[3] | 0x800000; - break; - - case 31: - w1[3] = w1[3] | 0x80000000; - break; - - case 32: - w2[0] = 0x80; - break; - - case 33: - w2[0] = w2[0] | 0x8000; - break; - - case 34: - w2[0] = w2[0] | 0x800000; - break; - - case 35: - w2[0] = w2[0] | 0x80000000; - break; - - case 36: - w2[1] = 0x80; - break; - - case 37: - w2[1] = w2[1] | 0x8000; - break; - - case 38: - w2[1] = w2[1] | 0x800000; - break; - - case 39: - w2[1] = w2[1] | 0x80000000; - break; - - case 40: - w2[2] = 0x80; - break; - - case 41: - w2[2] = w2[2] | 0x8000; - break; - - case 42: - w2[2] = w2[2] | 0x800000; - break; - - case 43: - w2[2] = w2[2] | 0x80000000; - break; - - case 44: - w2[3] = 0x80; - break; - - case 45: - w2[3] = w2[3] | 0x8000; - break; - - case 46: - w2[3] = w2[3] | 0x800000; - break; - - case 47: - w2[3] = w2[3] | 0x80000000; - break; - } -} - -__device__ static void append_0x80_4 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x80; - break; - - case 1: - w0[0] = w0[0] | 0x8000; - break; - - case 2: - w0[0] = w0[0] | 0x800000; - break; - - case 3: - w0[0] = w0[0] | 0x80000000; - break; - - case 4: - w0[1] = 0x80; - break; - - case 5: - w0[1] = w0[1] | 0x8000; - break; - - case 6: - w0[1] = w0[1] | 0x800000; - break; - - case 7: - w0[1] = w0[1] | 0x80000000; - break; - - case 8: - w0[2] = 0x80; - break; - - case 9: - w0[2] = w0[2] | 0x8000; - break; - - case 10: - w0[2] = w0[2] | 0x800000; - break; - - case 11: - w0[2] = w0[2] | 0x80000000; - break; - - case 12: - w0[3] = 0x80; - break; - - case 13: - w0[3] = w0[3] | 0x8000; - break; - - case 14: - w0[3] = w0[3] | 0x800000; - break; - - case 15: - w0[3] = w0[3] | 0x80000000; - break; - - case 16: - w1[0] = 0x80; - break; - - case 17: - w1[0] = w1[0] | 0x8000; - break; - - case 18: - w1[0] = w1[0] | 0x800000; - break; - - case 19: - w1[0] = w1[0] | 0x80000000; - break; - - case 20: - w1[1] = 0x80; - break; - - case 21: - w1[1] = w1[1] | 0x8000; - break; - - case 22: - w1[1] = w1[1] | 0x800000; - break; - - case 23: - w1[1] = w1[1] | 0x80000000; - break; - - case 24: - w1[2] = 0x80; - break; - - case 25: - w1[2] = w1[2] | 0x8000; - break; - - case 26: - w1[2] = w1[2] | 0x800000; - break; - - case 27: - w1[2] = w1[2] | 0x80000000; - break; - - case 28: - w1[3] = 0x80; - break; - - case 29: - w1[3] = w1[3] | 0x8000; - break; - - case 30: - w1[3] = w1[3] | 0x800000; - break; - - case 31: - w1[3] = w1[3] | 0x80000000; - break; - - case 32: - w2[0] = 0x80; - break; - - case 33: - w2[0] = w2[0] | 0x8000; - break; - - case 34: - w2[0] = w2[0] | 0x800000; - break; - - case 35: - w2[0] = w2[0] | 0x80000000; - break; - - case 36: - w2[1] = 0x80; - break; - - case 37: - w2[1] = w2[1] | 0x8000; - break; - - case 38: - w2[1] = w2[1] | 0x800000; - break; - - case 39: - w2[1] = w2[1] | 0x80000000; - break; - - case 40: - w2[2] = 0x80; - break; - - case 41: - w2[2] = w2[2] | 0x8000; - break; - - case 42: - w2[2] = w2[2] | 0x800000; - break; - - case 43: - w2[2] = w2[2] | 0x80000000; - break; - - case 44: - w2[3] = 0x80; - break; - - case 45: - w2[3] = w2[3] | 0x8000; - break; - - case 46: - w2[3] = w2[3] | 0x800000; - break; - - case 47: - w2[3] = w2[3] | 0x80000000; - break; - - case 48: - w3[0] = 0x80; - break; - - case 49: - w3[0] = w3[0] | 0x8000; - break; - - case 50: - w3[0] = w3[0] | 0x800000; - break; - - case 51: - w3[0] = w3[0] | 0x80000000; - break; - - case 52: - w3[1] = 0x80; - break; - - case 53: - w3[1] = w3[1] | 0x8000; - break; - - case 54: - w3[1] = w3[1] | 0x800000; - break; - - case 55: - w3[1] = w3[1] | 0x80000000; - break; - - case 56: - w3[2] = 0x80; - break; - - case 57: - w3[2] = w3[2] | 0x8000; - break; - - case 58: - w3[2] = w3[2] | 0x800000; - break; - - case 59: - w3[2] = w3[2] | 0x80000000; - break; - - case 60: - w3[3] = 0x80; - break; - - case 61: - w3[3] = w3[3] | 0x8000; - break; - - case 62: - w3[3] = w3[3] | 0x800000; - break; - - case 63: - w3[3] = w3[3] | 0x80000000; - break; - } -} - -__device__ static void append_0x80_8 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = 0x80; - break; - - case 1: - w0[0] = w0[0] | 0x8000; - break; - - case 2: - w0[0] = w0[0] | 0x800000; - break; - - case 3: - w0[0] = w0[0] | 0x80000000; - break; - - case 4: - w0[1] = 0x80; - break; - - case 5: - w0[1] = w0[1] | 0x8000; - break; - - case 6: - w0[1] = w0[1] | 0x800000; - break; - - case 7: - w0[1] = w0[1] | 0x80000000; - break; - - case 8: - w0[2] = 0x80; - break; - - case 9: - w0[2] = w0[2] | 0x8000; - break; - - case 10: - w0[2] = w0[2] | 0x800000; - break; - - case 11: - w0[2] = w0[2] | 0x80000000; - break; - - case 12: - w0[3] = 0x80; - break; - - case 13: - w0[3] = w0[3] | 0x8000; - break; - - case 14: - w0[3] = w0[3] | 0x800000; - break; - - case 15: - w0[3] = w0[3] | 0x80000000; - break; - - case 16: - w1[0] = 0x80; - break; - - case 17: - w1[0] = w1[0] | 0x8000; - break; - - case 18: - w1[0] = w1[0] | 0x800000; - break; - - case 19: - w1[0] = w1[0] | 0x80000000; - break; - - case 20: - w1[1] = 0x80; - break; - - case 21: - w1[1] = w1[1] | 0x8000; - break; - - case 22: - w1[1] = w1[1] | 0x800000; - break; - - case 23: - w1[1] = w1[1] | 0x80000000; - break; - - case 24: - w1[2] = 0x80; - break; - - case 25: - w1[2] = w1[2] | 0x8000; - break; - - case 26: - w1[2] = w1[2] | 0x800000; - break; - - case 27: - w1[2] = w1[2] | 0x80000000; - break; - - case 28: - w1[3] = 0x80; - break; - - case 29: - w1[3] = w1[3] | 0x8000; - break; - - case 30: - w1[3] = w1[3] | 0x800000; - break; - - case 31: - w1[3] = w1[3] | 0x80000000; - break; - - case 32: - w2[0] = 0x80; - break; - - case 33: - w2[0] = w2[0] | 0x8000; - break; - - case 34: - w2[0] = w2[0] | 0x800000; - break; - - case 35: - w2[0] = w2[0] | 0x80000000; - break; - - case 36: - w2[1] = 0x80; - break; - - case 37: - w2[1] = w2[1] | 0x8000; - break; - - case 38: - w2[1] = w2[1] | 0x800000; - break; - - case 39: - w2[1] = w2[1] | 0x80000000; - break; - - case 40: - w2[2] = 0x80; - break; - - case 41: - w2[2] = w2[2] | 0x8000; - break; - - case 42: - w2[2] = w2[2] | 0x800000; - break; - - case 43: - w2[2] = w2[2] | 0x80000000; - break; - - case 44: - w2[3] = 0x80; - break; - - case 45: - w2[3] = w2[3] | 0x8000; - break; - - case 46: - w2[3] = w2[3] | 0x800000; - break; - - case 47: - w2[3] = w2[3] | 0x80000000; - break; - - case 48: - w3[0] = 0x80; - break; - - case 49: - w3[0] = w3[0] | 0x8000; - break; - - case 50: - w3[0] = w3[0] | 0x800000; - break; - - case 51: - w3[0] = w3[0] | 0x80000000; - break; - - case 52: - w3[1] = 0x80; - break; - - case 53: - w3[1] = w3[1] | 0x8000; - break; - - case 54: - w3[1] = w3[1] | 0x800000; - break; - - case 55: - w3[1] = w3[1] | 0x80000000; - break; - - case 56: - w3[2] = 0x80; - break; - - case 57: - w3[2] = w3[2] | 0x8000; - break; - - case 58: - w3[2] = w3[2] | 0x800000; - break; - - case 59: - w3[2] = w3[2] | 0x80000000; - break; - - case 60: - w3[3] = 0x80; - break; - - case 61: - w3[3] = w3[3] | 0x8000; - break; - - case 62: - w3[3] = w3[3] | 0x800000; - break; - - case 63: - w3[3] = w3[3] | 0x80000000; - break; - - case 64: - w4[0] = 0x80; - break; - - case 65: - w4[0] = w4[0] | 0x8000; - break; - - case 66: - w4[0] = w4[0] | 0x800000; - break; - - case 67: - w4[0] = w4[0] | 0x80000000; - break; - - case 68: - w4[1] = 0x80; - break; - - case 69: - w4[1] = w4[1] | 0x8000; - break; - - case 70: - w4[1] = w4[1] | 0x800000; - break; - - case 71: - w4[1] = w4[1] | 0x80000000; - break; - - case 72: - w4[2] = 0x80; - break; - - case 73: - w4[2] = w4[2] | 0x8000; - break; - - case 74: - w4[2] = w4[2] | 0x800000; - break; - - case 75: - w4[2] = w4[2] | 0x80000000; - break; - - case 76: - w4[3] = 0x80; - break; - - case 77: - w4[3] = w4[3] | 0x8000; - break; - - case 78: - w4[3] = w4[3] | 0x800000; - break; - - case 79: - w4[3] = w4[3] | 0x80000000; - break; - - case 80: - w5[0] = 0x80; - break; - - case 81: - w5[0] = w5[0] | 0x8000; - break; - - case 82: - w5[0] = w5[0] | 0x800000; - break; - - case 83: - w5[0] = w5[0] | 0x80000000; - break; - - case 84: - w5[1] = 0x80; - break; - - case 85: - w5[1] = w5[1] | 0x8000; - break; - - case 86: - w5[1] = w5[1] | 0x800000; - break; - - case 87: - w5[1] = w5[1] | 0x80000000; - break; - - case 88: - w5[2] = 0x80; - break; - - case 89: - w5[2] = w5[2] | 0x8000; - break; - - case 90: - w5[2] = w5[2] | 0x800000; - break; - - case 91: - w5[2] = w5[2] | 0x80000000; - break; - - case 92: - w5[3] = 0x80; - break; - - case 93: - w5[3] = w5[3] | 0x8000; - break; - - case 94: - w5[3] = w5[3] | 0x800000; - break; - - case 95: - w5[3] = w5[3] | 0x80000000; - break; - - case 96: - w6[0] = 0x80; - break; - - case 97: - w6[0] = w6[0] | 0x8000; - break; - - case 98: - w6[0] = w6[0] | 0x800000; - break; - - case 99: - w6[0] = w6[0] | 0x80000000; - break; - - case 100: - w6[1] = 0x80; - break; - - case 101: - w6[1] = w6[1] | 0x8000; - break; - - case 102: - w6[1] = w6[1] | 0x800000; - break; - - case 103: - w6[1] = w6[1] | 0x80000000; - break; - - case 104: - w6[2] = 0x80; - break; - - case 105: - w6[2] = w6[2] | 0x8000; - break; - - case 106: - w6[2] = w6[2] | 0x800000; - break; - - case 107: - w6[2] = w6[2] | 0x80000000; - break; - - case 108: - w6[3] = 0x80; - break; - - case 109: - w6[3] = w6[3] | 0x8000; - break; - - case 110: - w6[3] = w6[3] | 0x800000; - break; - - case 111: - w6[3] = w6[3] | 0x80000000; - break; - - case 112: - w7[0] = 0x80; - break; - - case 113: - w7[0] = w7[0] | 0x8000; - break; - - case 114: - w7[0] = w7[0] | 0x800000; - break; - - case 115: - w7[0] = w7[0] | 0x80000000; - break; - - case 116: - w7[1] = 0x80; - break; - - case 117: - w7[1] = w7[1] | 0x8000; - break; - - case 118: - w7[1] = w7[1] | 0x800000; - break; - - case 119: - w7[1] = w7[1] | 0x80000000; - break; - - case 120: - w7[2] = 0x80; - break; - - case 121: - w7[2] = w7[2] | 0x8000; - break; - - case 122: - w7[2] = w7[2] | 0x800000; - break; - - case 123: - w7[2] = w7[2] | 0x80000000; - break; - - case 124: - w7[3] = 0x80; - break; - - case 125: - w7[3] = w7[3] | 0x8000; - break; - - case 126: - w7[3] = w7[3] | 0x800000; - break; - - case 127: - w7[3] = w7[3] | 0x80000000; - break; - } -} - -__device__ static void device_memcat2L (const u32 offset, u32x dst0[2], u32x src_l0[2], u32 src_r0[2]) -{ - switch (offset) - { - case 1: - dst0[0] = src_l0[0] | src_r0[0] << 8; - dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; - break; - - case 2: - dst0[0] = src_l0[0] | src_r0[0] << 16; - dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; - break; - - case 3: - dst0[0] = src_l0[0] | src_r0[0] << 24; - dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; - break; - - case 4: - dst0[1] = src_r0[0]; - break; - - case 5: - dst0[1] = src_l0[1] | src_r0[0] << 8; - break; - - case 6: - dst0[1] = src_l0[1] | src_r0[0] << 16; - break; - - case 7: - dst0[1] = src_l0[1] | src_r0[0] << 24; - break; - } -} - -__device__ static void device_memcat2L (const u32 offset, u32x dst0[2], u32x src_l0[2], u32x src_r0[2]) -{ - switch (offset) - { - case 1: - dst0[0] = src_l0[0] | src_r0[0] << 8; - dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; - break; - - case 2: - dst0[0] = src_l0[0] | src_r0[0] << 16; - dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; - break; - - case 3: - dst0[0] = src_l0[0] | src_r0[0] << 24; - dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; - break; - - case 4: - dst0[1] = src_r0[0]; - break; - - case 5: - dst0[1] = src_l0[1] | src_r0[0] << 8; - break; - - case 6: - dst0[1] = src_l0[1] | src_r0[0] << 16; - break; - - case 7: - dst0[1] = src_l0[1] | src_r0[0] << 24; - break; - } -} - -__device__ static void device_memcat4L (const u32 offset, u32x dst0[4], u32x src_l0[4], u32 src_r0[4]) -{ - switch (offset) - { - case 1: - dst0[0] = src_l0[0] | src_r0[0] << 8; - dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8; - break; - - case 2: - dst0[0] = src_l0[0] | src_r0[0] << 16; - dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16; - break; - - case 3: - dst0[0] = src_l0[0] | src_r0[0] << 24; - dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24; - break; - - case 4: - dst0[1] = src_r0[0]; - dst0[2] = src_r0[1]; - dst0[3] = src_r0[2]; - break; - - case 5: - dst0[1] = src_l0[1] | src_r0[0] << 8; - dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8; - break; - - case 6: - dst0[1] = src_l0[1] | src_r0[0] << 16; - dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16; - break; - - case 7: - dst0[1] = src_l0[1] | src_r0[0] << 24; - dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24; - break; - - case 8: - dst0[2] = src_r0[0]; - dst0[3] = src_r0[1]; - break; - - case 9: - dst0[2] = src_l0[2] | src_r0[0] << 8; - dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8; - break; - - case 10: - dst0[2] = src_l0[2] | src_r0[0] << 16; - dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16; - break; - - case 11: - dst0[2] = src_l0[2] | src_r0[0] << 24; - dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24; - break; - - case 12: - dst0[3] = src_r0[0]; - break; - - case 13: - dst0[3] = src_l0[3] | src_r0[0] << 8; - break; - - case 14: - dst0[3] = src_l0[3] | src_r0[0] << 16; - break; - - case 15: - dst0[3] = src_l0[3] | src_r0[0] << 24; - break; - } -} - -__device__ static void device_memcat4L (const u32 offset, u32x dst0[4], u32x src_l0[4], u32x src_r0[4]) -{ - switch (offset) - { - case 1: - dst0[0] = src_l0[0] | src_r0[0] << 8; - dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8; - break; - - case 2: - dst0[0] = src_l0[0] | src_r0[0] << 16; - dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16; - break; - - case 3: - dst0[0] = src_l0[0] | src_r0[0] << 24; - dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24; - break; - - case 4: - dst0[1] = src_r0[0]; - dst0[2] = src_r0[1]; - dst0[3] = src_r0[2]; - break; - - case 5: - dst0[1] = src_l0[1] | src_r0[0] << 8; - dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8; - break; - - case 6: - dst0[1] = src_l0[1] | src_r0[0] << 16; - dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16; - break; - - case 7: - dst0[1] = src_l0[1] | src_r0[0] << 24; - dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24; - break; - - case 8: - dst0[2] = src_r0[0]; - dst0[3] = src_r0[1]; - break; - - case 9: - dst0[2] = src_l0[2] | src_r0[0] << 8; - dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8; - break; - - case 10: - dst0[2] = src_l0[2] | src_r0[0] << 16; - dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16; - break; - - case 11: - dst0[2] = src_l0[2] | src_r0[0] << 24; - dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24; - break; - - case 12: - dst0[3] = src_r0[0]; - break; - - case 13: - dst0[3] = src_l0[3] | src_r0[0] << 8; - break; - - case 14: - dst0[3] = src_l0[3] | src_r0[0] << 16; - break; - - case 15: - dst0[3] = src_l0[3] | src_r0[0] << 24; - break; - } -} - -__device__ static void device_memcat8L (const u32 offset, u32x dst0[4], u32x dst1[4], u32x src_l0[4], u32x src_l1[4], u32 src_r0[4]) -{ - switch (offset) - { - case 1: - dst0[0] = src_l0[0] | src_r0[0] << 8; - dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[0] = src_r0[3] >> 24; - break; - - case 2: - dst0[0] = src_l0[0] | src_r0[0] << 16; - dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[0] = src_r0[3] >> 16; - break; - - case 3: - dst0[0] = src_l0[0] | src_r0[0] << 24; - dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[0] = src_r0[3] >> 8; - break; - - case 4: - dst0[1] = src_r0[0]; - dst0[2] = src_r0[1]; - dst0[3] = src_r0[2]; - dst1[0] = src_r0[3]; - break; - - case 5: - dst0[1] = src_l0[1] | src_r0[0] << 8; - dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[1] = src_r0[3] >> 24; - break; - - case 6: - dst0[1] = src_l0[1] | src_r0[0] << 16; - dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[1] = src_r0[3] >> 16; - break; - - case 7: - dst0[1] = src_l0[1] | src_r0[0] << 24; - dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[1] = src_r0[3] >> 8; - break; - - case 8: - dst0[2] = src_r0[0]; - dst0[3] = src_r0[1]; - dst1[0] = src_r0[2]; - dst1[1] = src_r0[3]; - break; - - case 9: - dst0[2] = src_l0[2] | src_r0[0] << 8; - dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[2] = src_r0[3] >> 24; - break; - - case 10: - dst0[2] = src_l0[2] | src_r0[0] << 16; - dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[2] = src_r0[3] >> 16; - break; - - case 11: - dst0[2] = src_l0[2] | src_r0[0] << 24; - dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[2] = src_r0[3] >> 8; - break; - - case 12: - dst0[3] = src_r0[0]; - dst1[0] = src_r0[1]; - dst1[1] = src_r0[2]; - dst1[2] = src_r0[3]; - break; - - case 13: - dst0[3] = src_l0[3] | src_r0[0] << 8; - dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[3] = src_r0[3] >> 24; - break; - - case 14: - dst0[3] = src_l0[3] | src_r0[0] << 16; - dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[3] = src_r0[3] >> 16; - break; - - case 15: - dst0[3] = src_l0[3] | src_r0[0] << 24; - dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[3] = src_r0[3] >> 8; - break; - - case 16: - dst1[0] = src_r0[0]; - dst1[1] = src_r0[1]; - dst1[2] = src_r0[2]; - dst1[3] = src_r0[3]; - break; - - case 17: - dst1[0] = src_l1[0] | src_r0[0] << 8; - dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8; - break; - - case 18: - dst1[0] = src_l1[0] | src_r0[0] << 16; - dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16; - break; - - case 19: - dst1[0] = src_l1[0] | src_r0[0] << 24; - dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24; - break; - - case 20: - dst1[1] = src_r0[0]; - dst1[2] = src_r0[1]; - dst1[3] = src_r0[2]; - break; - - case 21: - dst1[1] = src_l1[1] | src_r0[0] << 8; - dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8; - break; - - case 22: - dst1[1] = src_l1[1] | src_r0[0] << 16; - dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16; - break; - - case 23: - dst1[1] = src_l1[1] | src_r0[0] << 24; - dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24; - break; - - case 24: - dst1[2] = src_r0[0]; - dst1[3] = src_r0[1]; - break; - - case 25: - dst1[2] = src_l1[2] | src_r0[0] << 8; - dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8; - break; - - case 26: - dst1[2] = src_l1[2] | src_r0[0] << 16; - dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16; - break; - - case 27: - dst1[2] = src_l1[2] | src_r0[0] << 24; - dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24; - break; - - case 28: - dst1[3] = src_r0[0]; - break; - - case 29: - dst1[3] = src_l1[3] | src_r0[0] << 8; - break; - - case 30: - dst1[3] = src_l1[3] | src_r0[0] << 16; - break; - - case 31: - dst1[3] = src_l1[3] | src_r0[0] << 24; - break; - } -} - -__device__ static void device_memcat8L (const u32 offset, u32x dst0[4], u32x dst1[4], u32x src_l0[4], u32x src_l1[4], u32x src_r0[4]) -{ - switch (offset) - { - case 1: - dst0[0] = src_l0[0] | src_r0[0] << 8; - dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[0] = src_r0[3] >> 24; - break; - - case 2: - dst0[0] = src_l0[0] | src_r0[0] << 16; - dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[0] = src_r0[3] >> 16; - break; - - case 3: - dst0[0] = src_l0[0] | src_r0[0] << 24; - dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[0] = src_r0[3] >> 8; - break; - - case 4: - dst0[1] = src_r0[0]; - dst0[2] = src_r0[1]; - dst0[3] = src_r0[2]; - dst1[0] = src_r0[3]; - break; - - case 5: - dst0[1] = src_l0[1] | src_r0[0] << 8; - dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[1] = src_r0[3] >> 24; - break; - - case 6: - dst0[1] = src_l0[1] | src_r0[0] << 16; - dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[1] = src_r0[3] >> 16; - break; - - case 7: - dst0[1] = src_l0[1] | src_r0[0] << 24; - dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[1] = src_r0[3] >> 8; - break; - - case 8: - dst0[2] = src_r0[0]; - dst0[3] = src_r0[1]; - dst1[0] = src_r0[2]; - dst1[1] = src_r0[3]; - break; - - case 9: - dst0[2] = src_l0[2] | src_r0[0] << 8; - dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[2] = src_r0[3] >> 24; - break; - - case 10: - dst0[2] = src_l0[2] | src_r0[0] << 16; - dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[2] = src_r0[3] >> 16; - break; - - case 11: - dst0[2] = src_l0[2] | src_r0[0] << 24; - dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[2] = src_r0[3] >> 8; - break; - - case 12: - dst0[3] = src_r0[0]; - dst1[0] = src_r0[1]; - dst1[1] = src_r0[2]; - dst1[2] = src_r0[3]; - break; - - case 13: - dst0[3] = src_l0[3] | src_r0[0] << 8; - dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[3] = src_r0[3] >> 24; - break; - - case 14: - dst0[3] = src_l0[3] | src_r0[0] << 16; - dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[3] = src_r0[3] >> 16; - break; - - case 15: - dst0[3] = src_l0[3] | src_r0[0] << 24; - dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[3] = src_r0[3] >> 8; - break; - - case 16: - dst1[0] = src_r0[0]; - dst1[1] = src_r0[1]; - dst1[2] = src_r0[2]; - dst1[3] = src_r0[3]; - break; - - case 17: - dst1[0] = src_l1[0] | src_r0[0] << 8; - dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8; - break; - - case 18: - dst1[0] = src_l1[0] | src_r0[0] << 16; - dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16; - break; - - case 19: - dst1[0] = src_l1[0] | src_r0[0] << 24; - dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24; - break; - - case 20: - dst1[1] = src_r0[0]; - dst1[2] = src_r0[1]; - dst1[3] = src_r0[2]; - break; - - case 21: - dst1[1] = src_l1[1] | src_r0[0] << 8; - dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8; - break; - - case 22: - dst1[1] = src_l1[1] | src_r0[0] << 16; - dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16; - break; - - case 23: - dst1[1] = src_l1[1] | src_r0[0] << 24; - dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24; - break; - - case 24: - dst1[2] = src_r0[0]; - dst1[3] = src_r0[1]; - break; - - case 25: - dst1[2] = src_l1[2] | src_r0[0] << 8; - dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8; - break; - - case 26: - dst1[2] = src_l1[2] | src_r0[0] << 16; - dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16; - break; - - case 27: - dst1[2] = src_l1[2] | src_r0[0] << 24; - dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24; - break; - - case 28: - dst1[3] = src_r0[0]; - break; - - case 29: - dst1[3] = src_l1[3] | src_r0[0] << 8; - break; - - case 30: - dst1[3] = src_l1[3] | src_r0[0] << 16; - break; - - case 31: - dst1[3] = src_l1[3] | src_r0[0] << 24; - break; - } -} - -__device__ static void device_memcat12L (const u32 offset, u32x dst0[4], u32x dst1[4], u32x dst2[4], u32x src_l0[4], u32x src_l1[4], u32x src_l2[4], u32 src_r0[4]) -{ - switch (offset) - { - case 1: - dst0[0] = src_l0[0] | src_r0[0] << 8; - dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[0] = src_r0[3] >> 24; - break; - - case 2: - dst0[0] = src_l0[0] | src_r0[0] << 16; - dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[0] = src_r0[3] >> 16; - break; - - case 3: - dst0[0] = src_l0[0] | src_r0[0] << 24; - dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[0] = src_r0[3] >> 8; - break; - - case 4: - dst0[1] = src_r0[0]; - dst0[2] = src_r0[1]; - dst0[3] = src_r0[2]; - dst1[0] = src_r0[3]; - break; - - case 5: - dst0[1] = src_l0[1] | src_r0[0] << 8; - dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[1] = src_r0[3] >> 24; - break; - - case 6: - dst0[1] = src_l0[1] | src_r0[0] << 16; - dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[1] = src_r0[3] >> 16; - break; - - case 7: - dst0[1] = src_l0[1] | src_r0[0] << 24; - dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[1] = src_r0[3] >> 8; - break; - - case 8: - dst0[2] = src_r0[0]; - dst0[3] = src_r0[1]; - dst1[0] = src_r0[2]; - dst1[1] = src_r0[3]; - break; - - case 9: - dst0[2] = src_l0[2] | src_r0[0] << 8; - dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[2] = src_r0[3] >> 24; - break; - - case 10: - dst0[2] = src_l0[2] | src_r0[0] << 16; - dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[2] = src_r0[3] >> 16; - break; - - case 11: - dst0[2] = src_l0[2] | src_r0[0] << 24; - dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[2] = src_r0[3] >> 8; - break; - - case 12: - dst0[3] = src_r0[0]; - dst1[0] = src_r0[1]; - dst1[1] = src_r0[2]; - dst1[2] = src_r0[3]; - break; - - case 13: - dst0[3] = src_l0[3] | src_r0[0] << 8; - dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[3] = src_r0[3] >> 24; - break; - - case 14: - dst0[3] = src_l0[3] | src_r0[0] << 16; - dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[3] = src_r0[3] >> 16; - break; - - case 15: - dst0[3] = src_l0[3] | src_r0[0] << 24; - dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[3] = src_r0[3] >> 8; - break; - - case 16: - dst1[0] = src_r0[0]; - dst1[1] = src_r0[1]; - dst1[2] = src_r0[2]; - dst1[3] = src_r0[3]; - break; - - case 17: - dst1[0] = src_l1[0] | src_r0[0] << 8; - dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[0] = src_r0[3] >> 24; - break; - - case 18: - dst1[0] = src_l1[0] | src_r0[0] << 16; - dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[0] = src_r0[3] >> 16; - break; - - case 19: - dst1[0] = src_l1[0] | src_r0[0] << 24; - dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[0] = src_r0[3] >> 8; - break; - - case 20: - dst1[1] = src_r0[0]; - dst1[2] = src_r0[1]; - dst1[3] = src_r0[2]; - dst2[0] = src_r0[3]; - break; - - case 21: - dst1[1] = src_l1[1] | src_r0[0] << 8; - dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[0] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[1] = src_r0[3] >> 24; - break; - - case 22: - dst1[1] = src_l1[1] | src_r0[0] << 16; - dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[0] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[1] = src_r0[3] >> 16; - break; - - case 23: - dst1[1] = src_l1[1] | src_r0[0] << 24; - dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[0] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[1] = src_r0[3] >> 8; - break; - - case 24: - dst1[2] = src_r0[0]; - dst1[3] = src_r0[1]; - dst2[0] = src_r0[2]; - dst2[1] = src_r0[3]; - break; - - case 25: - dst1[2] = src_l1[2] | src_r0[0] << 8; - dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[0] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[1] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[2] = src_r0[3] >> 24; - break; - - case 26: - dst1[2] = src_l1[2] | src_r0[0] << 16; - dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[0] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[1] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[2] = src_r0[3] >> 16; - break; - - case 27: - dst1[2] = src_l1[2] | src_r0[0] << 24; - dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[0] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[1] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[2] = src_r0[3] >> 8; - break; - - case 28: - dst1[3] = src_r0[0]; - dst2[0] = src_r0[1]; - dst2[1] = src_r0[2]; - dst2[2] = src_r0[3]; - break; - - case 29: - dst1[3] = src_l1[3] | src_r0[0] << 8; - dst2[0] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[1] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[2] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[3] = src_r0[3] >> 24; - break; - - case 30: - dst1[3] = src_l1[3] | src_r0[0] << 16; - dst2[0] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[1] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[2] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[3] = src_r0[3] >> 16; - break; - - case 31: - dst1[3] = src_l1[3] | src_r0[0] << 24; - dst2[0] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[1] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[2] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[3] = src_r0[3] >> 8; - break; - - case 32: - dst2[0] = src_r0[0]; - dst2[1] = src_r0[1]; - dst2[2] = src_r0[2]; - dst2[3] = src_r0[3]; - break; - - case 33: - dst2[0] = src_l2[0] | src_r0[0] << 8; - dst2[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[3] = src_r0[2] >> 24 | src_r0[3] << 8; - break; - - case 34: - dst2[0] = src_l2[0] | src_r0[0] << 16; - dst2[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[3] = src_r0[2] >> 16 | src_r0[3] << 16; - break; - - case 35: - dst2[0] = src_l2[0] | src_r0[0] << 24; - dst2[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[3] = src_r0[2] >> 8 | src_r0[3] << 24; - break; - - case 36: - dst2[1] = src_r0[0]; - dst2[2] = src_r0[1]; - dst2[3] = src_r0[2]; - break; - - case 37: - dst2[1] = src_l2[1] | src_r0[0] << 8; - dst2[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[3] = src_r0[1] >> 24 | src_r0[2] << 8; - break; - - case 38: - dst2[1] = src_l2[1] | src_r0[0] << 16; - dst2[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[3] = src_r0[1] >> 16 | src_r0[2] << 16; - break; - - case 39: - dst2[1] = src_l2[1] | src_r0[0] << 24; - dst2[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[3] = src_r0[1] >> 8 | src_r0[2] << 24; - break; - - case 40: - dst2[2] = src_r0[0]; - dst2[3] = src_r0[1]; - break; - - case 41: - dst2[2] = src_l2[2] | src_r0[0] << 8; - dst2[3] = src_r0[0] >> 24 | src_r0[1] << 8; - break; - - case 42: - dst2[2] = src_l2[2] | src_r0[0] << 16; - dst2[3] = src_r0[0] >> 16 | src_r0[1] << 16; - break; - - case 43: - dst2[2] = src_l2[2] | src_r0[0] << 24; - dst2[3] = src_r0[0] >> 8 | src_r0[1] << 24; - break; - - case 44: - dst2[3] = src_r0[0]; - break; - - case 45: - dst2[3] = src_l2[3] | src_r0[0] << 8; - break; - - case 46: - dst2[3] = src_l2[3] | src_r0[0] << 16; - break; - - case 47: - dst2[3] = src_l2[3] | src_r0[0] << 24; - break; - } -} - -__device__ static void device_memcat12L (const u32 offset, u32x dst0[4], u32x dst1[4], u32x dst2[4], u32x src_l0[4], u32x src_l1[4], u32x src_l2[4], u32x src_r0[4]) -{ - switch (offset) - { - case 1: - dst0[0] = src_l0[0] | src_r0[0] << 8; - dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[0] = src_r0[3] >> 24; - break; - - case 2: - dst0[0] = src_l0[0] | src_r0[0] << 16; - dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[0] = src_r0[3] >> 16; - break; - - case 3: - dst0[0] = src_l0[0] | src_r0[0] << 24; - dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[0] = src_r0[3] >> 8; - break; - - case 4: - dst0[1] = src_r0[0]; - dst0[2] = src_r0[1]; - dst0[3] = src_r0[2]; - dst1[0] = src_r0[3]; - break; - - case 5: - dst0[1] = src_l0[1] | src_r0[0] << 8; - dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[1] = src_r0[3] >> 24; - break; - - case 6: - dst0[1] = src_l0[1] | src_r0[0] << 16; - dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[1] = src_r0[3] >> 16; - break; - - case 7: - dst0[1] = src_l0[1] | src_r0[0] << 24; - dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[1] = src_r0[3] >> 8; - break; - - case 8: - dst0[2] = src_r0[0]; - dst0[3] = src_r0[1]; - dst1[0] = src_r0[2]; - dst1[1] = src_r0[3]; - break; - - case 9: - dst0[2] = src_l0[2] | src_r0[0] << 8; - dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[2] = src_r0[3] >> 24; - break; - - case 10: - dst0[2] = src_l0[2] | src_r0[0] << 16; - dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[2] = src_r0[3] >> 16; - break; - - case 11: - dst0[2] = src_l0[2] | src_r0[0] << 24; - dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[2] = src_r0[3] >> 8; - break; - - case 12: - dst0[3] = src_r0[0]; - dst1[0] = src_r0[1]; - dst1[1] = src_r0[2]; - dst1[2] = src_r0[3]; - break; - - case 13: - dst0[3] = src_l0[3] | src_r0[0] << 8; - dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[3] = src_r0[3] >> 24; - break; - - case 14: - dst0[3] = src_l0[3] | src_r0[0] << 16; - dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[3] = src_r0[3] >> 16; - break; - - case 15: - dst0[3] = src_l0[3] | src_r0[0] << 24; - dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[3] = src_r0[3] >> 8; - break; - - case 16: - dst1[0] = src_r0[0]; - dst1[1] = src_r0[1]; - dst1[2] = src_r0[2]; - dst1[3] = src_r0[3]; - break; - - case 17: - dst1[0] = src_l1[0] | src_r0[0] << 8; - dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[0] = src_r0[3] >> 24; - break; - - case 18: - dst1[0] = src_l1[0] | src_r0[0] << 16; - dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[0] = src_r0[3] >> 16; - break; - - case 19: - dst1[0] = src_l1[0] | src_r0[0] << 24; - dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[0] = src_r0[3] >> 8; - break; - - case 20: - dst1[1] = src_r0[0]; - dst1[2] = src_r0[1]; - dst1[3] = src_r0[2]; - dst2[0] = src_r0[3]; - break; - - case 21: - dst1[1] = src_l1[1] | src_r0[0] << 8; - dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[0] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[1] = src_r0[3] >> 24; - break; - - case 22: - dst1[1] = src_l1[1] | src_r0[0] << 16; - dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[0] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[1] = src_r0[3] >> 16; - break; - - case 23: - dst1[1] = src_l1[1] | src_r0[0] << 24; - dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[0] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[1] = src_r0[3] >> 8; - break; - - case 24: - dst1[2] = src_r0[0]; - dst1[3] = src_r0[1]; - dst2[0] = src_r0[2]; - dst2[1] = src_r0[3]; - break; - - case 25: - dst1[2] = src_l1[2] | src_r0[0] << 8; - dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[0] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[1] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[2] = src_r0[3] >> 24; - break; - - case 26: - dst1[2] = src_l1[2] | src_r0[0] << 16; - dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[0] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[1] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[2] = src_r0[3] >> 16; - break; - - case 27: - dst1[2] = src_l1[2] | src_r0[0] << 24; - dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[0] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[1] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[2] = src_r0[3] >> 8; - break; - - case 28: - dst1[3] = src_r0[0]; - dst2[0] = src_r0[1]; - dst2[1] = src_r0[2]; - dst2[2] = src_r0[3]; - break; - - case 29: - dst1[3] = src_l1[3] | src_r0[0] << 8; - dst2[0] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[1] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[2] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[3] = src_r0[3] >> 24; - break; - - case 30: - dst1[3] = src_l1[3] | src_r0[0] << 16; - dst2[0] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[1] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[2] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[3] = src_r0[3] >> 16; - break; - - case 31: - dst1[3] = src_l1[3] | src_r0[0] << 24; - dst2[0] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[1] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[2] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[3] = src_r0[3] >> 8; - break; - - case 32: - dst2[0] = src_r0[0]; - dst2[1] = src_r0[1]; - dst2[2] = src_r0[2]; - dst2[3] = src_r0[3]; - break; - - case 33: - dst2[0] = src_l2[0] | src_r0[0] << 8; - dst2[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[3] = src_r0[2] >> 24 | src_r0[3] << 8; - break; - - case 34: - dst2[0] = src_l2[0] | src_r0[0] << 16; - dst2[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[3] = src_r0[2] >> 16 | src_r0[3] << 16; - break; - - case 35: - dst2[0] = src_l2[0] | src_r0[0] << 24; - dst2[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[3] = src_r0[2] >> 8 | src_r0[3] << 24; - break; - - case 36: - dst2[1] = src_r0[0]; - dst2[2] = src_r0[1]; - dst2[3] = src_r0[2]; - break; - - case 37: - dst2[1] = src_l2[1] | src_r0[0] << 8; - dst2[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[3] = src_r0[1] >> 24 | src_r0[2] << 8; - break; - - case 38: - dst2[1] = src_l2[1] | src_r0[0] << 16; - dst2[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[3] = src_r0[1] >> 16 | src_r0[2] << 16; - break; - - case 39: - dst2[1] = src_l2[1] | src_r0[0] << 24; - dst2[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[3] = src_r0[1] >> 8 | src_r0[2] << 24; - break; - - case 40: - dst2[2] = src_r0[0]; - dst2[3] = src_r0[1]; - break; - - case 41: - dst2[2] = src_l2[2] | src_r0[0] << 8; - dst2[3] = src_r0[0] >> 24 | src_r0[1] << 8; - break; - - case 42: - dst2[2] = src_l2[2] | src_r0[0] << 16; - dst2[3] = src_r0[0] >> 16 | src_r0[1] << 16; - break; - - case 43: - dst2[2] = src_l2[2] | src_r0[0] << 24; - dst2[3] = src_r0[0] >> 8 | src_r0[1] << 24; - break; - - case 44: - dst2[3] = src_r0[0]; - break; - - case 45: - dst2[3] = src_l2[3] | src_r0[0] << 8; - break; - - case 46: - dst2[3] = src_l2[3] | src_r0[0] << 16; - break; - - case 47: - dst2[3] = src_l2[3] | src_r0[0] << 24; - break; - } -} - -__device__ static void device_memcat12L (const u32 offset, u32x dst0[4], u32x dst1[4], u32x dst2[4], u32x src_l0[4], u32x src_l1[4], u32x src_l2[4], u32x src_r0[4], u32x src_r1[4]) -{ - switch (offset) - { - case 0: - dst0[0] = src_r0[0]; - dst0[1] = src_r0[1]; - dst0[2] = src_r0[2]; - dst0[3] = src_r0[3]; - dst1[0] = src_r1[0]; - dst1[1] = src_r1[1]; - dst1[2] = src_r1[2]; - dst1[3] = src_r1[3]; - break; - - case 1: - dst0[0] = src_l0[0] | src_r0[0] << 8; - dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[0] = src_r0[3] >> 24 | src_r1[0] << 8; - dst1[1] = src_r1[0] >> 24 | src_r1[1] << 8; - dst1[2] = src_r1[1] >> 24 | src_r1[2] << 8; - dst1[3] = src_r1[2] >> 24 | src_r1[3] << 8; - dst2[0] = src_r1[3] >> 24; - break; - - case 2: - dst0[0] = src_l0[0] | src_r0[0] << 16; - dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[0] = src_r0[3] >> 16 | src_r1[0] << 16; - dst1[1] = src_r1[0] >> 16 | src_r1[1] << 16; - dst1[2] = src_r1[1] >> 16 | src_r1[2] << 16; - dst1[3] = src_r1[2] >> 16 | src_r1[3] << 16; - dst2[0] = src_r1[3] >> 16; - break; - - case 3: - dst0[0] = src_l0[0] | src_r0[0] << 24; - dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[0] = src_r0[3] >> 8 | src_r1[0] << 24; - dst1[1] = src_r1[0] >> 8 | src_r1[1] << 24; - dst1[2] = src_r1[1] >> 8 | src_r1[2] << 24; - dst1[3] = src_r1[2] >> 8 | src_r1[3] << 24; - dst2[0] = src_r1[3] >> 8; - break; - - case 4: - dst0[1] = src_r0[0]; - dst0[2] = src_r0[1]; - dst0[3] = src_r0[2]; - dst1[0] = src_r0[3]; - dst1[1] = src_r1[0]; - dst1[2] = src_r1[1]; - dst1[3] = src_r1[2]; - dst2[0] = src_r1[3]; - break; - - case 5: - dst0[1] = src_l0[1] | src_r0[0] << 8; - dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[1] = src_r0[3] >> 24 | src_r1[0] << 8; - dst1[2] = src_r1[0] >> 24 | src_r1[1] << 8; - dst1[3] = src_r1[1] >> 24 | src_r1[2] << 8; - dst2[0] = src_r1[2] >> 24 | src_r1[3] << 8; - dst2[1] = src_r1[3] >> 24; - break; - - case 6: - dst0[1] = src_l0[1] | src_r0[0] << 16; - dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[1] = src_r0[3] >> 16 | src_r1[0] << 16; - dst1[2] = src_r1[0] >> 16 | src_r1[1] << 16; - dst1[3] = src_r1[1] >> 16 | src_r1[2] << 16; - dst2[0] = src_r1[2] >> 16 | src_r1[3] << 16; - dst2[1] = src_r1[3] >> 16; - break; - - case 7: - dst0[1] = src_l0[1] | src_r0[0] << 24; - dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[1] = src_r0[3] >> 8 | src_r1[0] << 24; - dst1[2] = src_r1[0] >> 8 | src_r1[1] << 24; - dst1[3] = src_r1[1] >> 8 | src_r1[2] << 24; - dst2[0] = src_r1[2] >> 8 | src_r1[3] << 24; - dst2[1] = src_r1[3] >> 8; - break; - - case 8: - dst0[2] = src_r0[0]; - dst0[3] = src_r0[1]; - dst1[0] = src_r0[2]; - dst1[1] = src_r0[3]; - dst1[2] = src_r1[0]; - dst1[3] = src_r1[1]; - dst2[0] = src_r1[2]; - dst2[1] = src_r1[3]; - break; - - case 9: - dst0[2] = src_l0[2] | src_r0[0] << 8; - dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[2] = src_r0[3] >> 24 | src_r1[0] << 8; - dst1[3] = src_r1[0] >> 24 | src_r1[1] << 8; - dst2[0] = src_r1[1] >> 24 | src_r1[2] << 8; - dst2[1] = src_r1[2] >> 24 | src_r1[3] << 8; - dst2[2] = src_r1[3] >> 24; - break; - - case 10: - dst0[2] = src_l0[2] | src_r0[0] << 16; - dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[2] = src_r0[3] >> 16 | src_r1[0] << 16; - dst1[3] = src_r1[0] >> 16 | src_r1[1] << 16; - dst2[0] = src_r1[1] >> 16 | src_r1[2] << 16; - dst2[1] = src_r1[2] >> 16 | src_r1[3] << 16; - dst2[2] = src_r1[3] >> 16; - break; - - case 11: - dst0[2] = src_l0[2] | src_r0[0] << 24; - dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[2] = src_r0[3] >> 8 | src_r1[0] << 24; - dst1[3] = src_r1[0] >> 8 | src_r1[1] << 24; - dst2[0] = src_r1[1] >> 8 | src_r1[2] << 24; - dst2[1] = src_r1[2] >> 8 | src_r1[3] << 24; - dst2[2] = src_r1[3] >> 8; - break; - - case 12: - dst0[3] = src_r0[0]; - dst1[0] = src_r0[1]; - dst1[1] = src_r0[2]; - dst1[2] = src_r0[3]; - dst1[3] = src_r1[0]; - dst2[0] = src_r1[1]; - dst2[1] = src_r1[2]; - dst2[2] = src_r1[3]; - break; - - case 13: - dst0[3] = src_l0[3] | src_r0[0] << 8; - dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8; - dst1[3] = src_r0[3] >> 24 | src_r1[0] << 8; - dst2[0] = src_r1[0] >> 24 | src_r1[1] << 8; - dst2[1] = src_r1[1] >> 24 | src_r1[2] << 8; - dst2[2] = src_r1[2] >> 24 | src_r1[3] << 8; - dst2[3] = src_r1[3] >> 24; - break; - - case 14: - dst0[3] = src_l0[3] | src_r0[0] << 16; - dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16; - dst1[3] = src_r0[3] >> 16 | src_r1[0] << 16; - dst2[0] = src_r1[0] >> 16 | src_r1[1] << 16; - dst2[1] = src_r1[1] >> 16 | src_r1[2] << 16; - dst2[2] = src_r1[2] >> 16 | src_r1[3] << 16; - dst2[3] = src_r1[3] >> 16; - break; - - case 15: - dst0[3] = src_l0[3] | src_r0[0] << 24; - dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24; - dst1[3] = src_r0[3] >> 8 | src_r1[0] << 24; - dst2[0] = src_r1[0] >> 8 | src_r1[1] << 24; - dst2[1] = src_r1[1] >> 8 | src_r1[2] << 24; - dst2[2] = src_r1[2] >> 8 | src_r1[3] << 24; - dst2[3] = src_r1[3] >> 8; - break; - - case 16: - dst1[0] = src_r0[0]; - dst1[1] = src_r0[1]; - dst1[2] = src_r0[2]; - dst1[3] = src_r0[3]; - dst2[0] = src_r1[0]; - dst2[1] = src_r1[1]; - dst2[2] = src_r1[2]; - dst2[3] = src_r1[3]; - break; - - case 17: - dst1[0] = src_l1[0] | src_r0[0] << 8; - dst1[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst1[3] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[0] = src_r0[3] >> 24 | src_r1[0] << 8; - dst2[1] = src_r1[0] >> 24 | src_r1[1] << 8; - dst2[2] = src_r1[1] >> 24 | src_r1[2] << 8; - dst2[3] = src_r1[2] >> 24 | src_r1[3] << 8; - break; - - case 18: - dst1[0] = src_l1[0] | src_r0[0] << 16; - dst1[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst1[3] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[0] = src_r0[3] >> 16 | src_r1[0] << 16; - dst2[1] = src_r1[0] >> 16 | src_r1[1] << 16; - dst2[2] = src_r1[1] >> 16 | src_r1[2] << 16; - dst2[3] = src_r1[2] >> 16 | src_r1[3] << 16; - break; - - case 19: - dst1[0] = src_l1[0] | src_r0[0] << 24; - dst1[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst1[3] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[0] = src_r0[3] >> 8 | src_r1[0] << 24; - dst2[1] = src_r1[0] >> 8 | src_r1[1] << 24; - dst2[2] = src_r1[1] >> 8 | src_r1[2] << 24; - dst2[3] = src_r1[2] >> 8 | src_r1[3] << 24; - break; - - case 20: - dst1[1] = src_r1[0]; - dst1[2] = src_r0[1]; - dst1[3] = src_r0[2]; - dst2[0] = src_r0[3]; - dst2[1] = src_r1[0]; - dst2[2] = src_r1[1]; - dst2[3] = src_r1[2]; - break; - - case 21: - dst1[1] = src_l1[1] | src_r0[0] << 8; - dst1[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst1[3] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[0] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[1] = src_r0[3] >> 24 | src_r1[0] << 8; - dst2[2] = src_r1[0] >> 24 | src_r1[1] << 8; - dst2[3] = src_r1[1] >> 24 | src_r1[2] << 8; - break; - - case 22: - dst1[1] = src_l1[1] | src_r0[0] << 16; - dst1[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst1[3] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[0] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[1] = src_r0[3] >> 16 | src_r1[0] << 16; - dst2[2] = src_r1[0] >> 16 | src_r1[1] << 16; - dst2[3] = src_r1[1] >> 16 | src_r1[2] << 16; - break; - - case 23: - dst1[1] = src_l1[1] | src_r0[0] << 24; - dst1[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst1[3] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[0] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[1] = src_r0[3] >> 8 | src_r1[0] << 24; - dst2[2] = src_r1[0] >> 8 | src_r1[1] << 24; - dst2[3] = src_r1[1] >> 8 | src_r1[2] << 24; - break; - - case 24: - dst1[2] = src_r1[0]; - dst1[3] = src_r0[1]; - dst2[0] = src_r0[2]; - dst2[1] = src_r0[3]; - dst2[2] = src_r1[0]; - dst2[3] = src_r1[1]; - break; - - case 25: - dst1[2] = src_l1[2] | src_r0[0] << 8; - dst1[3] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[0] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[1] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[2] = src_r0[3] >> 24 | src_r1[0] << 8; - dst2[3] = src_r1[0] >> 24 | src_r1[1] << 8; - break; - - case 26: - dst1[2] = src_l1[2] | src_r0[0] << 16; - dst1[3] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[0] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[1] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[2] = src_r0[3] >> 16 | src_r1[0] << 16; - dst2[3] = src_r1[0] >> 16 | src_r1[1] << 16; - break; - - case 27: - dst1[2] = src_l1[2] | src_r0[0] << 24; - dst1[3] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[0] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[1] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[2] = src_r0[3] >> 8 | src_r1[0] << 24; - dst2[3] = src_r1[0] >> 8 | src_r1[1] << 24; - break; - - case 28: - dst1[3] = src_r1[0]; - dst2[0] = src_r0[1]; - dst2[1] = src_r0[2]; - dst2[2] = src_r0[3]; - dst2[3] = src_r1[0]; - break; - - case 29: - dst1[3] = src_l1[3] | src_r0[0] << 8; - dst2[0] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[1] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[2] = src_r0[2] >> 24 | src_r0[3] << 8; - dst2[3] = src_r0[3] >> 24 | src_r1[0] << 8; - break; - - case 30: - dst1[3] = src_l1[3] | src_r0[0] << 16; - dst2[0] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[1] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[2] = src_r0[2] >> 16 | src_r0[3] << 16; - dst2[3] = src_r0[3] >> 16 | src_r1[0] << 16; - break; - - case 31: - dst1[3] = src_l1[3] | src_r0[0] << 24; - dst2[0] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[1] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[2] = src_r0[2] >> 8 | src_r0[3] << 24; - dst2[3] = src_r0[3] >> 8 | src_r1[0] << 24; - break; - - case 32: - dst2[0] = src_r0[0]; - dst2[1] = src_r0[1]; - dst2[2] = src_r0[2]; - dst2[3] = src_r0[3]; - break; - - case 33: - dst2[0] = src_l2[0] | src_r0[0] << 8; - dst2[1] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[2] = src_r0[1] >> 24 | src_r0[2] << 8; - dst2[3] = src_r0[2] >> 24 | src_r0[3] << 8; - break; - - case 34: - dst2[0] = src_l2[0] | src_r0[0] << 16; - dst2[1] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[2] = src_r0[1] >> 16 | src_r0[2] << 16; - dst2[3] = src_r0[2] >> 16 | src_r0[3] << 16; - break; - - case 35: - dst2[0] = src_l2[0] | src_r0[0] << 24; - dst2[1] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[2] = src_r0[1] >> 8 | src_r0[2] << 24; - dst2[3] = src_r0[2] >> 8 | src_r0[3] << 24; - break; - - case 36: - dst2[1] = src_r0[0]; - dst2[2] = src_r0[1]; - dst2[3] = src_r0[2]; - break; - - case 37: - dst2[1] = src_l2[1] | src_r0[0] << 8; - dst2[2] = src_r0[0] >> 24 | src_r0[1] << 8; - dst2[3] = src_r0[1] >> 24 | src_r0[2] << 8; - break; - - case 38: - dst2[1] = src_l2[1] | src_r0[0] << 16; - dst2[2] = src_r0[0] >> 16 | src_r0[1] << 16; - dst2[3] = src_r0[1] >> 16 | src_r0[2] << 16; - break; - - case 39: - dst2[1] = src_l2[1] | src_r0[0] << 24; - dst2[2] = src_r0[0] >> 8 | src_r0[1] << 24; - dst2[3] = src_r0[1] >> 8 | src_r0[2] << 24; - break; - - case 40: - dst2[2] = src_r0[0]; - dst2[3] = src_r0[1]; - break; - - case 41: - dst2[2] = src_l2[2] | src_r0[0] << 8; - dst2[3] = src_r0[0] >> 24 | src_r0[1] << 8; - break; - - case 42: - dst2[2] = src_l2[2] | src_r0[0] << 16; - dst2[3] = src_r0[0] >> 16 | src_r0[1] << 16; - break; - - case 43: - dst2[2] = src_l2[2] | src_r0[0] << 24; - dst2[3] = src_r0[0] >> 8 | src_r0[1] << 24; - break; - - case 44: - dst2[3] = src_r0[0]; - break; - - case 45: - dst2[3] = src_l2[3] | src_r0[0] << 8; - break; - - case 46: - dst2[3] = src_l2[3] | src_r0[0] << 16; - break; - - case 47: - dst2[3] = src_l2[3] | src_r0[0] << 24; - break; - } -} - -__device__ static void memcat16_9 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = append0[0]; - w0[1] = append0[1]; - w0[2] = append0[2]; - w0[3] = append0[3]; - w1[0] = append1[0]; - w1[1] = append1[1]; - w1[2] = append1[2]; - w1[3] = append1[3]; - w2[0] = append2[0]; - break; - - case 1: - w0[0] = w0[0] | append0[0] << 8; - w0[1] = append0[0] >> 24 | append0[1] << 8; - w0[2] = append0[1] >> 24 | append0[2] << 8; - w0[3] = append0[2] >> 24 | append0[3] << 8; - w1[0] = append0[3] >> 24 | append1[0] << 8; - w1[1] = append1[0] >> 24 | append1[1] << 8; - w1[2] = append1[1] >> 24 | append1[2] << 8; - w1[3] = append1[2] >> 24 | append1[3] << 8; - w2[0] = append1[3] >> 24 | append2[0] << 8; - w2[1] = append2[0] >> 24; - break; - - case 2: - w0[0] = w0[0] | append0[0] << 16; - w0[1] = append0[0] >> 16 | append0[1] << 16; - w0[2] = append0[1] >> 16 | append0[2] << 16; - w0[3] = append0[2] >> 16 | append0[3] << 16; - w1[0] = append0[3] >> 16 | append1[0] << 16; - w1[1] = append1[0] >> 16 | append1[1] << 16; - w1[2] = append1[1] >> 16 | append1[2] << 16; - w1[3] = append1[2] >> 16 | append1[3] << 16; - w2[0] = append1[3] >> 16 | append2[0] << 16; - w2[1] = append2[0] >> 16; - break; - - case 3: - w0[0] = w0[0] | append0[0] << 24; - w0[1] = append0[0] >> 8 | append0[1] << 24; - w0[2] = append0[1] >> 8 | append0[2] << 24; - w0[3] = append0[2] >> 8 | append0[3] << 24; - w1[0] = append0[3] >> 8 | append1[0] << 24; - w1[1] = append1[0] >> 8 | append1[1] << 24; - w1[2] = append1[1] >> 8 | append1[2] << 24; - w1[3] = append1[2] >> 8 | append1[3] << 24; - w2[0] = append1[3] >> 8 | append2[0] << 24; - w2[1] = append2[0] >> 8; - break; - - case 4: - w0[1] = append0[0]; - w0[2] = append0[1]; - w0[3] = append0[2]; - w1[0] = append0[3]; - w1[1] = append1[0]; - w1[2] = append1[1]; - w1[3] = append1[2]; - w2[0] = append1[3]; - w2[1] = append2[0]; - break; - - case 5: - w0[1] = w0[1] | append0[0] << 8; - w0[2] = append0[0] >> 24 | append0[1] << 8; - w0[3] = append0[1] >> 24 | append0[2] << 8; - w1[0] = append0[2] >> 24 | append0[3] << 8; - w1[1] = append0[3] >> 24 | append1[0] << 8; - w1[2] = append1[0] >> 24 | append1[1] << 8; - w1[3] = append1[1] >> 24 | append1[2] << 8; - w2[0] = append1[2] >> 24 | append1[3] << 8; - w2[1] = append1[3] >> 24 | append2[0] << 8; - w2[2] = append2[0] >> 24; - break; - - case 6: - w0[1] = w0[1] | append0[0] << 16; - w0[2] = append0[0] >> 16 | append0[1] << 16; - w0[3] = append0[1] >> 16 | append0[2] << 16; - w1[0] = append0[2] >> 16 | append0[3] << 16; - w1[1] = append0[3] >> 16 | append1[0] << 16; - w1[2] = append1[0] >> 16 | append1[1] << 16; - w1[3] = append1[1] >> 16 | append1[2] << 16; - w2[0] = append1[2] >> 16 | append1[3] << 16; - w2[1] = append1[3] >> 16 | append2[0] << 16; - w2[2] = append2[0] >> 16; - break; - - case 7: - w0[1] = w0[1] | append0[0] << 24; - w0[2] = append0[0] >> 8 | append0[1] << 24; - w0[3] = append0[1] >> 8 | append0[2] << 24; - w1[0] = append0[2] >> 8 | append0[3] << 24; - w1[1] = append0[3] >> 8 | append1[0] << 24; - w1[2] = append1[0] >> 8 | append1[1] << 24; - w1[3] = append1[1] >> 8 | append1[2] << 24; - w2[0] = append1[2] >> 8 | append1[3] << 24; - w2[1] = append1[3] >> 8 | append2[0] << 24; - w2[2] = append2[0] >> 8; - break; - - case 8: - w0[2] = append0[0]; - w0[3] = append0[1]; - w1[0] = append0[2]; - w1[1] = append0[3]; - w1[2] = append1[0]; - w1[3] = append1[1]; - w2[0] = append1[2]; - w2[1] = append1[3]; - w2[2] = append2[0]; - break; - - case 9: - w0[2] = w0[2] | append0[0] << 8; - w0[3] = append0[0] >> 24 | append0[1] << 8; - w1[0] = append0[1] >> 24 | append0[2] << 8; - w1[1] = append0[2] >> 24 | append0[3] << 8; - w1[2] = append0[3] >> 24 | append1[0] << 8; - w1[3] = append1[0] >> 24 | append1[1] << 8; - w2[0] = append1[1] >> 24 | append1[2] << 8; - w2[1] = append1[2] >> 24 | append1[3] << 8; - w2[2] = append1[3] >> 24 | append2[0] << 8; - w2[3] = append2[0] >> 24; - break; - - case 10: - w0[2] = w0[2] | append0[0] << 16; - w0[3] = append0[0] >> 16 | append0[1] << 16; - w1[0] = append0[1] >> 16 | append0[2] << 16; - w1[1] = append0[2] >> 16 | append0[3] << 16; - w1[2] = append0[3] >> 16 | append1[0] << 16; - w1[3] = append1[0] >> 16 | append1[1] << 16; - w2[0] = append1[1] >> 16 | append1[2] << 16; - w2[1] = append1[2] >> 16 | append1[3] << 16; - w2[2] = append1[3] >> 16 | append2[0] << 16; - w2[3] = append2[0] >> 16; - break; - - case 11: - w0[2] = w0[2] | append0[0] << 24; - w0[3] = append0[0] >> 8 | append0[1] << 24; - w1[0] = append0[1] >> 8 | append0[2] << 24; - w1[1] = append0[2] >> 8 | append0[3] << 24; - w1[2] = append0[3] >> 8 | append1[0] << 24; - w1[3] = append1[0] >> 8 | append1[1] << 24; - w2[0] = append1[1] >> 8 | append1[2] << 24; - w2[1] = append1[2] >> 8 | append1[3] << 24; - w2[2] = append1[3] >> 8 | append2[0] << 24; - w2[3] = append2[0] >> 8; - break; - - case 12: - w0[3] = append0[0]; - w1[0] = append0[1]; - w1[1] = append0[2]; - w1[2] = append0[3]; - w1[3] = append1[0]; - w2[0] = append1[1]; - w2[1] = append1[2]; - w2[2] = append1[3]; - w2[3] = append2[0]; - break; - - case 13: - w0[3] = w0[3] | append0[0] << 8; - w1[0] = append0[0] >> 24 | append0[1] << 8; - w1[1] = append0[1] >> 24 | append0[2] << 8; - w1[2] = append0[2] >> 24 | append0[3] << 8; - w1[3] = append0[3] >> 24 | append1[0] << 8; - w2[0] = append1[0] >> 24 | append1[1] << 8; - w2[1] = append1[1] >> 24 | append1[2] << 8; - w2[2] = append1[2] >> 24 | append1[3] << 8; - w2[3] = append1[3] >> 24 | append2[0] << 8; - w3[0] = append2[0] >> 24; - break; - - case 14: - w0[3] = w0[3] | append0[0] << 16; - w1[0] = append0[0] >> 16 | append0[1] << 16; - w1[1] = append0[1] >> 16 | append0[2] << 16; - w1[2] = append0[2] >> 16 | append0[3] << 16; - w1[3] = append0[3] >> 16 | append1[0] << 16; - w2[0] = append1[0] >> 16 | append1[1] << 16; - w2[1] = append1[1] >> 16 | append1[2] << 16; - w2[2] = append1[2] >> 16 | append1[3] << 16; - w2[3] = append1[3] >> 16 | append2[0] << 16; - w3[0] = append2[0] >> 16; - break; - - case 15: - w0[3] = w0[3] | append0[0] << 24; - w1[0] = append0[0] >> 8 | append0[1] << 24; - w1[1] = append0[1] >> 8 | append0[2] << 24; - w1[2] = append0[2] >> 8 | append0[3] << 24; - w1[3] = append0[3] >> 8 | append1[0] << 24; - w2[0] = append1[0] >> 8 | append1[1] << 24; - w2[1] = append1[1] >> 8 | append1[2] << 24; - w2[2] = append1[2] >> 8 | append1[3] << 24; - w2[3] = append1[3] >> 8 | append2[0] << 24; - w3[0] = append2[0] >> 8; - break; - } -} - -__device__ static void memcat16_9 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32x append0[4], const u32x append1[4], const u32x append2[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = append0[0]; - w0[1] = append0[1]; - w0[2] = append0[2]; - w0[3] = append0[3]; - w1[0] = append1[0]; - w1[1] = append1[1]; - w1[2] = append1[2]; - w1[3] = append1[3]; - w2[0] = append2[0]; - break; - - case 1: - w0[0] = w0[0] | append0[0] << 8; - w0[1] = append0[0] >> 24 | append0[1] << 8; - w0[2] = append0[1] >> 24 | append0[2] << 8; - w0[3] = append0[2] >> 24 | append0[3] << 8; - w1[0] = append0[3] >> 24 | append1[0] << 8; - w1[1] = append1[0] >> 24 | append1[1] << 8; - w1[2] = append1[1] >> 24 | append1[2] << 8; - w1[3] = append1[2] >> 24 | append1[3] << 8; - w2[0] = append1[3] >> 24 | append2[0] << 8; - w2[1] = append2[0] >> 24; - break; - - case 2: - w0[0] = w0[0] | append0[0] << 16; - w0[1] = append0[0] >> 16 | append0[1] << 16; - w0[2] = append0[1] >> 16 | append0[2] << 16; - w0[3] = append0[2] >> 16 | append0[3] << 16; - w1[0] = append0[3] >> 16 | append1[0] << 16; - w1[1] = append1[0] >> 16 | append1[1] << 16; - w1[2] = append1[1] >> 16 | append1[2] << 16; - w1[3] = append1[2] >> 16 | append1[3] << 16; - w2[0] = append1[3] >> 16 | append2[0] << 16; - w2[1] = append2[0] >> 16; - break; - - case 3: - w0[0] = w0[0] | append0[0] << 24; - w0[1] = append0[0] >> 8 | append0[1] << 24; - w0[2] = append0[1] >> 8 | append0[2] << 24; - w0[3] = append0[2] >> 8 | append0[3] << 24; - w1[0] = append0[3] >> 8 | append1[0] << 24; - w1[1] = append1[0] >> 8 | append1[1] << 24; - w1[2] = append1[1] >> 8 | append1[2] << 24; - w1[3] = append1[2] >> 8 | append1[3] << 24; - w2[0] = append1[3] >> 8 | append2[0] << 24; - w2[1] = append2[0] >> 8; - break; - - case 4: - w0[1] = append0[0]; - w0[2] = append0[1]; - w0[3] = append0[2]; - w1[0] = append0[3]; - w1[1] = append1[0]; - w1[2] = append1[1]; - w1[3] = append1[2]; - w2[0] = append1[3]; - w2[1] = append2[0]; - break; - - case 5: - w0[1] = w0[1] | append0[0] << 8; - w0[2] = append0[0] >> 24 | append0[1] << 8; - w0[3] = append0[1] >> 24 | append0[2] << 8; - w1[0] = append0[2] >> 24 | append0[3] << 8; - w1[1] = append0[3] >> 24 | append1[0] << 8; - w1[2] = append1[0] >> 24 | append1[1] << 8; - w1[3] = append1[1] >> 24 | append1[2] << 8; - w2[0] = append1[2] >> 24 | append1[3] << 8; - w2[1] = append1[3] >> 24 | append2[0] << 8; - w2[2] = append2[0] >> 24; - break; - - case 6: - w0[1] = w0[1] | append0[0] << 16; - w0[2] = append0[0] >> 16 | append0[1] << 16; - w0[3] = append0[1] >> 16 | append0[2] << 16; - w1[0] = append0[2] >> 16 | append0[3] << 16; - w1[1] = append0[3] >> 16 | append1[0] << 16; - w1[2] = append1[0] >> 16 | append1[1] << 16; - w1[3] = append1[1] >> 16 | append1[2] << 16; - w2[0] = append1[2] >> 16 | append1[3] << 16; - w2[1] = append1[3] >> 16 | append2[0] << 16; - w2[2] = append2[0] >> 16; - break; - - case 7: - w0[1] = w0[1] | append0[0] << 24; - w0[2] = append0[0] >> 8 | append0[1] << 24; - w0[3] = append0[1] >> 8 | append0[2] << 24; - w1[0] = append0[2] >> 8 | append0[3] << 24; - w1[1] = append0[3] >> 8 | append1[0] << 24; - w1[2] = append1[0] >> 8 | append1[1] << 24; - w1[3] = append1[1] >> 8 | append1[2] << 24; - w2[0] = append1[2] >> 8 | append1[3] << 24; - w2[1] = append1[3] >> 8 | append2[0] << 24; - w2[2] = append2[0] >> 8; - break; - - case 8: - w0[2] = append0[0]; - w0[3] = append0[1]; - w1[0] = append0[2]; - w1[1] = append0[3]; - w1[2] = append1[0]; - w1[3] = append1[1]; - w2[0] = append1[2]; - w2[1] = append1[3]; - w2[2] = append2[0]; - break; - - case 9: - w0[2] = w0[2] | append0[0] << 8; - w0[3] = append0[0] >> 24 | append0[1] << 8; - w1[0] = append0[1] >> 24 | append0[2] << 8; - w1[1] = append0[2] >> 24 | append0[3] << 8; - w1[2] = append0[3] >> 24 | append1[0] << 8; - w1[3] = append1[0] >> 24 | append1[1] << 8; - w2[0] = append1[1] >> 24 | append1[2] << 8; - w2[1] = append1[2] >> 24 | append1[3] << 8; - w2[2] = append1[3] >> 24 | append2[0] << 8; - w2[3] = append2[0] >> 24; - break; - - case 10: - w0[2] = w0[2] | append0[0] << 16; - w0[3] = append0[0] >> 16 | append0[1] << 16; - w1[0] = append0[1] >> 16 | append0[2] << 16; - w1[1] = append0[2] >> 16 | append0[3] << 16; - w1[2] = append0[3] >> 16 | append1[0] << 16; - w1[3] = append1[0] >> 16 | append1[1] << 16; - w2[0] = append1[1] >> 16 | append1[2] << 16; - w2[1] = append1[2] >> 16 | append1[3] << 16; - w2[2] = append1[3] >> 16 | append2[0] << 16; - w2[3] = append2[0] >> 16; - break; - - case 11: - w0[2] = w0[2] | append0[0] << 24; - w0[3] = append0[0] >> 8 | append0[1] << 24; - w1[0] = append0[1] >> 8 | append0[2] << 24; - w1[1] = append0[2] >> 8 | append0[3] << 24; - w1[2] = append0[3] >> 8 | append1[0] << 24; - w1[3] = append1[0] >> 8 | append1[1] << 24; - w2[0] = append1[1] >> 8 | append1[2] << 24; - w2[1] = append1[2] >> 8 | append1[3] << 24; - w2[2] = append1[3] >> 8 | append2[0] << 24; - w2[3] = append2[0] >> 8; - break; - - case 12: - w0[3] = append0[0]; - w1[0] = append0[1]; - w1[1] = append0[2]; - w1[2] = append0[3]; - w1[3] = append1[0]; - w2[0] = append1[1]; - w2[1] = append1[2]; - w2[2] = append1[3]; - w2[3] = append2[0]; - break; - - case 13: - w0[3] = w0[3] | append0[0] << 8; - w1[0] = append0[0] >> 24 | append0[1] << 8; - w1[1] = append0[1] >> 24 | append0[2] << 8; - w1[2] = append0[2] >> 24 | append0[3] << 8; - w1[3] = append0[3] >> 24 | append1[0] << 8; - w2[0] = append1[0] >> 24 | append1[1] << 8; - w2[1] = append1[1] >> 24 | append1[2] << 8; - w2[2] = append1[2] >> 24 | append1[3] << 8; - w2[3] = append1[3] >> 24 | append2[0] << 8; - w3[0] = append2[0] >> 24; - break; - - case 14: - w0[3] = w0[3] | append0[0] << 16; - w1[0] = append0[0] >> 16 | append0[1] << 16; - w1[1] = append0[1] >> 16 | append0[2] << 16; - w1[2] = append0[2] >> 16 | append0[3] << 16; - w1[3] = append0[3] >> 16 | append1[0] << 16; - w2[0] = append1[0] >> 16 | append1[1] << 16; - w2[1] = append1[1] >> 16 | append1[2] << 16; - w2[2] = append1[2] >> 16 | append1[3] << 16; - w2[3] = append1[3] >> 16 | append2[0] << 16; - w3[0] = append2[0] >> 16; - break; - - case 15: - w0[3] = w0[3] | append0[0] << 24; - w1[0] = append0[0] >> 8 | append0[1] << 24; - w1[1] = append0[1] >> 8 | append0[2] << 24; - w1[2] = append0[2] >> 8 | append0[3] << 24; - w1[3] = append0[3] >> 8 | append1[0] << 24; - w2[0] = append1[0] >> 8 | append1[1] << 24; - w2[1] = append1[1] >> 8 | append1[2] << 24; - w2[2] = append1[2] >> 8 | append1[3] << 24; - w2[3] = append1[3] >> 8 | append2[0] << 24; - w3[0] = append2[0] >> 8; - break; - } -} - -__device__ static void memcat32_8 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 append0[4], const u32 append1[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = append0[0]; - w0[1] = append0[1]; - w0[2] = append0[2]; - w0[3] = append0[3]; - w1[0] = append1[0]; - w1[1] = append1[1]; - w1[2] = append1[2]; - w1[3] = append1[3]; - break; - - case 1: - w0[0] = w0[0] | append0[0] << 8; - w0[1] = append0[0] >> 24 | append0[1] << 8; - w0[2] = append0[1] >> 24 | append0[2] << 8; - w0[3] = append0[2] >> 24 | append0[3] << 8; - w1[0] = append0[3] >> 24 | append1[0] << 8; - w1[1] = append1[0] >> 24 | append1[1] << 8; - w1[2] = append1[1] >> 24 | append1[2] << 8; - w1[3] = append1[2] >> 24 | append1[3] << 8; - w2[0] = append1[3] >> 24; - break; - - case 2: - w0[0] = w0[0] | append0[0] << 16; - w0[1] = append0[0] >> 16 | append0[1] << 16; - w0[2] = append0[1] >> 16 | append0[2] << 16; - w0[3] = append0[2] >> 16 | append0[3] << 16; - w1[0] = append0[3] >> 16 | append1[0] << 16; - w1[1] = append1[0] >> 16 | append1[1] << 16; - w1[2] = append1[1] >> 16 | append1[2] << 16; - w1[3] = append1[2] >> 16 | append1[3] << 16; - w2[0] = append1[3] >> 16; - break; - - case 3: - w0[0] = w0[0] | append0[0] << 24; - w0[1] = append0[0] >> 8 | append0[1] << 24; - w0[2] = append0[1] >> 8 | append0[2] << 24; - w0[3] = append0[2] >> 8 | append0[3] << 24; - w1[0] = append0[3] >> 8 | append1[0] << 24; - w1[1] = append1[0] >> 8 | append1[1] << 24; - w1[2] = append1[1] >> 8 | append1[2] << 24; - w1[3] = append1[2] >> 8 | append1[3] << 24; - w2[0] = append1[3] >> 8; - break; - - case 4: - w0[1] = append0[0]; - w0[2] = append0[1]; - w0[3] = append0[2]; - w1[0] = append0[3]; - w1[1] = append1[0]; - w1[2] = append1[1]; - w1[3] = append1[2]; - w2[0] = append1[3]; - break; - - case 5: - w0[1] = w0[1] | append0[0] << 8; - w0[2] = append0[0] >> 24 | append0[1] << 8; - w0[3] = append0[1] >> 24 | append0[2] << 8; - w1[0] = append0[2] >> 24 | append0[3] << 8; - w1[1] = append0[3] >> 24 | append1[0] << 8; - w1[2] = append1[0] >> 24 | append1[1] << 8; - w1[3] = append1[1] >> 24 | append1[2] << 8; - w2[0] = append1[2] >> 24 | append1[3] << 8; - w2[1] = append1[3] >> 24; - break; - - case 6: - w0[1] = w0[1] | append0[0] << 16; - w0[2] = append0[0] >> 16 | append0[1] << 16; - w0[3] = append0[1] >> 16 | append0[2] << 16; - w1[0] = append0[2] >> 16 | append0[3] << 16; - w1[1] = append0[3] >> 16 | append1[0] << 16; - w1[2] = append1[0] >> 16 | append1[1] << 16; - w1[3] = append1[1] >> 16 | append1[2] << 16; - w2[0] = append1[2] >> 16 | append1[3] << 16; - w2[1] = append1[3] >> 16; - break; - - case 7: - w0[1] = w0[1] | append0[0] << 24; - w0[2] = append0[0] >> 8 | append0[1] << 24; - w0[3] = append0[1] >> 8 | append0[2] << 24; - w1[0] = append0[2] >> 8 | append0[3] << 24; - w1[1] = append0[3] >> 8 | append1[0] << 24; - w1[2] = append1[0] >> 8 | append1[1] << 24; - w1[3] = append1[1] >> 8 | append1[2] << 24; - w2[0] = append1[2] >> 8 | append1[3] << 24; - w2[1] = append1[3] >> 8; - break; - - case 8: - w0[2] = append0[0]; - w0[3] = append0[1]; - w1[0] = append0[2]; - w1[1] = append0[3]; - w1[2] = append1[0]; - w1[3] = append1[1]; - w2[0] = append1[2]; - w2[1] = append1[3]; - break; - - case 9: - w0[2] = w0[2] | append0[0] << 8; - w0[3] = append0[0] >> 24 | append0[1] << 8; - w1[0] = append0[1] >> 24 | append0[2] << 8; - w1[1] = append0[2] >> 24 | append0[3] << 8; - w1[2] = append0[3] >> 24 | append1[0] << 8; - w1[3] = append1[0] >> 24 | append1[1] << 8; - w2[0] = append1[1] >> 24 | append1[2] << 8; - w2[1] = append1[2] >> 24 | append1[3] << 8; - w2[2] = append1[3] >> 24; - break; - - case 10: - w0[2] = w0[2] | append0[0] << 16; - w0[3] = append0[0] >> 16 | append0[1] << 16; - w1[0] = append0[1] >> 16 | append0[2] << 16; - w1[1] = append0[2] >> 16 | append0[3] << 16; - w1[2] = append0[3] >> 16 | append1[0] << 16; - w1[3] = append1[0] >> 16 | append1[1] << 16; - w2[0] = append1[1] >> 16 | append1[2] << 16; - w2[1] = append1[2] >> 16 | append1[3] << 16; - w2[2] = append1[3] >> 16; - break; - - case 11: - w0[2] = w0[2] | append0[0] << 24; - w0[3] = append0[0] >> 8 | append0[1] << 24; - w1[0] = append0[1] >> 8 | append0[2] << 24; - w1[1] = append0[2] >> 8 | append0[3] << 24; - w1[2] = append0[3] >> 8 | append1[0] << 24; - w1[3] = append1[0] >> 8 | append1[1] << 24; - w2[0] = append1[1] >> 8 | append1[2] << 24; - w2[1] = append1[2] >> 8 | append1[3] << 24; - w2[2] = append1[3] >> 8; - break; - - case 12: - w0[3] = append0[0]; - w1[0] = append0[1]; - w1[1] = append0[2]; - w1[2] = append0[3]; - w1[3] = append1[0]; - w2[0] = append1[1]; - w2[1] = append1[2]; - w2[2] = append1[3]; - break; - - case 13: - w0[3] = w0[3] | append0[0] << 8; - w1[0] = append0[0] >> 24 | append0[1] << 8; - w1[1] = append0[1] >> 24 | append0[2] << 8; - w1[2] = append0[2] >> 24 | append0[3] << 8; - w1[3] = append0[3] >> 24 | append1[0] << 8; - w2[0] = append1[0] >> 24 | append1[1] << 8; - w2[1] = append1[1] >> 24 | append1[2] << 8; - w2[2] = append1[2] >> 24 | append1[3] << 8; - w2[3] = append1[3] >> 24; - break; - - case 14: - w0[3] = w0[3] | append0[0] << 16; - w1[0] = append0[0] >> 16 | append0[1] << 16; - w1[1] = append0[1] >> 16 | append0[2] << 16; - w1[2] = append0[2] >> 16 | append0[3] << 16; - w1[3] = append0[3] >> 16 | append1[0] << 16; - w2[0] = append1[0] >> 16 | append1[1] << 16; - w2[1] = append1[1] >> 16 | append1[2] << 16; - w2[2] = append1[2] >> 16 | append1[3] << 16; - w2[3] = append1[3] >> 16; - break; - - case 15: - w0[3] = w0[3] | append0[0] << 24; - w1[0] = append0[0] >> 8 | append0[1] << 24; - w1[1] = append0[1] >> 8 | append0[2] << 24; - w1[2] = append0[2] >> 8 | append0[3] << 24; - w1[3] = append0[3] >> 8 | append1[0] << 24; - w2[0] = append1[0] >> 8 | append1[1] << 24; - w2[1] = append1[1] >> 8 | append1[2] << 24; - w2[2] = append1[2] >> 8 | append1[3] << 24; - w2[3] = append1[3] >> 8; - break; - - case 16: - w1[0] = append0[0]; - w1[1] = append0[1]; - w1[2] = append0[2]; - w1[3] = append0[3]; - w2[0] = append1[0]; - w2[1] = append1[1]; - w2[2] = append1[2]; - w2[3] = append1[3]; - break; - - case 17: - w1[0] = w1[0] | append0[0] << 8; - w1[1] = append0[0] >> 24 | append0[1] << 8; - w1[2] = append0[1] >> 24 | append0[2] << 8; - w1[3] = append0[2] >> 24 | append0[3] << 8; - w2[0] = append0[3] >> 24 | append1[0] << 8; - w2[1] = append1[0] >> 24 | append1[1] << 8; - w2[2] = append1[1] >> 24 | append1[2] << 8; - w2[3] = append1[2] >> 24 | append1[3] << 8; - w3[0] = append1[3] >> 24; - break; - - case 18: - w1[0] = w1[0] | append0[0] << 16; - w1[1] = append0[0] >> 16 | append0[1] << 16; - w1[2] = append0[1] >> 16 | append0[2] << 16; - w1[3] = append0[2] >> 16 | append0[3] << 16; - w2[0] = append0[3] >> 16 | append1[0] << 16; - w2[1] = append1[0] >> 16 | append1[1] << 16; - w2[2] = append1[1] >> 16 | append1[2] << 16; - w2[3] = append1[2] >> 16 | append1[3] << 16; - w3[0] = append1[3] >> 16; - break; - - case 19: - w1[0] = w1[0] | append0[0] << 24; - w1[1] = append0[0] >> 8 | append0[1] << 24; - w1[2] = append0[1] >> 8 | append0[2] << 24; - w1[3] = append0[2] >> 8 | append0[3] << 24; - w2[0] = append0[3] >> 8 | append1[0] << 24; - w2[1] = append1[0] >> 8 | append1[1] << 24; - w2[2] = append1[1] >> 8 | append1[2] << 24; - w2[3] = append1[2] >> 8 | append1[3] << 24; - w3[0] = append1[3] >> 8; - break; - - case 20: - w1[1] = append0[0]; - w1[2] = append0[1]; - w1[3] = append0[2]; - w2[0] = append0[3]; - w2[1] = append1[0]; - w2[2] = append1[1]; - w2[3] = append1[2]; - w3[0] = append1[3]; - break; - - case 21: - w1[1] = w1[1] | append0[0] << 8; - w1[2] = append0[0] >> 24 | append0[1] << 8; - w1[3] = append0[1] >> 24 | append0[2] << 8; - w2[0] = append0[2] >> 24 | append0[3] << 8; - w2[1] = append0[3] >> 24 | append1[0] << 8; - w2[2] = append1[0] >> 24 | append1[1] << 8; - w2[3] = append1[1] >> 24 | append1[2] << 8; - w3[0] = append1[2] >> 24 | append1[3] << 8; - w3[1] = append1[3] >> 24; - break; - - case 22: - w1[1] = w1[1] | append0[0] << 16; - w1[2] = append0[0] >> 16 | append0[1] << 16; - w1[3] = append0[1] >> 16 | append0[2] << 16; - w2[0] = append0[2] >> 16 | append0[3] << 16; - w2[1] = append0[3] >> 16 | append1[0] << 16; - w2[2] = append1[0] >> 16 | append1[1] << 16; - w2[3] = append1[1] >> 16 | append1[2] << 16; - w3[0] = append1[2] >> 16 | append1[3] << 16; - w3[1] = append1[3] >> 16; - break; - - case 23: - w1[1] = w1[1] | append0[0] << 24; - w1[2] = append0[0] >> 8 | append0[1] << 24; - w1[3] = append0[1] >> 8 | append0[2] << 24; - w2[0] = append0[2] >> 8 | append0[3] << 24; - w2[1] = append0[3] >> 8 | append1[0] << 24; - w2[2] = append1[0] >> 8 | append1[1] << 24; - w2[3] = append1[1] >> 8 | append1[2] << 24; - w3[0] = append1[2] >> 8 | append1[3] << 24; - w3[1] = append1[3] >> 8; - break; - - case 24: - w1[2] = append0[0]; - w1[3] = append0[1]; - w2[0] = append0[2]; - w2[1] = append0[3]; - w2[2] = append1[0]; - w2[3] = append1[1]; - w3[0] = append1[2]; - w3[1] = append1[3]; - break; - - case 25: - w1[2] = w1[2] | append0[0] << 8; - w1[3] = append0[0] >> 24 | append0[1] << 8; - w2[0] = append0[1] >> 24 | append0[2] << 8; - w2[1] = append0[2] >> 24 | append0[3] << 8; - w2[2] = append0[3] >> 24 | append1[0] << 8; - w2[3] = append1[0] >> 24 | append1[1] << 8; - w3[0] = append1[1] >> 24 | append1[2] << 8; - w3[1] = append1[2] >> 24 | append1[3] << 8; - break; - - case 26: - w1[2] = w1[2] | append0[0] << 16; - w1[3] = append0[0] >> 16 | append0[1] << 16; - w2[0] = append0[1] >> 16 | append0[2] << 16; - w2[1] = append0[2] >> 16 | append0[3] << 16; - w2[2] = append0[3] >> 16 | append1[0] << 16; - w2[3] = append1[0] >> 16 | append1[1] << 16; - w3[0] = append1[1] >> 16 | append1[2] << 16; - w3[1] = append1[2] >> 16 | append1[3] << 16; - break; - - case 27: - w1[2] = w1[2] | append0[0] << 24; - w1[3] = append0[0] >> 8 | append0[1] << 24; - w2[0] = append0[1] >> 8 | append0[2] << 24; - w2[1] = append0[2] >> 8 | append0[3] << 24; - w2[2] = append0[3] >> 8 | append1[0] << 24; - w2[3] = append1[0] >> 8 | append1[1] << 24; - w3[0] = append1[1] >> 8 | append1[2] << 24; - w3[1] = append1[2] >> 8 | append1[3] << 24; - break; - - case 28: - w1[3] = append0[0]; - w2[0] = append0[1]; - w2[1] = append0[2]; - w2[2] = append0[3]; - w2[3] = append1[0]; - w3[0] = append1[1]; - w3[1] = append1[2]; - break; - - case 29: - w1[3] = w1[3] | append0[0] << 8; - w2[0] = append0[0] >> 24 | append0[1] << 8; - w2[1] = append0[1] >> 24 | append0[2] << 8; - w2[2] = append0[2] >> 24 | append0[3] << 8; - w2[3] = append0[3] >> 24 | append1[0] << 8; - w3[0] = append1[0] >> 24 | append1[1] << 8; - w3[1] = append1[1] >> 24 | append1[2] << 8; - break; - - case 30: - w1[3] = w1[3] | append0[0] << 16; - w2[0] = append0[0] >> 16 | append0[1] << 16; - w2[1] = append0[1] >> 16 | append0[2] << 16; - w2[2] = append0[2] >> 16 | append0[3] << 16; - w2[3] = append0[3] >> 16 | append1[0] << 16; - w3[0] = append1[0] >> 16 | append1[1] << 16; - w3[1] = append1[1] >> 16 | append1[2] << 16; - break; - - case 31: - w1[3] = w1[3] | append0[0] << 24; - w2[0] = append0[0] >> 8 | append0[1] << 24; - w2[1] = append0[1] >> 8 | append0[2] << 24; - w2[2] = append0[2] >> 8 | append0[3] << 24; - w2[3] = append0[3] >> 8 | append1[0] << 24; - w3[0] = append1[0] >> 8 | append1[1] << 24; - w3[1] = append1[1] >> 8 | append1[2] << 24; - break; - - case 32: - w2[0] = append0[0]; - w2[1] = append0[1]; - w2[2] = append0[2]; - w2[3] = append0[3]; - w3[0] = append1[0]; - w3[1] = append1[1]; - break; - } -} - -__device__ static void memcat32_9 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 append0[4], const u32 append1[4], const u32 append2[4], const u32 offset) -{ - switch (offset) - { - case 0: - w0[0] = append0[0]; - w0[1] = append0[1]; - w0[2] = append0[2]; - w0[3] = append0[3]; - w1[0] = append1[0]; - w1[1] = append1[1]; - w1[2] = append1[2]; - w1[3] = append1[3]; - w2[0] = append2[0]; - break; - - case 1: - w0[0] = w0[0] | append0[0] << 8; - w0[1] = append0[0] >> 24 | append0[1] << 8; - w0[2] = append0[1] >> 24 | append0[2] << 8; - w0[3] = append0[2] >> 24 | append0[3] << 8; - w1[0] = append0[3] >> 24 | append1[0] << 8; - w1[1] = append1[0] >> 24 | append1[1] << 8; - w1[2] = append1[1] >> 24 | append1[2] << 8; - w1[3] = append1[2] >> 24 | append1[3] << 8; - w2[0] = append1[3] >> 24 | append2[0] << 8; - w2[1] = append2[0] >> 24; - break; - - case 2: - w0[0] = w0[0] | append0[0] << 16; - w0[1] = append0[0] >> 16 | append0[1] << 16; - w0[2] = append0[1] >> 16 | append0[2] << 16; - w0[3] = append0[2] >> 16 | append0[3] << 16; - w1[0] = append0[3] >> 16 | append1[0] << 16; - w1[1] = append1[0] >> 16 | append1[1] << 16; - w1[2] = append1[1] >> 16 | append1[2] << 16; - w1[3] = append1[2] >> 16 | append1[3] << 16; - w2[0] = append1[3] >> 16 | append2[0] << 16; - w2[1] = append2[0] >> 16; - break; - - case 3: - w0[0] = w0[0] | append0[0] << 24; - w0[1] = append0[0] >> 8 | append0[1] << 24; - w0[2] = append0[1] >> 8 | append0[2] << 24; - w0[3] = append0[2] >> 8 | append0[3] << 24; - w1[0] = append0[3] >> 8 | append1[0] << 24; - w1[1] = append1[0] >> 8 | append1[1] << 24; - w1[2] = append1[1] >> 8 | append1[2] << 24; - w1[3] = append1[2] >> 8 | append1[3] << 24; - w2[0] = append1[3] >> 8 | append2[0] << 24; - w2[1] = append2[0] >> 8; - break; - - case 4: - w0[1] = append0[0]; - w0[2] = append0[1]; - w0[3] = append0[2]; - w1[0] = append0[3]; - w1[1] = append1[0]; - w1[2] = append1[1]; - w1[3] = append1[2]; - w2[0] = append1[3]; - w2[1] = append2[0]; - break; - - case 5: - w0[1] = w0[1] | append0[0] << 8; - w0[2] = append0[0] >> 24 | append0[1] << 8; - w0[3] = append0[1] >> 24 | append0[2] << 8; - w1[0] = append0[2] >> 24 | append0[3] << 8; - w1[1] = append0[3] >> 24 | append1[0] << 8; - w1[2] = append1[0] >> 24 | append1[1] << 8; - w1[3] = append1[1] >> 24 | append1[2] << 8; - w2[0] = append1[2] >> 24 | append1[3] << 8; - w2[1] = append1[3] >> 24 | append2[0] << 8; - w2[2] = append2[0] >> 24; - break; - - case 6: - w0[1] = w0[1] | append0[0] << 16; - w0[2] = append0[0] >> 16 | append0[1] << 16; - w0[3] = append0[1] >> 16 | append0[2] << 16; - w1[0] = append0[2] >> 16 | append0[3] << 16; - w1[1] = append0[3] >> 16 | append1[0] << 16; - w1[2] = append1[0] >> 16 | append1[1] << 16; - w1[3] = append1[1] >> 16 | append1[2] << 16; - w2[0] = append1[2] >> 16 | append1[3] << 16; - w2[1] = append1[3] >> 16 | append2[0] << 16; - w2[2] = append2[0] >> 16; - break; - - case 7: - w0[1] = w0[1] | append0[0] << 24; - w0[2] = append0[0] >> 8 | append0[1] << 24; - w0[3] = append0[1] >> 8 | append0[2] << 24; - w1[0] = append0[2] >> 8 | append0[3] << 24; - w1[1] = append0[3] >> 8 | append1[0] << 24; - w1[2] = append1[0] >> 8 | append1[1] << 24; - w1[3] = append1[1] >> 8 | append1[2] << 24; - w2[0] = append1[2] >> 8 | append1[3] << 24; - w2[1] = append1[3] >> 8 | append2[0] << 24; - w2[2] = append2[0] >> 8; - break; - - case 8: - w0[2] = append0[0]; - w0[3] = append0[1]; - w1[0] = append0[2]; - w1[1] = append0[3]; - w1[2] = append1[0]; - w1[3] = append1[1]; - w2[0] = append1[2]; - w2[1] = append1[3]; - w2[2] = append2[0]; - break; - - case 9: - w0[2] = w0[2] | append0[0] << 8; - w0[3] = append0[0] >> 24 | append0[1] << 8; - w1[0] = append0[1] >> 24 | append0[2] << 8; - w1[1] = append0[2] >> 24 | append0[3] << 8; - w1[2] = append0[3] >> 24 | append1[0] << 8; - w1[3] = append1[0] >> 24 | append1[1] << 8; - w2[0] = append1[1] >> 24 | append1[2] << 8; - w2[1] = append1[2] >> 24 | append1[3] << 8; - w2[2] = append1[3] >> 24 | append2[0] << 8; - w2[3] = append2[0] >> 24; - break; - - case 10: - w0[2] = w0[2] | append0[0] << 16; - w0[3] = append0[0] >> 16 | append0[1] << 16; - w1[0] = append0[1] >> 16 | append0[2] << 16; - w1[1] = append0[2] >> 16 | append0[3] << 16; - w1[2] = append0[3] >> 16 | append1[0] << 16; - w1[3] = append1[0] >> 16 | append1[1] << 16; - w2[0] = append1[1] >> 16 | append1[2] << 16; - w2[1] = append1[2] >> 16 | append1[3] << 16; - w2[2] = append1[3] >> 16 | append2[0] << 16; - w2[3] = append2[0] >> 16; - break; - - case 11: - w0[2] = w0[2] | append0[0] << 24; - w0[3] = append0[0] >> 8 | append0[1] << 24; - w1[0] = append0[1] >> 8 | append0[2] << 24; - w1[1] = append0[2] >> 8 | append0[3] << 24; - w1[2] = append0[3] >> 8 | append1[0] << 24; - w1[3] = append1[0] >> 8 | append1[1] << 24; - w2[0] = append1[1] >> 8 | append1[2] << 24; - w2[1] = append1[2] >> 8 | append1[3] << 24; - w2[2] = append1[3] >> 8 | append2[0] << 24; - w2[3] = append2[0] >> 8; - break; - - case 12: - w0[3] = append0[0]; - w1[0] = append0[1]; - w1[1] = append0[2]; - w1[2] = append0[3]; - w1[3] = append1[0]; - w2[0] = append1[1]; - w2[1] = append1[2]; - w2[2] = append1[3]; - w2[3] = append2[0]; - break; - - case 13: - w0[3] = w0[3] | append0[0] << 8; - w1[0] = append0[0] >> 24 | append0[1] << 8; - w1[1] = append0[1] >> 24 | append0[2] << 8; - w1[2] = append0[2] >> 24 | append0[3] << 8; - w1[3] = append0[3] >> 24 | append1[0] << 8; - w2[0] = append1[0] >> 24 | append1[1] << 8; - w2[1] = append1[1] >> 24 | append1[2] << 8; - w2[2] = append1[2] >> 24 | append1[3] << 8; - w2[3] = append1[3] >> 24 | append2[0] << 8; - w3[0] = append2[0] >> 24; - break; - - case 14: - w0[3] = w0[3] | append0[0] << 16; - w1[0] = append0[0] >> 16 | append0[1] << 16; - w1[1] = append0[1] >> 16 | append0[2] << 16; - w1[2] = append0[2] >> 16 | append0[3] << 16; - w1[3] = append0[3] >> 16 | append1[0] << 16; - w2[0] = append1[0] >> 16 | append1[1] << 16; - w2[1] = append1[1] >> 16 | append1[2] << 16; - w2[2] = append1[2] >> 16 | append1[3] << 16; - w2[3] = append1[3] >> 16 | append2[0] << 16; - w3[0] = append2[0] >> 16; - break; - - case 15: - w0[3] = w0[3] | append0[0] << 24; - w1[0] = append0[0] >> 8 | append0[1] << 24; - w1[1] = append0[1] >> 8 | append0[2] << 24; - w1[2] = append0[2] >> 8 | append0[3] << 24; - w1[3] = append0[3] >> 8 | append1[0] << 24; - w2[0] = append1[0] >> 8 | append1[1] << 24; - w2[1] = append1[1] >> 8 | append1[2] << 24; - w2[2] = append1[2] >> 8 | append1[3] << 24; - w2[3] = append1[3] >> 8 | append2[0] << 24; - w3[0] = append2[0] >> 8; - break; - - case 16: - w1[0] = append0[0]; - w1[1] = append0[1]; - w1[2] = append0[2]; - w1[3] = append0[3]; - w2[0] = append1[0]; - w2[1] = append1[1]; - w2[2] = append1[2]; - w2[3] = append1[3]; - w3[0] = append2[0]; - break; - - case 17: - w1[0] = w1[0] | append0[0] << 8; - w1[1] = append0[0] >> 24 | append0[1] << 8; - w1[2] = append0[1] >> 24 | append0[2] << 8; - w1[3] = append0[2] >> 24 | append0[3] << 8; - w2[0] = append0[3] >> 24 | append1[0] << 8; - w2[1] = append1[0] >> 24 | append1[1] << 8; - w2[2] = append1[1] >> 24 | append1[2] << 8; - w2[3] = append1[2] >> 24 | append1[3] << 8; - w3[0] = append1[3] >> 24 | append2[0] << 8; - w3[1] = append2[0] >> 24; - break; - - case 18: - w1[0] = w1[0] | append0[0] << 16; - w1[1] = append0[0] >> 16 | append0[1] << 16; - w1[2] = append0[1] >> 16 | append0[2] << 16; - w1[3] = append0[2] >> 16 | append0[3] << 16; - w2[0] = append0[3] >> 16 | append1[0] << 16; - w2[1] = append1[0] >> 16 | append1[1] << 16; - w2[2] = append1[1] >> 16 | append1[2] << 16; - w2[3] = append1[2] >> 16 | append1[3] << 16; - w3[0] = append1[3] >> 16 | append2[0] << 16; - w3[1] = append2[0] >> 16; - break; - - case 19: - w1[0] = w1[0] | append0[0] << 24; - w1[1] = append0[0] >> 8 | append0[1] << 24; - w1[2] = append0[1] >> 8 | append0[2] << 24; - w1[3] = append0[2] >> 8 | append0[3] << 24; - w2[0] = append0[3] >> 8 | append1[0] << 24; - w2[1] = append1[0] >> 8 | append1[1] << 24; - w2[2] = append1[1] >> 8 | append1[2] << 24; - w2[3] = append1[2] >> 8 | append1[3] << 24; - w3[0] = append1[3] >> 8 | append2[0] << 24; - w3[1] = append2[0] >> 8; - break; - - case 20: - w1[1] = append0[0]; - w1[2] = append0[1]; - w1[3] = append0[2]; - w2[0] = append0[3]; - w2[1] = append1[0]; - w2[2] = append1[1]; - w2[3] = append1[2]; - w3[0] = append1[3]; - w3[1] = append2[0]; - break; - - case 21: - w1[1] = w1[1] | append0[0] << 8; - w1[2] = append0[0] >> 24 | append0[1] << 8; - w1[3] = append0[1] >> 24 | append0[2] << 8; - w2[0] = append0[2] >> 24 | append0[3] << 8; - w2[1] = append0[3] >> 24 | append1[0] << 8; - w2[2] = append1[0] >> 24 | append1[1] << 8; - w2[3] = append1[1] >> 24 | append1[2] << 8; - w3[0] = append1[2] >> 24 | append1[3] << 8; - w3[1] = append1[3] >> 24 | append2[0] << 8; - break; - - case 22: - w1[1] = w1[1] | append0[0] << 16; - w1[2] = append0[0] >> 16 | append0[1] << 16; - w1[3] = append0[1] >> 16 | append0[2] << 16; - w2[0] = append0[2] >> 16 | append0[3] << 16; - w2[1] = append0[3] >> 16 | append1[0] << 16; - w2[2] = append1[0] >> 16 | append1[1] << 16; - w2[3] = append1[1] >> 16 | append1[2] << 16; - w3[0] = append1[2] >> 16 | append1[3] << 16; - w3[1] = append1[3] >> 16 | append2[0] << 16; - break; - - case 23: - w1[1] = w1[1] | append0[0] << 24; - w1[2] = append0[0] >> 8 | append0[1] << 24; - w1[3] = append0[1] >> 8 | append0[2] << 24; - w2[0] = append0[2] >> 8 | append0[3] << 24; - w2[1] = append0[3] >> 8 | append1[0] << 24; - w2[2] = append1[0] >> 8 | append1[1] << 24; - w2[3] = append1[1] >> 8 | append1[2] << 24; - w3[0] = append1[2] >> 8 | append1[3] << 24; - w3[1] = append1[3] >> 8 | append2[0] << 24; - break; - - case 24: - w1[2] = append0[0]; - w1[3] = append0[1]; - w2[0] = append0[2]; - w2[1] = append0[3]; - w2[2] = append1[0]; - w2[3] = append1[1]; - w3[0] = append1[2]; - w3[1] = append1[3]; - break; - - case 25: - w1[2] = w1[2] | append0[0] << 8; - w1[3] = append0[0] >> 24 | append0[1] << 8; - w2[0] = append0[1] >> 24 | append0[2] << 8; - w2[1] = append0[2] >> 24 | append0[3] << 8; - w2[2] = append0[3] >> 24 | append1[0] << 8; - w2[3] = append1[0] >> 24 | append1[1] << 8; - w3[0] = append1[1] >> 24 | append1[2] << 8; - w3[1] = append1[2] >> 24 | append1[3] << 8; - break; - - case 26: - w1[2] = w1[2] | append0[0] << 16; - w1[3] = append0[0] >> 16 | append0[1] << 16; - w2[0] = append0[1] >> 16 | append0[2] << 16; - w2[1] = append0[2] >> 16 | append0[3] << 16; - w2[2] = append0[3] >> 16 | append1[0] << 16; - w2[3] = append1[0] >> 16 | append1[1] << 16; - w3[0] = append1[1] >> 16 | append1[2] << 16; - w3[1] = append1[2] >> 16 | append1[3] << 16; - break; - - case 27: - w1[2] = w1[2] | append0[0] << 24; - w1[3] = append0[0] >> 8 | append0[1] << 24; - w2[0] = append0[1] >> 8 | append0[2] << 24; - w2[1] = append0[2] >> 8 | append0[3] << 24; - w2[2] = append0[3] >> 8 | append1[0] << 24; - w2[3] = append1[0] >> 8 | append1[1] << 24; - w3[0] = append1[1] >> 8 | append1[2] << 24; - w3[1] = append1[2] >> 8 | append1[3] << 24; - break; - - case 28: - w1[3] = append0[0]; - w2[0] = append0[1]; - w2[1] = append0[2]; - w2[2] = append0[3]; - w2[3] = append1[0]; - w3[0] = append1[1]; - w3[1] = append1[2]; - break; - - case 29: - w1[3] = w1[3] | append0[0] << 8; - w2[0] = append0[0] >> 24 | append0[1] << 8; - w2[1] = append0[1] >> 24 | append0[2] << 8; - w2[2] = append0[2] >> 24 | append0[3] << 8; - w2[3] = append0[3] >> 24 | append1[0] << 8; - w3[0] = append1[0] >> 24 | append1[1] << 8; - w3[1] = append1[1] >> 24 | append1[2] << 8; - break; - - case 30: - w1[3] = w1[3] | append0[0] << 16; - w2[0] = append0[0] >> 16 | append0[1] << 16; - w2[1] = append0[1] >> 16 | append0[2] << 16; - w2[2] = append0[2] >> 16 | append0[3] << 16; - w2[3] = append0[3] >> 16 | append1[0] << 16; - w3[0] = append1[0] >> 16 | append1[1] << 16; - w3[1] = append1[1] >> 16 | append1[2] << 16; - break; - - case 31: - w1[3] = w1[3] | append0[0] << 24; - w2[0] = append0[0] >> 8 | append0[1] << 24; - w2[1] = append0[1] >> 8 | append0[2] << 24; - w2[2] = append0[2] >> 8 | append0[3] << 24; - w2[3] = append0[3] >> 8 | append1[0] << 24; - w3[0] = append1[0] >> 8 | append1[1] << 24; - w3[1] = append1[1] >> 8 | append1[2] << 24; - break; - - case 32: - w2[0] = append0[0]; - w2[1] = append0[1]; - w2[2] = append0[2]; - w2[3] = append0[3]; - w3[0] = append1[0]; - w3[1] = append1[1]; - break; - } -} - -__device__ static void switch_buffer_by_offset (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 offset) -{ - #if __CUDA_ARCH__ >= 200 - - const int offset_minus_4 = 4 - (offset % 4); - - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - - switch (offset / 4) - { - case 0: - w3[1] = __byte_perm (w3[0], w3[1], selector); - w3[0] = __byte_perm (w2[3], w3[0], selector); - w2[3] = __byte_perm (w2[2], w2[3], selector); - w2[2] = __byte_perm (w2[1], w2[2], selector); - w2[1] = __byte_perm (w2[0], w2[1], selector); - w2[0] = __byte_perm (w1[3], w2[0], selector); - w1[3] = __byte_perm (w1[2], w1[3], selector); - w1[2] = __byte_perm (w1[1], w1[2], selector); - w1[1] = __byte_perm (w1[0], w1[1], selector); - w1[0] = __byte_perm (w0[3], w1[0], selector); - w0[3] = __byte_perm (w0[2], w0[3], selector); - w0[2] = __byte_perm (w0[1], w0[2], selector); - w0[1] = __byte_perm (w0[0], w0[1], selector); - w0[0] = __byte_perm ( 0, w0[0], selector); - - break; - - case 1: - w3[1] = __byte_perm (w2[3], w3[0], selector); - w3[0] = __byte_perm (w2[2], w2[3], selector); - w2[3] = __byte_perm (w2[1], w2[2], selector); - w2[2] = __byte_perm (w2[0], w2[1], selector); - w2[1] = __byte_perm (w1[3], w2[0], selector); - w2[0] = __byte_perm (w1[2], w1[3], selector); - w1[3] = __byte_perm (w1[1], w1[2], selector); - w1[2] = __byte_perm (w1[0], w1[1], selector); - w1[1] = __byte_perm (w0[3], w1[0], selector); - w1[0] = __byte_perm (w0[2], w0[3], selector); - w0[3] = __byte_perm (w0[1], w0[2], selector); - w0[2] = __byte_perm (w0[0], w0[1], selector); - w0[1] = __byte_perm ( 0, w0[0], selector); - w0[0] = 0; - - break; - - case 2: - w3[1] = __byte_perm (w2[2], w2[3], selector); - w3[0] = __byte_perm (w2[1], w2[2], selector); - w2[3] = __byte_perm (w2[0], w2[1], selector); - w2[2] = __byte_perm (w1[3], w2[0], selector); - w2[1] = __byte_perm (w1[2], w1[3], selector); - w2[0] = __byte_perm (w1[1], w1[2], selector); - w1[3] = __byte_perm (w1[0], w1[1], selector); - w1[2] = __byte_perm (w0[3], w1[0], selector); - w1[1] = __byte_perm (w0[2], w0[3], selector); - w1[0] = __byte_perm (w0[1], w0[2], selector); - w0[3] = __byte_perm (w0[0], w0[1], selector); - w0[2] = __byte_perm ( 0, w0[0], selector); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - w3[1] = __byte_perm (w2[1], w2[2], selector); - w3[0] = __byte_perm (w2[0], w2[1], selector); - w2[3] = __byte_perm (w1[3], w2[0], selector); - w2[2] = __byte_perm (w1[2], w1[3], selector); - w2[1] = __byte_perm (w1[1], w1[2], selector); - w2[0] = __byte_perm (w1[0], w1[1], selector); - w1[3] = __byte_perm (w0[3], w1[0], selector); - w1[2] = __byte_perm (w0[2], w0[3], selector); - w1[1] = __byte_perm (w0[1], w0[2], selector); - w1[0] = __byte_perm (w0[0], w0[1], selector); - w0[3] = __byte_perm ( 0, w0[0], selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - w3[1] = __byte_perm (w2[0], w2[1], selector); - w3[0] = __byte_perm (w1[3], w2[0], selector); - w2[3] = __byte_perm (w1[2], w1[3], selector); - w2[2] = __byte_perm (w1[1], w1[2], selector); - w2[1] = __byte_perm (w1[0], w1[1], selector); - w2[0] = __byte_perm (w0[3], w1[0], selector); - w1[3] = __byte_perm (w0[2], w0[3], selector); - w1[2] = __byte_perm (w0[1], w0[2], selector); - w1[1] = __byte_perm (w0[0], w0[1], selector); - w1[0] = __byte_perm ( 0, w0[0], selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - w3[1] = __byte_perm (w1[3], w2[0], selector); - w3[0] = __byte_perm (w1[2], w1[3], selector); - w2[3] = __byte_perm (w1[1], w1[2], selector); - w2[2] = __byte_perm (w1[0], w1[1], selector); - w2[1] = __byte_perm (w0[3], w1[0], selector); - w2[0] = __byte_perm (w0[2], w0[3], selector); - w1[3] = __byte_perm (w0[1], w0[2], selector); - w1[2] = __byte_perm (w0[0], w0[1], selector); - w1[1] = __byte_perm ( 0, w0[0], selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - w3[1] = __byte_perm (w1[2], w1[3], selector); - w3[0] = __byte_perm (w1[1], w1[2], selector); - w2[3] = __byte_perm (w1[0], w1[1], selector); - w2[2] = __byte_perm (w0[3], w1[0], selector); - w2[1] = __byte_perm (w0[2], w0[3], selector); - w2[0] = __byte_perm (w0[1], w0[2], selector); - w1[3] = __byte_perm (w0[0], w0[1], selector); - w1[2] = __byte_perm ( 0, w0[0], selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - w3[1] = __byte_perm (w1[1], w1[2], selector); - w3[0] = __byte_perm (w1[0], w1[1], selector); - w2[3] = __byte_perm (w0[3], w1[0], selector); - w2[2] = __byte_perm (w0[2], w0[3], selector); - w2[1] = __byte_perm (w0[1], w0[2], selector); - w2[0] = __byte_perm (w0[0], w0[1], selector); - w1[3] = __byte_perm ( 0, w0[0], selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - w3[1] = __byte_perm (w1[0], w1[1], selector); - w3[0] = __byte_perm (w0[3], w1[0], selector); - w2[3] = __byte_perm (w0[2], w0[3], selector); - w2[2] = __byte_perm (w0[1], w0[2], selector); - w2[1] = __byte_perm (w0[0], w0[1], selector); - w2[0] = __byte_perm ( 0, w0[0], selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - w3[1] = __byte_perm (w0[3], w1[0], selector); - w3[0] = __byte_perm (w0[2], w0[3], selector); - w2[3] = __byte_perm (w0[1], w0[2], selector); - w2[2] = __byte_perm (w0[0], w0[1], selector); - w2[1] = __byte_perm ( 0, w0[0], selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - w3[1] = __byte_perm (w0[2], w0[3], selector); - w3[0] = __byte_perm (w0[1], w0[2], selector); - w2[3] = __byte_perm (w0[0], w0[1], selector); - w2[2] = __byte_perm ( 0, w0[0], selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - w3[1] = __byte_perm (w0[1], w0[2], selector); - w3[0] = __byte_perm (w0[0], w0[1], selector); - w2[3] = __byte_perm ( 0, w0[0], selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - w3[1] = __byte_perm (w0[0], w0[1], selector); - w3[0] = __byte_perm ( 0, w0[0], selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - w3[1] = __byte_perm ( 0, w0[0], selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - - #else - - u32x tmp0[4]; - u32x tmp1[4]; - u32x tmp2[1]; - - switch (offset % 4) - { - case 0: - tmp0[0] = w0[0]; - tmp0[1] = w0[1]; - tmp0[2] = w0[2]; - tmp0[3] = w0[3]; - tmp1[0] = w1[0]; - tmp1[1] = w1[1]; - tmp1[2] = w1[2]; - tmp1[3] = w1[3]; - tmp2[0] = 0; - break; - - case 1: - tmp0[0] = w0[0] << 8; - tmp0[1] = w0[0] >> 24 | w0[1] << 8; - tmp0[2] = w0[1] >> 24 | w0[2] << 8; - tmp0[3] = w0[2] >> 24 | w0[3] << 8; - tmp1[0] = w0[3] >> 24 | w1[0] << 8; - tmp1[1] = w1[0] >> 24 | w1[1] << 8; - tmp1[2] = w1[1] >> 24 | w1[2] << 8; - tmp1[3] = w1[2] >> 24 | w1[3] << 8; - tmp2[0] = w1[3] >> 24; - break; - - case 2: - tmp0[0] = w0[0] << 16; - tmp0[1] = w0[0] >> 16 | w0[1] << 16; - tmp0[2] = w0[1] >> 16 | w0[2] << 16; - tmp0[3] = w0[2] >> 16 | w0[3] << 16; - tmp1[0] = w0[3] >> 16 | w1[0] << 16; - tmp1[1] = w1[0] >> 16 | w1[1] << 16; - tmp1[2] = w1[1] >> 16 | w1[2] << 16; - tmp1[3] = w1[2] >> 16 | w1[3] << 16; - tmp2[0] = w1[3] >> 16; - break; - - case 3: - tmp0[0] = w0[0] << 24; - tmp0[1] = w0[0] >> 8 | w0[1] << 24; - tmp0[2] = w0[1] >> 8 | w0[2] << 24; - tmp0[3] = w0[2] >> 8 | w0[3] << 24; - tmp1[0] = w0[3] >> 8 | w1[0] << 24; - tmp1[1] = w1[0] >> 8 | w1[1] << 24; - tmp1[2] = w1[1] >> 8 | w1[2] << 24; - tmp1[3] = w1[2] >> 8 | w1[3] << 24; - tmp2[0] = w1[3] >> 8; - break; - } - - switch (offset / 4) - { - case 0: - w0[0] = tmp0[0]; - w0[1] = tmp0[1]; - w0[2] = tmp0[2]; - w0[3] = tmp0[3]; - w1[0] = tmp1[0]; - w1[1] = tmp1[1]; - w1[2] = tmp1[2]; - w1[3] = tmp1[3]; - w2[0] = tmp2[0]; - break; - - case 1: - w0[0] = 0; - w0[1] = tmp0[0]; - w0[2] = tmp0[1]; - w0[3] = tmp0[2]; - w1[0] = tmp0[3]; - w1[1] = tmp1[0]; - w1[2] = tmp1[1]; - w1[3] = tmp1[2]; - w2[0] = tmp1[3]; - w2[1] = tmp2[0]; - break; - - case 2: - w0[0] = 0; - w0[1] = 0; - w0[2] = tmp0[0]; - w0[3] = tmp0[1]; - w1[0] = tmp0[2]; - w1[1] = tmp0[3]; - w1[2] = tmp1[0]; - w1[3] = tmp1[1]; - w2[0] = tmp1[2]; - w2[1] = tmp1[3]; - w2[2] = tmp2[0]; - break; - - case 3: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = tmp0[0]; - w1[0] = tmp0[1]; - w1[1] = tmp0[2]; - w1[2] = tmp0[3]; - w1[3] = tmp1[0]; - w2[0] = tmp1[1]; - w2[1] = tmp1[2]; - w2[2] = tmp1[3]; - w2[3] = tmp2[0]; - break; - - case 4: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = tmp0[0]; - w1[1] = tmp0[1]; - w1[2] = tmp0[2]; - w1[3] = tmp0[3]; - w2[0] = tmp1[0]; - w2[1] = tmp1[1]; - w2[2] = tmp1[2]; - w2[3] = tmp1[3]; - w3[0] = tmp2[0]; - break; - - case 5: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = tmp0[0]; - w1[2] = tmp0[1]; - w1[3] = tmp0[2]; - w2[0] = tmp0[3]; - w2[1] = tmp1[0]; - w2[2] = tmp1[1]; - w2[3] = tmp1[2]; - w3[0] = tmp1[3]; - w3[1] = tmp2[0]; - break; - - case 6: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = tmp0[0]; - w1[3] = tmp0[1]; - w2[0] = tmp0[2]; - w2[1] = tmp0[3]; - w2[2] = tmp1[0]; - w2[3] = tmp1[1]; - w3[0] = tmp1[2]; - w3[1] = tmp1[3]; - w3[2] = tmp2[0]; - break; - - case 7: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = tmp0[0]; - w2[0] = tmp0[1]; - w2[1] = tmp0[2]; - w2[2] = tmp0[3]; - w2[3] = tmp1[0]; - w3[0] = tmp1[1]; - w3[1] = tmp1[2]; - w3[2] = tmp1[3]; - w3[3] = tmp2[0]; - break; - - case 8: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = tmp0[0]; - w2[1] = tmp0[1]; - w2[2] = tmp0[2]; - w2[3] = tmp0[3]; - w3[0] = tmp1[0]; - w3[1] = tmp1[1]; - w3[2] = tmp1[2]; - w3[3] = tmp1[3]; - break; - - case 9: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = tmp0[0]; - w2[2] = tmp0[1]; - w2[3] = tmp0[2]; - w3[0] = tmp0[3]; - w3[1] = tmp1[0]; - w3[2] = tmp1[1]; - w3[3] = tmp1[2]; - break; - - case 10: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = tmp0[0]; - w2[3] = tmp0[1]; - w3[0] = tmp0[2]; - w3[1] = tmp0[3]; - w3[2] = tmp1[0]; - w3[3] = tmp1[1]; - break; - - case 11: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = tmp0[0]; - w3[0] = tmp0[1]; - w3[1] = tmp0[2]; - w3[2] = tmp0[3]; - w3[3] = tmp1[0]; - break; - - case 12: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = tmp0[0]; - w3[1] = tmp0[1]; - w3[2] = tmp0[2]; - w3[3] = tmp0[3]; - break; - - case 13: - w0[0] = 0; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = tmp0[0]; - w3[2] = tmp0[1]; - w3[3] = tmp0[2]; - break; - - } - - #endif -} - -__device__ static void switch_buffer_by_offset_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 offset) -{ - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - - switch (offset / 4) - { - case 0: - w3[1] = __byte_perm (w3[1], w3[0], selector); - w3[0] = __byte_perm (w3[0], w2[3], selector); - w2[3] = __byte_perm (w2[3], w2[2], selector); - w2[2] = __byte_perm (w2[2], w2[1], selector); - w2[1] = __byte_perm (w2[1], w2[0], selector); - w2[0] = __byte_perm (w2[0], w1[3], selector); - w1[3] = __byte_perm (w1[3], w1[2], selector); - w1[2] = __byte_perm (w1[2], w1[1], selector); - w1[1] = __byte_perm (w1[1], w1[0], selector); - w1[0] = __byte_perm (w1[0], w0[3], selector); - w0[3] = __byte_perm (w0[3], w0[2], selector); - w0[2] = __byte_perm (w0[2], w0[1], selector); - w0[1] = __byte_perm (w0[1], w0[0], selector); - w0[0] = __byte_perm (w0[0], 0, selector); - break; - - case 1: - w3[1] = __byte_perm (w3[0], w2[3], selector); - w3[0] = __byte_perm (w2[3], w2[2], selector); - w2[3] = __byte_perm (w2[2], w2[1], selector); - w2[2] = __byte_perm (w2[1], w2[0], selector); - w2[1] = __byte_perm (w2[0], w1[3], selector); - w2[0] = __byte_perm (w1[3], w1[2], selector); - w1[3] = __byte_perm (w1[2], w1[1], selector); - w1[2] = __byte_perm (w1[1], w1[0], selector); - w1[1] = __byte_perm (w1[0], w0[3], selector); - w1[0] = __byte_perm (w0[3], w0[2], selector); - w0[3] = __byte_perm (w0[2], w0[1], selector); - w0[2] = __byte_perm (w0[1], w0[0], selector); - w0[1] = __byte_perm (w0[0], 0, selector); - w0[0] = 0; - break; - - case 2: - w3[1] = __byte_perm (w2[3], w2[2], selector); - w3[0] = __byte_perm (w2[2], w2[1], selector); - w2[3] = __byte_perm (w2[1], w2[0], selector); - w2[2] = __byte_perm (w2[0], w1[3], selector); - w2[1] = __byte_perm (w1[3], w1[2], selector); - w2[0] = __byte_perm (w1[2], w1[1], selector); - w1[3] = __byte_perm (w1[1], w1[0], selector); - w1[2] = __byte_perm (w1[0], w0[3], selector); - w1[1] = __byte_perm (w0[3], w0[2], selector); - w1[0] = __byte_perm (w0[2], w0[1], selector); - w0[3] = __byte_perm (w0[1], w0[0], selector); - w0[2] = __byte_perm (w0[0], 0, selector); - w0[1] = 0; - w0[0] = 0; - break; - - case 3: - w3[1] = __byte_perm (w2[2], w2[1], selector); - w3[0] = __byte_perm (w2[1], w2[0], selector); - w2[3] = __byte_perm (w2[0], w1[3], selector); - w2[2] = __byte_perm (w1[3], w1[2], selector); - w2[1] = __byte_perm (w1[2], w1[1], selector); - w2[0] = __byte_perm (w1[1], w1[0], selector); - w1[3] = __byte_perm (w1[0], w0[3], selector); - w1[2] = __byte_perm (w0[3], w0[2], selector); - w1[1] = __byte_perm (w0[2], w0[1], selector); - w1[0] = __byte_perm (w0[1], w0[0], selector); - w0[3] = __byte_perm (w0[0], 0, selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 4: - w3[1] = __byte_perm (w2[1], w2[0], selector); - w3[0] = __byte_perm (w2[0], w1[3], selector); - w2[3] = __byte_perm (w1[3], w1[2], selector); - w2[2] = __byte_perm (w1[2], w1[1], selector); - w2[1] = __byte_perm (w1[1], w1[0], selector); - w2[0] = __byte_perm (w1[0], w0[3], selector); - w1[3] = __byte_perm (w0[3], w0[2], selector); - w1[2] = __byte_perm (w0[2], w0[1], selector); - w1[1] = __byte_perm (w0[1], w0[0], selector); - w1[0] = __byte_perm (w0[0], 0, selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 5: - w3[1] = __byte_perm (w2[0], w1[3], selector); - w3[0] = __byte_perm (w1[3], w1[2], selector); - w2[3] = __byte_perm (w1[2], w1[1], selector); - w2[2] = __byte_perm (w1[1], w1[0], selector); - w2[1] = __byte_perm (w1[0], w0[3], selector); - w2[0] = __byte_perm (w0[3], w0[2], selector); - w1[3] = __byte_perm (w0[2], w0[1], selector); - w1[2] = __byte_perm (w0[1], w0[0], selector); - w1[1] = __byte_perm (w0[0], 0, selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 6: - w3[1] = __byte_perm (w1[3], w1[2], selector); - w3[0] = __byte_perm (w1[2], w1[1], selector); - w2[3] = __byte_perm (w1[1], w1[0], selector); - w2[2] = __byte_perm (w1[0], w0[3], selector); - w2[1] = __byte_perm (w0[3], w0[2], selector); - w2[0] = __byte_perm (w0[2], w0[1], selector); - w1[3] = __byte_perm (w0[1], w0[0], selector); - w1[2] = __byte_perm (w0[0], 0, selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 7: - w3[1] = __byte_perm (w1[2], w1[1], selector); - w3[0] = __byte_perm (w1[1], w1[0], selector); - w2[3] = __byte_perm (w1[0], w0[3], selector); - w2[2] = __byte_perm (w0[3], w0[2], selector); - w2[1] = __byte_perm (w0[2], w0[1], selector); - w2[0] = __byte_perm (w0[1], w0[0], selector); - w1[3] = __byte_perm (w0[0], 0, selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 8: - w3[1] = __byte_perm (w1[1], w1[0], selector); - w3[0] = __byte_perm (w1[0], w0[3], selector); - w2[3] = __byte_perm (w0[3], w0[2], selector); - w2[2] = __byte_perm (w0[2], w0[1], selector); - w2[1] = __byte_perm (w0[1], w0[0], selector); - w2[0] = __byte_perm (w0[0], 0, selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 9: - w3[1] = __byte_perm (w1[0], w0[3], selector); - w3[0] = __byte_perm (w0[3], w0[2], selector); - w2[3] = __byte_perm (w0[2], w0[1], selector); - w2[2] = __byte_perm (w0[1], w0[0], selector); - w2[1] = __byte_perm (w0[0], 0, selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 10: - w3[1] = __byte_perm (w0[3], w0[2], selector); - w3[0] = __byte_perm (w0[2], w0[1], selector); - w2[3] = __byte_perm (w0[1], w0[0], selector); - w2[2] = __byte_perm (w0[0], 0, selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 11: - w3[1] = __byte_perm (w0[2], w0[1], selector); - w3[0] = __byte_perm (w0[1], w0[0], selector); - w2[3] = __byte_perm (w0[0], 0, selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 12: - w3[1] = __byte_perm (w0[1], w0[0], selector); - w3[0] = __byte_perm (w0[0], 0, selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 13: - w3[1] = __byte_perm (w0[0], 0, selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - } -} - -#endif - -__device__ static u32 check_vector_accessible (const u32 il_pos, const u32 bf_loops, const u32 bfs_cnt, const u32 element) -{ - #ifdef VECT_SIZE1 - - // nothing to do here - - #else - - if ((il_pos + 1) == bf_loops) - { - #ifdef VECT_SIZE2 - u32 bfs_over = bfs_cnt % 2; - - if (bfs_over == 0) bfs_over = 2; - #endif - - #ifdef VECT_SIZE4 - u32 bfs_over = bfs_cnt % 4; - - if (bfs_over == 0) bfs_over = 4; - #endif - - if (element >= bfs_over) return 0; - } - - #endif - - return 1; -} diff --git a/nv/gpu_aes256_nv.c b/nv/gpu_aes256_nv.c deleted file mode 100644 index 8011899..0000000 --- a/nv/gpu_aes256_nv.c +++ /dev/null @@ -1,1048 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -__device__ __constant__ u32 te0[256] = -{ - 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, - 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, - 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, - 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, - 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, - 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, - 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, - 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, - 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, - 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, - 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, - 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, - 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, - 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, - 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, - 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, - 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, - 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, - 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, - 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, - 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, - 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, - 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, - 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, - 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, - 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, - 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, - 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, - 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, - 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, - 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, - 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, - 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, - 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, - 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, - 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, - 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, - 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, - 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, - 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, - 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, - 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, - 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, - 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, - 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, - 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, - 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, - 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, - 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, - 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, - 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, - 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, - 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, - 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, - 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, - 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, - 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, - 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, - 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, - 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, - 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, - 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, - 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, - 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a, -}; - -__device__ __constant__ u32 te1[256] = -{ - 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b, - 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5, - 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b, - 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676, - 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d, - 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0, - 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf, - 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0, - 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626, - 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc, - 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1, - 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515, - 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3, - 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a, - 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2, - 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575, - 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a, - 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0, - 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3, - 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484, - 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded, - 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b, - 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939, - 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf, - 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb, - 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585, - 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f, - 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8, - 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f, - 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5, - 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121, - 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2, - 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec, - 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717, - 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d, - 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373, - 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc, - 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888, - 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414, - 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb, - 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a, - 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c, - 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262, - 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979, - 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d, - 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9, - 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea, - 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808, - 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e, - 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6, - 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f, - 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a, - 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666, - 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e, - 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9, - 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e, - 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111, - 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494, - 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9, - 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf, - 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d, - 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868, - 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f, - 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616, -}; - -__device__ __constant__ u32 te2[256] = -{ - 0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b, - 0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5, - 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b, - 0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76, - 0xca458fca, 0x829d1f82, 0xc94089c9, 0x7d87fa7d, - 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0, - 0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af, - 0x9cbf239c, 0xa4f753a4, 0x7296e472, 0xc05b9bc0, - 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26, - 0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc, - 0x345c6834, 0xa5f451a5, 0xe534d1e5, 0xf108f9f1, - 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15, - 0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3, - 0x18283018, 0x96a13796, 0x050f0a05, 0x9ab52f9a, - 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2, - 0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75, - 0x091b1209, 0x839e1d83, 0x2c74582c, 0x1a2e341a, - 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0, - 0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3, - 0x297b5229, 0xe33edde3, 0x2f715e2f, 0x84971384, - 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed, - 0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b, - 0x6abed46a, 0xcb468dcb, 0xbed967be, 0x394b7239, - 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf, - 0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb, - 0x43c58643, 0x4dd79a4d, 0x33556633, 0x85941185, - 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f, - 0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8, - 0x51f3a251, 0xa3fe5da3, 0x40c08040, 0x8f8a058f, - 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5, - 0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221, - 0x10302010, 0xff1ae5ff, 0xf30efdf3, 0xd26dbfd2, - 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec, - 0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17, - 0xc45793c4, 0xa7f255a7, 0x7e82fc7e, 0x3d477a3d, - 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673, - 0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc, - 0x22664422, 0x2a7e542a, 0x90ab3b90, 0x88830b88, - 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814, - 0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb, - 0xe03bdbe0, 0x32566432, 0x3a4e743a, 0x0a1e140a, - 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c, - 0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462, - 0x91a83991, 0x95a43195, 0xe437d3e4, 0x798bf279, - 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d, - 0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9, - 0x6cb4d86c, 0x56faac56, 0xf407f3f4, 0xea25cfea, - 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008, - 0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e, - 0x1c24381c, 0xa6f157a6, 0xb4c773b4, 0xc65197c6, - 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f, - 0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a, - 0x7090e070, 0x3e427c3e, 0xb5c471b5, 0x66aacc66, - 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e, - 0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9, - 0x86911786, 0xc15899c1, 0x1d273a1d, 0x9eb9279e, - 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211, - 0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394, - 0x9bb62d9b, 0x1e223c1e, 0x87921587, 0xe920c9e9, - 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df, - 0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d, - 0xbfda65bf, 0xe631d7e6, 0x42c68442, 0x68b8d068, - 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f, - 0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16, -}; - -__device__ __constant__ u32 te3[256] = -{ - 0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6, - 0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491, - 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56, - 0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec, - 0xcaca458f, 0x82829d1f, 0xc9c94089, 0x7d7d87fa, - 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb, - 0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45, - 0x9c9cbf23, 0xa4a4f753, 0x727296e4, 0xc0c05b9b, - 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c, - 0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83, - 0x34345c68, 0xa5a5f451, 0xe5e534d1, 0xf1f108f9, - 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a, - 0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d, - 0x18182830, 0x9696a137, 0x05050f0a, 0x9a9ab52f, - 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf, - 0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea, - 0x09091b12, 0x83839e1d, 0x2c2c7458, 0x1a1a2e34, - 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b, - 0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d, - 0x29297b52, 0xe3e33edd, 0x2f2f715e, 0x84849713, - 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1, - 0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6, - 0x6a6abed4, 0xcbcb468d, 0xbebed967, 0x39394b72, - 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85, - 0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed, - 0x4343c586, 0x4d4dd79a, 0x33335566, 0x85859411, - 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe, - 0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b, - 0x5151f3a2, 0xa3a3fe5d, 0x4040c080, 0x8f8f8a05, - 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1, - 0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342, - 0x10103020, 0xffff1ae5, 0xf3f30efd, 0xd2d26dbf, - 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3, - 0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e, - 0xc4c45793, 0xa7a7f255, 0x7e7e82fc, 0x3d3d477a, - 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6, - 0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3, - 0x22226644, 0x2a2a7e54, 0x9090ab3b, 0x8888830b, - 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28, - 0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad, - 0xe0e03bdb, 0x32325664, 0x3a3a4e74, 0x0a0a1e14, - 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8, - 0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4, - 0x9191a839, 0x9595a431, 0xe4e437d3, 0x79798bf2, - 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da, - 0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049, - 0x6c6cb4d8, 0x5656faac, 0xf4f407f3, 0xeaea25cf, - 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810, - 0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c, - 0x1c1c2438, 0xa6a6f157, 0xb4b4c773, 0xc6c65197, - 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e, - 0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f, - 0x707090e0, 0x3e3e427c, 0xb5b5c471, 0x6666aacc, - 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c, - 0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069, - 0x86869117, 0xc1c15899, 0x1d1d273a, 0x9e9eb927, - 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322, - 0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733, - 0x9b9bb62d, 0x1e1e223c, 0x87879215, 0xe9e920c9, - 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5, - 0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a, - 0xbfbfda65, 0xe6e631d7, 0x4242c684, 0x6868b8d0, - 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e, - 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c, -}; - -__device__ __constant__ u32 te4[256] = -{ - 0x63636363, 0x7c7c7c7c, 0x77777777, 0x7b7b7b7b, - 0xf2f2f2f2, 0x6b6b6b6b, 0x6f6f6f6f, 0xc5c5c5c5, - 0x30303030, 0x01010101, 0x67676767, 0x2b2b2b2b, - 0xfefefefe, 0xd7d7d7d7, 0xabababab, 0x76767676, - 0xcacacaca, 0x82828282, 0xc9c9c9c9, 0x7d7d7d7d, - 0xfafafafa, 0x59595959, 0x47474747, 0xf0f0f0f0, - 0xadadadad, 0xd4d4d4d4, 0xa2a2a2a2, 0xafafafaf, - 0x9c9c9c9c, 0xa4a4a4a4, 0x72727272, 0xc0c0c0c0, - 0xb7b7b7b7, 0xfdfdfdfd, 0x93939393, 0x26262626, - 0x36363636, 0x3f3f3f3f, 0xf7f7f7f7, 0xcccccccc, - 0x34343434, 0xa5a5a5a5, 0xe5e5e5e5, 0xf1f1f1f1, - 0x71717171, 0xd8d8d8d8, 0x31313131, 0x15151515, - 0x04040404, 0xc7c7c7c7, 0x23232323, 0xc3c3c3c3, - 0x18181818, 0x96969696, 0x05050505, 0x9a9a9a9a, - 0x07070707, 0x12121212, 0x80808080, 0xe2e2e2e2, - 0xebebebeb, 0x27272727, 0xb2b2b2b2, 0x75757575, - 0x09090909, 0x83838383, 0x2c2c2c2c, 0x1a1a1a1a, - 0x1b1b1b1b, 0x6e6e6e6e, 0x5a5a5a5a, 0xa0a0a0a0, - 0x52525252, 0x3b3b3b3b, 0xd6d6d6d6, 0xb3b3b3b3, - 0x29292929, 0xe3e3e3e3, 0x2f2f2f2f, 0x84848484, - 0x53535353, 0xd1d1d1d1, 0x00000000, 0xedededed, - 0x20202020, 0xfcfcfcfc, 0xb1b1b1b1, 0x5b5b5b5b, - 0x6a6a6a6a, 0xcbcbcbcb, 0xbebebebe, 0x39393939, - 0x4a4a4a4a, 0x4c4c4c4c, 0x58585858, 0xcfcfcfcf, - 0xd0d0d0d0, 0xefefefef, 0xaaaaaaaa, 0xfbfbfbfb, - 0x43434343, 0x4d4d4d4d, 0x33333333, 0x85858585, - 0x45454545, 0xf9f9f9f9, 0x02020202, 0x7f7f7f7f, - 0x50505050, 0x3c3c3c3c, 0x9f9f9f9f, 0xa8a8a8a8, - 0x51515151, 0xa3a3a3a3, 0x40404040, 0x8f8f8f8f, - 0x92929292, 0x9d9d9d9d, 0x38383838, 0xf5f5f5f5, - 0xbcbcbcbc, 0xb6b6b6b6, 0xdadadada, 0x21212121, - 0x10101010, 0xffffffff, 0xf3f3f3f3, 0xd2d2d2d2, - 0xcdcdcdcd, 0x0c0c0c0c, 0x13131313, 0xecececec, - 0x5f5f5f5f, 0x97979797, 0x44444444, 0x17171717, - 0xc4c4c4c4, 0xa7a7a7a7, 0x7e7e7e7e, 0x3d3d3d3d, - 0x64646464, 0x5d5d5d5d, 0x19191919, 0x73737373, - 0x60606060, 0x81818181, 0x4f4f4f4f, 0xdcdcdcdc, - 0x22222222, 0x2a2a2a2a, 0x90909090, 0x88888888, - 0x46464646, 0xeeeeeeee, 0xb8b8b8b8, 0x14141414, - 0xdededede, 0x5e5e5e5e, 0x0b0b0b0b, 0xdbdbdbdb, - 0xe0e0e0e0, 0x32323232, 0x3a3a3a3a, 0x0a0a0a0a, - 0x49494949, 0x06060606, 0x24242424, 0x5c5c5c5c, - 0xc2c2c2c2, 0xd3d3d3d3, 0xacacacac, 0x62626262, - 0x91919191, 0x95959595, 0xe4e4e4e4, 0x79797979, - 0xe7e7e7e7, 0xc8c8c8c8, 0x37373737, 0x6d6d6d6d, - 0x8d8d8d8d, 0xd5d5d5d5, 0x4e4e4e4e, 0xa9a9a9a9, - 0x6c6c6c6c, 0x56565656, 0xf4f4f4f4, 0xeaeaeaea, - 0x65656565, 0x7a7a7a7a, 0xaeaeaeae, 0x08080808, - 0xbabababa, 0x78787878, 0x25252525, 0x2e2e2e2e, - 0x1c1c1c1c, 0xa6a6a6a6, 0xb4b4b4b4, 0xc6c6c6c6, - 0xe8e8e8e8, 0xdddddddd, 0x74747474, 0x1f1f1f1f, - 0x4b4b4b4b, 0xbdbdbdbd, 0x8b8b8b8b, 0x8a8a8a8a, - 0x70707070, 0x3e3e3e3e, 0xb5b5b5b5, 0x66666666, - 0x48484848, 0x03030303, 0xf6f6f6f6, 0x0e0e0e0e, - 0x61616161, 0x35353535, 0x57575757, 0xb9b9b9b9, - 0x86868686, 0xc1c1c1c1, 0x1d1d1d1d, 0x9e9e9e9e, - 0xe1e1e1e1, 0xf8f8f8f8, 0x98989898, 0x11111111, - 0x69696969, 0xd9d9d9d9, 0x8e8e8e8e, 0x94949494, - 0x9b9b9b9b, 0x1e1e1e1e, 0x87878787, 0xe9e9e9e9, - 0xcececece, 0x55555555, 0x28282828, 0xdfdfdfdf, - 0x8c8c8c8c, 0xa1a1a1a1, 0x89898989, 0x0d0d0d0d, - 0xbfbfbfbf, 0xe6e6e6e6, 0x42424242, 0x68686868, - 0x41414141, 0x99999999, 0x2d2d2d2d, 0x0f0f0f0f, - 0xb0b0b0b0, 0x54545454, 0xbbbbbbbb, 0x16161616, -}; - -__device__ __constant__ u32 td0[256] = -{ - 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, - 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, - 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, - 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, - 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, - 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, - 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, - 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, - 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, - 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, - 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, - 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, - 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, - 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, - 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, - 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, - 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, - 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, - 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, - 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, - 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, - 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, - 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, - 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, - 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, - 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, - 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, - 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, - 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, - 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, - 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, - 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, - 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, - 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, - 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, - 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, - 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, - 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, - 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, - 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, - 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, - 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, - 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, - 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, - 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, - 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, - 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, - 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, - 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, - 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, - 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, - 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, - 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, - 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, - 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, - 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, - 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, - 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, - 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, - 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, - 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, - 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, - 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, - 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742, -}; - -__device__ __constant__ u32 td1[256] = -{ - 0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e, - 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303, - 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c, - 0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3, - 0x49deb15a, 0x6725ba1b, 0x9845ea0e, 0xe15dfec0, - 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9, - 0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259, - 0x2dd4be83, 0xd3587421, 0x2949e069, 0x448ec9c8, - 0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971, - 0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a, - 0x1863df4a, 0x82e51a31, 0x60975133, 0x4562537f, - 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b, - 0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8, - 0x23ab73d3, 0xe2724b02, 0x57e31f8f, 0x2a6655ab, - 0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708, - 0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682, - 0x2b8acf1c, 0x92a779b4, 0xf0f307f2, 0xa14e69e2, - 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe, - 0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb, - 0x390b83ec, 0xaa4060ef, 0x065e719f, 0x51bd6e10, - 0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd, - 0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015, - 0x241998fb, 0x97d6bde9, 0xcc894043, 0x7767d99e, - 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee, - 0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000, - 0x83098086, 0x48322bed, 0xac1e1170, 0x4e6c5a72, - 0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39, - 0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e, - 0xb10c0a67, 0x0f9357e7, 0xd2b4ee96, 0x9e1b9b91, - 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a, - 0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17, - 0x0b0e090d, 0xadf28bc7, 0xb92db6a8, 0xc8141ea9, - 0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60, - 0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e, - 0x768b4329, 0xdccb23c6, 0x68b6edfc, 0x63b8e4f1, - 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611, - 0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1, - 0x4b1d9e2f, 0xf3dcb230, 0xec0d8652, 0xd077c1e3, - 0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964, - 0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390, - 0xc787494e, 0xc1d938d1, 0xfe8ccaa2, 0x3698d40b, - 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf, - 0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46, - 0xc2f68d13, 0xe890d8b8, 0x5e2e39f7, 0xf582c3af, - 0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512, - 0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb, - 0x09cd2678, 0xf46e5918, 0x01ec9ab7, 0xa8834f9a, - 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8, - 0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c, - 0xaf31a4b2, 0x312a3f23, 0x30c6a594, 0xc035a266, - 0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8, - 0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6, - 0x8d764dd6, 0x4d43efb0, 0x54ccaa4d, 0xdfe49604, - 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551, - 0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41, - 0x5ab3671d, 0x5292dbd2, 0x33e91056, 0x136dd647, - 0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c, - 0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1, - 0x599cd2df, 0x3f55f273, 0x791814ce, 0xbf73c737, - 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db, - 0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340, - 0x72161dc3, 0x0cbce225, 0x8b283c49, 0x41ff0d95, - 0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1, - 0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857, -}; - -__device__ __constant__ u32 td2[256] = -{ - 0xa75051f4, 0x65537e41, 0xa4c31a17, 0x5e963a27, - 0x6bcb3bab, 0x45f11f9d, 0x58abacfa, 0x03934be3, - 0xfa552030, 0x6df6ad76, 0x769188cc, 0x4c25f502, - 0xd7fc4fe5, 0xcbd7c52a, 0x44802635, 0xa38fb562, - 0x5a49deb1, 0x1b6725ba, 0x0e9845ea, 0xc0e15dfe, - 0x7502c32f, 0xf012814c, 0x97a38d46, 0xf9c66bd3, - 0x5fe7038f, 0x9c951592, 0x7aebbf6d, 0x59da9552, - 0x832dd4be, 0x21d35874, 0x692949e0, 0xc8448ec9, - 0x896a75c2, 0x7978f48e, 0x3e6b9958, 0x71dd27b9, - 0x4fb6bee1, 0xad17f088, 0xac66c920, 0x3ab47dce, - 0x4a1863df, 0x3182e51a, 0x33609751, 0x7f456253, - 0x77e0b164, 0xae84bb6b, 0xa01cfe81, 0x2b94f908, - 0x68587048, 0xfd198f45, 0x6c8794de, 0xf8b7527b, - 0xd323ab73, 0x02e2724b, 0x8f57e31f, 0xab2a6655, - 0x2807b2eb, 0xc2032fb5, 0x7b9a86c5, 0x08a5d337, - 0x87f23028, 0xa5b223bf, 0x6aba0203, 0x825ced16, - 0x1c2b8acf, 0xb492a779, 0xf2f0f307, 0xe2a14e69, - 0xf4cd65da, 0xbed50605, 0x621fd134, 0xfe8ac4a6, - 0x539d342e, 0x55a0a2f3, 0xe132058a, 0xeb75a4f6, - 0xec390b83, 0xefaa4060, 0x9f065e71, 0x1051bd6e, - 0x8af93e21, 0x063d96dd, 0x05aedd3e, 0xbd464de6, - 0x8db59154, 0x5d0571c4, 0xd46f0406, 0x15ff6050, - 0xfb241998, 0xe997d6bd, 0x43cc8940, 0x9e7767d9, - 0x42bdb0e8, 0x8b880789, 0x5b38e719, 0xeedb79c8, - 0x0a47a17c, 0x0fe97c42, 0x1ec9f884, 0x00000000, - 0x86830980, 0xed48322b, 0x70ac1e11, 0x724e6c5a, - 0xfffbfd0e, 0x38560f85, 0xd51e3dae, 0x3927362d, - 0xd9640a0f, 0xa621685c, 0x54d19b5b, 0x2e3a2436, - 0x67b10c0a, 0xe70f9357, 0x96d2b4ee, 0x919e1b9b, - 0xc54f80c0, 0x20a261dc, 0x4b695a77, 0x1a161c12, - 0xba0ae293, 0x2ae5c0a0, 0xe0433c22, 0x171d121b, - 0x0d0b0e09, 0xc7adf28b, 0xa8b92db6, 0xa9c8141e, - 0x198557f1, 0x074caf75, 0xddbbee99, 0x60fda37f, - 0x269ff701, 0xf5bc5c72, 0x3bc54466, 0x7e345bfb, - 0x29768b43, 0xc6dccb23, 0xfc68b6ed, 0xf163b8e4, - 0xdccad731, 0x85104263, 0x22401397, 0x112084c6, - 0x247d854a, 0x3df8d2bb, 0x3211aef9, 0xa16dc729, - 0x2f4b1d9e, 0x30f3dcb2, 0x52ec0d86, 0xe3d077c1, - 0x166c2bb3, 0xb999a970, 0x48fa1194, 0x642247e9, - 0x8cc4a8fc, 0x3f1aa0f0, 0x2cd8567d, 0x90ef2233, - 0x4ec78749, 0xd1c1d938, 0xa2fe8cca, 0x0b3698d4, - 0x81cfa6f5, 0xde28a57a, 0x8e26dab7, 0xbfa43fad, - 0x9de42c3a, 0x920d5078, 0xcc9b6a5f, 0x4662547e, - 0x13c2f68d, 0xb8e890d8, 0xf75e2e39, 0xaff582c3, - 0x80be9f5d, 0x937c69d0, 0x2da96fd5, 0x12b3cf25, - 0x993bc8ac, 0x7da71018, 0x636ee89c, 0xbb7bdb3b, - 0x7809cd26, 0x18f46e59, 0xb701ec9a, 0x9aa8834f, - 0x6e65e695, 0xe67eaaff, 0xcf0821bc, 0xe8e6ef15, - 0x9bd9bae7, 0x36ce4a6f, 0x09d4ea9f, 0x7cd629b0, - 0xb2af31a4, 0x23312a3f, 0x9430c6a5, 0x66c035a2, - 0xbc37744e, 0xcaa6fc82, 0xd0b0e090, 0xd81533a7, - 0x984af104, 0xdaf741ec, 0x500e7fcd, 0xf62f1791, - 0xd68d764d, 0xb04d43ef, 0x4d54ccaa, 0x04dfe496, - 0xb5e39ed1, 0x881b4c6a, 0x1fb8c12c, 0x517f4665, - 0xea049d5e, 0x355d018c, 0x7473fa87, 0x412efb0b, - 0x1d5ab367, 0xd25292db, 0x5633e910, 0x47136dd6, - 0x618c9ad7, 0x0c7a37a1, 0x148e59f8, 0x3c89eb13, - 0x27eecea9, 0xc935b761, 0xe5ede11c, 0xb13c7a47, - 0xdf599cd2, 0x733f55f2, 0xce791814, 0x37bf73c7, - 0xcdea53f7, 0xaa5b5ffd, 0x6f14df3d, 0xdb867844, - 0xf381caaf, 0xc43eb968, 0x342c3824, 0x405fc2a3, - 0xc372161d, 0x250cbce2, 0x498b283c, 0x9541ff0d, - 0x017139a8, 0xb3de080c, 0xe49cd8b4, 0xc1906456, - 0x84617bcb, 0xb670d532, 0x5c74486c, 0x5742d0b8, -}; - -__device__ __constant__ u32 td3[256] = -{ - 0xf4a75051, 0x4165537e, 0x17a4c31a, 0x275e963a, - 0xab6bcb3b, 0x9d45f11f, 0xfa58abac, 0xe303934b, - 0x30fa5520, 0x766df6ad, 0xcc769188, 0x024c25f5, - 0xe5d7fc4f, 0x2acbd7c5, 0x35448026, 0x62a38fb5, - 0xb15a49de, 0xba1b6725, 0xea0e9845, 0xfec0e15d, - 0x2f7502c3, 0x4cf01281, 0x4697a38d, 0xd3f9c66b, - 0x8f5fe703, 0x929c9515, 0x6d7aebbf, 0x5259da95, - 0xbe832dd4, 0x7421d358, 0xe0692949, 0xc9c8448e, - 0xc2896a75, 0x8e7978f4, 0x583e6b99, 0xb971dd27, - 0xe14fb6be, 0x88ad17f0, 0x20ac66c9, 0xce3ab47d, - 0xdf4a1863, 0x1a3182e5, 0x51336097, 0x537f4562, - 0x6477e0b1, 0x6bae84bb, 0x81a01cfe, 0x082b94f9, - 0x48685870, 0x45fd198f, 0xde6c8794, 0x7bf8b752, - 0x73d323ab, 0x4b02e272, 0x1f8f57e3, 0x55ab2a66, - 0xeb2807b2, 0xb5c2032f, 0xc57b9a86, 0x3708a5d3, - 0x2887f230, 0xbfa5b223, 0x036aba02, 0x16825ced, - 0xcf1c2b8a, 0x79b492a7, 0x07f2f0f3, 0x69e2a14e, - 0xdaf4cd65, 0x05bed506, 0x34621fd1, 0xa6fe8ac4, - 0x2e539d34, 0xf355a0a2, 0x8ae13205, 0xf6eb75a4, - 0x83ec390b, 0x60efaa40, 0x719f065e, 0x6e1051bd, - 0x218af93e, 0xdd063d96, 0x3e05aedd, 0xe6bd464d, - 0x548db591, 0xc45d0571, 0x06d46f04, 0x5015ff60, - 0x98fb2419, 0xbde997d6, 0x4043cc89, 0xd99e7767, - 0xe842bdb0, 0x898b8807, 0x195b38e7, 0xc8eedb79, - 0x7c0a47a1, 0x420fe97c, 0x841ec9f8, 0x00000000, - 0x80868309, 0x2bed4832, 0x1170ac1e, 0x5a724e6c, - 0x0efffbfd, 0x8538560f, 0xaed51e3d, 0x2d392736, - 0x0fd9640a, 0x5ca62168, 0x5b54d19b, 0x362e3a24, - 0x0a67b10c, 0x57e70f93, 0xee96d2b4, 0x9b919e1b, - 0xc0c54f80, 0xdc20a261, 0x774b695a, 0x121a161c, - 0x93ba0ae2, 0xa02ae5c0, 0x22e0433c, 0x1b171d12, - 0x090d0b0e, 0x8bc7adf2, 0xb6a8b92d, 0x1ea9c814, - 0xf1198557, 0x75074caf, 0x99ddbbee, 0x7f60fda3, - 0x01269ff7, 0x72f5bc5c, 0x663bc544, 0xfb7e345b, - 0x4329768b, 0x23c6dccb, 0xedfc68b6, 0xe4f163b8, - 0x31dccad7, 0x63851042, 0x97224013, 0xc6112084, - 0x4a247d85, 0xbb3df8d2, 0xf93211ae, 0x29a16dc7, - 0x9e2f4b1d, 0xb230f3dc, 0x8652ec0d, 0xc1e3d077, - 0xb3166c2b, 0x70b999a9, 0x9448fa11, 0xe9642247, - 0xfc8cc4a8, 0xf03f1aa0, 0x7d2cd856, 0x3390ef22, - 0x494ec787, 0x38d1c1d9, 0xcaa2fe8c, 0xd40b3698, - 0xf581cfa6, 0x7ade28a5, 0xb78e26da, 0xadbfa43f, - 0x3a9de42c, 0x78920d50, 0x5fcc9b6a, 0x7e466254, - 0x8d13c2f6, 0xd8b8e890, 0x39f75e2e, 0xc3aff582, - 0x5d80be9f, 0xd0937c69, 0xd52da96f, 0x2512b3cf, - 0xac993bc8, 0x187da710, 0x9c636ee8, 0x3bbb7bdb, - 0x267809cd, 0x5918f46e, 0x9ab701ec, 0x4f9aa883, - 0x956e65e6, 0xffe67eaa, 0xbccf0821, 0x15e8e6ef, - 0xe79bd9ba, 0x6f36ce4a, 0x9f09d4ea, 0xb07cd629, - 0xa4b2af31, 0x3f23312a, 0xa59430c6, 0xa266c035, - 0x4ebc3774, 0x82caa6fc, 0x90d0b0e0, 0xa7d81533, - 0x04984af1, 0xecdaf741, 0xcd500e7f, 0x91f62f17, - 0x4dd68d76, 0xefb04d43, 0xaa4d54cc, 0x9604dfe4, - 0xd1b5e39e, 0x6a881b4c, 0x2c1fb8c1, 0x65517f46, - 0x5eea049d, 0x8c355d01, 0x877473fa, 0x0b412efb, - 0x671d5ab3, 0xdbd25292, 0x105633e9, 0xd647136d, - 0xd7618c9a, 0xa10c7a37, 0xf8148e59, 0x133c89eb, - 0xa927eece, 0x61c935b7, 0x1ce5ede1, 0x47b13c7a, - 0xd2df599c, 0xf2733f55, 0x14ce7918, 0xc737bf73, - 0xf7cdea53, 0xfdaa5b5f, 0x3d6f14df, 0x44db8678, - 0xaff381ca, 0x68c43eb9, 0x24342c38, 0xa3405fc2, - 0x1dc37216, 0xe2250cbc, 0x3c498b28, 0x0d9541ff, - 0xa8017139, 0x0cb3de08, 0xb4e49cd8, 0x56c19064, - 0xcb84617b, 0x32b670d5, 0x6c5c7448, 0xb85742d0, -}; - -__device__ __constant__ u32 td4[256] = -{ - 0x52525252, 0x09090909, 0x6a6a6a6a, 0xd5d5d5d5, - 0x30303030, 0x36363636, 0xa5a5a5a5, 0x38383838, - 0xbfbfbfbf, 0x40404040, 0xa3a3a3a3, 0x9e9e9e9e, - 0x81818181, 0xf3f3f3f3, 0xd7d7d7d7, 0xfbfbfbfb, - 0x7c7c7c7c, 0xe3e3e3e3, 0x39393939, 0x82828282, - 0x9b9b9b9b, 0x2f2f2f2f, 0xffffffff, 0x87878787, - 0x34343434, 0x8e8e8e8e, 0x43434343, 0x44444444, - 0xc4c4c4c4, 0xdededede, 0xe9e9e9e9, 0xcbcbcbcb, - 0x54545454, 0x7b7b7b7b, 0x94949494, 0x32323232, - 0xa6a6a6a6, 0xc2c2c2c2, 0x23232323, 0x3d3d3d3d, - 0xeeeeeeee, 0x4c4c4c4c, 0x95959595, 0x0b0b0b0b, - 0x42424242, 0xfafafafa, 0xc3c3c3c3, 0x4e4e4e4e, - 0x08080808, 0x2e2e2e2e, 0xa1a1a1a1, 0x66666666, - 0x28282828, 0xd9d9d9d9, 0x24242424, 0xb2b2b2b2, - 0x76767676, 0x5b5b5b5b, 0xa2a2a2a2, 0x49494949, - 0x6d6d6d6d, 0x8b8b8b8b, 0xd1d1d1d1, 0x25252525, - 0x72727272, 0xf8f8f8f8, 0xf6f6f6f6, 0x64646464, - 0x86868686, 0x68686868, 0x98989898, 0x16161616, - 0xd4d4d4d4, 0xa4a4a4a4, 0x5c5c5c5c, 0xcccccccc, - 0x5d5d5d5d, 0x65656565, 0xb6b6b6b6, 0x92929292, - 0x6c6c6c6c, 0x70707070, 0x48484848, 0x50505050, - 0xfdfdfdfd, 0xedededed, 0xb9b9b9b9, 0xdadadada, - 0x5e5e5e5e, 0x15151515, 0x46464646, 0x57575757, - 0xa7a7a7a7, 0x8d8d8d8d, 0x9d9d9d9d, 0x84848484, - 0x90909090, 0xd8d8d8d8, 0xabababab, 0x00000000, - 0x8c8c8c8c, 0xbcbcbcbc, 0xd3d3d3d3, 0x0a0a0a0a, - 0xf7f7f7f7, 0xe4e4e4e4, 0x58585858, 0x05050505, - 0xb8b8b8b8, 0xb3b3b3b3, 0x45454545, 0x06060606, - 0xd0d0d0d0, 0x2c2c2c2c, 0x1e1e1e1e, 0x8f8f8f8f, - 0xcacacaca, 0x3f3f3f3f, 0x0f0f0f0f, 0x02020202, - 0xc1c1c1c1, 0xafafafaf, 0xbdbdbdbd, 0x03030303, - 0x01010101, 0x13131313, 0x8a8a8a8a, 0x6b6b6b6b, - 0x3a3a3a3a, 0x91919191, 0x11111111, 0x41414141, - 0x4f4f4f4f, 0x67676767, 0xdcdcdcdc, 0xeaeaeaea, - 0x97979797, 0xf2f2f2f2, 0xcfcfcfcf, 0xcececece, - 0xf0f0f0f0, 0xb4b4b4b4, 0xe6e6e6e6, 0x73737373, - 0x96969696, 0xacacacac, 0x74747474, 0x22222222, - 0xe7e7e7e7, 0xadadadad, 0x35353535, 0x85858585, - 0xe2e2e2e2, 0xf9f9f9f9, 0x37373737, 0xe8e8e8e8, - 0x1c1c1c1c, 0x75757575, 0xdfdfdfdf, 0x6e6e6e6e, - 0x47474747, 0xf1f1f1f1, 0x1a1a1a1a, 0x71717171, - 0x1d1d1d1d, 0x29292929, 0xc5c5c5c5, 0x89898989, - 0x6f6f6f6f, 0xb7b7b7b7, 0x62626262, 0x0e0e0e0e, - 0xaaaaaaaa, 0x18181818, 0xbebebebe, 0x1b1b1b1b, - 0xfcfcfcfc, 0x56565656, 0x3e3e3e3e, 0x4b4b4b4b, - 0xc6c6c6c6, 0xd2d2d2d2, 0x79797979, 0x20202020, - 0x9a9a9a9a, 0xdbdbdbdb, 0xc0c0c0c0, 0xfefefefe, - 0x78787878, 0xcdcdcdcd, 0x5a5a5a5a, 0xf4f4f4f4, - 0x1f1f1f1f, 0xdddddddd, 0xa8a8a8a8, 0x33333333, - 0x88888888, 0x07070707, 0xc7c7c7c7, 0x31313131, - 0xb1b1b1b1, 0x12121212, 0x10101010, 0x59595959, - 0x27272727, 0x80808080, 0xecececec, 0x5f5f5f5f, - 0x60606060, 0x51515151, 0x7f7f7f7f, 0xa9a9a9a9, - 0x19191919, 0xb5b5b5b5, 0x4a4a4a4a, 0x0d0d0d0d, - 0x2d2d2d2d, 0xe5e5e5e5, 0x7a7a7a7a, 0x9f9f9f9f, - 0x93939393, 0xc9c9c9c9, 0x9c9c9c9c, 0xefefefef, - 0xa0a0a0a0, 0xe0e0e0e0, 0x3b3b3b3b, 0x4d4d4d4d, - 0xaeaeaeae, 0x2a2a2a2a, 0xf5f5f5f5, 0xb0b0b0b0, - 0xc8c8c8c8, 0xebebebeb, 0xbbbbbbbb, 0x3c3c3c3c, - 0x83838383, 0x53535353, 0x99999999, 0x61616161, - 0x17171717, 0x2b2b2b2b, 0x04040404, 0x7e7e7e7e, - 0xbabababa, 0x77777777, 0xd6d6d6d6, 0x26262626, - 0xe1e1e1e1, 0x69696969, 0x14141414, 0x63636363, - 0x55555555, 0x21212121, 0x0c0c0c0c, 0x7d7d7d7d, -}; - -__device__ __constant__ u32 rcon[] = -{ - 0x01000000, 0x02000000, 0x04000000, 0x08000000, - 0x10000000, 0x20000000, 0x40000000, 0x80000000, - 0x1b000000, 0x36000000, -}; - -__device__ static void aes256_ExpandKey (u32 *ks, const u32 *ukey) -{ - ks[0] = ukey[0]; - ks[1] = ukey[1]; - ks[2] = ukey[2]; - ks[3] = ukey[3]; - ks[4] = ukey[4]; - ks[5] = ukey[5]; - ks[6] = ukey[6]; - ks[7] = ukey[7]; - - int i; - int j; - - i = 0; - j = 0; - - while (1) - { - u32 temp = ks[j + 7]; - - ks[j + 8] = ks[j + 0] - ^ (te2[(temp >> 16) & 0xff] & 0xff000000) - ^ (te3[(temp >> 8) & 0xff] & 0x00ff0000) - ^ (te0[(temp >> 0) & 0xff] & 0x0000ff00) - ^ (te1[(temp >> 24) & 0xff] & 0x000000ff) - ^ rcon[i]; - - ks[j + 9] = ks[j + 1] ^ ks[j + 8]; - ks[j + 10] = ks[j + 2] ^ ks[j + 9]; - ks[j + 11] = ks[j + 3] ^ ks[j + 10]; - - if (++i == 7) break; - - temp = ks[j + 11]; - - ks[j + 12] = ks[j + 4] - ^ (te2[(temp >> 24) & 0xff] & 0xff000000) - ^ (te3[(temp >> 16) & 0xff] & 0x00ff0000) - ^ (te0[(temp >> 8) & 0xff] & 0x0000ff00) - ^ (te1[(temp >> 0) & 0xff] & 0x000000ff); - - ks[j + 13] = ks[j + 5] ^ ks[j + 12]; - ks[j + 14] = ks[j + 6] ^ ks[j + 13]; - ks[j + 15] = ks[j + 7] ^ ks[j + 14]; - - j += 8; - } -} - -__device__ static void aes256_InvertKey (u32 *ks) -{ - for (u32 i = 0, j = 56; i < j; i += 4, j -= 4) - { - u32 temp; - - temp = ks[i + 0]; ks[i + 0] = ks[j + 0]; ks[j + 0] = temp; - temp = ks[i + 1]; ks[i + 1] = ks[j + 1]; ks[j + 1] = temp; - temp = ks[i + 2]; ks[i + 2] = ks[j + 2]; ks[j + 2] = temp; - temp = ks[i + 3]; ks[i + 3] = ks[j + 3]; ks[j + 3] = temp; - } - - for (u32 i = 1, j = 4; i < 14; i += 1, j += 4) - { - ks[j + 0] = - td0[te1[(ks[j + 0] >> 24) & 0xff] & 0xff] ^ - td1[te1[(ks[j + 0] >> 16) & 0xff] & 0xff] ^ - td2[te1[(ks[j + 0] >> 8) & 0xff] & 0xff] ^ - td3[te1[(ks[j + 0] >> 0) & 0xff] & 0xff]; - - ks[j + 1] = - td0[te1[(ks[j + 1] >> 24) & 0xff] & 0xff] ^ - td1[te1[(ks[j + 1] >> 16) & 0xff] & 0xff] ^ - td2[te1[(ks[j + 1] >> 8) & 0xff] & 0xff] ^ - td3[te1[(ks[j + 1] >> 0) & 0xff] & 0xff]; - - ks[j + 2] = - td0[te1[(ks[j + 2] >> 24) & 0xff] & 0xff] ^ - td1[te1[(ks[j + 2] >> 16) & 0xff] & 0xff] ^ - td2[te1[(ks[j + 2] >> 8) & 0xff] & 0xff] ^ - td3[te1[(ks[j + 2] >> 0) & 0xff] & 0xff]; - - ks[j + 3] = - td0[te1[(ks[j + 3] >> 24) & 0xff] & 0xff] ^ - td1[te1[(ks[j + 3] >> 16) & 0xff] & 0xff] ^ - td2[te1[(ks[j + 3] >> 8) & 0xff] & 0xff] ^ - td3[te1[(ks[j + 3] >> 0) & 0xff] & 0xff]; - } -} - -__device__ static void aes256_set_encrypt_key (u32 *ks, const u32 *ukey) -{ - u32 ukey_s[8]; - - ukey_s[0] = swap_workaround (ukey[0]); - ukey_s[1] = swap_workaround (ukey[1]); - ukey_s[2] = swap_workaround (ukey[2]); - ukey_s[3] = swap_workaround (ukey[3]); - ukey_s[4] = swap_workaround (ukey[4]); - ukey_s[5] = swap_workaround (ukey[5]); - ukey_s[6] = swap_workaround (ukey[6]); - ukey_s[7] = swap_workaround (ukey[7]); - - aes256_ExpandKey (ks, ukey_s); -} - -__device__ static void aes256_set_decrypt_key (u32 *ks, const u32 *ukey) -{ - u32 ukey_s[8]; - - ukey_s[0] = swap_workaround (ukey[0]); - ukey_s[1] = swap_workaround (ukey[1]); - ukey_s[2] = swap_workaround (ukey[2]); - ukey_s[3] = swap_workaround (ukey[3]); - ukey_s[4] = swap_workaround (ukey[4]); - ukey_s[5] = swap_workaround (ukey[5]); - ukey_s[6] = swap_workaround (ukey[6]); - ukey_s[7] = swap_workaround (ukey[7]); - - aes256_ExpandKey (ks, ukey_s); - - aes256_InvertKey (ks); -} - -__device__ static void aes256_decrypt (const u32 *ks, const u32 *in, u32 *out) -{ - u32 in_s[4]; - - in_s[0] = swap_workaround (in[0]); - in_s[1] = swap_workaround (in[1]); - in_s[2] = swap_workaround (in[2]); - in_s[3] = swap_workaround (in[3]); - - u32 s0 = in_s[0] ^ ks[0]; - u32 s1 = in_s[1] ^ ks[1]; - u32 s2 = in_s[2] ^ ks[2]; - u32 s3 = in_s[3] ^ ks[3]; - - u32 t0; - u32 t1; - u32 t2; - u32 t3; - - t0 = td0[s0 >> 24] ^ td1[(s3 >> 16) & 0xff] ^ td2[(s2 >> 8) & 0xff] ^ td3[s1 & 0xff] ^ ks[ 4]; - t1 = td0[s1 >> 24] ^ td1[(s0 >> 16) & 0xff] ^ td2[(s3 >> 8) & 0xff] ^ td3[s2 & 0xff] ^ ks[ 5]; - t2 = td0[s2 >> 24] ^ td1[(s1 >> 16) & 0xff] ^ td2[(s0 >> 8) & 0xff] ^ td3[s3 & 0xff] ^ ks[ 6]; - t3 = td0[s3 >> 24] ^ td1[(s2 >> 16) & 0xff] ^ td2[(s1 >> 8) & 0xff] ^ td3[s0 & 0xff] ^ ks[ 7]; - s0 = td0[t0 >> 24] ^ td1[(t3 >> 16) & 0xff] ^ td2[(t2 >> 8) & 0xff] ^ td3[t1 & 0xff] ^ ks[ 8]; - s1 = td0[t1 >> 24] ^ td1[(t0 >> 16) & 0xff] ^ td2[(t3 >> 8) & 0xff] ^ td3[t2 & 0xff] ^ ks[ 9]; - s2 = td0[t2 >> 24] ^ td1[(t1 >> 16) & 0xff] ^ td2[(t0 >> 8) & 0xff] ^ td3[t3 & 0xff] ^ ks[10]; - s3 = td0[t3 >> 24] ^ td1[(t2 >> 16) & 0xff] ^ td2[(t1 >> 8) & 0xff] ^ td3[t0 & 0xff] ^ ks[11]; - t0 = td0[s0 >> 24] ^ td1[(s3 >> 16) & 0xff] ^ td2[(s2 >> 8) & 0xff] ^ td3[s1 & 0xff] ^ ks[12]; - t1 = td0[s1 >> 24] ^ td1[(s0 >> 16) & 0xff] ^ td2[(s3 >> 8) & 0xff] ^ td3[s2 & 0xff] ^ ks[13]; - t2 = td0[s2 >> 24] ^ td1[(s1 >> 16) & 0xff] ^ td2[(s0 >> 8) & 0xff] ^ td3[s3 & 0xff] ^ ks[14]; - t3 = td0[s3 >> 24] ^ td1[(s2 >> 16) & 0xff] ^ td2[(s1 >> 8) & 0xff] ^ td3[s0 & 0xff] ^ ks[15]; - s0 = td0[t0 >> 24] ^ td1[(t3 >> 16) & 0xff] ^ td2[(t2 >> 8) & 0xff] ^ td3[t1 & 0xff] ^ ks[16]; - s1 = td0[t1 >> 24] ^ td1[(t0 >> 16) & 0xff] ^ td2[(t3 >> 8) & 0xff] ^ td3[t2 & 0xff] ^ ks[17]; - s2 = td0[t2 >> 24] ^ td1[(t1 >> 16) & 0xff] ^ td2[(t0 >> 8) & 0xff] ^ td3[t3 & 0xff] ^ ks[18]; - s3 = td0[t3 >> 24] ^ td1[(t2 >> 16) & 0xff] ^ td2[(t1 >> 8) & 0xff] ^ td3[t0 & 0xff] ^ ks[19]; - t0 = td0[s0 >> 24] ^ td1[(s3 >> 16) & 0xff] ^ td2[(s2 >> 8) & 0xff] ^ td3[s1 & 0xff] ^ ks[20]; - t1 = td0[s1 >> 24] ^ td1[(s0 >> 16) & 0xff] ^ td2[(s3 >> 8) & 0xff] ^ td3[s2 & 0xff] ^ ks[21]; - t2 = td0[s2 >> 24] ^ td1[(s1 >> 16) & 0xff] ^ td2[(s0 >> 8) & 0xff] ^ td3[s3 & 0xff] ^ ks[22]; - t3 = td0[s3 >> 24] ^ td1[(s2 >> 16) & 0xff] ^ td2[(s1 >> 8) & 0xff] ^ td3[s0 & 0xff] ^ ks[23]; - s0 = td0[t0 >> 24] ^ td1[(t3 >> 16) & 0xff] ^ td2[(t2 >> 8) & 0xff] ^ td3[t1 & 0xff] ^ ks[24]; - s1 = td0[t1 >> 24] ^ td1[(t0 >> 16) & 0xff] ^ td2[(t3 >> 8) & 0xff] ^ td3[t2 & 0xff] ^ ks[25]; - s2 = td0[t2 >> 24] ^ td1[(t1 >> 16) & 0xff] ^ td2[(t0 >> 8) & 0xff] ^ td3[t3 & 0xff] ^ ks[26]; - s3 = td0[t3 >> 24] ^ td1[(t2 >> 16) & 0xff] ^ td2[(t1 >> 8) & 0xff] ^ td3[t0 & 0xff] ^ ks[27]; - t0 = td0[s0 >> 24] ^ td1[(s3 >> 16) & 0xff] ^ td2[(s2 >> 8) & 0xff] ^ td3[s1 & 0xff] ^ ks[28]; - t1 = td0[s1 >> 24] ^ td1[(s0 >> 16) & 0xff] ^ td2[(s3 >> 8) & 0xff] ^ td3[s2 & 0xff] ^ ks[29]; - t2 = td0[s2 >> 24] ^ td1[(s1 >> 16) & 0xff] ^ td2[(s0 >> 8) & 0xff] ^ td3[s3 & 0xff] ^ ks[30]; - t3 = td0[s3 >> 24] ^ td1[(s2 >> 16) & 0xff] ^ td2[(s1 >> 8) & 0xff] ^ td3[s0 & 0xff] ^ ks[31]; - s0 = td0[t0 >> 24] ^ td1[(t3 >> 16) & 0xff] ^ td2[(t2 >> 8) & 0xff] ^ td3[t1 & 0xff] ^ ks[32]; - s1 = td0[t1 >> 24] ^ td1[(t0 >> 16) & 0xff] ^ td2[(t3 >> 8) & 0xff] ^ td3[t2 & 0xff] ^ ks[33]; - s2 = td0[t2 >> 24] ^ td1[(t1 >> 16) & 0xff] ^ td2[(t0 >> 8) & 0xff] ^ td3[t3 & 0xff] ^ ks[34]; - s3 = td0[t3 >> 24] ^ td1[(t2 >> 16) & 0xff] ^ td2[(t1 >> 8) & 0xff] ^ td3[t0 & 0xff] ^ ks[35]; - t0 = td0[s0 >> 24] ^ td1[(s3 >> 16) & 0xff] ^ td2[(s2 >> 8) & 0xff] ^ td3[s1 & 0xff] ^ ks[36]; - t1 = td0[s1 >> 24] ^ td1[(s0 >> 16) & 0xff] ^ td2[(s3 >> 8) & 0xff] ^ td3[s2 & 0xff] ^ ks[37]; - t2 = td0[s2 >> 24] ^ td1[(s1 >> 16) & 0xff] ^ td2[(s0 >> 8) & 0xff] ^ td3[s3 & 0xff] ^ ks[38]; - t3 = td0[s3 >> 24] ^ td1[(s2 >> 16) & 0xff] ^ td2[(s1 >> 8) & 0xff] ^ td3[s0 & 0xff] ^ ks[39]; - s0 = td0[t0 >> 24] ^ td1[(t3 >> 16) & 0xff] ^ td2[(t2 >> 8) & 0xff] ^ td3[t1 & 0xff] ^ ks[40]; - s1 = td0[t1 >> 24] ^ td1[(t0 >> 16) & 0xff] ^ td2[(t3 >> 8) & 0xff] ^ td3[t2 & 0xff] ^ ks[41]; - s2 = td0[t2 >> 24] ^ td1[(t1 >> 16) & 0xff] ^ td2[(t0 >> 8) & 0xff] ^ td3[t3 & 0xff] ^ ks[42]; - s3 = td0[t3 >> 24] ^ td1[(t2 >> 16) & 0xff] ^ td2[(t1 >> 8) & 0xff] ^ td3[t0 & 0xff] ^ ks[43]; - t0 = td0[s0 >> 24] ^ td1[(s3 >> 16) & 0xff] ^ td2[(s2 >> 8) & 0xff] ^ td3[s1 & 0xff] ^ ks[44]; - t1 = td0[s1 >> 24] ^ td1[(s0 >> 16) & 0xff] ^ td2[(s3 >> 8) & 0xff] ^ td3[s2 & 0xff] ^ ks[45]; - t2 = td0[s2 >> 24] ^ td1[(s1 >> 16) & 0xff] ^ td2[(s0 >> 8) & 0xff] ^ td3[s3 & 0xff] ^ ks[46]; - t3 = td0[s3 >> 24] ^ td1[(s2 >> 16) & 0xff] ^ td2[(s1 >> 8) & 0xff] ^ td3[s0 & 0xff] ^ ks[47]; - s0 = td0[t0 >> 24] ^ td1[(t3 >> 16) & 0xff] ^ td2[(t2 >> 8) & 0xff] ^ td3[t1 & 0xff] ^ ks[48]; - s1 = td0[t1 >> 24] ^ td1[(t0 >> 16) & 0xff] ^ td2[(t3 >> 8) & 0xff] ^ td3[t2 & 0xff] ^ ks[49]; - s2 = td0[t2 >> 24] ^ td1[(t1 >> 16) & 0xff] ^ td2[(t0 >> 8) & 0xff] ^ td3[t3 & 0xff] ^ ks[50]; - s3 = td0[t3 >> 24] ^ td1[(t2 >> 16) & 0xff] ^ td2[(t1 >> 8) & 0xff] ^ td3[t0 & 0xff] ^ ks[51]; - t0 = td0[s0 >> 24] ^ td1[(s3 >> 16) & 0xff] ^ td2[(s2 >> 8) & 0xff] ^ td3[s1 & 0xff] ^ ks[52]; - t1 = td0[s1 >> 24] ^ td1[(s0 >> 16) & 0xff] ^ td2[(s3 >> 8) & 0xff] ^ td3[s2 & 0xff] ^ ks[53]; - t2 = td0[s2 >> 24] ^ td1[(s1 >> 16) & 0xff] ^ td2[(s0 >> 8) & 0xff] ^ td3[s3 & 0xff] ^ ks[54]; - t3 = td0[s3 >> 24] ^ td1[(s2 >> 16) & 0xff] ^ td2[(s1 >> 8) & 0xff] ^ td3[s0 & 0xff] ^ ks[55]; - - out[0] = (td4[(t0 >> 24) & 0xff] & 0xff000000) - ^ (td4[(t3 >> 16) & 0xff] & 0x00ff0000) - ^ (td4[(t2 >> 8) & 0xff] & 0x0000ff00) - ^ (td4[(t1 >> 0) & 0xff] & 0x000000ff) - ^ ks[56]; - - out[1] = (td4[(t1 >> 24) & 0xff] & 0xff000000) - ^ (td4[(t0 >> 16) & 0xff] & 0x00ff0000) - ^ (td4[(t3 >> 8) & 0xff] & 0x0000ff00) - ^ (td4[(t2 >> 0) & 0xff] & 0x000000ff) - ^ ks[57]; - - out[2] = (td4[(t2 >> 24) & 0xff] & 0xff000000) - ^ (td4[(t1 >> 16) & 0xff] & 0x00ff0000) - ^ (td4[(t0 >> 8) & 0xff] & 0x0000ff00) - ^ (td4[(t3 >> 0) & 0xff] & 0x000000ff) - ^ ks[58]; - - out[3] = (td4[(t3 >> 24) & 0xff] & 0xff000000) - ^ (td4[(t2 >> 16) & 0xff] & 0x00ff0000) - ^ (td4[(t1 >> 8) & 0xff] & 0x0000ff00) - ^ (td4[(t0 >> 0) & 0xff] & 0x000000ff) - ^ ks[59]; - - out[0] = swap_workaround (out[0]); - out[1] = swap_workaround (out[1]); - out[2] = swap_workaround (out[2]); - out[3] = swap_workaround (out[3]); -} - -__device__ static void aes256_encrypt (const u32 *ks, const u32 *in, u32 *out) -{ - u32 in_s[4]; - - in_s[0] = swap_workaround (in[0]); - in_s[1] = swap_workaround (in[1]); - in_s[2] = swap_workaround (in[2]); - in_s[3] = swap_workaround (in[3]); - - u32 s0 = in_s[0] ^ ks[0]; - u32 s1 = in_s[1] ^ ks[1]; - u32 s2 = in_s[2] ^ ks[2]; - u32 s3 = in_s[3] ^ ks[3]; - - u32 t0; - u32 t1; - u32 t2; - u32 t3; - - t0 = te0[s0 >> 24] ^ te1[(s1 >> 16) & 0xff] ^ te2[(s2 >> 8) & 0xff] ^ te3[s3 & 0xff] ^ ks[ 4]; - t1 = te0[s1 >> 24] ^ te1[(s2 >> 16) & 0xff] ^ te2[(s3 >> 8) & 0xff] ^ te3[s0 & 0xff] ^ ks[ 5]; - t2 = te0[s2 >> 24] ^ te1[(s3 >> 16) & 0xff] ^ te2[(s0 >> 8) & 0xff] ^ te3[s1 & 0xff] ^ ks[ 6]; - t3 = te0[s3 >> 24] ^ te1[(s0 >> 16) & 0xff] ^ te2[(s1 >> 8) & 0xff] ^ te3[s2 & 0xff] ^ ks[ 7]; - s0 = te0[t0 >> 24] ^ te1[(t1 >> 16) & 0xff] ^ te2[(t2 >> 8) & 0xff] ^ te3[t3 & 0xff] ^ ks[ 8]; - s1 = te0[t1 >> 24] ^ te1[(t2 >> 16) & 0xff] ^ te2[(t3 >> 8) & 0xff] ^ te3[t0 & 0xff] ^ ks[ 9]; - s2 = te0[t2 >> 24] ^ te1[(t3 >> 16) & 0xff] ^ te2[(t0 >> 8) & 0xff] ^ te3[t1 & 0xff] ^ ks[10]; - s3 = te0[t3 >> 24] ^ te1[(t0 >> 16) & 0xff] ^ te2[(t1 >> 8) & 0xff] ^ te3[t2 & 0xff] ^ ks[11]; - t0 = te0[s0 >> 24] ^ te1[(s1 >> 16) & 0xff] ^ te2[(s2 >> 8) & 0xff] ^ te3[s3 & 0xff] ^ ks[12]; - t1 = te0[s1 >> 24] ^ te1[(s2 >> 16) & 0xff] ^ te2[(s3 >> 8) & 0xff] ^ te3[s0 & 0xff] ^ ks[13]; - t2 = te0[s2 >> 24] ^ te1[(s3 >> 16) & 0xff] ^ te2[(s0 >> 8) & 0xff] ^ te3[s1 & 0xff] ^ ks[14]; - t3 = te0[s3 >> 24] ^ te1[(s0 >> 16) & 0xff] ^ te2[(s1 >> 8) & 0xff] ^ te3[s2 & 0xff] ^ ks[15]; - s0 = te0[t0 >> 24] ^ te1[(t1 >> 16) & 0xff] ^ te2[(t2 >> 8) & 0xff] ^ te3[t3 & 0xff] ^ ks[16]; - s1 = te0[t1 >> 24] ^ te1[(t2 >> 16) & 0xff] ^ te2[(t3 >> 8) & 0xff] ^ te3[t0 & 0xff] ^ ks[17]; - s2 = te0[t2 >> 24] ^ te1[(t3 >> 16) & 0xff] ^ te2[(t0 >> 8) & 0xff] ^ te3[t1 & 0xff] ^ ks[18]; - s3 = te0[t3 >> 24] ^ te1[(t0 >> 16) & 0xff] ^ te2[(t1 >> 8) & 0xff] ^ te3[t2 & 0xff] ^ ks[19]; - t0 = te0[s0 >> 24] ^ te1[(s1 >> 16) & 0xff] ^ te2[(s2 >> 8) & 0xff] ^ te3[s3 & 0xff] ^ ks[20]; - t1 = te0[s1 >> 24] ^ te1[(s2 >> 16) & 0xff] ^ te2[(s3 >> 8) & 0xff] ^ te3[s0 & 0xff] ^ ks[21]; - t2 = te0[s2 >> 24] ^ te1[(s3 >> 16) & 0xff] ^ te2[(s0 >> 8) & 0xff] ^ te3[s1 & 0xff] ^ ks[22]; - t3 = te0[s3 >> 24] ^ te1[(s0 >> 16) & 0xff] ^ te2[(s1 >> 8) & 0xff] ^ te3[s2 & 0xff] ^ ks[23]; - s0 = te0[t0 >> 24] ^ te1[(t1 >> 16) & 0xff] ^ te2[(t2 >> 8) & 0xff] ^ te3[t3 & 0xff] ^ ks[24]; - s1 = te0[t1 >> 24] ^ te1[(t2 >> 16) & 0xff] ^ te2[(t3 >> 8) & 0xff] ^ te3[t0 & 0xff] ^ ks[25]; - s2 = te0[t2 >> 24] ^ te1[(t3 >> 16) & 0xff] ^ te2[(t0 >> 8) & 0xff] ^ te3[t1 & 0xff] ^ ks[26]; - s3 = te0[t3 >> 24] ^ te1[(t0 >> 16) & 0xff] ^ te2[(t1 >> 8) & 0xff] ^ te3[t2 & 0xff] ^ ks[27]; - t0 = te0[s0 >> 24] ^ te1[(s1 >> 16) & 0xff] ^ te2[(s2 >> 8) & 0xff] ^ te3[s3 & 0xff] ^ ks[28]; - t1 = te0[s1 >> 24] ^ te1[(s2 >> 16) & 0xff] ^ te2[(s3 >> 8) & 0xff] ^ te3[s0 & 0xff] ^ ks[29]; - t2 = te0[s2 >> 24] ^ te1[(s3 >> 16) & 0xff] ^ te2[(s0 >> 8) & 0xff] ^ te3[s1 & 0xff] ^ ks[30]; - t3 = te0[s3 >> 24] ^ te1[(s0 >> 16) & 0xff] ^ te2[(s1 >> 8) & 0xff] ^ te3[s2 & 0xff] ^ ks[31]; - s0 = te0[t0 >> 24] ^ te1[(t1 >> 16) & 0xff] ^ te2[(t2 >> 8) & 0xff] ^ te3[t3 & 0xff] ^ ks[32]; - s1 = te0[t1 >> 24] ^ te1[(t2 >> 16) & 0xff] ^ te2[(t3 >> 8) & 0xff] ^ te3[t0 & 0xff] ^ ks[33]; - s2 = te0[t2 >> 24] ^ te1[(t3 >> 16) & 0xff] ^ te2[(t0 >> 8) & 0xff] ^ te3[t1 & 0xff] ^ ks[34]; - s3 = te0[t3 >> 24] ^ te1[(t0 >> 16) & 0xff] ^ te2[(t1 >> 8) & 0xff] ^ te3[t2 & 0xff] ^ ks[35]; - t0 = te0[s0 >> 24] ^ te1[(s1 >> 16) & 0xff] ^ te2[(s2 >> 8) & 0xff] ^ te3[s3 & 0xff] ^ ks[36]; - t1 = te0[s1 >> 24] ^ te1[(s2 >> 16) & 0xff] ^ te2[(s3 >> 8) & 0xff] ^ te3[s0 & 0xff] ^ ks[37]; - t2 = te0[s2 >> 24] ^ te1[(s3 >> 16) & 0xff] ^ te2[(s0 >> 8) & 0xff] ^ te3[s1 & 0xff] ^ ks[38]; - t3 = te0[s3 >> 24] ^ te1[(s0 >> 16) & 0xff] ^ te2[(s1 >> 8) & 0xff] ^ te3[s2 & 0xff] ^ ks[39]; - s0 = te0[t0 >> 24] ^ te1[(t1 >> 16) & 0xff] ^ te2[(t2 >> 8) & 0xff] ^ te3[t3 & 0xff] ^ ks[40]; - s1 = te0[t1 >> 24] ^ te1[(t2 >> 16) & 0xff] ^ te2[(t3 >> 8) & 0xff] ^ te3[t0 & 0xff] ^ ks[41]; - s2 = te0[t2 >> 24] ^ te1[(t3 >> 16) & 0xff] ^ te2[(t0 >> 8) & 0xff] ^ te3[t1 & 0xff] ^ ks[42]; - s3 = te0[t3 >> 24] ^ te1[(t0 >> 16) & 0xff] ^ te2[(t1 >> 8) & 0xff] ^ te3[t2 & 0xff] ^ ks[43]; - t0 = te0[s0 >> 24] ^ te1[(s1 >> 16) & 0xff] ^ te2[(s2 >> 8) & 0xff] ^ te3[s3 & 0xff] ^ ks[44]; - t1 = te0[s1 >> 24] ^ te1[(s2 >> 16) & 0xff] ^ te2[(s3 >> 8) & 0xff] ^ te3[s0 & 0xff] ^ ks[45]; - t2 = te0[s2 >> 24] ^ te1[(s3 >> 16) & 0xff] ^ te2[(s0 >> 8) & 0xff] ^ te3[s1 & 0xff] ^ ks[46]; - t3 = te0[s3 >> 24] ^ te1[(s0 >> 16) & 0xff] ^ te2[(s1 >> 8) & 0xff] ^ te3[s2 & 0xff] ^ ks[47]; - s0 = te0[t0 >> 24] ^ te1[(t1 >> 16) & 0xff] ^ te2[(t2 >> 8) & 0xff] ^ te3[t3 & 0xff] ^ ks[48]; - s1 = te0[t1 >> 24] ^ te1[(t2 >> 16) & 0xff] ^ te2[(t3 >> 8) & 0xff] ^ te3[t0 & 0xff] ^ ks[49]; - s2 = te0[t2 >> 24] ^ te1[(t3 >> 16) & 0xff] ^ te2[(t0 >> 8) & 0xff] ^ te3[t1 & 0xff] ^ ks[50]; - s3 = te0[t3 >> 24] ^ te1[(t0 >> 16) & 0xff] ^ te2[(t1 >> 8) & 0xff] ^ te3[t2 & 0xff] ^ ks[51]; - t0 = te0[s0 >> 24] ^ te1[(s1 >> 16) & 0xff] ^ te2[(s2 >> 8) & 0xff] ^ te3[s3 & 0xff] ^ ks[52]; - t1 = te0[s1 >> 24] ^ te1[(s2 >> 16) & 0xff] ^ te2[(s3 >> 8) & 0xff] ^ te3[s0 & 0xff] ^ ks[53]; - t2 = te0[s2 >> 24] ^ te1[(s3 >> 16) & 0xff] ^ te2[(s0 >> 8) & 0xff] ^ te3[s1 & 0xff] ^ ks[54]; - t3 = te0[s3 >> 24] ^ te1[(s0 >> 16) & 0xff] ^ te2[(s1 >> 8) & 0xff] ^ te3[s2 & 0xff] ^ ks[55]; - - out[0] = (te4[(t0 >> 24) & 0xff] & 0xff000000) - ^ (te4[(t1 >> 16) & 0xff] & 0x00ff0000) - ^ (te4[(t2 >> 8) & 0xff] & 0x0000ff00) - ^ (te4[(t3 >> 0) & 0xff] & 0x000000ff) - ^ ks[56]; - - out[1] = (te4[(t1 >> 24) & 0xff] & 0xff000000) - ^ (te4[(t2 >> 16) & 0xff] & 0x00ff0000) - ^ (te4[(t3 >> 8) & 0xff] & 0x0000ff00) - ^ (te4[(t0 >> 0) & 0xff] & 0x000000ff) - ^ ks[57]; - - out[2] = (te4[(t2 >> 24) & 0xff] & 0xff000000) - ^ (te4[(t3 >> 16) & 0xff] & 0x00ff0000) - ^ (te4[(t0 >> 8) & 0xff] & 0x0000ff00) - ^ (te4[(t1 >> 0) & 0xff] & 0x000000ff) - ^ ks[58]; - - out[3] = (te4[(t3 >> 24) & 0xff] & 0xff000000) - ^ (te4[(t0 >> 16) & 0xff] & 0x00ff0000) - ^ (te4[(t1 >> 8) & 0xff] & 0x0000ff00) - ^ (te4[(t2 >> 0) & 0xff] & 0x000000ff) - ^ ks[59]; - - out[0] = swap_workaround (out[0]); - out[1] = swap_workaround (out[1]); - out[2] = swap_workaround (out[2]); - out[3] = swap_workaround (out[3]); -} - -__device__ static void aes256_decrypt_xts (const u32 *ukey1, const u32 *ukey2, const u32 *in, u32 *out) -{ - u32 T[4] = { 0 }; - u32 Z[4] = { 0 }; - - out[0] = in[0]; - out[1] = in[1]; - out[2] = in[2]; - out[3] = in[3]; - - u32 ks[60]; - - aes256_set_encrypt_key (ks, ukey2); - aes256_encrypt (ks, Z, T); - - out[0] ^= T[0]; - out[1] ^= T[1]; - out[2] ^= T[2]; - out[3] ^= T[3]; - - aes256_set_decrypt_key (ks, ukey1); - aes256_decrypt (ks, out, out); - - out[0] ^= T[0]; - out[1] ^= T[1]; - out[2] ^= T[2]; - out[3] ^= T[3]; -} diff --git a/nv/gpu_serpent256_nv.c b/nv/gpu_serpent256_nv.c deleted file mode 100644 index 8f2d01e..0000000 --- a/nv/gpu_serpent256_nv.c +++ /dev/null @@ -1,583 +0,0 @@ -/* This is an independent implementation of the encryption algorithm: */ -/* */ -/* Serpent by Ross Anderson, Eli Biham and Lars Knudsen */ -/* */ -/* which is a candidate algorithm in the Advanced Encryption Standard */ -/* programme of the US National Institute of Standards and Technology. */ -/* */ -/* Copyright in this implementation is held by Dr B R Gladman but I */ -/* hereby give permission for its free direct or derivative use subject */ -/* to acknowledgment of its origin and compliance with any conditions */ -/* that the originators of the algorithm place on its exploitation. */ -/* */ -/* Dr Brian Gladman (gladman@seven77.demon.co.uk) 14th January 1999 */ -/* */ -/* -------------------------------------------------------------------- */ -/* */ -/* Cleaned and optimized for GPU use with oclHashcat by Jens Steube */ - -/* 15 terms */ - -#define sb0(a,b,c,d,e,f,g,h) \ - t1 = a ^ d; \ - t2 = a & d; \ - t3 = c ^ t1; \ - t6 = b & t1; \ - t4 = b ^ t3; \ - t10 = ~t3; \ - h = t2 ^ t4; \ - t7 = a ^ t6; \ - t14 = ~t7; \ - t8 = c | t7; \ - t11 = t3 ^ t7; \ - g = t4 ^ t8; \ - t12 = h & t11; \ - f = t10 ^ t12; \ - e = t12 ^ t14 - -/* 15 terms */ - -#define ib0(a,b,c,d,e,f,g,h) \ - t1 = ~a; \ - t2 = a ^ b; \ - t3 = t1 | t2; \ - t4 = d ^ t3; \ - t7 = d & t2; \ - t5 = c ^ t4; \ - t8 = t1 ^ t7; \ - g = t2 ^ t5; \ - t11 = a & t4; \ - t9 = g & t8; \ - t14 = t5 ^ t8; \ - f = t4 ^ t9; \ - t12 = t5 | f; \ - h = t11 ^ t12; \ - e = h ^ t14 - -/* 14 terms! */ - -#define sb1(a,b,c,d,e,f,g,h) \ - t1 = ~a; \ - t2 = b ^ t1; \ - t3 = a | t2; \ - t4 = d | t2; \ - t5 = c ^ t3; \ - g = d ^ t5; \ - t7 = b ^ t4; \ - t8 = t2 ^ g; \ - t9 = t5 & t7; \ - h = t8 ^ t9; \ - t11 = t5 ^ t7; \ - f = h ^ t11; \ - t13 = t8 & t11; \ - e = t5 ^ t13 - -/* 17 terms */ - -#define ib1(a,b,c,d,e,f,g,h) \ - t1 = a ^ d; \ - t2 = a & b; \ - t3 = b ^ c; \ - t4 = a ^ t3; \ - t5 = b | d; \ - t7 = c | t1; \ - h = t4 ^ t5; \ - t8 = b ^ t7; \ - t11 = ~t2; \ - t9 = t4 & t8; \ - f = t1 ^ t9; \ - t13 = t9 ^ t11; \ - t12 = h & f; \ - g = t12 ^ t13; \ - t15 = a & d; \ - t16 = c ^ t13; \ - e = t15 ^ t16 - -/* 16 terms */ - -#define sb2(a,b,c,d,e,f,g,h) \ - t1 = ~a; \ - t2 = b ^ d; \ - t3 = c & t1; \ - t13 = d | t1; \ - e = t2 ^ t3; \ - t5 = c ^ t1; \ - t6 = c ^ e; \ - t7 = b & t6; \ - t10 = e | t5; \ - h = t5 ^ t7; \ - t9 = d | t7; \ - t11 = t9 & t10; \ - t14 = t2 ^ h; \ - g = a ^ t11; \ - t15 = g ^ t13; \ - f = t14 ^ t15 - -/* 16 terms */ - -#define ib2(a,b,c,d,e,f,g,h) \ - t1 = b ^ d; \ - t2 = ~t1; \ - t3 = a ^ c; \ - t4 = c ^ t1; \ - t7 = a | t2; \ - t5 = b & t4; \ - t8 = d ^ t7; \ - t11 = ~t4; \ - e = t3 ^ t5; \ - t9 = t3 | t8; \ - t14 = d & t11; \ - h = t1 ^ t9; \ - t12 = e | h; \ - f = t11 ^ t12; \ - t15 = t3 ^ t12; \ - g = t14 ^ t15 - -/* 17 terms */ - -#define sb3(a,b,c,d,e,f,g,h) \ - t1 = a ^ c; \ - t2 = d ^ t1; \ - t3 = a & t2; \ - t4 = d ^ t3; \ - t5 = b & t4; \ - g = t2 ^ t5; \ - t7 = a | g; \ - t8 = b | d; \ - t11 = a | d; \ - t9 = t4 & t7; \ - f = t8 ^ t9; \ - t12 = b ^ t11; \ - t13 = g ^ t9; \ - t15 = t3 ^ t8; \ - h = t12 ^ t13; \ - t16 = c & t15; \ - e = t12 ^ t16 - -/* 16 term solution that performs less well than 17 term one - in my environment (PPro/PII) - -#define sb3(a,b,c,d,e,f,g,h) \ - t1 = a ^ b; \ - t2 = a & c; \ - t3 = a | d; \ - t4 = c ^ d; \ - t5 = t1 & t3; \ - t6 = t2 | t5; \ - g = t4 ^ t6; \ - t8 = b ^ t3; \ - t9 = t6 ^ t8; \ - t10 = t4 & t9; \ - e = t1 ^ t10; \ - t12 = g & e; \ - f = t9 ^ t12; \ - t14 = b | d; \ - t15 = t4 ^ t12; \ - h = t14 ^ t15 -*/ - -/* 17 terms */ - -#define ib3(a,b,c,d,e,f,g,h) \ - t1 = b ^ c; \ - t2 = b | c; \ - t3 = a ^ c; \ - t7 = a ^ d; \ - t4 = t2 ^ t3; \ - t5 = d | t4; \ - t9 = t2 ^ t7; \ - e = t1 ^ t5; \ - t8 = t1 | t5; \ - t11 = a & t4; \ - g = t8 ^ t9; \ - t12 = e | t9; \ - f = t11 ^ t12; \ - t14 = a & g; \ - t15 = t2 ^ t14; \ - t16 = e & t15; \ - h = t4 ^ t16 - -/* 15 terms */ - -#define sb4(a,b,c,d,e,f,g,h) \ - t1 = a ^ d; \ - t2 = d & t1; \ - t3 = c ^ t2; \ - t4 = b | t3; \ - h = t1 ^ t4; \ - t6 = ~b; \ - t7 = t1 | t6; \ - e = t3 ^ t7; \ - t9 = a & e; \ - t10 = t1 ^ t6; \ - t11 = t4 & t10; \ - g = t9 ^ t11; \ - t13 = a ^ t3; \ - t14 = t10 & g; \ - f = t13 ^ t14 - -/* 17 terms */ - -#define ib4(a,b,c,d,e,f,g,h) \ - t1 = c ^ d; \ - t2 = c | d; \ - t3 = b ^ t2; \ - t4 = a & t3; \ - f = t1 ^ t4; \ - t6 = a ^ d; \ - t7 = b | d; \ - t8 = t6 & t7; \ - h = t3 ^ t8; \ - t10 = ~a; \ - t11 = c ^ h; \ - t12 = t10 | t11;\ - e = t3 ^ t12; \ - t14 = c | t4; \ - t15 = t7 ^ t14; \ - t16 = h | t10; \ - g = t15 ^ t16 - -/* 16 terms */ - -#define sb5(a,b,c,d,e,f,g,h) \ - t1 = ~a; \ - t2 = a ^ b; \ - t3 = a ^ d; \ - t4 = c ^ t1; \ - t5 = t2 | t3; \ - e = t4 ^ t5; \ - t7 = d & e; \ - t8 = t2 ^ e; \ - t10 = t1 | e; \ - f = t7 ^ t8; \ - t11 = t2 | t7; \ - t12 = t3 ^ t10; \ - t14 = b ^ t7; \ - g = t11 ^ t12; \ - t15 = f & t12; \ - h = t14 ^ t15 - -/* 16 terms */ - -#define ib5(a,b,c,d,e,f,g,h) \ - t1 = ~c; \ - t2 = b & t1; \ - t3 = d ^ t2; \ - t4 = a & t3; \ - t5 = b ^ t1; \ - h = t4 ^ t5; \ - t7 = b | h; \ - t8 = a & t7; \ - f = t3 ^ t8; \ - t10 = a | d; \ - t11 = t1 ^ t7; \ - e = t10 ^ t11; \ - t13 = a ^ c; \ - t14 = b & t10; \ - t15 = t4 | t13; \ - g = t14 ^ t15 - -/* 15 terms */ - -#define sb6(a,b,c,d,e,f,g,h) \ - t1 = ~a; \ - t2 = a ^ d; \ - t3 = b ^ t2; \ - t4 = t1 | t2; \ - t5 = c ^ t4; \ - f = b ^ t5; \ - t13 = ~t5; \ - t7 = t2 | f; \ - t8 = d ^ t7; \ - t9 = t5 & t8; \ - g = t3 ^ t9; \ - t11 = t5 ^ t8; \ - e = g ^ t11; \ - t14 = t3 & t11; \ - h = t13 ^ t14 - -/* 15 terms */ - -#define ib6(a,b,c,d,e,f,g,h) \ - t1 = ~a; \ - t2 = a ^ b; \ - t3 = c ^ t2; \ - t4 = c | t1; \ - t5 = d ^ t4; \ - t13 = d & t1; \ - f = t3 ^ t5; \ - t7 = t3 & t5; \ - t8 = t2 ^ t7; \ - t9 = b | t8; \ - h = t5 ^ t9; \ - t11 = b | h; \ - e = t8 ^ t11; \ - t14 = t3 ^ t11; \ - g = t13 ^ t14 - -/* 17 terms */ - -#define sb7(a,b,c,d,e,f,g,h) \ - t1 = ~c; \ - t2 = b ^ c; \ - t3 = b | t1; \ - t4 = d ^ t3; \ - t5 = a & t4; \ - t7 = a ^ d; \ - h = t2 ^ t5; \ - t8 = b ^ t5; \ - t9 = t2 | t8; \ - t11 = d & t3; \ - f = t7 ^ t9; \ - t12 = t5 ^ f; \ - t15 = t1 | t4; \ - t13 = h & t12; \ - g = t11 ^ t13; \ - t16 = t12 ^ g; \ - e = t15 ^ t16 - -/* 17 terms */ - -#define ib7(a,b,c,d,e,f,g,h) \ - t1 = a & b; \ - t2 = a | b; \ - t3 = c | t1; \ - t4 = d & t2; \ - h = t3 ^ t4; \ - t6 = ~d; \ - t7 = b ^ t4; \ - t8 = h ^ t6; \ - t11 = c ^ t7; \ - t9 = t7 | t8; \ - f = a ^ t9; \ - t12 = d | f; \ - e = t11 ^ t12; \ - t14 = a & h; \ - t15 = t3 ^ f; \ - t16 = e ^ t14; \ - g = t15 ^ t16 - -#define k_xor(r,a,b,c,d) \ - a ^= ks[4 * r + 8]; \ - b ^= ks[4 * r + 9]; \ - c ^= ks[4 * r + 10]; \ - d ^= ks[4 * r + 11] - -#define k_set(r,a,b,c,d) \ - a = ks[4 * r + 8]; \ - b = ks[4 * r + 9]; \ - c = ks[4 * r + 10]; \ - d = ks[4 * r + 11] - -#define k_get(r,a,b,c,d) \ - ks[4 * r + 8] = a; \ - ks[4 * r + 9] = b; \ - ks[4 * r + 10] = c; \ - ks[4 * r + 11] = d - -/* the linear transformation and its inverse */ - -#define rot(a,b,c,d) \ - a = rotl32(a, 13); \ - c = rotl32(c, 3); \ - d ^= c ^ (a << 3); \ - b ^= a ^ c; \ - d = rotl32(d, 7); \ - b = rotl32(b, 1); \ - a ^= b ^ d; \ - c ^= d ^ (b << 7); \ - a = rotl32(a, 5); \ - c = rotl32(c, 22) - -#define irot(a,b,c,d) \ - c = rotr32(c, 22); \ - a = rotr32(a, 5); \ - c ^= d ^ (b << 7); \ - a ^= b ^ d; \ - d = rotr32(d, 7); \ - b = rotr32(b, 1); \ - d ^= c ^ (a << 3); \ - b ^= a ^ c; \ - c = rotr32(c, 3); \ - a = rotr32(a, 13) - -__device__ static void serpent256_set_key (u32 *ks, const u32 *ukey) -{ - #pragma unroll - for (int i = 0; i < 8; i++) - { - ks[i] = ukey[i]; - } - - #pragma unroll - for (int i = 0; i < 132; i++) - { - ks[i + 8] = rotl32 (ks[i + 7] ^ ks[i + 5] ^ ks[i + 3] ^ ks[i + 0] ^ 0x9e3779b9 ^ i, 11); - } - - u32 a,b,c,d,e,f,g,h; - u32 t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16; - - k_set( 0,a,b,c,d); sb3(a,b,c,d,e,f,g,h); k_get( 0,e,f,g,h); - k_set( 1,a,b,c,d); sb2(a,b,c,d,e,f,g,h); k_get( 1,e,f,g,h); - k_set( 2,a,b,c,d); sb1(a,b,c,d,e,f,g,h); k_get( 2,e,f,g,h); - k_set( 3,a,b,c,d); sb0(a,b,c,d,e,f,g,h); k_get( 3,e,f,g,h); - k_set( 4,a,b,c,d); sb7(a,b,c,d,e,f,g,h); k_get( 4,e,f,g,h); - k_set( 5,a,b,c,d); sb6(a,b,c,d,e,f,g,h); k_get( 5,e,f,g,h); - k_set( 6,a,b,c,d); sb5(a,b,c,d,e,f,g,h); k_get( 6,e,f,g,h); - k_set( 7,a,b,c,d); sb4(a,b,c,d,e,f,g,h); k_get( 7,e,f,g,h); - k_set( 8,a,b,c,d); sb3(a,b,c,d,e,f,g,h); k_get( 8,e,f,g,h); - k_set( 9,a,b,c,d); sb2(a,b,c,d,e,f,g,h); k_get( 9,e,f,g,h); - k_set(10,a,b,c,d); sb1(a,b,c,d,e,f,g,h); k_get(10,e,f,g,h); - k_set(11,a,b,c,d); sb0(a,b,c,d,e,f,g,h); k_get(11,e,f,g,h); - k_set(12,a,b,c,d); sb7(a,b,c,d,e,f,g,h); k_get(12,e,f,g,h); - k_set(13,a,b,c,d); sb6(a,b,c,d,e,f,g,h); k_get(13,e,f,g,h); - k_set(14,a,b,c,d); sb5(a,b,c,d,e,f,g,h); k_get(14,e,f,g,h); - k_set(15,a,b,c,d); sb4(a,b,c,d,e,f,g,h); k_get(15,e,f,g,h); - k_set(16,a,b,c,d); sb3(a,b,c,d,e,f,g,h); k_get(16,e,f,g,h); - k_set(17,a,b,c,d); sb2(a,b,c,d,e,f,g,h); k_get(17,e,f,g,h); - k_set(18,a,b,c,d); sb1(a,b,c,d,e,f,g,h); k_get(18,e,f,g,h); - k_set(19,a,b,c,d); sb0(a,b,c,d,e,f,g,h); k_get(19,e,f,g,h); - k_set(20,a,b,c,d); sb7(a,b,c,d,e,f,g,h); k_get(20,e,f,g,h); - k_set(21,a,b,c,d); sb6(a,b,c,d,e,f,g,h); k_get(21,e,f,g,h); - k_set(22,a,b,c,d); sb5(a,b,c,d,e,f,g,h); k_get(22,e,f,g,h); - k_set(23,a,b,c,d); sb4(a,b,c,d,e,f,g,h); k_get(23,e,f,g,h); - k_set(24,a,b,c,d); sb3(a,b,c,d,e,f,g,h); k_get(24,e,f,g,h); - k_set(25,a,b,c,d); sb2(a,b,c,d,e,f,g,h); k_get(25,e,f,g,h); - k_set(26,a,b,c,d); sb1(a,b,c,d,e,f,g,h); k_get(26,e,f,g,h); - k_set(27,a,b,c,d); sb0(a,b,c,d,e,f,g,h); k_get(27,e,f,g,h); - k_set(28,a,b,c,d); sb7(a,b,c,d,e,f,g,h); k_get(28,e,f,g,h); - k_set(29,a,b,c,d); sb6(a,b,c,d,e,f,g,h); k_get(29,e,f,g,h); - k_set(30,a,b,c,d); sb5(a,b,c,d,e,f,g,h); k_get(30,e,f,g,h); - k_set(31,a,b,c,d); sb4(a,b,c,d,e,f,g,h); k_get(31,e,f,g,h); - k_set(32,a,b,c,d); sb3(a,b,c,d,e,f,g,h); k_get(32,e,f,g,h); -} - -__device__ static void serpent256_encrypt (const u32 *ks, const u32 *in, u32 *out) -{ - u32 a,b,c,d,e,f,g,h; - u32 t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16; - - a = in[0]; - b = in[1]; - c = in[2]; - d = in[3]; - - k_xor( 0,a,b,c,d); sb0(a,b,c,d,e,f,g,h); rot(e,f,g,h); - k_xor( 1,e,f,g,h); sb1(e,f,g,h,a,b,c,d); rot(a,b,c,d); - k_xor( 2,a,b,c,d); sb2(a,b,c,d,e,f,g,h); rot(e,f,g,h); - k_xor( 3,e,f,g,h); sb3(e,f,g,h,a,b,c,d); rot(a,b,c,d); - k_xor( 4,a,b,c,d); sb4(a,b,c,d,e,f,g,h); rot(e,f,g,h); - k_xor( 5,e,f,g,h); sb5(e,f,g,h,a,b,c,d); rot(a,b,c,d); - k_xor( 6,a,b,c,d); sb6(a,b,c,d,e,f,g,h); rot(e,f,g,h); - k_xor( 7,e,f,g,h); sb7(e,f,g,h,a,b,c,d); rot(a,b,c,d); - k_xor( 8,a,b,c,d); sb0(a,b,c,d,e,f,g,h); rot(e,f,g,h); - k_xor( 9,e,f,g,h); sb1(e,f,g,h,a,b,c,d); rot(a,b,c,d); - k_xor(10,a,b,c,d); sb2(a,b,c,d,e,f,g,h); rot(e,f,g,h); - k_xor(11,e,f,g,h); sb3(e,f,g,h,a,b,c,d); rot(a,b,c,d); - k_xor(12,a,b,c,d); sb4(a,b,c,d,e,f,g,h); rot(e,f,g,h); - k_xor(13,e,f,g,h); sb5(e,f,g,h,a,b,c,d); rot(a,b,c,d); - k_xor(14,a,b,c,d); sb6(a,b,c,d,e,f,g,h); rot(e,f,g,h); - k_xor(15,e,f,g,h); sb7(e,f,g,h,a,b,c,d); rot(a,b,c,d); - k_xor(16,a,b,c,d); sb0(a,b,c,d,e,f,g,h); rot(e,f,g,h); - k_xor(17,e,f,g,h); sb1(e,f,g,h,a,b,c,d); rot(a,b,c,d); - k_xor(18,a,b,c,d); sb2(a,b,c,d,e,f,g,h); rot(e,f,g,h); - k_xor(19,e,f,g,h); sb3(e,f,g,h,a,b,c,d); rot(a,b,c,d); - k_xor(20,a,b,c,d); sb4(a,b,c,d,e,f,g,h); rot(e,f,g,h); - k_xor(21,e,f,g,h); sb5(e,f,g,h,a,b,c,d); rot(a,b,c,d); - k_xor(22,a,b,c,d); sb6(a,b,c,d,e,f,g,h); rot(e,f,g,h); - k_xor(23,e,f,g,h); sb7(e,f,g,h,a,b,c,d); rot(a,b,c,d); - k_xor(24,a,b,c,d); sb0(a,b,c,d,e,f,g,h); rot(e,f,g,h); - k_xor(25,e,f,g,h); sb1(e,f,g,h,a,b,c,d); rot(a,b,c,d); - k_xor(26,a,b,c,d); sb2(a,b,c,d,e,f,g,h); rot(e,f,g,h); - k_xor(27,e,f,g,h); sb3(e,f,g,h,a,b,c,d); rot(a,b,c,d); - k_xor(28,a,b,c,d); sb4(a,b,c,d,e,f,g,h); rot(e,f,g,h); - k_xor(29,e,f,g,h); sb5(e,f,g,h,a,b,c,d); rot(a,b,c,d); - k_xor(30,a,b,c,d); sb6(a,b,c,d,e,f,g,h); rot(e,f,g,h); - k_xor(31,e,f,g,h); sb7(e,f,g,h,a,b,c,d); - k_xor(32,a,b,c,d); - - out[0] = a; - out[1] = b; - out[2] = c; - out[3] = d; -} - -__device__ static void serpent256_decrypt (const u32 *ks, const u32 *in, u32 *out) -{ - u32 a,b,c,d,e,f,g,h; - u32 t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16; - - a = in[0]; - b = in[1]; - c = in[2]; - d = in[3]; - - k_xor(32,a,b,c,d); - ib7(a,b,c,d,e,f,g,h); k_xor(31,e,f,g,h); - irot(e,f,g,h); ib6(e,f,g,h,a,b,c,d); k_xor(30,a,b,c,d); - irot(a,b,c,d); ib5(a,b,c,d,e,f,g,h); k_xor(29,e,f,g,h); - irot(e,f,g,h); ib4(e,f,g,h,a,b,c,d); k_xor(28,a,b,c,d); - irot(a,b,c,d); ib3(a,b,c,d,e,f,g,h); k_xor(27,e,f,g,h); - irot(e,f,g,h); ib2(e,f,g,h,a,b,c,d); k_xor(26,a,b,c,d); - irot(a,b,c,d); ib1(a,b,c,d,e,f,g,h); k_xor(25,e,f,g,h); - irot(e,f,g,h); ib0(e,f,g,h,a,b,c,d); k_xor(24,a,b,c,d); - irot(a,b,c,d); ib7(a,b,c,d,e,f,g,h); k_xor(23,e,f,g,h); - irot(e,f,g,h); ib6(e,f,g,h,a,b,c,d); k_xor(22,a,b,c,d); - irot(a,b,c,d); ib5(a,b,c,d,e,f,g,h); k_xor(21,e,f,g,h); - irot(e,f,g,h); ib4(e,f,g,h,a,b,c,d); k_xor(20,a,b,c,d); - irot(a,b,c,d); ib3(a,b,c,d,e,f,g,h); k_xor(19,e,f,g,h); - irot(e,f,g,h); ib2(e,f,g,h,a,b,c,d); k_xor(18,a,b,c,d); - irot(a,b,c,d); ib1(a,b,c,d,e,f,g,h); k_xor(17,e,f,g,h); - irot(e,f,g,h); ib0(e,f,g,h,a,b,c,d); k_xor(16,a,b,c,d); - irot(a,b,c,d); ib7(a,b,c,d,e,f,g,h); k_xor(15,e,f,g,h); - irot(e,f,g,h); ib6(e,f,g,h,a,b,c,d); k_xor(14,a,b,c,d); - irot(a,b,c,d); ib5(a,b,c,d,e,f,g,h); k_xor(13,e,f,g,h); - irot(e,f,g,h); ib4(e,f,g,h,a,b,c,d); k_xor(12,a,b,c,d); - irot(a,b,c,d); ib3(a,b,c,d,e,f,g,h); k_xor(11,e,f,g,h); - irot(e,f,g,h); ib2(e,f,g,h,a,b,c,d); k_xor(10,a,b,c,d); - irot(a,b,c,d); ib1(a,b,c,d,e,f,g,h); k_xor( 9,e,f,g,h); - irot(e,f,g,h); ib0(e,f,g,h,a,b,c,d); k_xor( 8,a,b,c,d); - irot(a,b,c,d); ib7(a,b,c,d,e,f,g,h); k_xor( 7,e,f,g,h); - irot(e,f,g,h); ib6(e,f,g,h,a,b,c,d); k_xor( 6,a,b,c,d); - irot(a,b,c,d); ib5(a,b,c,d,e,f,g,h); k_xor( 5,e,f,g,h); - irot(e,f,g,h); ib4(e,f,g,h,a,b,c,d); k_xor( 4,a,b,c,d); - irot(a,b,c,d); ib3(a,b,c,d,e,f,g,h); k_xor( 3,e,f,g,h); - irot(e,f,g,h); ib2(e,f,g,h,a,b,c,d); k_xor( 2,a,b,c,d); - irot(a,b,c,d); ib1(a,b,c,d,e,f,g,h); k_xor( 1,e,f,g,h); - irot(e,f,g,h); ib0(e,f,g,h,a,b,c,d); k_xor( 0,a,b,c,d); - - out[0] = a; - out[1] = b; - out[2] = c; - out[3] = d; -} - -__device__ static void serpent256_decrypt_xts (const u32 *ukey1, const u32 *ukey2, const u32 *in, u32 *out) -{ - u32 T[4] = { 0 }; - u32 Z[4] = { 0 }; - - out[0] = in[0]; - out[1] = in[1]; - out[2] = in[2]; - out[3] = in[3]; - - u32 ks[140]; - - serpent256_set_key (ks, ukey2); - serpent256_encrypt (ks, Z, T); - - out[0] ^= T[0]; - out[1] ^= T[1]; - out[2] ^= T[2]; - out[3] ^= T[3]; - - serpent256_set_key (ks, ukey1); - serpent256_decrypt (ks, out, out); - - out[0] ^= T[0]; - out[1] ^= T[1]; - out[2] ^= T[2]; - out[3] ^= T[3]; -} diff --git a/nv/gpu_twofish256_nv.c b/nv/gpu_twofish256_nv.c deleted file mode 100644 index fcc7ee9..0000000 --- a/nv/gpu_twofish256_nv.c +++ /dev/null @@ -1,466 +0,0 @@ -/* This is an independent implementation of the encryption algorithm: */ -/* */ -/* Twofish by Bruce Schneier and colleagues */ -/* */ -/* which is a candidate algorithm in the Advanced Encryption Standard */ -/* programme of the US National Institute of Standards and Technology. */ -/* */ -/* Copyright in this implementation is held by Dr B R Gladman but I */ -/* hereby give permission for its free direct or derivative use subject */ -/* to acknowledgment of its origin and compliance with any conditions */ -/* that the originators of t he algorithm place on its exploitation. */ -/* */ -/* My thanks to Doug Whiting and Niels Ferguson for comments that led */ -/* to improvements in this implementation. */ -/* */ -/* Dr Brian Gladman (gladman@seven77.demon.co.uk) 14th January 1999 */ -/* */ -/* -------------------------------------------------------------------- */ -/* */ -/* Cleaned and optimized for GPU use with oclHashcat by Jens Steube */ - -#define extract_byte(x,n) (((x) >> (8 * (n))) & 0xff) - -__device__ __constant__ u32 q_tab[2][256] = -{ - { - 0xA9, 0x67, 0xB3, 0xE8, 0x04, 0xFD, 0xA3, 0x76, 0x9A, 0x92, 0x80, 0x78, - 0xE4, 0xDD, 0xD1, 0x38, 0x0D, 0xC6, 0x35, 0x98, 0x18, 0xF7, 0xEC, 0x6C, - 0x43, 0x75, 0x37, 0x26, 0xFA, 0x13, 0x94, 0x48, 0xF2, 0xD0, 0x8B, 0x30, - 0x84, 0x54, 0xDF, 0x23, 0x19, 0x5B, 0x3D, 0x59, 0xF3, 0xAE, 0xA2, 0x82, - 0x63, 0x01, 0x83, 0x2E, 0xD9, 0x51, 0x9B, 0x7C, 0xA6, 0xEB, 0xA5, 0xBE, - 0x16, 0x0C, 0xE3, 0x61, 0xC0, 0x8C, 0x3A, 0xF5, 0x73, 0x2C, 0x25, 0x0B, - 0xBB, 0x4E, 0x89, 0x6B, 0x53, 0x6A, 0xB4, 0xF1, 0xE1, 0xE6, 0xBD, 0x45, - 0xE2, 0xF4, 0xB6, 0x66, 0xCC, 0x95, 0x03, 0x56, 0xD4, 0x1C, 0x1E, 0xD7, - 0xFB, 0xC3, 0x8E, 0xB5, 0xE9, 0xCF, 0xBF, 0xBA, 0xEA, 0x77, 0x39, 0xAF, - 0x33, 0xC9, 0x62, 0x71, 0x81, 0x79, 0x09, 0xAD, 0x24, 0xCD, 0xF9, 0xD8, - 0xE5, 0xC5, 0xB9, 0x4D, 0x44, 0x08, 0x86, 0xE7, 0xA1, 0x1D, 0xAA, 0xED, - 0x06, 0x70, 0xB2, 0xD2, 0x41, 0x7B, 0xA0, 0x11, 0x31, 0xC2, 0x27, 0x90, - 0x20, 0xF6, 0x60, 0xFF, 0x96, 0x5C, 0xB1, 0xAB, 0x9E, 0x9C, 0x52, 0x1B, - 0x5F, 0x93, 0x0A, 0xEF, 0x91, 0x85, 0x49, 0xEE, 0x2D, 0x4F, 0x8F, 0x3B, - 0x47, 0x87, 0x6D, 0x46, 0xD6, 0x3E, 0x69, 0x64, 0x2A, 0xCE, 0xCB, 0x2F, - 0xFC, 0x97, 0x05, 0x7A, 0xAC, 0x7F, 0xD5, 0x1A, 0x4B, 0x0E, 0xA7, 0x5A, - 0x28, 0x14, 0x3F, 0x29, 0x88, 0x3C, 0x4C, 0x02, 0xB8, 0xDA, 0xB0, 0x17, - 0x55, 0x1F, 0x8A, 0x7D, 0x57, 0xC7, 0x8D, 0x74, 0xB7, 0xC4, 0x9F, 0x72, - 0x7E, 0x15, 0x22, 0x12, 0x58, 0x07, 0x99, 0x34, 0x6E, 0x50, 0xDE, 0x68, - 0x65, 0xBC, 0xDB, 0xF8, 0xC8, 0xA8, 0x2B, 0x40, 0xDC, 0xFE, 0x32, 0xA4, - 0xCA, 0x10, 0x21, 0xF0, 0xD3, 0x5D, 0x0F, 0x00, 0x6F, 0x9D, 0x36, 0x42, - 0x4A, 0x5E, 0xC1, 0xE0 - }, - { - 0x75, 0xF3, 0xC6, 0xF4, 0xDB, 0x7B, 0xFB, 0xC8, 0x4A, 0xD3, 0xE6, 0x6B, - 0x45, 0x7D, 0xE8, 0x4B, 0xD6, 0x32, 0xD8, 0xFD, 0x37, 0x71, 0xF1, 0xE1, - 0x30, 0x0F, 0xF8, 0x1B, 0x87, 0xFA, 0x06, 0x3F, 0x5E, 0xBA, 0xAE, 0x5B, - 0x8A, 0x00, 0xBC, 0x9D, 0x6D, 0xC1, 0xB1, 0x0E, 0x80, 0x5D, 0xD2, 0xD5, - 0xA0, 0x84, 0x07, 0x14, 0xB5, 0x90, 0x2C, 0xA3, 0xB2, 0x73, 0x4C, 0x54, - 0x92, 0x74, 0x36, 0x51, 0x38, 0xB0, 0xBD, 0x5A, 0xFC, 0x60, 0x62, 0x96, - 0x6C, 0x42, 0xF7, 0x10, 0x7C, 0x28, 0x27, 0x8C, 0x13, 0x95, 0x9C, 0xC7, - 0x24, 0x46, 0x3B, 0x70, 0xCA, 0xE3, 0x85, 0xCB, 0x11, 0xD0, 0x93, 0xB8, - 0xA6, 0x83, 0x20, 0xFF, 0x9F, 0x77, 0xC3, 0xCC, 0x03, 0x6F, 0x08, 0xBF, - 0x40, 0xE7, 0x2B, 0xE2, 0x79, 0x0C, 0xAA, 0x82, 0x41, 0x3A, 0xEA, 0xB9, - 0xE4, 0x9A, 0xA4, 0x97, 0x7E, 0xDA, 0x7A, 0x17, 0x66, 0x94, 0xA1, 0x1D, - 0x3D, 0xF0, 0xDE, 0xB3, 0x0B, 0x72, 0xA7, 0x1C, 0xEF, 0xD1, 0x53, 0x3E, - 0x8F, 0x33, 0x26, 0x5F, 0xEC, 0x76, 0x2A, 0x49, 0x81, 0x88, 0xEE, 0x21, - 0xC4, 0x1A, 0xEB, 0xD9, 0xC5, 0x39, 0x99, 0xCD, 0xAD, 0x31, 0x8B, 0x01, - 0x18, 0x23, 0xDD, 0x1F, 0x4E, 0x2D, 0xF9, 0x48, 0x4F, 0xF2, 0x65, 0x8E, - 0x78, 0x5C, 0x58, 0x19, 0x8D, 0xE5, 0x98, 0x57, 0x67, 0x7F, 0x05, 0x64, - 0xAF, 0x63, 0xB6, 0xFE, 0xF5, 0xB7, 0x3C, 0xA5, 0xCE, 0xE9, 0x68, 0x44, - 0xE0, 0x4D, 0x43, 0x69, 0x29, 0x2E, 0xAC, 0x15, 0x59, 0xA8, 0x0A, 0x9E, - 0x6E, 0x47, 0xDF, 0x34, 0x35, 0x6A, 0xCF, 0xDC, 0x22, 0xC9, 0xC0, 0x9B, - 0x89, 0xD4, 0xED, 0xAB, 0x12, 0xA2, 0x0D, 0x52, 0xBB, 0x02, 0x2F, 0xA9, - 0xD7, 0x61, 0x1E, 0xB4, 0x50, 0x04, 0xF6, 0xC2, 0x16, 0x25, 0x86, 0x56, - 0x55, 0x09, 0xBE, 0x91 - } -}; - -#define q(n,x) q_tab[n][x] - -__device__ __constant__ u32 m_tab[4][256] = -{ - { 0xBCBC3275, 0xECEC21F3, 0x202043C6, 0xB3B3C9F4, 0xDADA03DB, 0x02028B7B, - 0xE2E22BFB, 0x9E9EFAC8, 0xC9C9EC4A, 0xD4D409D3, 0x18186BE6, 0x1E1E9F6B, - 0x98980E45, 0xB2B2387D, 0xA6A6D2E8, 0x2626B74B, 0x3C3C57D6, 0x93938A32, - 0x8282EED8, 0x525298FD, 0x7B7BD437, 0xBBBB3771, 0x5B5B97F1, 0x474783E1, - 0x24243C30, 0x5151E20F, 0xBABAC6F8, 0x4A4AF31B, 0xBFBF4887, 0x0D0D70FA, - 0xB0B0B306, 0x7575DE3F, 0xD2D2FD5E, 0x7D7D20BA, 0x666631AE, 0x3A3AA35B, - 0x59591C8A, 0x00000000, 0xCDCD93BC, 0x1A1AE09D, 0xAEAE2C6D, 0x7F7FABC1, - 0x2B2BC7B1, 0xBEBEB90E, 0xE0E0A080, 0x8A8A105D, 0x3B3B52D2, 0x6464BAD5, - 0xD8D888A0, 0xE7E7A584, 0x5F5FE807, 0x1B1B1114, 0x2C2CC2B5, 0xFCFCB490, - 0x3131272C, 0x808065A3, 0x73732AB2, 0x0C0C8173, 0x79795F4C, 0x6B6B4154, - 0x4B4B0292, 0x53536974, 0x94948F36, 0x83831F51, 0x2A2A3638, 0xC4C49CB0, - 0x2222C8BD, 0xD5D5F85A, 0xBDBDC3FC, 0x48487860, 0xFFFFCE62, 0x4C4C0796, - 0x4141776C, 0xC7C7E642, 0xEBEB24F7, 0x1C1C1410, 0x5D5D637C, 0x36362228, - 0x6767C027, 0xE9E9AF8C, 0x4444F913, 0x1414EA95, 0xF5F5BB9C, 0xCFCF18C7, - 0x3F3F2D24, 0xC0C0E346, 0x7272DB3B, 0x54546C70, 0x29294CCA, 0xF0F035E3, - 0x0808FE85, 0xC6C617CB, 0xF3F34F11, 0x8C8CE4D0, 0xA4A45993, 0xCACA96B8, - 0x68683BA6, 0xB8B84D83, 0x38382820, 0xE5E52EFF, 0xADAD569F, 0x0B0B8477, - 0xC8C81DC3, 0x9999FFCC, 0x5858ED03, 0x19199A6F, 0x0E0E0A08, 0x95957EBF, - 0x70705040, 0xF7F730E7, 0x6E6ECF2B, 0x1F1F6EE2, 0xB5B53D79, 0x09090F0C, - 0x616134AA, 0x57571682, 0x9F9F0B41, 0x9D9D803A, 0x111164EA, 0x2525CDB9, - 0xAFAFDDE4, 0x4545089A, 0xDFDF8DA4, 0xA3A35C97, 0xEAEAD57E, 0x353558DA, - 0xEDEDD07A, 0x4343FC17, 0xF8F8CB66, 0xFBFBB194, 0x3737D3A1, 0xFAFA401D, - 0xC2C2683D, 0xB4B4CCF0, 0x32325DDE, 0x9C9C71B3, 0x5656E70B, 0xE3E3DA72, - 0x878760A7, 0x15151B1C, 0xF9F93AEF, 0x6363BFD1, 0x3434A953, 0x9A9A853E, - 0xB1B1428F, 0x7C7CD133, 0x88889B26, 0x3D3DA65F, 0xA1A1D7EC, 0xE4E4DF76, - 0x8181942A, 0x91910149, 0x0F0FFB81, 0xEEEEAA88, 0x161661EE, 0xD7D77321, - 0x9797F5C4, 0xA5A5A81A, 0xFEFE3FEB, 0x6D6DB5D9, 0x7878AEC5, 0xC5C56D39, - 0x1D1DE599, 0x7676A4CD, 0x3E3EDCAD, 0xCBCB6731, 0xB6B6478B, 0xEFEF5B01, - 0x12121E18, 0x6060C523, 0x6A6AB0DD, 0x4D4DF61F, 0xCECEE94E, 0xDEDE7C2D, - 0x55559DF9, 0x7E7E5A48, 0x2121B24F, 0x03037AF2, 0xA0A02665, 0x5E5E198E, - 0x5A5A6678, 0x65654B5C, 0x62624E58, 0xFDFD4519, 0x0606F48D, 0x404086E5, - 0xF2F2BE98, 0x3333AC57, 0x17179067, 0x05058E7F, 0xE8E85E05, 0x4F4F7D64, - 0x89896AAF, 0x10109563, 0x74742FB6, 0x0A0A75FE, 0x5C5C92F5, 0x9B9B74B7, - 0x2D2D333C, 0x3030D6A5, 0x2E2E49CE, 0x494989E9, 0x46467268, 0x77775544, - 0xA8A8D8E0, 0x9696044D, 0x2828BD43, 0xA9A92969, 0xD9D97929, 0x8686912E, - 0xD1D187AC, 0xF4F44A15, 0x8D8D1559, 0xD6D682A8, 0xB9B9BC0A, 0x42420D9E, - 0xF6F6C16E, 0x2F2FB847, 0xDDDD06DF, 0x23233934, 0xCCCC6235, 0xF1F1C46A, - 0xC1C112CF, 0x8585EBDC, 0x8F8F9E22, 0x7171A1C9, 0x9090F0C0, 0xAAAA539B, - 0x0101F189, 0x8B8BE1D4, 0x4E4E8CED, 0x8E8E6FAB, 0xABABA212, 0x6F6F3EA2, - 0xE6E6540D, 0xDBDBF252, 0x92927BBB, 0xB7B7B602, 0x6969CA2F, 0x3939D9A9, - 0xD3D30CD7, 0xA7A72361, 0xA2A2AD1E, 0xC3C399B4, 0x6C6C4450, 0x07070504, - 0x04047FF6, 0x272746C2, 0xACACA716, 0xD0D07625, 0x50501386, 0xDCDCF756, - 0x84841A55, 0xE1E15109, 0x7A7A25BE, 0x1313EF91 }, - - { 0xA9D93939, 0x67901717, 0xB3719C9C, 0xE8D2A6A6, 0x04050707, 0xFD985252, - 0xA3658080, 0x76DFE4E4, 0x9A084545, 0x92024B4B, 0x80A0E0E0, 0x78665A5A, - 0xE4DDAFAF, 0xDDB06A6A, 0xD1BF6363, 0x38362A2A, 0x0D54E6E6, 0xC6432020, - 0x3562CCCC, 0x98BEF2F2, 0x181E1212, 0xF724EBEB, 0xECD7A1A1, 0x6C774141, - 0x43BD2828, 0x7532BCBC, 0x37D47B7B, 0x269B8888, 0xFA700D0D, 0x13F94444, - 0x94B1FBFB, 0x485A7E7E, 0xF27A0303, 0xD0E48C8C, 0x8B47B6B6, 0x303C2424, - 0x84A5E7E7, 0x54416B6B, 0xDF06DDDD, 0x23C56060, 0x1945FDFD, 0x5BA33A3A, - 0x3D68C2C2, 0x59158D8D, 0xF321ECEC, 0xAE316666, 0xA23E6F6F, 0x82165757, - 0x63951010, 0x015BEFEF, 0x834DB8B8, 0x2E918686, 0xD9B56D6D, 0x511F8383, - 0x9B53AAAA, 0x7C635D5D, 0xA63B6868, 0xEB3FFEFE, 0xA5D63030, 0xBE257A7A, - 0x16A7ACAC, 0x0C0F0909, 0xE335F0F0, 0x6123A7A7, 0xC0F09090, 0x8CAFE9E9, - 0x3A809D9D, 0xF5925C5C, 0x73810C0C, 0x2C273131, 0x2576D0D0, 0x0BE75656, - 0xBB7B9292, 0x4EE9CECE, 0x89F10101, 0x6B9F1E1E, 0x53A93434, 0x6AC4F1F1, - 0xB499C3C3, 0xF1975B5B, 0xE1834747, 0xE66B1818, 0xBDC82222, 0x450E9898, - 0xE26E1F1F, 0xF4C9B3B3, 0xB62F7474, 0x66CBF8F8, 0xCCFF9999, 0x95EA1414, - 0x03ED5858, 0x56F7DCDC, 0xD4E18B8B, 0x1C1B1515, 0x1EADA2A2, 0xD70CD3D3, - 0xFB2BE2E2, 0xC31DC8C8, 0x8E195E5E, 0xB5C22C2C, 0xE9894949, 0xCF12C1C1, - 0xBF7E9595, 0xBA207D7D, 0xEA641111, 0x77840B0B, 0x396DC5C5, 0xAF6A8989, - 0x33D17C7C, 0xC9A17171, 0x62CEFFFF, 0x7137BBBB, 0x81FB0F0F, 0x793DB5B5, - 0x0951E1E1, 0xADDC3E3E, 0x242D3F3F, 0xCDA47676, 0xF99D5555, 0xD8EE8282, - 0xE5864040, 0xC5AE7878, 0xB9CD2525, 0x4D049696, 0x44557777, 0x080A0E0E, - 0x86135050, 0xE730F7F7, 0xA1D33737, 0x1D40FAFA, 0xAA346161, 0xED8C4E4E, - 0x06B3B0B0, 0x706C5454, 0xB22A7373, 0xD2523B3B, 0x410B9F9F, 0x7B8B0202, - 0xA088D8D8, 0x114FF3F3, 0x3167CBCB, 0xC2462727, 0x27C06767, 0x90B4FCFC, - 0x20283838, 0xF67F0404, 0x60784848, 0xFF2EE5E5, 0x96074C4C, 0x5C4B6565, - 0xB1C72B2B, 0xAB6F8E8E, 0x9E0D4242, 0x9CBBF5F5, 0x52F2DBDB, 0x1BF34A4A, - 0x5FA63D3D, 0x9359A4A4, 0x0ABCB9B9, 0xEF3AF9F9, 0x91EF1313, 0x85FE0808, - 0x49019191, 0xEE611616, 0x2D7CDEDE, 0x4FB22121, 0x8F42B1B1, 0x3BDB7272, - 0x47B82F2F, 0x8748BFBF, 0x6D2CAEAE, 0x46E3C0C0, 0xD6573C3C, 0x3E859A9A, - 0x6929A9A9, 0x647D4F4F, 0x2A948181, 0xCE492E2E, 0xCB17C6C6, 0x2FCA6969, - 0xFCC3BDBD, 0x975CA3A3, 0x055EE8E8, 0x7AD0EDED, 0xAC87D1D1, 0x7F8E0505, - 0xD5BA6464, 0x1AA8A5A5, 0x4BB72626, 0x0EB9BEBE, 0xA7608787, 0x5AF8D5D5, - 0x28223636, 0x14111B1B, 0x3FDE7575, 0x2979D9D9, 0x88AAEEEE, 0x3C332D2D, - 0x4C5F7979, 0x02B6B7B7, 0xB896CACA, 0xDA583535, 0xB09CC4C4, 0x17FC4343, - 0x551A8484, 0x1FF64D4D, 0x8A1C5959, 0x7D38B2B2, 0x57AC3333, 0xC718CFCF, - 0x8DF40606, 0x74695353, 0xB7749B9B, 0xC4F59797, 0x9F56ADAD, 0x72DAE3E3, - 0x7ED5EAEA, 0x154AF4F4, 0x229E8F8F, 0x12A2ABAB, 0x584E6262, 0x07E85F5F, - 0x99E51D1D, 0x34392323, 0x6EC1F6F6, 0x50446C6C, 0xDE5D3232, 0x68724646, - 0x6526A0A0, 0xBC93CDCD, 0xDB03DADA, 0xF8C6BABA, 0xC8FA9E9E, 0xA882D6D6, - 0x2BCF6E6E, 0x40507070, 0xDCEB8585, 0xFE750A0A, 0x328A9393, 0xA48DDFDF, - 0xCA4C2929, 0x10141C1C, 0x2173D7D7, 0xF0CCB4B4, 0xD309D4D4, 0x5D108A8A, - 0x0FE25151, 0x00000000, 0x6F9A1919, 0x9DE01A1A, 0x368F9494, 0x42E6C7C7, - 0x4AECC9C9, 0x5EFDD2D2, 0xC1AB7F7F, 0xE0D8A8A8 }, - - { 0xBC75BC32, 0xECF3EC21, 0x20C62043, 0xB3F4B3C9, 0xDADBDA03, 0x027B028B, - 0xE2FBE22B, 0x9EC89EFA, 0xC94AC9EC, 0xD4D3D409, 0x18E6186B, 0x1E6B1E9F, - 0x9845980E, 0xB27DB238, 0xA6E8A6D2, 0x264B26B7, 0x3CD63C57, 0x9332938A, - 0x82D882EE, 0x52FD5298, 0x7B377BD4, 0xBB71BB37, 0x5BF15B97, 0x47E14783, - 0x2430243C, 0x510F51E2, 0xBAF8BAC6, 0x4A1B4AF3, 0xBF87BF48, 0x0DFA0D70, - 0xB006B0B3, 0x753F75DE, 0xD25ED2FD, 0x7DBA7D20, 0x66AE6631, 0x3A5B3AA3, - 0x598A591C, 0x00000000, 0xCDBCCD93, 0x1A9D1AE0, 0xAE6DAE2C, 0x7FC17FAB, - 0x2BB12BC7, 0xBE0EBEB9, 0xE080E0A0, 0x8A5D8A10, 0x3BD23B52, 0x64D564BA, - 0xD8A0D888, 0xE784E7A5, 0x5F075FE8, 0x1B141B11, 0x2CB52CC2, 0xFC90FCB4, - 0x312C3127, 0x80A38065, 0x73B2732A, 0x0C730C81, 0x794C795F, 0x6B546B41, - 0x4B924B02, 0x53745369, 0x9436948F, 0x8351831F, 0x2A382A36, 0xC4B0C49C, - 0x22BD22C8, 0xD55AD5F8, 0xBDFCBDC3, 0x48604878, 0xFF62FFCE, 0x4C964C07, - 0x416C4177, 0xC742C7E6, 0xEBF7EB24, 0x1C101C14, 0x5D7C5D63, 0x36283622, - 0x672767C0, 0xE98CE9AF, 0x441344F9, 0x149514EA, 0xF59CF5BB, 0xCFC7CF18, - 0x3F243F2D, 0xC046C0E3, 0x723B72DB, 0x5470546C, 0x29CA294C, 0xF0E3F035, - 0x088508FE, 0xC6CBC617, 0xF311F34F, 0x8CD08CE4, 0xA493A459, 0xCAB8CA96, - 0x68A6683B, 0xB883B84D, 0x38203828, 0xE5FFE52E, 0xAD9FAD56, 0x0B770B84, - 0xC8C3C81D, 0x99CC99FF, 0x580358ED, 0x196F199A, 0x0E080E0A, 0x95BF957E, - 0x70407050, 0xF7E7F730, 0x6E2B6ECF, 0x1FE21F6E, 0xB579B53D, 0x090C090F, - 0x61AA6134, 0x57825716, 0x9F419F0B, 0x9D3A9D80, 0x11EA1164, 0x25B925CD, - 0xAFE4AFDD, 0x459A4508, 0xDFA4DF8D, 0xA397A35C, 0xEA7EEAD5, 0x35DA3558, - 0xED7AEDD0, 0x431743FC, 0xF866F8CB, 0xFB94FBB1, 0x37A137D3, 0xFA1DFA40, - 0xC23DC268, 0xB4F0B4CC, 0x32DE325D, 0x9CB39C71, 0x560B56E7, 0xE372E3DA, - 0x87A78760, 0x151C151B, 0xF9EFF93A, 0x63D163BF, 0x345334A9, 0x9A3E9A85, - 0xB18FB142, 0x7C337CD1, 0x8826889B, 0x3D5F3DA6, 0xA1ECA1D7, 0xE476E4DF, - 0x812A8194, 0x91499101, 0x0F810FFB, 0xEE88EEAA, 0x16EE1661, 0xD721D773, - 0x97C497F5, 0xA51AA5A8, 0xFEEBFE3F, 0x6DD96DB5, 0x78C578AE, 0xC539C56D, - 0x1D991DE5, 0x76CD76A4, 0x3EAD3EDC, 0xCB31CB67, 0xB68BB647, 0xEF01EF5B, - 0x1218121E, 0x602360C5, 0x6ADD6AB0, 0x4D1F4DF6, 0xCE4ECEE9, 0xDE2DDE7C, - 0x55F9559D, 0x7E487E5A, 0x214F21B2, 0x03F2037A, 0xA065A026, 0x5E8E5E19, - 0x5A785A66, 0x655C654B, 0x6258624E, 0xFD19FD45, 0x068D06F4, 0x40E54086, - 0xF298F2BE, 0x335733AC, 0x17671790, 0x057F058E, 0xE805E85E, 0x4F644F7D, - 0x89AF896A, 0x10631095, 0x74B6742F, 0x0AFE0A75, 0x5CF55C92, 0x9BB79B74, - 0x2D3C2D33, 0x30A530D6, 0x2ECE2E49, 0x49E94989, 0x46684672, 0x77447755, - 0xA8E0A8D8, 0x964D9604, 0x284328BD, 0xA969A929, 0xD929D979, 0x862E8691, - 0xD1ACD187, 0xF415F44A, 0x8D598D15, 0xD6A8D682, 0xB90AB9BC, 0x429E420D, - 0xF66EF6C1, 0x2F472FB8, 0xDDDFDD06, 0x23342339, 0xCC35CC62, 0xF16AF1C4, - 0xC1CFC112, 0x85DC85EB, 0x8F228F9E, 0x71C971A1, 0x90C090F0, 0xAA9BAA53, - 0x018901F1, 0x8BD48BE1, 0x4EED4E8C, 0x8EAB8E6F, 0xAB12ABA2, 0x6FA26F3E, - 0xE60DE654, 0xDB52DBF2, 0x92BB927B, 0xB702B7B6, 0x692F69CA, 0x39A939D9, - 0xD3D7D30C, 0xA761A723, 0xA21EA2AD, 0xC3B4C399, 0x6C506C44, 0x07040705, - 0x04F6047F, 0x27C22746, 0xAC16ACA7, 0xD025D076, 0x50865013, 0xDC56DCF7, - 0x8455841A, 0xE109E151, 0x7ABE7A25, 0x139113EF }, - - { 0xD939A9D9, 0x90176790, 0x719CB371, 0xD2A6E8D2, 0x05070405, 0x9852FD98, - 0x6580A365, 0xDFE476DF, 0x08459A08, 0x024B9202, 0xA0E080A0, 0x665A7866, - 0xDDAFE4DD, 0xB06ADDB0, 0xBF63D1BF, 0x362A3836, 0x54E60D54, 0x4320C643, - 0x62CC3562, 0xBEF298BE, 0x1E12181E, 0x24EBF724, 0xD7A1ECD7, 0x77416C77, - 0xBD2843BD, 0x32BC7532, 0xD47B37D4, 0x9B88269B, 0x700DFA70, 0xF94413F9, - 0xB1FB94B1, 0x5A7E485A, 0x7A03F27A, 0xE48CD0E4, 0x47B68B47, 0x3C24303C, - 0xA5E784A5, 0x416B5441, 0x06DDDF06, 0xC56023C5, 0x45FD1945, 0xA33A5BA3, - 0x68C23D68, 0x158D5915, 0x21ECF321, 0x3166AE31, 0x3E6FA23E, 0x16578216, - 0x95106395, 0x5BEF015B, 0x4DB8834D, 0x91862E91, 0xB56DD9B5, 0x1F83511F, - 0x53AA9B53, 0x635D7C63, 0x3B68A63B, 0x3FFEEB3F, 0xD630A5D6, 0x257ABE25, - 0xA7AC16A7, 0x0F090C0F, 0x35F0E335, 0x23A76123, 0xF090C0F0, 0xAFE98CAF, - 0x809D3A80, 0x925CF592, 0x810C7381, 0x27312C27, 0x76D02576, 0xE7560BE7, - 0x7B92BB7B, 0xE9CE4EE9, 0xF10189F1, 0x9F1E6B9F, 0xA93453A9, 0xC4F16AC4, - 0x99C3B499, 0x975BF197, 0x8347E183, 0x6B18E66B, 0xC822BDC8, 0x0E98450E, - 0x6E1FE26E, 0xC9B3F4C9, 0x2F74B62F, 0xCBF866CB, 0xFF99CCFF, 0xEA1495EA, - 0xED5803ED, 0xF7DC56F7, 0xE18BD4E1, 0x1B151C1B, 0xADA21EAD, 0x0CD3D70C, - 0x2BE2FB2B, 0x1DC8C31D, 0x195E8E19, 0xC22CB5C2, 0x8949E989, 0x12C1CF12, - 0x7E95BF7E, 0x207DBA20, 0x6411EA64, 0x840B7784, 0x6DC5396D, 0x6A89AF6A, - 0xD17C33D1, 0xA171C9A1, 0xCEFF62CE, 0x37BB7137, 0xFB0F81FB, 0x3DB5793D, - 0x51E10951, 0xDC3EADDC, 0x2D3F242D, 0xA476CDA4, 0x9D55F99D, 0xEE82D8EE, - 0x8640E586, 0xAE78C5AE, 0xCD25B9CD, 0x04964D04, 0x55774455, 0x0A0E080A, - 0x13508613, 0x30F7E730, 0xD337A1D3, 0x40FA1D40, 0x3461AA34, 0x8C4EED8C, - 0xB3B006B3, 0x6C54706C, 0x2A73B22A, 0x523BD252, 0x0B9F410B, 0x8B027B8B, - 0x88D8A088, 0x4FF3114F, 0x67CB3167, 0x4627C246, 0xC06727C0, 0xB4FC90B4, - 0x28382028, 0x7F04F67F, 0x78486078, 0x2EE5FF2E, 0x074C9607, 0x4B655C4B, - 0xC72BB1C7, 0x6F8EAB6F, 0x0D429E0D, 0xBBF59CBB, 0xF2DB52F2, 0xF34A1BF3, - 0xA63D5FA6, 0x59A49359, 0xBCB90ABC, 0x3AF9EF3A, 0xEF1391EF, 0xFE0885FE, - 0x01914901, 0x6116EE61, 0x7CDE2D7C, 0xB2214FB2, 0x42B18F42, 0xDB723BDB, - 0xB82F47B8, 0x48BF8748, 0x2CAE6D2C, 0xE3C046E3, 0x573CD657, 0x859A3E85, - 0x29A96929, 0x7D4F647D, 0x94812A94, 0x492ECE49, 0x17C6CB17, 0xCA692FCA, - 0xC3BDFCC3, 0x5CA3975C, 0x5EE8055E, 0xD0ED7AD0, 0x87D1AC87, 0x8E057F8E, - 0xBA64D5BA, 0xA8A51AA8, 0xB7264BB7, 0xB9BE0EB9, 0x6087A760, 0xF8D55AF8, - 0x22362822, 0x111B1411, 0xDE753FDE, 0x79D92979, 0xAAEE88AA, 0x332D3C33, - 0x5F794C5F, 0xB6B702B6, 0x96CAB896, 0x5835DA58, 0x9CC4B09C, 0xFC4317FC, - 0x1A84551A, 0xF64D1FF6, 0x1C598A1C, 0x38B27D38, 0xAC3357AC, 0x18CFC718, - 0xF4068DF4, 0x69537469, 0x749BB774, 0xF597C4F5, 0x56AD9F56, 0xDAE372DA, - 0xD5EA7ED5, 0x4AF4154A, 0x9E8F229E, 0xA2AB12A2, 0x4E62584E, 0xE85F07E8, - 0xE51D99E5, 0x39233439, 0xC1F66EC1, 0x446C5044, 0x5D32DE5D, 0x72466872, - 0x26A06526, 0x93CDBC93, 0x03DADB03, 0xC6BAF8C6, 0xFA9EC8FA, 0x82D6A882, - 0xCF6E2BCF, 0x50704050, 0xEB85DCEB, 0x750AFE75, 0x8A93328A, 0x8DDFA48D, - 0x4C29CA4C, 0x141C1014, 0x73D72173, 0xCCB4F0CC, 0x09D4D309, 0x108A5D10, - 0xE2510FE2, 0x00000000, 0x9A196F9A, 0xE01A9DE0, 0x8F94368F, 0xE6C742E6, - 0xECC94AEC, 0xFDD25EFD, 0xAB7FC1AB, 0xD8A8E0D8 } -}; - -#define mds(n,x) m_tab[n][x] - -__device__ static u32 h_fun (u32 *sk, u32 *lk, const u32 x, const u32 *key) -{ - u32 b0, b1, b2, b3; - - b0 = extract_byte (x, 0); - b1 = extract_byte (x, 1); - b2 = extract_byte (x, 2); - b3 = extract_byte (x, 3); - - b0 = q (1, b0) ^ extract_byte (key[3], 0); - b1 = q (0, b1) ^ extract_byte (key[3], 1); - b2 = q (0, b2) ^ extract_byte (key[3], 2); - b3 = q (1, b3) ^ extract_byte (key[3], 3); - - b0 = q (1, b0) ^ extract_byte (key[2], 0); - b1 = q (1, b1) ^ extract_byte (key[2], 1); - b2 = q (0, b2) ^ extract_byte (key[2], 2); - b3 = q (0, b3) ^ extract_byte (key[2], 3); - - b0 = q (0, (q (0, b0) ^ extract_byte (key[1], 0))) ^ extract_byte (key[0], 0); - b1 = q (0, (q (1, b1) ^ extract_byte (key[1], 1))) ^ extract_byte (key[0], 1); - b2 = q (1, (q (0, b2) ^ extract_byte (key[1], 2))) ^ extract_byte (key[0], 2); - b3 = q (1, (q (1, b3) ^ extract_byte (key[1], 3))) ^ extract_byte (key[0], 3); - - return mds (0, b0) ^ mds (1, b1) ^ mds (2, b2) ^ mds (3, b3); -} - -#define q40(x,k) q (0, q (0, q (1, q (1, x) ^ extract_byte (k[3], 0)) ^ extract_byte (k[2], 0)) ^ extract_byte (k[1], 0)) ^ extract_byte (k[0], 0) -#define q41(x,k) q (0, q (1, q (1, q (0, x) ^ extract_byte (k[3], 1)) ^ extract_byte (k[2], 1)) ^ extract_byte (k[1], 1)) ^ extract_byte (k[0], 1) -#define q42(x,k) q (1, q (0, q (0, q (0, x) ^ extract_byte (k[3], 2)) ^ extract_byte (k[2], 2)) ^ extract_byte (k[1], 2)) ^ extract_byte (k[0], 2) -#define q43(x,k) q (1, q (1, q (0, q (1, x) ^ extract_byte (k[3], 3)) ^ extract_byte (k[2], 3)) ^ extract_byte (k[1], 3)) ^ extract_byte (k[0], 3) - -#define g1_fun(x) \ - (mds (0, q40 (extract_byte (x, 3), sk)) ^ \ - mds (1, q41 (extract_byte (x, 0), sk)) ^ \ - mds (2, q42 (extract_byte (x, 1), sk)) ^ \ - mds (3, q43 (extract_byte (x, 2), sk))) - -#define g0_fun(x) \ - (mds (0, q40 (extract_byte (x, 0), sk)) ^ \ - mds (1, q41 (extract_byte (x, 1), sk)) ^ \ - mds (2, q42 (extract_byte (x, 2), sk)) ^ \ - mds (3, q43 (extract_byte (x, 3), sk))) - -__device__ static u32 mds_rem (u32 p0, u32 p1) -{ - #define G_MOD 0x14d - - for (int i = 0; i < 8; i++) - { - u32 t = p1 >> 24; - - p1 = (p1 << 8) | (p0 >> 24); - - p0 <<= 8; - - u32 u = (t << 1); - - if (t & 0x80) u ^= G_MOD; - - p1 ^= t ^ (u << 16); - - u ^= (t >> 1); - - if (t & 0x01) u ^= G_MOD >> 1; - - p1 ^= (u << 24) | (u << 8); - } - - return p1; -} - -__device__ static void twofish256_set_key (u32 *sk, u32 *lk, const u32 *ukey) -{ - u32 me_key[4]; - - me_key[0] = ukey[0]; - me_key[1] = ukey[2]; - me_key[2] = ukey[4]; - me_key[3] = ukey[6]; - - u32 mo_key[4]; - - mo_key[0] = ukey[1]; - mo_key[1] = ukey[3]; - mo_key[2] = ukey[5]; - mo_key[3] = ukey[7]; - - sk[3] = mds_rem (me_key[0], mo_key[0]); - sk[2] = mds_rem (me_key[1], mo_key[1]); - sk[1] = mds_rem (me_key[2], mo_key[2]); - sk[0] = mds_rem (me_key[3], mo_key[3]); - - for (int i = 0; i < 40; i += 2) - { - u32 a = 0x01010101 * i; - u32 b = 0x01010101 + a; - - a = h_fun (sk, lk, a, me_key); - b = h_fun (sk, lk, b, mo_key); - - b = rotl32 (b, 8); - - lk[i + 0] = a + b; - lk[i + 1] = rotl32 (a + 2 * b, 9); - } -} - -#define f_rnd(i) \ -{ \ - u32 t0 = g0_fun (data[0]); \ - u32 t1 = g1_fun (data[1]); \ - data[2] = rotr32 (data[2] ^ (t0 + t1 + lk[4 * (i) + 8]), 1); \ - data[3] = rotl32 (data[3], 1) ^ (t0 + 2 * t1 + lk[4 * (i) + 9]); \ - u32 t2 = g0_fun (data[2]); \ - u32 t3 = g1_fun (data[3]); \ - data[0] = rotr32 (data[0] ^ (t2 + t3 + lk[4 * (i) + 10]), 1); \ - data[1] = rotl32 (data[1], 1) ^ (t2 + 2 * t3 + lk[4 * (i) + 11]); \ -} - -__device__ static void twofish256_encrypt (const u32 *sk, const u32 *lk, const u32 *in, u32 *out) -{ - u32 data[4]; - - data[0] = in[0] ^ lk[0]; - data[1] = in[1] ^ lk[1]; - data[2] = in[2] ^ lk[2]; - data[3] = in[3] ^ lk[3]; - - f_rnd (0); - f_rnd (1); - f_rnd (2); - f_rnd (3); - f_rnd (4); - f_rnd (5); - f_rnd (6); - f_rnd (7); - - out[0] = data[2] ^ lk[4]; - out[1] = data[3] ^ lk[5]; - out[2] = data[0] ^ lk[6]; - out[3] = data[1] ^ lk[7]; -} - -#define i_rnd(i) \ -{ \ - u32 t0 = g0_fun (data[0]); \ - u32 t1 = g1_fun (data[1]); \ - data[2] = rotl32 (data[2], 1) ^ (t0 + t1 + lk[4 * (i) + 10]); \ - data[3] = rotr32 (data[3] ^ (t0 + 2 * t1 + lk[4 * (i) + 11]), 1); \ - u32 t2 = g0_fun (data[2]); \ - u32 t3 = g1_fun (data[3]); \ - data[0] = rotl32 (data[0], 1) ^ (t2 + t3 + lk[4 * (i) + 8]); \ - data[1] = rotr32 (data[1] ^ (t2 + 2 * t3 + lk[4 * (i) + 9]), 1); \ -} - -__device__ static void twofish256_decrypt (const u32 *sk, const u32 *lk, const u32 *in, u32 *out) -{ - u32 data[4]; - - data[0] = in[0] ^ lk[4]; - data[1] = in[1] ^ lk[5]; - data[2] = in[2] ^ lk[6]; - data[3] = in[3] ^ lk[7]; - - i_rnd (7); - i_rnd (6); - i_rnd (5); - i_rnd (4); - i_rnd (3); - i_rnd (2); - i_rnd (1); - i_rnd (0); - - out[0] = data[2] ^ lk[0]; - out[1] = data[3] ^ lk[1]; - out[2] = data[0] ^ lk[2]; - out[3] = data[1] ^ lk[3]; -} - -__device__ static void twofish256_decrypt_xts (const u32 *ukey1, const u32 *ukey2, const u32 *in, u32 *out) -{ - u32 T[4] = { 0 }; - u32 Z[4] = { 0 }; - - out[0] = in[0]; - out[1] = in[1]; - out[2] = in[2]; - out[3] = in[3]; - - u32 sk[4]; u32 lk[40]; - - twofish256_set_key (sk, lk, ukey2); - twofish256_encrypt (sk, lk, Z, T); - - out[0] ^= T[0]; - out[1] ^= T[1]; - out[2] ^= T[2]; - out[3] ^= T[3]; - - twofish256_set_key (sk, lk, ukey1); - twofish256_decrypt (sk, lk, out, out); - - out[0] ^= T[0]; - out[1] ^= T[1]; - out[2] ^= T[2]; - out[3] ^= T[3]; -} diff --git a/nv/m00000_a0.cu b/nv/m00000_a0.cu deleted file mode 100644 index e9c7c10..0000000 --- a/nv/m00000_a0.cu +++ /dev/null @@ -1,392 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00000_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - w3[2] = out_len * 8; - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00000_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00000_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00000_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - w3[2] = out_len * 8; - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - - bool q_cond = (search[0] != a); - - if (q_cond) continue; - - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00000_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00000_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00000_a1.cu b/nv/m00000_a1.cu deleted file mode 100644 index 8518773..0000000 --- a/nv/m00000_a1.cu +++ /dev/null @@ -1,494 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00000_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = pw_len * 8; - w3[3] = 0; - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00000_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00000_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00000_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = pw_len * 8; - w3[3] = 0; - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - - bool q_cond = (search[0] != a); - - if (q_cond) continue; - - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00000_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00000_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00000_a3.cu b/nv/m00000_a3.cu deleted file mode 100644 index 3ac7d75..0000000 --- a/nv/m00000_a3.cu +++ /dev/null @@ -1,703 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ -#define _SCALAR_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -#define MD5_STEP_REV(f,a,b,c,d,x,t,s) \ -{ \ - a -= b; \ - a = rotr32 (a, s); \ - a -= f (b, c, d); \ - a -= x; \ - a -= t; \ -} - -#define MD5_STEP_REV1(f,a,b,c,d,x,t,s) \ -{ \ - a -= b; \ - a = rotr32 (a, s); \ - a -= x; \ - a -= t; \ -} - -__device__ __constant__ u32x c_bfs[1024]; - -__device__ static void m00000m (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - - /** - * base - */ - - const u32 F_w0c00 = 0 + MD5C00; - const u32 F_w1c01 = w[ 1] + MD5C01; - const u32 F_w2c02 = w[ 2] + MD5C02; - const u32 F_w3c03 = w[ 3] + MD5C03; - const u32 F_w4c04 = w[ 4] + MD5C04; - const u32 F_w5c05 = w[ 5] + MD5C05; - const u32 F_w6c06 = w[ 6] + MD5C06; - const u32 F_w7c07 = w[ 7] + MD5C07; - const u32 F_w8c08 = w[ 8] + MD5C08; - const u32 F_w9c09 = w[ 9] + MD5C09; - const u32 F_wac0a = w[10] + MD5C0a; - const u32 F_wbc0b = w[11] + MD5C0b; - const u32 F_wcc0c = w[12] + MD5C0c; - const u32 F_wdc0d = w[13] + MD5C0d; - const u32 F_wec0e = w[14] + MD5C0e; - const u32 F_wfc0f = w[15] + MD5C0f; - - const u32 G_w1c10 = w[ 1] + MD5C10; - const u32 G_w6c11 = w[ 6] + MD5C11; - const u32 G_wbc12 = w[11] + MD5C12; - const u32 G_w0c13 = 0 + MD5C13; - const u32 G_w5c14 = w[ 5] + MD5C14; - const u32 G_wac15 = w[10] + MD5C15; - const u32 G_wfc16 = w[15] + MD5C16; - const u32 G_w4c17 = w[ 4] + MD5C17; - const u32 G_w9c18 = w[ 9] + MD5C18; - const u32 G_wec19 = w[14] + MD5C19; - const u32 G_w3c1a = w[ 3] + MD5C1a; - const u32 G_w8c1b = w[ 8] + MD5C1b; - const u32 G_wdc1c = w[13] + MD5C1c; - const u32 G_w2c1d = w[ 2] + MD5C1d; - const u32 G_w7c1e = w[ 7] + MD5C1e; - const u32 G_wcc1f = w[12] + MD5C1f; - - const u32 H_w5c20 = w[ 5] + MD5C20; - const u32 H_w8c21 = w[ 8] + MD5C21; - const u32 H_wbc22 = w[11] + MD5C22; - const u32 H_wec23 = w[14] + MD5C23; - const u32 H_w1c24 = w[ 1] + MD5C24; - const u32 H_w4c25 = w[ 4] + MD5C25; - const u32 H_w7c26 = w[ 7] + MD5C26; - const u32 H_wac27 = w[10] + MD5C27; - const u32 H_wdc28 = w[13] + MD5C28; - const u32 H_w0c29 = 0 + MD5C29; - const u32 H_w3c2a = w[ 3] + MD5C2a; - const u32 H_w6c2b = w[ 6] + MD5C2b; - const u32 H_w9c2c = w[ 9] + MD5C2c; - const u32 H_wcc2d = w[12] + MD5C2d; - const u32 H_wfc2e = w[15] + MD5C2e; - const u32 H_w2c2f = w[ 2] + MD5C2f; - - const u32 I_w0c30 = 0 + MD5C30; - const u32 I_w7c31 = w[ 7] + MD5C31; - const u32 I_wec32 = w[14] + MD5C32; - const u32 I_w5c33 = w[ 5] + MD5C33; - const u32 I_wcc34 = w[12] + MD5C34; - const u32 I_w3c35 = w[ 3] + MD5C35; - const u32 I_wac36 = w[10] + MD5C36; - const u32 I_w1c37 = w[ 1] + MD5C37; - const u32 I_w8c38 = w[ 8] + MD5C38; - const u32 I_wfc39 = w[15] + MD5C39; - const u32 I_w6c3a = w[ 6] + MD5C3a; - const u32 I_wdc3b = w[13] + MD5C3b; - const u32 I_w4c3c = w[ 4] + MD5C3c; - const u32 I_wbc3d = w[11] + MD5C3d; - const u32 I_w2c3e = w[ 2] + MD5C3e; - const u32 I_w9c3f = w[ 9] + MD5C3f; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_w2c02, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_w3c03, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_w4c04, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w5c05, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_w6c06, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_w7c07, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_w8c08, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w9c09, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_wac0a, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_wbc0b, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_wcc0c, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_wdc0d, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_wec0e, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_wfc0f, MD5S03); - - MD5_STEP0(MD5_Go, a, b, c, d, G_w1c10, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_w6c11, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_wbc12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0, G_w0c13, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_w5c14, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_wac15, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_wfc16, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_w4c17, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_w9c18, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_wec19, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_w3c1a, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_w8c1b, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_wdc1c, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_w2c1d, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_w7c1e, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_wcc1f, MD5S13); - - MD5_STEP0(MD5_H1, a, b, c, d, H_w5c20, MD5S20); - MD5_STEP0(MD5_H2, d, a, b, c, H_w8c21, MD5S21); - MD5_STEP0(MD5_H1, c, d, a, b, H_wbc22, MD5S22); - MD5_STEP0(MD5_H2, b, c, d, a, H_wec23, MD5S23); - MD5_STEP0(MD5_H1, a, b, c, d, H_w1c24, MD5S20); - MD5_STEP0(MD5_H2, d, a, b, c, H_w4c25, MD5S21); - MD5_STEP0(MD5_H1, c, d, a, b, H_w7c26, MD5S22); - MD5_STEP0(MD5_H2, b, c, d, a, H_wac27, MD5S23); - MD5_STEP0(MD5_H1, a, b, c, d, H_wdc28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0, H_w0c29, MD5S21); - MD5_STEP0(MD5_H1, c, d, a, b, H_w3c2a, MD5S22); - MD5_STEP0(MD5_H2, b, c, d, a, H_w6c2b, MD5S23); - MD5_STEP0(MD5_H1, a, b, c, d, H_w9c2c, MD5S20); - MD5_STEP0(MD5_H2, d, a, b, c, H_wcc2d, MD5S21); - MD5_STEP0(MD5_H1, c, d, a, b, H_wfc2e, MD5S22); - MD5_STEP0(MD5_H2, b, c, d, a, H_w2c2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0, I_w0c30, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_w7c31, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_wec32, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w5c33, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_wcc34, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_w3c35, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_wac36, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w1c37, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_w8c38, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_wfc39, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_w6c3a, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_wdc3b, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_w4c3c, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_wbc3d, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_w2c3e, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w9c3f, MD5S33); - - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m00000s (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 F_w0c00 = 0 + MD5C00; - const u32 F_w1c01 = w[ 1] + MD5C01; - const u32 F_w2c02 = w[ 2] + MD5C02; - const u32 F_w3c03 = w[ 3] + MD5C03; - const u32 F_w4c04 = w[ 4] + MD5C04; - const u32 F_w5c05 = w[ 5] + MD5C05; - const u32 F_w6c06 = w[ 6] + MD5C06; - const u32 F_w7c07 = w[ 7] + MD5C07; - const u32 F_w8c08 = w[ 8] + MD5C08; - const u32 F_w9c09 = w[ 9] + MD5C09; - const u32 F_wac0a = w[10] + MD5C0a; - const u32 F_wbc0b = w[11] + MD5C0b; - const u32 F_wcc0c = w[12] + MD5C0c; - const u32 F_wdc0d = w[13] + MD5C0d; - const u32 F_wec0e = w[14] + MD5C0e; - const u32 F_wfc0f = w[15] + MD5C0f; - - const u32 G_w1c10 = w[ 1] + MD5C10; - const u32 G_w6c11 = w[ 6] + MD5C11; - const u32 G_wbc12 = w[11] + MD5C12; - const u32 G_w0c13 = 0 + MD5C13; - const u32 G_w5c14 = w[ 5] + MD5C14; - const u32 G_wac15 = w[10] + MD5C15; - const u32 G_wfc16 = w[15] + MD5C16; - const u32 G_w4c17 = w[ 4] + MD5C17; - const u32 G_w9c18 = w[ 9] + MD5C18; - const u32 G_wec19 = w[14] + MD5C19; - const u32 G_w3c1a = w[ 3] + MD5C1a; - const u32 G_w8c1b = w[ 8] + MD5C1b; - const u32 G_wdc1c = w[13] + MD5C1c; - const u32 G_w2c1d = w[ 2] + MD5C1d; - const u32 G_w7c1e = w[ 7] + MD5C1e; - const u32 G_wcc1f = w[12] + MD5C1f; - - const u32 H_w5c20 = w[ 5] + MD5C20; - const u32 H_w8c21 = w[ 8] + MD5C21; - const u32 H_wbc22 = w[11] + MD5C22; - const u32 H_wec23 = w[14] + MD5C23; - const u32 H_w1c24 = w[ 1] + MD5C24; - const u32 H_w4c25 = w[ 4] + MD5C25; - const u32 H_w7c26 = w[ 7] + MD5C26; - const u32 H_wac27 = w[10] + MD5C27; - const u32 H_wdc28 = w[13] + MD5C28; - const u32 H_w0c29 = 0 + MD5C29; - const u32 H_w3c2a = w[ 3] + MD5C2a; - const u32 H_w6c2b = w[ 6] + MD5C2b; - const u32 H_w9c2c = w[ 9] + MD5C2c; - const u32 H_wcc2d = w[12] + MD5C2d; - const u32 H_wfc2e = w[15] + MD5C2e; - const u32 H_w2c2f = w[ 2] + MD5C2f; - - const u32 I_w0c30 = 0 + MD5C30; - const u32 I_w7c31 = w[ 7] + MD5C31; - const u32 I_wec32 = w[14] + MD5C32; - const u32 I_w5c33 = w[ 5] + MD5C33; - const u32 I_wcc34 = w[12] + MD5C34; - const u32 I_w3c35 = w[ 3] + MD5C35; - const u32 I_wac36 = w[10] + MD5C36; - const u32 I_w1c37 = w[ 1] + MD5C37; - const u32 I_w8c38 = w[ 8] + MD5C38; - const u32 I_wfc39 = w[15] + MD5C39; - const u32 I_w6c3a = w[ 6] + MD5C3a; - const u32 I_wdc3b = w[13] + MD5C3b; - const u32 I_w4c3c = w[ 4] + MD5C3c; - const u32 I_wbc3d = w[11] + MD5C3d; - const u32 I_w2c3e = w[ 2] + MD5C3e; - const u32 I_w9c3f = w[ 9] + MD5C3f; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * reverse - */ - - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; - - MD5_STEP_REV (MD5_I, b_rev, c_rev, d_rev, a_rev, w[ 9], MD5C3f, MD5S33); - MD5_STEP_REV (MD5_I, c_rev, d_rev, a_rev, b_rev, w[ 2], MD5C3e, MD5S32); - MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[11], MD5C3d, MD5S31); - MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, w[ 4], MD5C3c, MD5S30); - MD5_STEP_REV (MD5_I, b_rev, c_rev, d_rev, a_rev, w[13], MD5C3b, MD5S33); - MD5_STEP_REV (MD5_I, c_rev, d_rev, a_rev, b_rev, w[ 6], MD5C3a, MD5S32); - MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[15], MD5C39, MD5S31); - MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, w[ 8], MD5C38, MD5S30); - MD5_STEP_REV (MD5_I, b_rev, c_rev, d_rev, a_rev, w[ 1], MD5C37, MD5S33); - MD5_STEP_REV (MD5_I, c_rev, d_rev, a_rev, b_rev, w[10], MD5C36, MD5S32); - MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[ 3], MD5C35, MD5S31); - MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, w[12], MD5C34, MD5S30); - MD5_STEP_REV (MD5_I, b_rev, c_rev, d_rev, a_rev, w[ 5], MD5C33, MD5S33); - MD5_STEP_REV (MD5_I, c_rev, d_rev, a_rev, b_rev, w[14], MD5C32, MD5S32); - MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[ 7], MD5C31, MD5S31); - MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, 0, MD5C30, MD5S30); - - const u32x pre_cd = c_rev ^ d_rev; - - MD5_STEP_REV1(MD5_H, b_rev, c_rev, d_rev, a_rev, w[ 2], MD5C2f, MD5S23); - MD5_STEP_REV1(MD5_H, c_rev, d_rev, a_rev, b_rev, w[15], MD5C2e, MD5S22); - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - const u32x pre_d = d_rev; - const u32x pre_a = a_rev - w0; - const u32x pre_b = b_rev - (pre_a ^ pre_cd); - const u32x pre_c = c_rev - (pre_a ^ pre_b ^ pre_d); - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_w2c02, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_w3c03, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_w4c04, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w5c05, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_w6c06, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_w7c07, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_w8c08, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w9c09, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_wac0a, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_wbc0b, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_wcc0c, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_wdc0d, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_wec0e, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_wfc0f, MD5S03); - - MD5_STEP0(MD5_Go, a, b, c, d, G_w1c10, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_w6c11, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_wbc12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0, G_w0c13, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_w5c14, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_wac15, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_wfc16, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_w4c17, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_w9c18, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_wec19, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_w3c1a, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_w8c1b, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_wdc1c, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_w2c1d, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_w7c1e, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_wcc1f, MD5S13); - - MD5_STEP0(MD5_H1, a, b, c, d, H_w5c20, MD5S20); - MD5_STEP0(MD5_H2, d, a, b, c, H_w8c21, MD5S21); - MD5_STEP0(MD5_H1, c, d, a, b, H_wbc22, MD5S22); - MD5_STEP0(MD5_H2, b, c, d, a, H_wec23, MD5S23); - MD5_STEP0(MD5_H1, a, b, c, d, H_w1c24, MD5S20); - MD5_STEP0(MD5_H2, d, a, b, c, H_w4c25, MD5S21); - MD5_STEP0(MD5_H1, c, d, a, b, H_w7c26, MD5S22); - MD5_STEP0(MD5_H2, b, c, d, a, H_wac27, MD5S23); - MD5_STEP0(MD5_H1, a, b, c, d, H_wdc28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0, H_w0c29, MD5S21); - MD5_STEP0(MD5_H1, c, d, a, b, H_w3c2a, MD5S22); - - bool q_cond = (pre_c != c); - - if (q_cond) continue; - - MD5_STEP0(MD5_H2, b, c, d, a, H_w6c2b, MD5S23); - MD5_STEP0(MD5_H1, a, b, c, d, H_w9c2c, MD5S20); - MD5_STEP0(MD5_H2, d, a, b, c, H_wcc2d, MD5S21); - MD5_STEP0(MD5_H1, c, d, a, b, H_wfc2e, MD5S22); - MD5_STEP0(MD5_H2, b, c, d, a, H_w2c2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0, I_w0c30, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_w7c31, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_wec32, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w5c33, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_wcc34, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_w3c35, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_wac36, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w1c37, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_w8c38, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_wfc39, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_w6c3a, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_wdc3b, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_w4c3c, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_wbc3d, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_w2c3e, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w9c3f, MD5S33); - - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00000_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00000_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00000_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00000_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00000_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00000_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m00010_a0.cu b/nv/m00010_a0.cu deleted file mode 100644 index 0b2a1ef..0000000 --- a/nv/m00010_a0.cu +++ /dev/null @@ -1,548 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00010_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, out_len); - - const u32 pw_salt_len = out_len + salt_len; - - w0[0] |= s0[0]; - w0[1] |= s0[1]; - w0[2] |= s0[2]; - w0[3] |= s0[3]; - - w1[0] |= s1[0]; - w1[1] |= s1[1]; - w1[2] |= s1[2]; - w1[3] |= s1[3]; - - w2[0] |= s2[0]; - w2[1] |= s2[1]; - w2[2] |= s2[2]; - w2[3] |= s2[3]; - - w3[0] |= s3[0]; - w3[1] |= s3[1]; - w3[2] = pw_salt_len * 8; - w3[3] = 0; - - append_0x80_4 (w0, w1, w2, w3, pw_salt_len); - - /** - * md5 - */ - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00010_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00010_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00010_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, out_len); - - const u32 pw_salt_len = out_len + salt_len; - - w0[0] |= s0[0]; - w0[1] |= s0[1]; - w0[2] |= s0[2]; - w0[3] |= s0[3]; - - w1[0] |= s1[0]; - w1[1] |= s1[1]; - w1[2] |= s1[2]; - w1[3] |= s1[3]; - - w2[0] |= s2[0]; - w2[1] |= s2[1]; - w2[2] |= s2[2]; - w2[3] |= s2[3]; - - w3[0] |= s3[0]; - w3[1] |= s3[1]; - w3[2] = pw_salt_len * 8; - w3[3] = 0; - - append_0x80_4 (w0, w1, w2, w3, pw_salt_len); - - /** - * md5 - */ - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - - bool q_cond = (search[0] != a); - - if (q_cond) continue; - - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00010_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00010_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00010_a1.cu b/nv/m00010_a1.cu deleted file mode 100644 index 086de70..0000000 --- a/nv/m00010_a1.cu +++ /dev/null @@ -1,614 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00010_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, pw_len); - - const u32 pw_salt_len = pw_len + salt_len; - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0] | s0[0]; - w0[1] = wordl0[1] | wordr0[1] | s0[1]; - w0[2] = wordl0[2] | wordr0[2] | s0[2]; - w0[3] = wordl0[3] | wordr0[3] | s0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0] | s1[0]; - w1[1] = wordl1[1] | wordr1[1] | s1[1]; - w1[2] = wordl1[2] | wordr1[2] | s1[2]; - w1[3] = wordl1[3] | wordr1[3] | s1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0] | s2[0]; - w2[1] = wordl2[1] | wordr2[1] | s2[1]; - w2[2] = wordl2[2] | wordr2[2] | s2[2]; - w2[3] = wordl2[3] | wordr2[3] | s2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0] | s3[0]; - w3[1] = wordl3[1] | wordr3[1] | s3[1]; - w3[2] = pw_salt_len * 8; - w3[3] = 0; - - append_0x80_4 (w0, w1, w2, w3, pw_salt_len); - - /** - * md5 - */ - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00010_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00010_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00010_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, pw_len); - - const u32 pw_salt_len = pw_len + salt_len; - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0] | s0[0]; - w0[1] = wordl0[1] | wordr0[1] | s0[1]; - w0[2] = wordl0[2] | wordr0[2] | s0[2]; - w0[3] = wordl0[3] | wordr0[3] | s0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0] | s1[0]; - w1[1] = wordl1[1] | wordr1[1] | s1[1]; - w1[2] = wordl1[2] | wordr1[2] | s1[2]; - w1[3] = wordl1[3] | wordr1[3] | s1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0] | s2[0]; - w2[1] = wordl2[1] | wordr2[1] | s2[1]; - w2[2] = wordl2[2] | wordr2[2] | s2[2]; - w2[3] = wordl2[3] | wordr2[3] | s2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0] | s3[0]; - w3[1] = wordl3[1] | wordr3[1] | s3[1]; - w3[2] = pw_salt_len * 8; - w3[3] = 0; - - append_0x80_4 (w0, w1, w2, w3, pw_salt_len); - - /** - * md5 - */ - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - - bool q_cond = (search[0] != a); - - if (q_cond) continue; - - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00010_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00010_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00010_a3.cu b/nv/m00010_a3.cu deleted file mode 100644 index 034463f..0000000 --- a/nv/m00010_a3.cu +++ /dev/null @@ -1,759 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ -#define _SCALAR_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -#define MD5_STEP_REV(f,a,b,c,d,x,t,s) \ -{ \ - a -= b; \ - a = rotr32 (a, s); \ - a -= f (b, c, d); \ - a -= x; \ - a -= t; \ -} - -#define MD5_STEP_REV1(f,a,b,c,d,x,t,s) \ -{ \ - a -= b; \ - a = rotr32 (a, s); \ - a -= x; \ - a -= t; \ -} - -__device__ __constant__ u32x c_bfs[1024]; - -__device__ static void m00010m (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - switch_buffer_by_offset (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); - - w[ 0] |= salt_buf0[0]; - w[ 1] |= salt_buf0[1]; - w[ 2] |= salt_buf0[2]; - w[ 3] |= salt_buf0[3]; - w[ 4] |= salt_buf1[0]; - w[ 5] |= salt_buf1[1]; - w[ 6] |= salt_buf1[2]; - w[ 7] |= salt_buf1[3]; - w[ 8] |= salt_buf2[0]; - w[ 9] |= salt_buf2[1]; - w[10] |= salt_buf2[2]; - w[11] |= salt_buf2[3]; - w[12] |= salt_buf3[0]; - w[13] |= salt_buf3[1]; - w[14] |= salt_buf3[2]; - w[15] |= salt_buf3[3]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - w[14] = pw_salt_len * 8; - - /** - * base - */ - - const u32 F_w0c00 = 0 + MD5C00; - const u32 F_w1c01 = w[ 1] + MD5C01; - const u32 F_w2c02 = w[ 2] + MD5C02; - const u32 F_w3c03 = w[ 3] + MD5C03; - const u32 F_w4c04 = w[ 4] + MD5C04; - const u32 F_w5c05 = w[ 5] + MD5C05; - const u32 F_w6c06 = w[ 6] + MD5C06; - const u32 F_w7c07 = w[ 7] + MD5C07; - const u32 F_w8c08 = w[ 8] + MD5C08; - const u32 F_w9c09 = w[ 9] + MD5C09; - const u32 F_wac0a = w[10] + MD5C0a; - const u32 F_wbc0b = w[11] + MD5C0b; - const u32 F_wcc0c = w[12] + MD5C0c; - const u32 F_wdc0d = w[13] + MD5C0d; - const u32 F_wec0e = w[14] + MD5C0e; - const u32 F_wfc0f = w[15] + MD5C0f; - - const u32 G_w1c10 = w[ 1] + MD5C10; - const u32 G_w6c11 = w[ 6] + MD5C11; - const u32 G_wbc12 = w[11] + MD5C12; - const u32 G_w0c13 = 0 + MD5C13; - const u32 G_w5c14 = w[ 5] + MD5C14; - const u32 G_wac15 = w[10] + MD5C15; - const u32 G_wfc16 = w[15] + MD5C16; - const u32 G_w4c17 = w[ 4] + MD5C17; - const u32 G_w9c18 = w[ 9] + MD5C18; - const u32 G_wec19 = w[14] + MD5C19; - const u32 G_w3c1a = w[ 3] + MD5C1a; - const u32 G_w8c1b = w[ 8] + MD5C1b; - const u32 G_wdc1c = w[13] + MD5C1c; - const u32 G_w2c1d = w[ 2] + MD5C1d; - const u32 G_w7c1e = w[ 7] + MD5C1e; - const u32 G_wcc1f = w[12] + MD5C1f; - - const u32 H_w5c20 = w[ 5] + MD5C20; - const u32 H_w8c21 = w[ 8] + MD5C21; - const u32 H_wbc22 = w[11] + MD5C22; - const u32 H_wec23 = w[14] + MD5C23; - const u32 H_w1c24 = w[ 1] + MD5C24; - const u32 H_w4c25 = w[ 4] + MD5C25; - const u32 H_w7c26 = w[ 7] + MD5C26; - const u32 H_wac27 = w[10] + MD5C27; - const u32 H_wdc28 = w[13] + MD5C28; - const u32 H_w0c29 = 0 + MD5C29; - const u32 H_w3c2a = w[ 3] + MD5C2a; - const u32 H_w6c2b = w[ 6] + MD5C2b; - const u32 H_w9c2c = w[ 9] + MD5C2c; - const u32 H_wcc2d = w[12] + MD5C2d; - const u32 H_wfc2e = w[15] + MD5C2e; - const u32 H_w2c2f = w[ 2] + MD5C2f; - - const u32 I_w0c30 = 0 + MD5C30; - const u32 I_w7c31 = w[ 7] + MD5C31; - const u32 I_wec32 = w[14] + MD5C32; - const u32 I_w5c33 = w[ 5] + MD5C33; - const u32 I_wcc34 = w[12] + MD5C34; - const u32 I_w3c35 = w[ 3] + MD5C35; - const u32 I_wac36 = w[10] + MD5C36; - const u32 I_w1c37 = w[ 1] + MD5C37; - const u32 I_w8c38 = w[ 8] + MD5C38; - const u32 I_wfc39 = w[15] + MD5C39; - const u32 I_w6c3a = w[ 6] + MD5C3a; - const u32 I_wdc3b = w[13] + MD5C3b; - const u32 I_w4c3c = w[ 4] + MD5C3c; - const u32 I_wbc3d = w[11] + MD5C3d; - const u32 I_w2c3e = w[ 2] + MD5C3e; - const u32 I_w9c3f = w[ 9] + MD5C3f; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_w2c02, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_w3c03, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_w4c04, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w5c05, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_w6c06, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_w7c07, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_w8c08, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w9c09, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_wac0a, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_wbc0b, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_wcc0c, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_wdc0d, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_wec0e, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_wfc0f, MD5S03); - - MD5_STEP0(MD5_Go, a, b, c, d, G_w1c10, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_w6c11, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_wbc12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0, G_w0c13, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_w5c14, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_wac15, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_wfc16, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_w4c17, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_w9c18, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_wec19, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_w3c1a, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_w8c1b, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_wdc1c, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_w2c1d, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_w7c1e, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_wcc1f, MD5S13); - - MD5_STEP0(MD5_H1, a, b, c, d, H_w5c20, MD5S20); - MD5_STEP0(MD5_H2, d, a, b, c, H_w8c21, MD5S21); - MD5_STEP0(MD5_H1, c, d, a, b, H_wbc22, MD5S22); - MD5_STEP0(MD5_H2, b, c, d, a, H_wec23, MD5S23); - MD5_STEP0(MD5_H1, a, b, c, d, H_w1c24, MD5S20); - MD5_STEP0(MD5_H2, d, a, b, c, H_w4c25, MD5S21); - MD5_STEP0(MD5_H1, c, d, a, b, H_w7c26, MD5S22); - MD5_STEP0(MD5_H2, b, c, d, a, H_wac27, MD5S23); - MD5_STEP0(MD5_H1, a, b, c, d, H_wdc28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0, H_w0c29, MD5S21); - MD5_STEP0(MD5_H1, c, d, a, b, H_w3c2a, MD5S22); - MD5_STEP0(MD5_H2, b, c, d, a, H_w6c2b, MD5S23); - MD5_STEP0(MD5_H1, a, b, c, d, H_w9c2c, MD5S20); - MD5_STEP0(MD5_H2, d, a, b, c, H_wcc2d, MD5S21); - MD5_STEP0(MD5_H1, c, d, a, b, H_wfc2e, MD5S22); - MD5_STEP0(MD5_H2, b, c, d, a, H_w2c2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0, I_w0c30, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_w7c31, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_wec32, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w5c33, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_wcc34, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_w3c35, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_wac36, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w1c37, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_w8c38, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_wfc39, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_w6c3a, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_wdc3b, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_w4c3c, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_wbc3d, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_w2c3e, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w9c3f, MD5S33); - - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m00010s (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 F_w0c00 = 0 + MD5C00; - const u32 F_w1c01 = w[ 1] + MD5C01; - const u32 F_w2c02 = w[ 2] + MD5C02; - const u32 F_w3c03 = w[ 3] + MD5C03; - const u32 F_w4c04 = w[ 4] + MD5C04; - const u32 F_w5c05 = w[ 5] + MD5C05; - const u32 F_w6c06 = w[ 6] + MD5C06; - const u32 F_w7c07 = w[ 7] + MD5C07; - const u32 F_w8c08 = w[ 8] + MD5C08; - const u32 F_w9c09 = w[ 9] + MD5C09; - const u32 F_wac0a = w[10] + MD5C0a; - const u32 F_wbc0b = w[11] + MD5C0b; - const u32 F_wcc0c = w[12] + MD5C0c; - const u32 F_wdc0d = w[13] + MD5C0d; - const u32 F_wec0e = w[14] + MD5C0e; - const u32 F_wfc0f = w[15] + MD5C0f; - - const u32 G_w1c10 = w[ 1] + MD5C10; - const u32 G_w6c11 = w[ 6] + MD5C11; - const u32 G_wbc12 = w[11] + MD5C12; - const u32 G_w0c13 = 0 + MD5C13; - const u32 G_w5c14 = w[ 5] + MD5C14; - const u32 G_wac15 = w[10] + MD5C15; - const u32 G_wfc16 = w[15] + MD5C16; - const u32 G_w4c17 = w[ 4] + MD5C17; - const u32 G_w9c18 = w[ 9] + MD5C18; - const u32 G_wec19 = w[14] + MD5C19; - const u32 G_w3c1a = w[ 3] + MD5C1a; - const u32 G_w8c1b = w[ 8] + MD5C1b; - const u32 G_wdc1c = w[13] + MD5C1c; - const u32 G_w2c1d = w[ 2] + MD5C1d; - const u32 G_w7c1e = w[ 7] + MD5C1e; - const u32 G_wcc1f = w[12] + MD5C1f; - - const u32 H_w5c20 = w[ 5] + MD5C20; - const u32 H_w8c21 = w[ 8] + MD5C21; - const u32 H_wbc22 = w[11] + MD5C22; - const u32 H_wec23 = w[14] + MD5C23; - const u32 H_w1c24 = w[ 1] + MD5C24; - const u32 H_w4c25 = w[ 4] + MD5C25; - const u32 H_w7c26 = w[ 7] + MD5C26; - const u32 H_wac27 = w[10] + MD5C27; - const u32 H_wdc28 = w[13] + MD5C28; - const u32 H_w0c29 = 0 + MD5C29; - const u32 H_w3c2a = w[ 3] + MD5C2a; - const u32 H_w6c2b = w[ 6] + MD5C2b; - const u32 H_w9c2c = w[ 9] + MD5C2c; - const u32 H_wcc2d = w[12] + MD5C2d; - const u32 H_wfc2e = w[15] + MD5C2e; - const u32 H_w2c2f = w[ 2] + MD5C2f; - - const u32 I_w0c30 = 0 + MD5C30; - const u32 I_w7c31 = w[ 7] + MD5C31; - const u32 I_wec32 = w[14] + MD5C32; - const u32 I_w5c33 = w[ 5] + MD5C33; - const u32 I_wcc34 = w[12] + MD5C34; - const u32 I_w3c35 = w[ 3] + MD5C35; - const u32 I_wac36 = w[10] + MD5C36; - const u32 I_w1c37 = w[ 1] + MD5C37; - const u32 I_w8c38 = w[ 8] + MD5C38; - const u32 I_wfc39 = w[15] + MD5C39; - const u32 I_w6c3a = w[ 6] + MD5C3a; - const u32 I_wdc3b = w[13] + MD5C3b; - const u32 I_w4c3c = w[ 4] + MD5C3c; - const u32 I_wbc3d = w[11] + MD5C3d; - const u32 I_w2c3e = w[ 2] + MD5C3e; - const u32 I_w9c3f = w[ 9] + MD5C3f; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * reverse - */ - - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; - - MD5_STEP_REV (MD5_I, b_rev, c_rev, d_rev, a_rev, w[ 9], MD5C3f, MD5S33); - MD5_STEP_REV (MD5_I, c_rev, d_rev, a_rev, b_rev, w[ 2], MD5C3e, MD5S32); - MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[11], MD5C3d, MD5S31); - MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, w[ 4], MD5C3c, MD5S30); - MD5_STEP_REV (MD5_I, b_rev, c_rev, d_rev, a_rev, w[13], MD5C3b, MD5S33); - MD5_STEP_REV (MD5_I, c_rev, d_rev, a_rev, b_rev, w[ 6], MD5C3a, MD5S32); - MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[15], MD5C39, MD5S31); - MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, w[ 8], MD5C38, MD5S30); - MD5_STEP_REV (MD5_I, b_rev, c_rev, d_rev, a_rev, w[ 1], MD5C37, MD5S33); - MD5_STEP_REV (MD5_I, c_rev, d_rev, a_rev, b_rev, w[10], MD5C36, MD5S32); - MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[ 3], MD5C35, MD5S31); - MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, w[12], MD5C34, MD5S30); - MD5_STEP_REV (MD5_I, b_rev, c_rev, d_rev, a_rev, w[ 5], MD5C33, MD5S33); - MD5_STEP_REV (MD5_I, c_rev, d_rev, a_rev, b_rev, w[14], MD5C32, MD5S32); - MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[ 7], MD5C31, MD5S31); - MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, 0, MD5C30, MD5S30); - - const u32x pre_cd = c_rev ^ d_rev; - - MD5_STEP_REV1(MD5_H, b_rev, c_rev, d_rev, a_rev, w[ 2], MD5C2f, MD5S23); - MD5_STEP_REV1(MD5_H, c_rev, d_rev, a_rev, b_rev, w[15], MD5C2e, MD5S22); - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - const u32x pre_d = d_rev; - const u32x pre_a = a_rev - w0; - const u32x pre_b = b_rev - (pre_a ^ pre_cd); - const u32x pre_c = c_rev - (pre_a ^ pre_b ^ pre_d); - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_w2c02, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_w3c03, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_w4c04, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w5c05, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_w6c06, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_w7c07, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_w8c08, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w9c09, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_wac0a, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_wbc0b, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_wcc0c, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_wdc0d, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_wec0e, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_wfc0f, MD5S03); - - MD5_STEP0(MD5_Go, a, b, c, d, G_w1c10, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_w6c11, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_wbc12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0, G_w0c13, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_w5c14, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_wac15, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_wfc16, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_w4c17, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_w9c18, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_wec19, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_w3c1a, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_w8c1b, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_wdc1c, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_w2c1d, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_w7c1e, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_wcc1f, MD5S13); - - MD5_STEP0(MD5_H1, a, b, c, d, H_w5c20, MD5S20); - MD5_STEP0(MD5_H2, d, a, b, c, H_w8c21, MD5S21); - MD5_STEP0(MD5_H1, c, d, a, b, H_wbc22, MD5S22); - MD5_STEP0(MD5_H2, b, c, d, a, H_wec23, MD5S23); - MD5_STEP0(MD5_H1, a, b, c, d, H_w1c24, MD5S20); - MD5_STEP0(MD5_H2, d, a, b, c, H_w4c25, MD5S21); - MD5_STEP0(MD5_H1, c, d, a, b, H_w7c26, MD5S22); - MD5_STEP0(MD5_H2, b, c, d, a, H_wac27, MD5S23); - MD5_STEP0(MD5_H1, a, b, c, d, H_wdc28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0, H_w0c29, MD5S21); - MD5_STEP0(MD5_H1, c, d, a, b, H_w3c2a, MD5S22); - - bool q_cond = (pre_c != c); - - if (q_cond) continue; - - MD5_STEP0(MD5_H2, b, c, d, a, H_w6c2b, MD5S23); - MD5_STEP0(MD5_H1, a, b, c, d, H_w9c2c, MD5S20); - MD5_STEP0(MD5_H2, d, a, b, c, H_wcc2d, MD5S21); - MD5_STEP0(MD5_H1, c, d, a, b, H_wfc2e, MD5S22); - MD5_STEP0(MD5_H2, b, c, d, a, H_w2c2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0, I_w0c30, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_w7c31, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_wec32, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w5c33, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_wcc34, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_w3c35, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_wac36, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w1c37, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_w8c38, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_wfc39, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_w6c3a, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_wdc3b, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_w4c3c, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_wbc3d, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_w2c3e, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w9c3f, MD5S33); - - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00010_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00010m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00010_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00010m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00010_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00010m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00010_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00010s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00010_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00010s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00010_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00010s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m00020_a0.cu b/nv/m00020_a0.cu deleted file mode 100644 index 568d90b..0000000 --- a/nv/m00020_a0.cu +++ /dev/null @@ -1,506 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00020_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * prepend salt - */ - - const u32 out_salt_len = out_len + salt_len; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, out_salt_len); - - w3_t[2] = out_salt_len * 8; - - /** - * md5 - */ - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00020_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00020_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00020_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * prepend salt - */ - - const u32 out_salt_len = out_len + salt_len; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, out_salt_len); - - w3_t[2] = out_salt_len * 8; - - /** - * md5 - */ - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - - bool q_cond = (search[0] != a); - - if (q_cond) continue; - - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00020_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00020_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00020_a1.cu b/nv/m00020_a1.cu deleted file mode 100644 index 41a52f7..0000000 --- a/nv/m00020_a1.cu +++ /dev/null @@ -1,602 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00020_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - u32 wordr1[4]; - u32 wordr2[4]; - u32 wordr3[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * prepend salt - */ - - const u32 pw_salt_len = pw_len + salt_len; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len); - - w3_t[2] = pw_salt_len * 8; - - /** - * md5 - */ - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00020_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00020_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00020_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - u32 wordr1[4]; - u32 wordr2[4]; - u32 wordr3[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * prepend salt - */ - - const u32 pw_salt_len = pw_len + salt_len; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w3_t[2] = pw_salt_len * 8; - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len); - - w3_t[2] = pw_salt_len * 8; - - /** - * md5 - */ - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - - bool q_cond = (search[0] != a); - - if (q_cond) continue; - - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00020_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00020_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00020_a3.cu b/nv/m00020_a3.cu deleted file mode 100644 index faf9a69..0000000 --- a/nv/m00020_a3.cu +++ /dev/null @@ -1,728 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m00020m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * prepend salt - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w3_t[2] = pw_salt_len * 8; - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - w2_t[0] |= salt_buf2[0]; - w2_t[1] |= salt_buf2[1]; - w2_t[2] |= salt_buf2[2]; - w2_t[3] |= salt_buf2[3]; - w3_t[0] |= salt_buf3[0]; - w3_t[1] |= salt_buf3[1]; - w3_t[2] |= salt_buf3[2]; - w3_t[3] |= salt_buf3[3]; - - /** - * md5 - */ - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m00020s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * prepend salt - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w3_t[2] = pw_salt_len * 8; - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - w2_t[0] |= salt_buf2[0]; - w2_t[1] |= salt_buf2[1]; - w2_t[2] |= salt_buf2[2]; - w2_t[3] |= salt_buf2[3]; - w3_t[0] |= salt_buf3[0]; - w3_t[1] |= salt_buf3[1]; - w3_t[2] |= salt_buf3[2]; - w3_t[3] |= salt_buf3[3]; - - /** - * md5 - */ - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - - bool q_cond = (search[0] != a); - - if (q_cond) continue; - - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00020_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00020m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00020_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00020m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00020_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00020m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00020_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00020s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00020_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00020s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00020_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00020s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m00030_a0.cu b/nv/m00030_a0.cu deleted file mode 100644 index b5eb7fb..0000000 --- a/nv/m00030_a0.cu +++ /dev/null @@ -1,558 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00030_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, (out_len * 2)); - - const u32 out_salt_len = (out_len * 2) + salt_len; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w0_t[0] |= s0[0]; - w0_t[1] |= s0[1]; - w0_t[2] |= s0[2]; - w0_t[3] |= s0[3]; - w1_t[0] |= s1[0]; - w1_t[1] |= s1[1]; - w1_t[2] |= s1[2]; - w1_t[3] |= s1[3]; - w2_t[0] |= s2[0]; - w2_t[1] |= s2[1]; - w2_t[2] |= s2[2]; - w2_t[3] |= s2[3]; - w3_t[0] |= s3[0]; - w3_t[1] |= s3[1]; - w3_t[2] |= s3[2]; - w3_t[3] |= s3[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, out_salt_len); - - w3_t[2] = out_salt_len * 8; - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00030_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00030_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00030_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, (out_len * 2)); - - const u32 out_salt_len = (out_len * 2) + salt_len; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w0_t[0] |= s0[0]; - w0_t[1] |= s0[1]; - w0_t[2] |= s0[2]; - w0_t[3] |= s0[3]; - w1_t[0] |= s1[0]; - w1_t[1] |= s1[1]; - w1_t[2] |= s1[2]; - w1_t[3] |= s1[3]; - w2_t[0] |= s2[0]; - w2_t[1] |= s2[1]; - w2_t[2] |= s2[2]; - w2_t[3] |= s2[3]; - w3_t[0] |= s3[0]; - w3_t[1] |= s3[1]; - w3_t[2] |= s3[2]; - w3_t[3] |= s3[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, out_salt_len); - - w3_t[2] = out_salt_len * 8; - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - - bool q_cond = (search[0] != a); - - if (q_cond) continue; - - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00030_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00030_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00030_a1.cu b/nv/m00030_a1.cu deleted file mode 100644 index b8e6e7f..0000000 --- a/nv/m00030_a1.cu +++ /dev/null @@ -1,652 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00030_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, (pw_len * 2)); - - const u32 pw_salt_len = (pw_len * 2) + salt_len; - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w0_t[0] |= s0[0]; - w0_t[1] |= s0[1]; - w0_t[2] |= s0[2]; - w0_t[3] |= s0[3]; - w1_t[0] |= s1[0]; - w1_t[1] |= s1[1]; - w1_t[2] |= s1[2]; - w1_t[3] |= s1[3]; - w2_t[0] |= s2[0]; - w2_t[1] |= s2[1]; - w2_t[2] |= s2[2]; - w2_t[3] |= s2[3]; - w3_t[0] |= s3[0]; - w3_t[1] |= s3[1]; - w3_t[2] |= s3[2]; - w3_t[3] |= s3[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len); - - w3_t[2] = pw_salt_len * 8; - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00030_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00030_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00030_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, (pw_len * 2)); - - const u32 pw_salt_len = (pw_len * 2) + salt_len; - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w0_t[0] |= s0[0]; - w0_t[1] |= s0[1]; - w0_t[2] |= s0[2]; - w0_t[3] |= s0[3]; - w1_t[0] |= s1[0]; - w1_t[1] |= s1[1]; - w1_t[2] |= s1[2]; - w1_t[3] |= s1[3]; - w2_t[0] |= s2[0]; - w2_t[1] |= s2[1]; - w2_t[2] |= s2[2]; - w2_t[3] |= s2[3]; - w3_t[0] |= s3[0]; - w3_t[1] |= s3[1]; - w3_t[2] |= s3[2]; - w3_t[3] |= s3[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len); - - w3_t[2] = pw_salt_len * 8; - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - - bool q_cond = (search[0] != a); - - if (q_cond) continue; - - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00030_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00030_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00030_a3.cu b/nv/m00030_a3.cu deleted file mode 100644 index 6900bc0..0000000 --- a/nv/m00030_a3.cu +++ /dev/null @@ -1,755 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ -#define _SCALAR_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -#define MD5_STEP_REV(f,a,b,c,d,x,t,s) \ -{ \ - a -= b; \ - a = rotr32 (a, s); \ - a -= f (b, c, d); \ - a -= x; \ - a -= t; \ -} - -#define MD5_STEP_REV1(f,a,b,c,d,x,t,s) \ -{ \ - a -= b; \ - a = rotr32 (a, s); \ - a -= x; \ - a -= t; \ -} - -__device__ __constant__ u32x c_bfs[1024]; - -__device__ static void m00030m (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - switch_buffer_by_offset (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); - - w[ 0] |= salt_buf0[0]; - w[ 1] |= salt_buf0[1]; - w[ 2] |= salt_buf0[2]; - w[ 3] |= salt_buf0[3]; - w[ 4] |= salt_buf1[0]; - w[ 5] |= salt_buf1[1]; - w[ 6] |= salt_buf1[2]; - w[ 7] |= salt_buf1[3]; - w[ 8] |= salt_buf2[0]; - w[ 9] |= salt_buf2[1]; - w[10] |= salt_buf2[2]; - w[11] |= salt_buf2[3]; - w[12] |= salt_buf3[0]; - w[13] |= salt_buf3[1]; - w[14] |= salt_buf3[2]; - w[15] |= salt_buf3[3]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - w[14] = pw_salt_len * 8; - - /** - * base - */ - - const u32 F_w0c00 = 0 + MD5C00; - const u32 F_w1c01 = w[ 1] + MD5C01; - const u32 F_w2c02 = w[ 2] + MD5C02; - const u32 F_w3c03 = w[ 3] + MD5C03; - const u32 F_w4c04 = w[ 4] + MD5C04; - const u32 F_w5c05 = w[ 5] + MD5C05; - const u32 F_w6c06 = w[ 6] + MD5C06; - const u32 F_w7c07 = w[ 7] + MD5C07; - const u32 F_w8c08 = w[ 8] + MD5C08; - const u32 F_w9c09 = w[ 9] + MD5C09; - const u32 F_wac0a = w[10] + MD5C0a; - const u32 F_wbc0b = w[11] + MD5C0b; - const u32 F_wcc0c = w[12] + MD5C0c; - const u32 F_wdc0d = w[13] + MD5C0d; - const u32 F_wec0e = w[14] + MD5C0e; - const u32 F_wfc0f = w[15] + MD5C0f; - - const u32 G_w1c10 = w[ 1] + MD5C10; - const u32 G_w6c11 = w[ 6] + MD5C11; - const u32 G_wbc12 = w[11] + MD5C12; - const u32 G_w0c13 = 0 + MD5C13; - const u32 G_w5c14 = w[ 5] + MD5C14; - const u32 G_wac15 = w[10] + MD5C15; - const u32 G_wfc16 = w[15] + MD5C16; - const u32 G_w4c17 = w[ 4] + MD5C17; - const u32 G_w9c18 = w[ 9] + MD5C18; - const u32 G_wec19 = w[14] + MD5C19; - const u32 G_w3c1a = w[ 3] + MD5C1a; - const u32 G_w8c1b = w[ 8] + MD5C1b; - const u32 G_wdc1c = w[13] + MD5C1c; - const u32 G_w2c1d = w[ 2] + MD5C1d; - const u32 G_w7c1e = w[ 7] + MD5C1e; - const u32 G_wcc1f = w[12] + MD5C1f; - - const u32 H_w5c20 = w[ 5] + MD5C20; - const u32 H_w8c21 = w[ 8] + MD5C21; - const u32 H_wbc22 = w[11] + MD5C22; - const u32 H_wec23 = w[14] + MD5C23; - const u32 H_w1c24 = w[ 1] + MD5C24; - const u32 H_w4c25 = w[ 4] + MD5C25; - const u32 H_w7c26 = w[ 7] + MD5C26; - const u32 H_wac27 = w[10] + MD5C27; - const u32 H_wdc28 = w[13] + MD5C28; - const u32 H_w0c29 = 0 + MD5C29; - const u32 H_w3c2a = w[ 3] + MD5C2a; - const u32 H_w6c2b = w[ 6] + MD5C2b; - const u32 H_w9c2c = w[ 9] + MD5C2c; - const u32 H_wcc2d = w[12] + MD5C2d; - const u32 H_wfc2e = w[15] + MD5C2e; - const u32 H_w2c2f = w[ 2] + MD5C2f; - - const u32 I_w0c30 = 0 + MD5C30; - const u32 I_w7c31 = w[ 7] + MD5C31; - const u32 I_wec32 = w[14] + MD5C32; - const u32 I_w5c33 = w[ 5] + MD5C33; - const u32 I_wcc34 = w[12] + MD5C34; - const u32 I_w3c35 = w[ 3] + MD5C35; - const u32 I_wac36 = w[10] + MD5C36; - const u32 I_w1c37 = w[ 1] + MD5C37; - const u32 I_w8c38 = w[ 8] + MD5C38; - const u32 I_wfc39 = w[15] + MD5C39; - const u32 I_w6c3a = w[ 6] + MD5C3a; - const u32 I_wdc3b = w[13] + MD5C3b; - const u32 I_w4c3c = w[ 4] + MD5C3c; - const u32 I_wbc3d = w[11] + MD5C3d; - const u32 I_w2c3e = w[ 2] + MD5C3e; - const u32 I_w9c3f = w[ 9] + MD5C3f; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_w2c02, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_w3c03, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_w4c04, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w5c05, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_w6c06, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_w7c07, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_w8c08, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w9c09, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_wac0a, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_wbc0b, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_wcc0c, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_wdc0d, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_wec0e, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_wfc0f, MD5S03); - - MD5_STEP0(MD5_Go, a, b, c, d, G_w1c10, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_w6c11, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_wbc12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0, G_w0c13, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_w5c14, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_wac15, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_wfc16, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_w4c17, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_w9c18, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_wec19, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_w3c1a, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_w8c1b, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_wdc1c, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_w2c1d, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_w7c1e, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_wcc1f, MD5S13); - - MD5_STEP0(MD5_H , a, b, c, d, H_w5c20, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, H_w8c21, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, H_wbc22, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, H_wec23, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, H_w1c24, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, H_w4c25, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, H_w7c26, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, H_wac27, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, H_wdc28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0, H_w0c29, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, H_w3c2a, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, H_w6c2b, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, H_w9c2c, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, H_wcc2d, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, H_wfc2e, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, H_w2c2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0, I_w0c30, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_w7c31, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_wec32, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w5c33, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_wcc34, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_w3c35, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_wac36, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w1c37, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_w8c38, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_wfc39, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_w6c3a, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_wdc3b, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_w4c3c, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_wbc3d, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_w2c3e, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w9c3f, MD5S33); - - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m00030s (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 F_w0c00 = 0 + MD5C00; - const u32 F_w1c01 = w[ 1] + MD5C01; - const u32 F_w2c02 = w[ 2] + MD5C02; - const u32 F_w3c03 = w[ 3] + MD5C03; - const u32 F_w4c04 = w[ 4] + MD5C04; - const u32 F_w5c05 = w[ 5] + MD5C05; - const u32 F_w6c06 = w[ 6] + MD5C06; - const u32 F_w7c07 = w[ 7] + MD5C07; - const u32 F_w8c08 = w[ 8] + MD5C08; - const u32 F_w9c09 = w[ 9] + MD5C09; - const u32 F_wac0a = w[10] + MD5C0a; - const u32 F_wbc0b = w[11] + MD5C0b; - const u32 F_wcc0c = w[12] + MD5C0c; - const u32 F_wdc0d = w[13] + MD5C0d; - const u32 F_wec0e = w[14] + MD5C0e; - const u32 F_wfc0f = w[15] + MD5C0f; - - const u32 G_w1c10 = w[ 1] + MD5C10; - const u32 G_w6c11 = w[ 6] + MD5C11; - const u32 G_wbc12 = w[11] + MD5C12; - const u32 G_w0c13 = 0 + MD5C13; - const u32 G_w5c14 = w[ 5] + MD5C14; - const u32 G_wac15 = w[10] + MD5C15; - const u32 G_wfc16 = w[15] + MD5C16; - const u32 G_w4c17 = w[ 4] + MD5C17; - const u32 G_w9c18 = w[ 9] + MD5C18; - const u32 G_wec19 = w[14] + MD5C19; - const u32 G_w3c1a = w[ 3] + MD5C1a; - const u32 G_w8c1b = w[ 8] + MD5C1b; - const u32 G_wdc1c = w[13] + MD5C1c; - const u32 G_w2c1d = w[ 2] + MD5C1d; - const u32 G_w7c1e = w[ 7] + MD5C1e; - const u32 G_wcc1f = w[12] + MD5C1f; - - const u32 H_w5c20 = w[ 5] + MD5C20; - const u32 H_w8c21 = w[ 8] + MD5C21; - const u32 H_wbc22 = w[11] + MD5C22; - const u32 H_wec23 = w[14] + MD5C23; - const u32 H_w1c24 = w[ 1] + MD5C24; - const u32 H_w4c25 = w[ 4] + MD5C25; - const u32 H_w7c26 = w[ 7] + MD5C26; - const u32 H_wac27 = w[10] + MD5C27; - const u32 H_wdc28 = w[13] + MD5C28; - const u32 H_w0c29 = 0 + MD5C29; - const u32 H_w3c2a = w[ 3] + MD5C2a; - const u32 H_w6c2b = w[ 6] + MD5C2b; - const u32 H_w9c2c = w[ 9] + MD5C2c; - const u32 H_wcc2d = w[12] + MD5C2d; - const u32 H_wfc2e = w[15] + MD5C2e; - const u32 H_w2c2f = w[ 2] + MD5C2f; - - const u32 I_w0c30 = 0 + MD5C30; - const u32 I_w7c31 = w[ 7] + MD5C31; - const u32 I_wec32 = w[14] + MD5C32; - const u32 I_w5c33 = w[ 5] + MD5C33; - const u32 I_wcc34 = w[12] + MD5C34; - const u32 I_w3c35 = w[ 3] + MD5C35; - const u32 I_wac36 = w[10] + MD5C36; - const u32 I_w1c37 = w[ 1] + MD5C37; - const u32 I_w8c38 = w[ 8] + MD5C38; - const u32 I_wfc39 = w[15] + MD5C39; - const u32 I_w6c3a = w[ 6] + MD5C3a; - const u32 I_wdc3b = w[13] + MD5C3b; - const u32 I_w4c3c = w[ 4] + MD5C3c; - const u32 I_wbc3d = w[11] + MD5C3d; - const u32 I_w2c3e = w[ 2] + MD5C3e; - const u32 I_w9c3f = w[ 9] + MD5C3f; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * reverse - */ - - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; - - MD5_STEP_REV (MD5_I, b_rev, c_rev, d_rev, a_rev, w[ 9], MD5C3f, MD5S33); - MD5_STEP_REV (MD5_I, c_rev, d_rev, a_rev, b_rev, w[ 2], MD5C3e, MD5S32); - MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[11], MD5C3d, MD5S31); - MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, w[ 4], MD5C3c, MD5S30); - MD5_STEP_REV (MD5_I, b_rev, c_rev, d_rev, a_rev, w[13], MD5C3b, MD5S33); - MD5_STEP_REV (MD5_I, c_rev, d_rev, a_rev, b_rev, w[ 6], MD5C3a, MD5S32); - MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[15], MD5C39, MD5S31); - MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, w[ 8], MD5C38, MD5S30); - MD5_STEP_REV (MD5_I, b_rev, c_rev, d_rev, a_rev, w[ 1], MD5C37, MD5S33); - MD5_STEP_REV (MD5_I, c_rev, d_rev, a_rev, b_rev, w[10], MD5C36, MD5S32); - MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[ 3], MD5C35, MD5S31); - MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, w[12], MD5C34, MD5S30); - MD5_STEP_REV (MD5_I, b_rev, c_rev, d_rev, a_rev, w[ 5], MD5C33, MD5S33); - MD5_STEP_REV (MD5_I, c_rev, d_rev, a_rev, b_rev, w[14], MD5C32, MD5S32); - MD5_STEP_REV (MD5_I, d_rev, a_rev, b_rev, c_rev, w[ 7], MD5C31, MD5S31); - MD5_STEP_REV (MD5_I, a_rev, b_rev, c_rev, d_rev, 0, MD5C30, MD5S30); - - const u32x pre_cd = c_rev ^ d_rev; - - MD5_STEP_REV1(MD5_H, b_rev, c_rev, d_rev, a_rev, w[ 2], MD5C2f, MD5S23); - MD5_STEP_REV1(MD5_H, c_rev, d_rev, a_rev, b_rev, w[15], MD5C2e, MD5S22); - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - const u32x pre_d = d_rev; - const u32x pre_a = a_rev - w0; - const u32x pre_b = b_rev - (pre_a ^ pre_cd); - const u32x pre_c = c_rev - (pre_a ^ pre_b ^ pre_d); - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_w2c02, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_w3c03, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_w4c04, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w5c05, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_w6c06, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_w7c07, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_w8c08, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w9c09, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_wac0a, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_wbc0b, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_wcc0c, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_wdc0d, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_wec0e, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_wfc0f, MD5S03); - - MD5_STEP0(MD5_Go, a, b, c, d, G_w1c10, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_w6c11, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_wbc12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0, G_w0c13, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_w5c14, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_wac15, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_wfc16, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_w4c17, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_w9c18, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_wec19, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_w3c1a, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_w8c1b, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_wdc1c, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_w2c1d, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_w7c1e, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_wcc1f, MD5S13); - - MD5_STEP0(MD5_H , a, b, c, d, H_w5c20, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, H_w8c21, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, H_wbc22, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, H_wec23, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, H_w1c24, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, H_w4c25, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, H_w7c26, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, H_wac27, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, H_wdc28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0, H_w0c29, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, H_w3c2a, MD5S22); - - bool q_cond = (pre_c != c); - - if (q_cond) continue; - - MD5_STEP0(MD5_H , b, c, d, a, H_w6c2b, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, H_w9c2c, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, H_wcc2d, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, H_wfc2e, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, H_w2c2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0, I_w0c30, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_w7c31, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_wec32, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w5c33, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_wcc34, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_w3c35, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_wac36, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w1c37, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_w8c38, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_wfc39, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_w6c3a, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_wdc3b, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_w4c3c, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_wbc3d, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_w2c3e, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w9c3f, MD5S33); - - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00030_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00030m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00030_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00030m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00030_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00030m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00030_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00030s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00030_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00030s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00030_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00030s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m00040_a0.cu b/nv/m00040_a0.cu deleted file mode 100644 index e657a3c..0000000 --- a/nv/m00040_a0.cu +++ /dev/null @@ -1,486 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00040_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * prepend salt - */ - - const u32 out_salt_len = (out_len * 2) + salt_len; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, out_salt_len); - - w3_t[2] = out_salt_len * 8; - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00040_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00040_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00040_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * prepend salt - */ - - const u32 out_salt_len = (out_len * 2) + salt_len; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, out_salt_len); - - w3_t[2] = out_salt_len * 8; - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - - bool q_cond = (search[0] != a); - - if (q_cond) continue; - - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00040_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00040_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00040_a1.cu b/nv/m00040_a1.cu deleted file mode 100644 index 2492f09..0000000 --- a/nv/m00040_a1.cu +++ /dev/null @@ -1,580 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00040_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - /** - * prepend salt - */ - - const u32 pw_salt_len = (pw_len * 2) + salt_len; - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len); - - w3_t[2] = pw_salt_len * 8; - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00040_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00040_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00040_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - /** - * prepend salt - */ - - const u32 pw_salt_len = (pw_len * 2) + salt_len; - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len); - - w3_t[2] = pw_salt_len * 8; - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - - bool q_cond = (search[0] != a); - - if (q_cond) continue; - - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00040_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00040_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00040_a3.cu b/nv/m00040_a3.cu deleted file mode 100644 index 46b2856..0000000 --- a/nv/m00040_a3.cu +++ /dev/null @@ -1,724 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m00040m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * prepend salt - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w3_t[2] = pw_salt_len * 8; - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - w2_t[0] |= salt_buf2[0]; - w2_t[1] |= salt_buf2[1]; - w2_t[2] |= salt_buf2[2]; - w2_t[3] |= salt_buf2[3]; - w3_t[0] |= salt_buf3[0]; - w3_t[1] |= salt_buf3[1]; - w3_t[2] |= salt_buf3[2]; - w3_t[3] |= salt_buf3[3]; - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m00040s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * prepend salt - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w3_t[2] = pw_salt_len * 8; - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - w2_t[0] |= salt_buf2[0]; - w2_t[1] |= salt_buf2[1]; - w2_t[2] |= salt_buf2[2]; - w2_t[3] |= salt_buf2[3]; - w3_t[0] |= salt_buf3[0]; - w3_t[1] |= salt_buf3[1]; - w3_t[2] |= salt_buf3[2]; - w3_t[3] |= salt_buf3[3]; - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - - bool q_cond = (search[0] != a); - - if (q_cond) continue; - - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00040_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00040m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00040_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00040m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00040_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00040m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00040_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00040s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00040_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00040s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00040_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00040s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m00050_a0.cu b/nv/m00050_a0.cu deleted file mode 100644 index 507a26d..0000000 --- a/nv/m00050_a0.cu +++ /dev/null @@ -1,596 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - u32x tmp2; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = MD5M_A; - ipad[1] = MD5M_B; - ipad[2] = MD5M_C; - ipad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = MD5M_A; - opad[1] = MD5M_B; - opad[2] = MD5M_C; - opad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - - md5_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = 0x80; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = (64 + 16) * 8; - w3[3] = 0; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - - md5_transform (w0, w1, w2, w3, digest); -} - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00050_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - - u32 salt_buf3[4]; - - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[4]; - u32x opad[4]; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = salt_buf2[2]; - w2_t[3] = salt_buf2[3]; - w3_t[0] = salt_buf3[0]; - w3_t[1] = salt_buf3[1]; - w3_t[2] = (64 + salt_len) * 8; - w3_t[3] = 0; - - u32x digest[4]; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00050_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00050_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00050_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - - u32 salt_buf3[4]; - - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[4]; - u32x opad[4]; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = salt_buf2[2]; - w2_t[3] = salt_buf2[3]; - w3_t[0] = salt_buf3[0]; - w3_t[1] = salt_buf3[1]; - w3_t[2] = (64 + salt_len) * 8; - w3_t[3] = 0; - - u32x digest[4]; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00050_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00050_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00050_a1.cu b/nv/m00050_a1.cu deleted file mode 100644 index 92f1784..0000000 --- a/nv/m00050_a1.cu +++ /dev/null @@ -1,702 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - u32x tmp2; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = MD5M_A; - ipad[1] = MD5M_B; - ipad[2] = MD5M_C; - ipad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = MD5M_A; - opad[1] = MD5M_B; - opad[2] = MD5M_C; - opad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - - md5_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = 0x80; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = (64 + 16) * 8; - w3[3] = 0; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - - md5_transform (w0, w1, w2, w3, digest); -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00050_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - - u32 salt_buf3[4]; - - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[4]; - u32x opad[4]; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = salt_buf2[2]; - w2_t[3] = salt_buf2[3]; - w3_t[0] = salt_buf3[0]; - w3_t[1] = salt_buf3[1]; - w3_t[2] = (64 + salt_len) * 8; - w3_t[3] = 0; - - u32x digest[4]; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00050_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00050_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00050_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - - u32 salt_buf3[4]; - - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[4]; - u32x opad[4]; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = salt_buf2[2]; - w2_t[3] = salt_buf2[3]; - w3_t[0] = salt_buf3[0]; - w3_t[1] = salt_buf3[1]; - w3_t[2] = (64 + salt_len) * 8; - w3_t[3] = 0; - - u32x digest[4]; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00050_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00050_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00050_a3.cu b/nv/m00050_a3.cu deleted file mode 100644 index 1948aa5..0000000 --- a/nv/m00050_a3.cu +++ /dev/null @@ -1,766 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - u32x tmp2; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = MD5M_A; - ipad[1] = MD5M_B; - ipad[2] = MD5M_C; - ipad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = MD5M_A; - opad[1] = MD5M_B; - opad[2] = MD5M_C; - opad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - - md5_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = 0x80; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = (64 + 16) * 8; - w3[3] = 0; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - - md5_transform (w0, w1, w2, w3, digest); -} - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m00050m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - - u32 salt_buf3[4]; - - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[4]; - u32x opad[4]; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = salt_buf2[2]; - w2_t[3] = salt_buf2[3]; - w3_t[0] = salt_buf3[0]; - w3_t[1] = salt_buf3[1]; - w3_t[2] = (64 + salt_len) * 8; - w3_t[3] = 0; - - u32x digest[4]; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -__device__ static void m00050s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - - u32 salt_buf3[4]; - - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - salt_buf3[2] = salt_bufs[salt_pos].salt_buf[14]; - salt_buf3[3] = salt_bufs[salt_pos].salt_buf[15]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[4]; - u32x opad[4]; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = salt_buf2[2]; - w2_t[3] = salt_buf2[3]; - w3_t[0] = salt_buf3[0]; - w3_t[1] = salt_buf3[1]; - w3_t[2] = (64 + salt_len) * 8; - w3_t[3] = 0; - - u32x digest[4]; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00050_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00050m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00050_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00050m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00050_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00050m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00050_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00050s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00050_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00050s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00050_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00050s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m00060_a0.cu b/nv/m00060_a0.cu deleted file mode 100644 index 9f43842..0000000 --- a/nv/m00060_a0.cu +++ /dev/null @@ -1,568 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - u32x tmp2; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = MD5M_A; - ipad[1] = MD5M_B; - ipad[2] = MD5M_C; - ipad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = MD5M_A; - opad[1] = MD5M_B; - opad[2] = MD5M_C; - opad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - - md5_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = 0x80; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = (64 + 16) * 8; - w3[3] = 0; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - - md5_transform (w0, w1, w2, w3, digest); -} - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00060_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - - u32x w1_t[4]; - - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[4]; - u32x opad[4]; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = (64 + out_len) * 8; - w3_t[3] = 0; - - u32x digest[4]; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00060_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00060_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00060_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - - u32x w1_t[4]; - - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[4]; - u32x opad[4]; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = (64 + out_len) * 8; - w3_t[3] = 0; - - u32x digest[4]; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00060_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00060_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00060_a1.cu b/nv/m00060_a1.cu deleted file mode 100644 index 95dcf93..0000000 --- a/nv/m00060_a1.cu +++ /dev/null @@ -1,674 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - u32x tmp2; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = MD5M_A; - ipad[1] = MD5M_B; - ipad[2] = MD5M_C; - ipad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = MD5M_A; - opad[1] = MD5M_B; - opad[2] = MD5M_C; - opad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - - md5_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = 0x80; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = (64 + 16) * 8; - w3[3] = 0; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - - md5_transform (w0, w1, w2, w3, digest); -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00060_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - - u32x w1_t[4]; - - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[4]; - u32x opad[4]; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - append_0x80_4 (w0, w1, w2, w3, pw_len); - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = (64 + pw_len) * 8; - w3_t[3] = 0; - - u32x digest[4]; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00060_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00060_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00060_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - - u32x w1_t[4]; - - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[4]; - u32x opad[4]; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - append_0x80_4 (w0, w1, w2, w3, pw_len); - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = (64 + pw_len) * 8; - w3_t[3] = 0; - - u32x digest[4]; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00060_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00060_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00060_a3.cu b/nv/m00060_a3.cu deleted file mode 100644 index b55d58b..0000000 --- a/nv/m00060_a3.cu +++ /dev/null @@ -1,738 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - u32x tmp2; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = MD5M_A; - ipad[1] = MD5M_B; - ipad[2] = MD5M_C; - ipad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = MD5M_A; - opad[1] = MD5M_B; - opad[2] = MD5M_C; - opad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - - md5_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = 0x80; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = (64 + 16) * 8; - w3[3] = 0; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - - md5_transform (w0, w1, w2, w3, digest); -} - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m00060m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - - u32x w1_t[4]; - - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[4]; - u32x opad[4]; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - append_0x80_4 (w0, w1, w2, w3, pw_len); - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = (64 + pw_len) * 8; - w3_t[3] = 0; - - u32x digest[4]; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -__device__ static void m00060s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - - u32x w1_t[4]; - - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[4]; - u32x opad[4]; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - append_0x80_4 (w0, w1, w2, w3, pw_len); - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = (64 + pw_len) * 8; - w3_t[3] = 0; - - u32x digest[4]; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00060_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00060m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00060_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00060m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00060_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00060m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00060_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00060s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00060_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00060s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00060_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00060s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m00100_a0.cu b/nv/m00100_a0.cu deleted file mode 100644 index fb31674..0000000 --- a/nv/m00100_a0.cu +++ /dev/null @@ -1,488 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00100_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - /** - * sha1 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00100_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00100_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00100_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3], - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - /** - * sha1 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - - if (e != e_rev) continue; - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00100_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00100_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00100_a1.cu b/nv/m00100_a1.cu deleted file mode 100644 index db0d411..0000000 --- a/nv/m00100_a1.cu +++ /dev/null @@ -1,598 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00100_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - append_0x80_2 (wordr0, wordr1, pw_r_len); - - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - /** - * sha1 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00100_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00100_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00100_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3], - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - append_0x80_2 (wordr0, wordr1, pw_r_len); - - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - /** - * sha1 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - - if (e != e_rev) continue; - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00100_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00100_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00100_a3.cu b/nv/m00100_a3.cu deleted file mode 100644 index 88cacef..0000000 --- a/nv/m00100_a3.cu +++ /dev/null @@ -1,830 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ -#define _SCALAR_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -__device__ __constant__ u32x c_bfs[1024]; - -__device__ static void m00100m (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 c_16s = rotl32 ((w[13] ^ w[ 8] ^ w[ 2] ), 1u); - const u32 c_17s = rotl32 ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u); - const u32 c_18s = rotl32 ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u); - const u32 c_19s = rotl32 ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u); - const u32 c_20s = rotl32 ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u); - const u32 c_21s = rotl32 ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u); - const u32 c_22s = rotl32 ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u); - const u32 c_23s = rotl32 ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u); - const u32 c_24s = rotl32 ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u); - const u32 c_25s = rotl32 ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u); - const u32 c_26s = rotl32 ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u); - const u32 c_27s = rotl32 ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u); - const u32 c_28s = rotl32 ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u); - const u32 c_29s = rotl32 ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u); - const u32 c_30s = rotl32 ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u); - const u32 c_31s = rotl32 ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u); - const u32 c_32s = rotl32 ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u); - const u32 c_33s = rotl32 ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u); - const u32 c_34s = rotl32 ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u); - const u32 c_35s = rotl32 ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u); - const u32 c_36s = rotl32 ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u); - const u32 c_37s = rotl32 ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u); - const u32 c_38s = rotl32 ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u); - const u32 c_39s = rotl32 ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u); - const u32 c_40s = rotl32 ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u); - const u32 c_41s = rotl32 ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u); - const u32 c_42s = rotl32 ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u); - const u32 c_43s = rotl32 ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u); - const u32 c_44s = rotl32 ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u); - const u32 c_45s = rotl32 ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u); - const u32 c_46s = rotl32 ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u); - const u32 c_47s = rotl32 ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u); - const u32 c_48s = rotl32 ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u); - const u32 c_49s = rotl32 ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u); - const u32 c_50s = rotl32 ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u); - const u32 c_51s = rotl32 ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u); - const u32 c_52s = rotl32 ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u); - const u32 c_53s = rotl32 ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u); - const u32 c_54s = rotl32 ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u); - const u32 c_55s = rotl32 ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u); - const u32 c_56s = rotl32 ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u); - const u32 c_57s = rotl32 ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u); - const u32 c_58s = rotl32 ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u); - const u32 c_59s = rotl32 ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u); - const u32 c_60s = rotl32 ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u); - const u32 c_61s = rotl32 ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u); - const u32 c_62s = rotl32 ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u); - const u32 c_63s = rotl32 ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u); - const u32 c_64s = rotl32 ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u); - const u32 c_65s = rotl32 ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u); - const u32 c_66s = rotl32 ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u); - const u32 c_67s = rotl32 ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u); - const u32 c_68s = rotl32 ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u); - const u32 c_69s = rotl32 ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u); - const u32 c_70s = rotl32 ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u); - const u32 c_71s = rotl32 ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u); - const u32 c_72s = rotl32 ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u); - const u32 c_73s = rotl32 ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u); - const u32 c_74s = rotl32 ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u); - const u32 c_75s = rotl32 ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u); - const u32 c_76s = rotl32 ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u); - const u32 c_77s = rotl32 ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u); - const u32 c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u); - const u32 c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u); - - const u32 c_17sK = c_17s + SHA1C00; - const u32 c_18sK = c_18s + SHA1C00; - const u32 c_20sK = c_20s + SHA1C01; - const u32 c_21sK = c_21s + SHA1C01; - const u32 c_23sK = c_23s + SHA1C01; - const u32 c_26sK = c_26s + SHA1C01; - const u32 c_27sK = c_27s + SHA1C01; - const u32 c_29sK = c_29s + SHA1C01; - const u32 c_33sK = c_33s + SHA1C01; - const u32 c_39sK = c_39s + SHA1C01; - const u32 c_41sK = c_41s + SHA1C02; - const u32 c_45sK = c_45s + SHA1C02; - const u32 c_53sK = c_53s + SHA1C02; - const u32 c_65sK = c_65s + SHA1C03; - const u32 c_69sK = c_69s + SHA1C03; - - const u32 w1 = w[ 1] + SHA1C00; - const u32 w2 = w[ 2] + SHA1C00; - const u32 w3 = w[ 3] + SHA1C00; - const u32 w4 = w[ 4] + SHA1C00; - const u32 w5 = w[ 5] + SHA1C00; - const u32 w6 = w[ 6] + SHA1C00; - const u32 w7 = w[ 7] + SHA1C00; - const u32 w8 = w[ 8] + SHA1C00; - const u32 w9 = w[ 9] + SHA1C00; - const u32 wa = w[10] + SHA1C00; - const u32 wb = w[11] + SHA1C00; - const u32 wc = w[12] + SHA1C00; - const u32 wd = w[13] + SHA1C00; - const u32 we = w[14] + SHA1C00; - const u32 wf = w[15] + SHA1C00; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - const u32x w0s01 = rotl32 (w0, 1u); - const u32x w0s02 = rotl32 (w0, 2u); - const u32x w0s03 = rotl32 (w0, 3u); - const u32x w0s04 = rotl32 (w0, 4u); - const u32x w0s05 = rotl32 (w0, 5u); - const u32x w0s06 = rotl32 (w0, 6u); - const u32x w0s07 = rotl32 (w0, 7u); - const u32x w0s08 = rotl32 (w0, 8u); - const u32x w0s09 = rotl32 (w0, 9u); - const u32x w0s10 = rotl32 (w0, 10u); - const u32x w0s11 = rotl32 (w0, 11u); - const u32x w0s12 = rotl32 (w0, 12u); - const u32x w0s13 = rotl32 (w0, 13u); - const u32x w0s14 = rotl32 (w0, 14u); - const u32x w0s15 = rotl32 (w0, 15u); - const u32x w0s16 = rotl32 (w0, 16u); - const u32x w0s17 = rotl32 (w0, 17u); - const u32x w0s18 = rotl32 (w0, 18u); - const u32x w0s19 = rotl32 (w0, 19u); - const u32x w0s20 = rotl32 (w0, 20u); - const u32x w0s21 = rotl32 (w0, 21u); - const u32x w0s22 = rotl32 (w0, 22U); - - const u32x w0s04___w0s06 = w0s04 ^ w0s06; - const u32x w0s04___w0s08 = w0s04 ^ w0s08; - const u32x w0s08___w0s12 = w0s08 ^ w0s12; - const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0); - SHA1_STEPX(SHA1_F0o, e, a, b, c, d, w1); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, w2); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, w3); - SHA1_STEPX(SHA1_F0o, b, c, d, e, a, w4); - SHA1_STEPX(SHA1_F0o, a, b, c, d, e, w5); - SHA1_STEPX(SHA1_F0o, e, a, b, c, d, w6); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, w7); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, w8); - SHA1_STEPX(SHA1_F0o, b, c, d, e, a, w9); - SHA1_STEPX(SHA1_F0o, a, b, c, d, e, wa); - SHA1_STEPX(SHA1_F0o, e, a, b, c, d, wb); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, wc); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, wd); - SHA1_STEPX(SHA1_F0o, b, c, d, e, a, we); - SHA1_STEPX(SHA1_F0o, a, b, c, d, e, wf); - - SHA1_STEP (SHA1_F0o, e, a, b, c, d, (c_16s ^ w0s01)); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, (c_17sK)); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, (c_18sK)); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, (c_19s ^ w0s02)); - - #undef K - #define K SHA1C01 - - SHA1_STEPX(SHA1_F1 , a, b, c, d, e, (c_20sK)); - SHA1_STEPX(SHA1_F1 , e, a, b, c, d, (c_21sK)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_22s ^ w0s03)); - SHA1_STEPX(SHA1_F1 , c, d, e, a, b, (c_23sK)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_24s ^ w0s02)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_25s ^ w0s04)); - SHA1_STEPX(SHA1_F1 , e, a, b, c, d, (c_26sK)); - SHA1_STEPX(SHA1_F1 , d, e, a, b, c, (c_27sK)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_28s ^ w0s05)); - SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_29sK)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_30s ^ w0s02 ^ w0s04)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_31s ^ w0s06)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_32s ^ w0s02 ^ w0s03)); - SHA1_STEPX(SHA1_F1 , c, d, e, a, b, (c_33sK)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_34s ^ w0s07)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_35s ^ w0s04)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_36s ^ w0s04___w0s06)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_37s ^ w0s08)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_38s ^ w0s04)); - SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_39sK)); - - #undef K - #define K SHA1C02 - - SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_40s ^ w0s04 ^ w0s09)); - SHA1_STEPX(SHA1_F2o, e, a, b, c, d, (c_41sK)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_42s ^ w0s06 ^ w0s08)); - SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_43s ^ w0s10)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_44s ^ w0s03 ^ w0s06 ^ w0s07)); - SHA1_STEPX(SHA1_F2o, a, b, c, d, e, (c_45sK)); - SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_46s ^ w0s04 ^ w0s11)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_47s ^ w0s04___w0s08)); - SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_48s ^ w0s03 ^ w0s04___w0s08 ^ w0s05 ^ w0s10)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_49s ^ w0s12)); - SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_50s ^ w0s08)); - SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_51s ^ w0s04___w0s06)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_52s ^ w0s04___w0s08 ^ w0s13)); - SHA1_STEPX(SHA1_F2o, c, d, e, a, b, (c_53sK)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_54s ^ w0s07 ^ w0s10 ^ w0s12)); - SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_55s ^ w0s14)); - SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_56s ^ w0s04___w0s06___w0s07 ^ w0s10 ^ w0s11)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_57s ^ w0s08)); - SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_58s ^ w0s04___w0s08 ^ w0s15)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_59s ^ w0s08___w0s12)); - - #undef K - #define K SHA1C03 - - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_60s ^ w0s04 ^ w0s08___w0s12 ^ w0s07 ^ w0s14)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_61s ^ w0s16)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_62s ^ w0s04___w0s06 ^ w0s08___w0s12)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_63s ^ w0s08)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_64s ^ w0s04___w0s06___w0s07 ^ w0s08___w0s12 ^ w0s17)); - SHA1_STEPX(SHA1_F1 , a, b, c, d, e, (c_65sK)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_66s ^ w0s14 ^ w0s16)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_67s ^ w0s08 ^ w0s18)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_68s ^ w0s11 ^ w0s14 ^ w0s15)); - SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_69sK)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_70s ^ w0s12 ^ w0s19)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_71s ^ w0s12 ^ w0s16)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_72s ^ w0s05 ^ w0s11 ^ w0s12 ^ w0s13 ^ w0s16 ^ w0s18)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_73s ^ w0s20)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_74s ^ w0s08 ^ w0s16)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_75s ^ w0s06 ^ w0s12 ^ w0s14)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_76s ^ w0s07 ^ w0s08___w0s12 ^ w0s16 ^ w0s21)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_77s)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_78s ^ w0s07 ^ w0s08 ^ w0s15 ^ w0s18 ^ w0s20)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_79s ^ w0s08 ^ w0s22)); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m00100s (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 c_16s = rotl32 ((w[13] ^ w[ 8] ^ w[ 2] ), 1u); - const u32 c_17s = rotl32 ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u); - const u32 c_18s = rotl32 ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u); - const u32 c_19s = rotl32 ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u); - const u32 c_20s = rotl32 ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u); - const u32 c_21s = rotl32 ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u); - const u32 c_22s = rotl32 ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u); - const u32 c_23s = rotl32 ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u); - const u32 c_24s = rotl32 ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u); - const u32 c_25s = rotl32 ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u); - const u32 c_26s = rotl32 ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u); - const u32 c_27s = rotl32 ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u); - const u32 c_28s = rotl32 ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u); - const u32 c_29s = rotl32 ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u); - const u32 c_30s = rotl32 ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u); - const u32 c_31s = rotl32 ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u); - const u32 c_32s = rotl32 ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u); - const u32 c_33s = rotl32 ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u); - const u32 c_34s = rotl32 ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u); - const u32 c_35s = rotl32 ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u); - const u32 c_36s = rotl32 ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u); - const u32 c_37s = rotl32 ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u); - const u32 c_38s = rotl32 ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u); - const u32 c_39s = rotl32 ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u); - const u32 c_40s = rotl32 ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u); - const u32 c_41s = rotl32 ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u); - const u32 c_42s = rotl32 ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u); - const u32 c_43s = rotl32 ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u); - const u32 c_44s = rotl32 ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u); - const u32 c_45s = rotl32 ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u); - const u32 c_46s = rotl32 ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u); - const u32 c_47s = rotl32 ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u); - const u32 c_48s = rotl32 ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u); - const u32 c_49s = rotl32 ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u); - const u32 c_50s = rotl32 ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u); - const u32 c_51s = rotl32 ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u); - const u32 c_52s = rotl32 ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u); - const u32 c_53s = rotl32 ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u); - const u32 c_54s = rotl32 ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u); - const u32 c_55s = rotl32 ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u); - const u32 c_56s = rotl32 ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u); - const u32 c_57s = rotl32 ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u); - const u32 c_58s = rotl32 ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u); - const u32 c_59s = rotl32 ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u); - const u32 c_60s = rotl32 ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u); - const u32 c_61s = rotl32 ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u); - const u32 c_62s = rotl32 ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u); - const u32 c_63s = rotl32 ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u); - const u32 c_64s = rotl32 ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u); - const u32 c_65s = rotl32 ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u); - const u32 c_66s = rotl32 ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u); - const u32 c_67s = rotl32 ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u); - const u32 c_68s = rotl32 ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u); - const u32 c_69s = rotl32 ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u); - const u32 c_70s = rotl32 ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u); - const u32 c_71s = rotl32 ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u); - const u32 c_72s = rotl32 ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u); - const u32 c_73s = rotl32 ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u); - const u32 c_74s = rotl32 ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u); - const u32 c_75s = rotl32 ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u); - - const u32 c_17sK = c_17s + SHA1C00; - const u32 c_18sK = c_18s + SHA1C00; - const u32 c_20sK = c_20s + SHA1C01; - const u32 c_21sK = c_21s + SHA1C01; - const u32 c_23sK = c_23s + SHA1C01; - const u32 c_26sK = c_26s + SHA1C01; - const u32 c_27sK = c_27s + SHA1C01; - const u32 c_29sK = c_29s + SHA1C01; - const u32 c_33sK = c_33s + SHA1C01; - const u32 c_39sK = c_39s + SHA1C01; - const u32 c_41sK = c_41s + SHA1C02; - const u32 c_45sK = c_45s + SHA1C02; - const u32 c_53sK = c_53s + SHA1C02; - const u32 c_65sK = c_65s + SHA1C03; - const u32 c_69sK = c_69s + SHA1C03; - - const u32 w1 = w[ 1] + SHA1C00; - const u32 w2 = w[ 2] + SHA1C00; - const u32 w3 = w[ 3] + SHA1C00; - const u32 w4 = w[ 4] + SHA1C00; - const u32 w5 = w[ 5] + SHA1C00; - const u32 w6 = w[ 6] + SHA1C00; - const u32 w7 = w[ 7] + SHA1C00; - const u32 w8 = w[ 8] + SHA1C00; - const u32 w9 = w[ 9] + SHA1C00; - const u32 wa = w[10] + SHA1C00; - const u32 wb = w[11] + SHA1C00; - const u32 wc = w[12] + SHA1C00; - const u32 wd = w[13] + SHA1C00; - const u32 we = w[14] + SHA1C00; - const u32 wf = w[15] + SHA1C00; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3], - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u) - SHA1C03; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - const u32x w0s01 = rotl32 (w0, 1u); - const u32x w0s02 = rotl32 (w0, 2u); - const u32x w0s03 = rotl32 (w0, 3u); - const u32x w0s04 = rotl32 (w0, 4u); - const u32x w0s05 = rotl32 (w0, 5u); - const u32x w0s06 = rotl32 (w0, 6u); - const u32x w0s07 = rotl32 (w0, 7u); - const u32x w0s08 = rotl32 (w0, 8u); - const u32x w0s09 = rotl32 (w0, 9u); - const u32x w0s10 = rotl32 (w0, 10u); - const u32x w0s11 = rotl32 (w0, 11u); - const u32x w0s12 = rotl32 (w0, 12u); - const u32x w0s13 = rotl32 (w0, 13u); - const u32x w0s14 = rotl32 (w0, 14u); - const u32x w0s15 = rotl32 (w0, 15u); - const u32x w0s16 = rotl32 (w0, 16u); - const u32x w0s17 = rotl32 (w0, 17u); - const u32x w0s18 = rotl32 (w0, 18u); - const u32x w0s19 = rotl32 (w0, 19u); - const u32x w0s20 = rotl32 (w0, 20u); - - const u32x w0s04___w0s06 = w0s04 ^ w0s06; - const u32x w0s04___w0s08 = w0s04 ^ w0s08; - const u32x w0s08___w0s12 = w0s08 ^ w0s12; - const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0); - SHA1_STEPX(SHA1_F0o, e, a, b, c, d, w1); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, w2); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, w3); - SHA1_STEPX(SHA1_F0o, b, c, d, e, a, w4); - SHA1_STEPX(SHA1_F0o, a, b, c, d, e, w5); - SHA1_STEPX(SHA1_F0o, e, a, b, c, d, w6); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, w7); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, w8); - SHA1_STEPX(SHA1_F0o, b, c, d, e, a, w9); - SHA1_STEPX(SHA1_F0o, a, b, c, d, e, wa); - SHA1_STEPX(SHA1_F0o, e, a, b, c, d, wb); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, wc); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, wd); - SHA1_STEPX(SHA1_F0o, b, c, d, e, a, we); - SHA1_STEPX(SHA1_F0o, a, b, c, d, e, wf); - - SHA1_STEP (SHA1_F0o, e, a, b, c, d, (c_16s ^ w0s01)); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, (c_17sK)); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, (c_18sK)); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, (c_19s ^ w0s02)); - - #undef K - #define K SHA1C01 - - SHA1_STEPX(SHA1_F1 , a, b, c, d, e, (c_20sK)); - SHA1_STEPX(SHA1_F1 , e, a, b, c, d, (c_21sK)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_22s ^ w0s03)); - SHA1_STEPX(SHA1_F1 , c, d, e, a, b, (c_23sK)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_24s ^ w0s02)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_25s ^ w0s04)); - SHA1_STEPX(SHA1_F1 , e, a, b, c, d, (c_26sK)); - SHA1_STEPX(SHA1_F1 , d, e, a, b, c, (c_27sK)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_28s ^ w0s05)); - SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_29sK)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_30s ^ w0s02 ^ w0s04)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_31s ^ w0s06)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_32s ^ w0s02 ^ w0s03)); - SHA1_STEPX(SHA1_F1 , c, d, e, a, b, (c_33sK)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_34s ^ w0s07)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_35s ^ w0s04)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_36s ^ w0s04___w0s06)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_37s ^ w0s08)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_38s ^ w0s04)); - SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_39sK)); - - #undef K - #define K SHA1C02 - - SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_40s ^ w0s04 ^ w0s09)); - SHA1_STEPX(SHA1_F2o, e, a, b, c, d, (c_41sK)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_42s ^ w0s06 ^ w0s08)); - SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_43s ^ w0s10)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_44s ^ w0s03 ^ w0s06 ^ w0s07)); - SHA1_STEPX(SHA1_F2o, a, b, c, d, e, (c_45sK)); - SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_46s ^ w0s04 ^ w0s11)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_47s ^ w0s04___w0s08)); - SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_48s ^ w0s03 ^ w0s04___w0s08 ^ w0s05 ^ w0s10)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_49s ^ w0s12)); - SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_50s ^ w0s08)); - SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_51s ^ w0s04___w0s06)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_52s ^ w0s04___w0s08 ^ w0s13)); - SHA1_STEPX(SHA1_F2o, c, d, e, a, b, (c_53sK)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_54s ^ w0s07 ^ w0s10 ^ w0s12)); - SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_55s ^ w0s14)); - SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_56s ^ w0s04___w0s06___w0s07 ^ w0s10 ^ w0s11)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_57s ^ w0s08)); - SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_58s ^ w0s04___w0s08 ^ w0s15)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_59s ^ w0s08___w0s12)); - - #undef K - #define K SHA1C03 - - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_60s ^ w0s04 ^ w0s08___w0s12 ^ w0s07 ^ w0s14)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_61s ^ w0s16)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_62s ^ w0s04___w0s06 ^ w0s08___w0s12)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_63s ^ w0s08)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_64s ^ w0s04___w0s06___w0s07 ^ w0s08___w0s12 ^ w0s17)); - SHA1_STEPX(SHA1_F1 , a, b, c, d, e, (c_65sK)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_66s ^ w0s14 ^ w0s16)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_67s ^ w0s08 ^ w0s18)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_68s ^ w0s11 ^ w0s14 ^ w0s15)); - SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_69sK)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_70s ^ w0s12 ^ w0s19)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_71s ^ w0s12 ^ w0s16)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_72s ^ w0s05 ^ w0s11 ^ w0s12 ^ w0s13 ^ w0s16 ^ w0s18)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_73s ^ w0s20)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_74s ^ w0s08 ^ w0s16)); - - SHA1_STEP_PE (SHA1_F1, a, b, c, d, e, (c_75s ^ w0s06 ^ w0s12 ^ w0s14)); - - bool q_cond = (e_rev != e); - - if (q_cond) continue; - - SHA1_STEP_PB (SHA1_F1, a, b, c, d, e, 0); - - const u32 c_76s = rotl32 ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u); - const u32 c_77s = rotl32 ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u); - const u32 c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u); - const u32 c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u); - - const u32x w0s21 = rotl32 (w0, 21u); - const u32x w0s22 = rotl32 (w0, 22U); - - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_76s ^ w0s07 ^ w0s08___w0s12 ^ w0s16 ^ w0s21)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_77s)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_78s ^ w0s07 ^ w0s08 ^ w0s15 ^ w0s18 ^ w0s20)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_79s ^ w0s08 ^ w0s22)); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00100_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00100_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00100_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00100_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00100_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00100_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m00110_a0.cu b/nv/m00110_a0.cu deleted file mode 100644 index 31a6341..0000000 --- a/nv/m00110_a0.cu +++ /dev/null @@ -1,640 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00110_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, out_len); - - const u32 pw_salt_len = out_len + salt_len; - - w0[0] |= s0[0]; - w0[1] |= s0[1]; - w0[2] |= s0[2]; - w0[3] |= s0[3]; - - w1[0] |= s1[0]; - w1[1] |= s1[1]; - w1[2] |= s1[2]; - w1[3] |= s1[3]; - - w2[0] |= s2[0]; - w2[1] |= s2[1]; - w2[2] |= s2[2]; - w2[3] |= s2[3]; - - w3[0] |= s3[0]; - w3[1] |= s3[1]; - w3[2] |= s3[2]; - w3[3] |= s3[3]; - - append_0x80_4 (w0, w1, w2, w3, pw_salt_len); - - /** - * sha1 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00110_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00110_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00110_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, out_len); - - const u32 pw_salt_len = out_len + salt_len; - - w0[0] |= s0[0]; - w0[1] |= s0[1]; - w0[2] |= s0[2]; - w0[3] |= s0[3]; - - w1[0] |= s1[0]; - w1[1] |= s1[1]; - w1[2] |= s1[2]; - w1[3] |= s1[3]; - - w2[0] |= s2[0]; - w2[1] |= s2[1]; - w2[2] |= s2[2]; - w2[3] |= s2[3]; - - w3[0] |= s3[0]; - w3[1] |= s3[1]; - w3[2] |= s3[2]; - w3[3] |= s3[3]; - - append_0x80_4 (w0, w1, w2, w3, pw_salt_len); - - /** - * sha1 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - - if (e != e_rev) continue; - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00110_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00110_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00110_a1.cu b/nv/m00110_a1.cu deleted file mode 100644 index d764a0f..0000000 --- a/nv/m00110_a1.cu +++ /dev/null @@ -1,706 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00110_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, pw_len); - - const u32 pw_salt_len = pw_len + salt_len; - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0] | s0[0]; - w0[1] = wordl0[1] | wordr0[1] | s0[1]; - w0[2] = wordl0[2] | wordr0[2] | s0[2]; - w0[3] = wordl0[3] | wordr0[3] | s0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0] | s1[0]; - w1[1] = wordl1[1] | wordr1[1] | s1[1]; - w1[2] = wordl1[2] | wordr1[2] | s1[2]; - w1[3] = wordl1[3] | wordr1[3] | s1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0] | s2[0]; - w2[1] = wordl2[1] | wordr2[1] | s2[1]; - w2[2] = wordl2[2] | wordr2[2] | s2[2]; - w2[3] = wordl2[3] | wordr2[3] | s2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0] | s3[0]; - w3[1] = wordl3[1] | wordr3[1] | s3[1]; - w3[2] = 0; - w3[3] = 0; - - append_0x80_4 (w0, w1, w2, w3, pw_salt_len); - - /** - * sha1 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00110_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00110_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00110_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, pw_len); - - const u32 pw_salt_len = pw_len + salt_len; - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0] | s0[0]; - w0[1] = wordl0[1] | wordr0[1] | s0[1]; - w0[2] = wordl0[2] | wordr0[2] | s0[2]; - w0[3] = wordl0[3] | wordr0[3] | s0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0] | s1[0]; - w1[1] = wordl1[1] | wordr1[1] | s1[1]; - w1[2] = wordl1[2] | wordr1[2] | s1[2]; - w1[3] = wordl1[3] | wordr1[3] | s1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0] | s2[0]; - w2[1] = wordl2[1] | wordr2[1] | s2[1]; - w2[2] = wordl2[2] | wordr2[2] | s2[2]; - w2[3] = wordl2[3] | wordr2[3] | s2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0] | s3[0]; - w3[1] = wordl3[1] | wordr3[1] | s3[1]; - w3[2] = 0; - w3[3] = 0; - - append_0x80_4 (w0, w1, w2, w3, pw_salt_len); - - /** - * sha1 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - - if (e != e_rev) continue; - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00110_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00110_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00110_a3.cu b/nv/m00110_a3.cu deleted file mode 100644 index 0f0b80e..0000000 --- a/nv/m00110_a3.cu +++ /dev/null @@ -1,887 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ -#define _SCALAR_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -__device__ __constant__ u32x c_bfs[1024]; - -__device__ static void m00110m (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - switch_buffer_by_offset (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); - - w[ 0] |= swap_workaround (salt_buf0[0]); - w[ 1] |= swap_workaround (salt_buf0[1]); - w[ 2] |= swap_workaround (salt_buf0[2]); - w[ 3] |= swap_workaround (salt_buf0[3]); - w[ 4] |= swap_workaround (salt_buf1[0]); - w[ 5] |= swap_workaround (salt_buf1[1]); - w[ 6] |= swap_workaround (salt_buf1[2]); - w[ 7] |= swap_workaround (salt_buf1[3]); - w[ 8] |= swap_workaround (salt_buf2[0]); - w[ 9] |= swap_workaround (salt_buf2[1]); - w[10] |= swap_workaround (salt_buf2[2]); - w[11] |= swap_workaround (salt_buf2[3]); - w[12] |= swap_workaround (salt_buf3[0]); - w[13] |= swap_workaround (salt_buf3[1]); - w[14] |= swap_workaround (salt_buf3[2]); - w[15] |= swap_workaround (salt_buf3[3]); - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - w[15] = pw_salt_len * 8; - - /** - * base - */ - - const u32 c_16s = rotl32 ((w[13] ^ w[ 8] ^ w[ 2] ), 1u); - const u32 c_17s = rotl32 ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u); - const u32 c_18s = rotl32 ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u); - const u32 c_19s = rotl32 ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u); - const u32 c_20s = rotl32 ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u); - const u32 c_21s = rotl32 ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u); - const u32 c_22s = rotl32 ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u); - const u32 c_23s = rotl32 ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u); - const u32 c_24s = rotl32 ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u); - const u32 c_25s = rotl32 ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u); - const u32 c_26s = rotl32 ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u); - const u32 c_27s = rotl32 ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u); - const u32 c_28s = rotl32 ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u); - const u32 c_29s = rotl32 ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u); - const u32 c_30s = rotl32 ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u); - const u32 c_31s = rotl32 ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u); - const u32 c_32s = rotl32 ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u); - const u32 c_33s = rotl32 ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u); - const u32 c_34s = rotl32 ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u); - const u32 c_35s = rotl32 ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u); - const u32 c_36s = rotl32 ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u); - const u32 c_37s = rotl32 ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u); - const u32 c_38s = rotl32 ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u); - const u32 c_39s = rotl32 ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u); - const u32 c_40s = rotl32 ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u); - const u32 c_41s = rotl32 ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u); - const u32 c_42s = rotl32 ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u); - const u32 c_43s = rotl32 ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u); - const u32 c_44s = rotl32 ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u); - const u32 c_45s = rotl32 ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u); - const u32 c_46s = rotl32 ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u); - const u32 c_47s = rotl32 ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u); - const u32 c_48s = rotl32 ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u); - const u32 c_49s = rotl32 ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u); - const u32 c_50s = rotl32 ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u); - const u32 c_51s = rotl32 ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u); - const u32 c_52s = rotl32 ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u); - const u32 c_53s = rotl32 ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u); - const u32 c_54s = rotl32 ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u); - const u32 c_55s = rotl32 ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u); - const u32 c_56s = rotl32 ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u); - const u32 c_57s = rotl32 ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u); - const u32 c_58s = rotl32 ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u); - const u32 c_59s = rotl32 ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u); - const u32 c_60s = rotl32 ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u); - const u32 c_61s = rotl32 ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u); - const u32 c_62s = rotl32 ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u); - const u32 c_63s = rotl32 ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u); - const u32 c_64s = rotl32 ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u); - const u32 c_65s = rotl32 ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u); - const u32 c_66s = rotl32 ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u); - const u32 c_67s = rotl32 ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u); - const u32 c_68s = rotl32 ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u); - const u32 c_69s = rotl32 ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u); - const u32 c_70s = rotl32 ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u); - const u32 c_71s = rotl32 ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u); - const u32 c_72s = rotl32 ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u); - const u32 c_73s = rotl32 ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u); - const u32 c_74s = rotl32 ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u); - const u32 c_75s = rotl32 ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u); - const u32 c_76s = rotl32 ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u); - const u32 c_77s = rotl32 ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u); - const u32 c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u); - const u32 c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u); - - const u32 c_17sK = c_17s + SHA1C00; - const u32 c_18sK = c_18s + SHA1C00; - const u32 c_20sK = c_20s + SHA1C01; - const u32 c_21sK = c_21s + SHA1C01; - const u32 c_23sK = c_23s + SHA1C01; - const u32 c_26sK = c_26s + SHA1C01; - const u32 c_27sK = c_27s + SHA1C01; - const u32 c_29sK = c_29s + SHA1C01; - const u32 c_33sK = c_33s + SHA1C01; - const u32 c_39sK = c_39s + SHA1C01; - const u32 c_41sK = c_41s + SHA1C02; - const u32 c_45sK = c_45s + SHA1C02; - const u32 c_53sK = c_53s + SHA1C02; - const u32 c_65sK = c_65s + SHA1C03; - const u32 c_69sK = c_69s + SHA1C03; - - const u32 w1 = w[ 1] + SHA1C00; - const u32 w2 = w[ 2] + SHA1C00; - const u32 w3 = w[ 3] + SHA1C00; - const u32 w4 = w[ 4] + SHA1C00; - const u32 w5 = w[ 5] + SHA1C00; - const u32 w6 = w[ 6] + SHA1C00; - const u32 w7 = w[ 7] + SHA1C00; - const u32 w8 = w[ 8] + SHA1C00; - const u32 w9 = w[ 9] + SHA1C00; - const u32 wa = w[10] + SHA1C00; - const u32 wb = w[11] + SHA1C00; - const u32 wc = w[12] + SHA1C00; - const u32 wd = w[13] + SHA1C00; - const u32 we = w[14] + SHA1C00; - const u32 wf = w[15] + SHA1C00; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - const u32x w0s01 = rotl32 (w0, 1u); - const u32x w0s02 = rotl32 (w0, 2u); - const u32x w0s03 = rotl32 (w0, 3u); - const u32x w0s04 = rotl32 (w0, 4u); - const u32x w0s05 = rotl32 (w0, 5u); - const u32x w0s06 = rotl32 (w0, 6u); - const u32x w0s07 = rotl32 (w0, 7u); - const u32x w0s08 = rotl32 (w0, 8u); - const u32x w0s09 = rotl32 (w0, 9u); - const u32x w0s10 = rotl32 (w0, 10u); - const u32x w0s11 = rotl32 (w0, 11u); - const u32x w0s12 = rotl32 (w0, 12u); - const u32x w0s13 = rotl32 (w0, 13u); - const u32x w0s14 = rotl32 (w0, 14u); - const u32x w0s15 = rotl32 (w0, 15u); - const u32x w0s16 = rotl32 (w0, 16u); - const u32x w0s17 = rotl32 (w0, 17u); - const u32x w0s18 = rotl32 (w0, 18u); - const u32x w0s19 = rotl32 (w0, 19u); - const u32x w0s20 = rotl32 (w0, 20u); - const u32x w0s21 = rotl32 (w0, 21u); - const u32x w0s22 = rotl32 (w0, 22U); - - const u32x w0s04___w0s06 = w0s04 ^ w0s06; - const u32x w0s04___w0s08 = w0s04 ^ w0s08; - const u32x w0s08___w0s12 = w0s08 ^ w0s12; - const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0); - SHA1_STEPX(SHA1_F0o, e, a, b, c, d, w1); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, w2); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, w3); - SHA1_STEPX(SHA1_F0o, b, c, d, e, a, w4); - SHA1_STEPX(SHA1_F0o, a, b, c, d, e, w5); - SHA1_STEPX(SHA1_F0o, e, a, b, c, d, w6); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, w7); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, w8); - SHA1_STEPX(SHA1_F0o, b, c, d, e, a, w9); - SHA1_STEPX(SHA1_F0o, a, b, c, d, e, wa); - SHA1_STEPX(SHA1_F0o, e, a, b, c, d, wb); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, wc); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, wd); - SHA1_STEPX(SHA1_F0o, b, c, d, e, a, we); - SHA1_STEPX(SHA1_F0o, a, b, c, d, e, wf); - - SHA1_STEP (SHA1_F0o, e, a, b, c, d, (c_16s ^ w0s01)); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, (c_17sK)); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, (c_18sK)); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, (c_19s ^ w0s02)); - - #undef K - #define K SHA1C01 - - SHA1_STEPX(SHA1_F1 , a, b, c, d, e, (c_20sK)); - SHA1_STEPX(SHA1_F1 , e, a, b, c, d, (c_21sK)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_22s ^ w0s03)); - SHA1_STEPX(SHA1_F1 , c, d, e, a, b, (c_23sK)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_24s ^ w0s02)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_25s ^ w0s04)); - SHA1_STEPX(SHA1_F1 , e, a, b, c, d, (c_26sK)); - SHA1_STEPX(SHA1_F1 , d, e, a, b, c, (c_27sK)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_28s ^ w0s05)); - SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_29sK)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_30s ^ w0s02 ^ w0s04)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_31s ^ w0s06)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_32s ^ w0s02 ^ w0s03)); - SHA1_STEPX(SHA1_F1 , c, d, e, a, b, (c_33sK)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_34s ^ w0s07)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_35s ^ w0s04)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_36s ^ w0s04___w0s06)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_37s ^ w0s08)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_38s ^ w0s04)); - SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_39sK)); - - #undef K - #define K SHA1C02 - - SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_40s ^ w0s04 ^ w0s09)); - SHA1_STEPX(SHA1_F2o, e, a, b, c, d, (c_41sK)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_42s ^ w0s06 ^ w0s08)); - SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_43s ^ w0s10)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_44s ^ w0s03 ^ w0s06 ^ w0s07)); - SHA1_STEPX(SHA1_F2o, a, b, c, d, e, (c_45sK)); - SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_46s ^ w0s04 ^ w0s11)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_47s ^ w0s04___w0s08)); - SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_48s ^ w0s03 ^ w0s04___w0s08 ^ w0s05 ^ w0s10)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_49s ^ w0s12)); - SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_50s ^ w0s08)); - SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_51s ^ w0s04___w0s06)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_52s ^ w0s04___w0s08 ^ w0s13)); - SHA1_STEPX(SHA1_F2o, c, d, e, a, b, (c_53sK)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_54s ^ w0s07 ^ w0s10 ^ w0s12)); - SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_55s ^ w0s14)); - SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_56s ^ w0s04___w0s06___w0s07 ^ w0s10 ^ w0s11)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_57s ^ w0s08)); - SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_58s ^ w0s04___w0s08 ^ w0s15)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_59s ^ w0s08___w0s12)); - - #undef K - #define K SHA1C03 - - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_60s ^ w0s04 ^ w0s08___w0s12 ^ w0s07 ^ w0s14)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_61s ^ w0s16)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_62s ^ w0s04___w0s06 ^ w0s08___w0s12)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_63s ^ w0s08)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_64s ^ w0s04___w0s06___w0s07 ^ w0s08___w0s12 ^ w0s17)); - SHA1_STEPX(SHA1_F1 , a, b, c, d, e, (c_65sK)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_66s ^ w0s14 ^ w0s16)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_67s ^ w0s08 ^ w0s18)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_68s ^ w0s11 ^ w0s14 ^ w0s15)); - SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_69sK)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_70s ^ w0s12 ^ w0s19)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_71s ^ w0s12 ^ w0s16)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_72s ^ w0s05 ^ w0s11 ^ w0s12 ^ w0s13 ^ w0s16 ^ w0s18)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_73s ^ w0s20)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_74s ^ w0s08 ^ w0s16)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_75s ^ w0s06 ^ w0s12 ^ w0s14)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_76s ^ w0s07 ^ w0s08___w0s12 ^ w0s16 ^ w0s21)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_77s)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_78s ^ w0s07 ^ w0s08 ^ w0s15 ^ w0s18 ^ w0s20)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_79s ^ w0s08 ^ w0s22)); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m00110s (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 c_16s = rotl32 ((w[13] ^ w[ 8] ^ w[ 2] ), 1u); - const u32 c_17s = rotl32 ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u); - const u32 c_18s = rotl32 ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u); - const u32 c_19s = rotl32 ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u); - const u32 c_20s = rotl32 ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u); - const u32 c_21s = rotl32 ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u); - const u32 c_22s = rotl32 ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u); - const u32 c_23s = rotl32 ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u); - const u32 c_24s = rotl32 ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u); - const u32 c_25s = rotl32 ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u); - const u32 c_26s = rotl32 ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u); - const u32 c_27s = rotl32 ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u); - const u32 c_28s = rotl32 ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u); - const u32 c_29s = rotl32 ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u); - const u32 c_30s = rotl32 ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u); - const u32 c_31s = rotl32 ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u); - const u32 c_32s = rotl32 ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u); - const u32 c_33s = rotl32 ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u); - const u32 c_34s = rotl32 ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u); - const u32 c_35s = rotl32 ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u); - const u32 c_36s = rotl32 ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u); - const u32 c_37s = rotl32 ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u); - const u32 c_38s = rotl32 ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u); - const u32 c_39s = rotl32 ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u); - const u32 c_40s = rotl32 ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u); - const u32 c_41s = rotl32 ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u); - const u32 c_42s = rotl32 ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u); - const u32 c_43s = rotl32 ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u); - const u32 c_44s = rotl32 ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u); - const u32 c_45s = rotl32 ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u); - const u32 c_46s = rotl32 ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u); - const u32 c_47s = rotl32 ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u); - const u32 c_48s = rotl32 ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u); - const u32 c_49s = rotl32 ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u); - const u32 c_50s = rotl32 ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u); - const u32 c_51s = rotl32 ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u); - const u32 c_52s = rotl32 ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u); - const u32 c_53s = rotl32 ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u); - const u32 c_54s = rotl32 ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u); - const u32 c_55s = rotl32 ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u); - const u32 c_56s = rotl32 ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u); - const u32 c_57s = rotl32 ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u); - const u32 c_58s = rotl32 ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u); - const u32 c_59s = rotl32 ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u); - const u32 c_60s = rotl32 ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u); - const u32 c_61s = rotl32 ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u); - const u32 c_62s = rotl32 ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u); - const u32 c_63s = rotl32 ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u); - const u32 c_64s = rotl32 ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u); - const u32 c_65s = rotl32 ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u); - const u32 c_66s = rotl32 ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u); - const u32 c_67s = rotl32 ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u); - const u32 c_68s = rotl32 ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u); - const u32 c_69s = rotl32 ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u); - const u32 c_70s = rotl32 ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u); - const u32 c_71s = rotl32 ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u); - const u32 c_72s = rotl32 ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u); - const u32 c_73s = rotl32 ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u); - const u32 c_74s = rotl32 ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u); - const u32 c_75s = rotl32 ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u); - - const u32 c_17sK = c_17s + SHA1C00; - const u32 c_18sK = c_18s + SHA1C00; - const u32 c_20sK = c_20s + SHA1C01; - const u32 c_21sK = c_21s + SHA1C01; - const u32 c_23sK = c_23s + SHA1C01; - const u32 c_26sK = c_26s + SHA1C01; - const u32 c_27sK = c_27s + SHA1C01; - const u32 c_29sK = c_29s + SHA1C01; - const u32 c_33sK = c_33s + SHA1C01; - const u32 c_39sK = c_39s + SHA1C01; - const u32 c_41sK = c_41s + SHA1C02; - const u32 c_45sK = c_45s + SHA1C02; - const u32 c_53sK = c_53s + SHA1C02; - const u32 c_65sK = c_65s + SHA1C03; - const u32 c_69sK = c_69s + SHA1C03; - - const u32 w1 = w[ 1] + SHA1C00; - const u32 w2 = w[ 2] + SHA1C00; - const u32 w3 = w[ 3] + SHA1C00; - const u32 w4 = w[ 4] + SHA1C00; - const u32 w5 = w[ 5] + SHA1C00; - const u32 w6 = w[ 6] + SHA1C00; - const u32 w7 = w[ 7] + SHA1C00; - const u32 w8 = w[ 8] + SHA1C00; - const u32 w9 = w[ 9] + SHA1C00; - const u32 wa = w[10] + SHA1C00; - const u32 wb = w[11] + SHA1C00; - const u32 wc = w[12] + SHA1C00; - const u32 wd = w[13] + SHA1C00; - const u32 we = w[14] + SHA1C00; - const u32 wf = w[15] + SHA1C00; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u) - SHA1C03; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - const u32x w0s01 = rotl32 (w0, 1u); - const u32x w0s02 = rotl32 (w0, 2u); - const u32x w0s03 = rotl32 (w0, 3u); - const u32x w0s04 = rotl32 (w0, 4u); - const u32x w0s05 = rotl32 (w0, 5u); - const u32x w0s06 = rotl32 (w0, 6u); - const u32x w0s07 = rotl32 (w0, 7u); - const u32x w0s08 = rotl32 (w0, 8u); - const u32x w0s09 = rotl32 (w0, 9u); - const u32x w0s10 = rotl32 (w0, 10u); - const u32x w0s11 = rotl32 (w0, 11u); - const u32x w0s12 = rotl32 (w0, 12u); - const u32x w0s13 = rotl32 (w0, 13u); - const u32x w0s14 = rotl32 (w0, 14u); - const u32x w0s15 = rotl32 (w0, 15u); - const u32x w0s16 = rotl32 (w0, 16u); - const u32x w0s17 = rotl32 (w0, 17u); - const u32x w0s18 = rotl32 (w0, 18u); - const u32x w0s19 = rotl32 (w0, 19u); - const u32x w0s20 = rotl32 (w0, 20u); - - const u32x w0s04___w0s06 = w0s04 ^ w0s06; - const u32x w0s04___w0s08 = w0s04 ^ w0s08; - const u32x w0s08___w0s12 = w0s08 ^ w0s12; - const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0); - SHA1_STEPX(SHA1_F0o, e, a, b, c, d, w1); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, w2); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, w3); - SHA1_STEPX(SHA1_F0o, b, c, d, e, a, w4); - SHA1_STEPX(SHA1_F0o, a, b, c, d, e, w5); - SHA1_STEPX(SHA1_F0o, e, a, b, c, d, w6); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, w7); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, w8); - SHA1_STEPX(SHA1_F0o, b, c, d, e, a, w9); - SHA1_STEPX(SHA1_F0o, a, b, c, d, e, wa); - SHA1_STEPX(SHA1_F0o, e, a, b, c, d, wb); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, wc); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, wd); - SHA1_STEPX(SHA1_F0o, b, c, d, e, a, we); - SHA1_STEPX(SHA1_F0o, a, b, c, d, e, wf); - - SHA1_STEP (SHA1_F0o, e, a, b, c, d, (c_16s ^ w0s01)); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, (c_17sK)); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, (c_18sK)); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, (c_19s ^ w0s02)); - - #undef K - #define K SHA1C01 - - SHA1_STEPX(SHA1_F1 , a, b, c, d, e, (c_20sK)); - SHA1_STEPX(SHA1_F1 , e, a, b, c, d, (c_21sK)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_22s ^ w0s03)); - SHA1_STEPX(SHA1_F1 , c, d, e, a, b, (c_23sK)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_24s ^ w0s02)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_25s ^ w0s04)); - SHA1_STEPX(SHA1_F1 , e, a, b, c, d, (c_26sK)); - SHA1_STEPX(SHA1_F1 , d, e, a, b, c, (c_27sK)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_28s ^ w0s05)); - SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_29sK)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_30s ^ w0s02 ^ w0s04)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_31s ^ w0s06)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_32s ^ w0s02 ^ w0s03)); - SHA1_STEPX(SHA1_F1 , c, d, e, a, b, (c_33sK)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_34s ^ w0s07)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_35s ^ w0s04)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_36s ^ w0s04___w0s06)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_37s ^ w0s08)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_38s ^ w0s04)); - SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_39sK)); - - #undef K - #define K SHA1C02 - - SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_40s ^ w0s04 ^ w0s09)); - SHA1_STEPX(SHA1_F2o, e, a, b, c, d, (c_41sK)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_42s ^ w0s06 ^ w0s08)); - SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_43s ^ w0s10)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_44s ^ w0s03 ^ w0s06 ^ w0s07)); - SHA1_STEPX(SHA1_F2o, a, b, c, d, e, (c_45sK)); - SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_46s ^ w0s04 ^ w0s11)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_47s ^ w0s04___w0s08)); - SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_48s ^ w0s03 ^ w0s04___w0s08 ^ w0s05 ^ w0s10)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_49s ^ w0s12)); - SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_50s ^ w0s08)); - SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_51s ^ w0s04___w0s06)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_52s ^ w0s04___w0s08 ^ w0s13)); - SHA1_STEPX(SHA1_F2o, c, d, e, a, b, (c_53sK)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_54s ^ w0s07 ^ w0s10 ^ w0s12)); - SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_55s ^ w0s14)); - SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_56s ^ w0s04___w0s06___w0s07 ^ w0s10 ^ w0s11)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_57s ^ w0s08)); - SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_58s ^ w0s04___w0s08 ^ w0s15)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_59s ^ w0s08___w0s12)); - - #undef K - #define K SHA1C03 - - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_60s ^ w0s04 ^ w0s08___w0s12 ^ w0s07 ^ w0s14)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_61s ^ w0s16)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_62s ^ w0s04___w0s06 ^ w0s08___w0s12)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_63s ^ w0s08)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_64s ^ w0s04___w0s06___w0s07 ^ w0s08___w0s12 ^ w0s17)); - SHA1_STEPX(SHA1_F1 , a, b, c, d, e, (c_65sK)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_66s ^ w0s14 ^ w0s16)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_67s ^ w0s08 ^ w0s18)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_68s ^ w0s11 ^ w0s14 ^ w0s15)); - SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_69sK)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_70s ^ w0s12 ^ w0s19)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_71s ^ w0s12 ^ w0s16)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_72s ^ w0s05 ^ w0s11 ^ w0s12 ^ w0s13 ^ w0s16 ^ w0s18)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_73s ^ w0s20)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_74s ^ w0s08 ^ w0s16)); - - SHA1_STEP_PE (SHA1_F1, a, b, c, d, e, (c_75s ^ w0s06 ^ w0s12 ^ w0s14)); - - bool q_cond = (e_rev != e); - - if (q_cond) continue; - - SHA1_STEP_PB (SHA1_F1, a, b, c, d, e, 0); - - const u32 c_76s = rotl32 ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u); - const u32 c_77s = rotl32 ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u); - const u32 c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u); - const u32 c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u); - - const u32x w0s21 = rotl32 (w0, 21u); - const u32x w0s22 = rotl32 (w0, 22U); - - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_76s ^ w0s07 ^ w0s08___w0s12 ^ w0s16 ^ w0s21)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_77s)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_78s ^ w0s07 ^ w0s08 ^ w0s15 ^ w0s18 ^ w0s20)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_79s ^ w0s08 ^ w0s22)); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00110_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00110m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00110_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00110m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00110_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00110m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00110_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00110s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00110_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00110s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00110_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00110s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m00120_a0.cu b/nv/m00120_a0.cu deleted file mode 100644 index 286b033..0000000 --- a/nv/m00120_a0.cu +++ /dev/null @@ -1,598 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00120_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * prepend salt - */ - - const u32 out_salt_len = out_len + salt_len; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, out_salt_len); - - w3_t[3] = out_salt_len * 8; - - /** - * sha1 - */ - - w0_t[0] = swap_workaround (w0_t[0]); - w0_t[1] = swap_workaround (w0_t[1]); - w0_t[2] = swap_workaround (w0_t[2]); - w0_t[3] = swap_workaround (w0_t[3]); - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - //w3_t[2] = swap_workaround (w3_t[2]); - //w3_t[3] = swap_workaround (w3_t[3]); - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t[0]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[1]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[2]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[3]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w1_t[0]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w1_t[1]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t[2]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t[3]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t[0]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w2_t[1]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w2_t[2]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w2_t[3]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w3_t[0]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t[1]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t[2]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w0_t[3]); - - #undef K - #define K SHA1C01 - - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[3]); - - #undef K - #define K SHA1C02 - - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w2_t[3]); - - #undef K - #define K SHA1C03 - - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00120_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00120_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00120_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * prepend salt - */ - - const u32 out_salt_len = out_len + salt_len; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, out_salt_len); - - w3_t[3] = out_salt_len * 8; - - /** - * sha1 - */ - - w0_t[0] = swap_workaround (w0_t[0]); - w0_t[1] = swap_workaround (w0_t[1]); - w0_t[2] = swap_workaround (w0_t[2]); - w0_t[3] = swap_workaround (w0_t[3]); - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - //w3_t[2] = swap_workaround (w3_t[2]); - //w3_t[3] = swap_workaround (w3_t[3]); - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t[0]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[1]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[2]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[3]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w1_t[0]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w1_t[1]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t[2]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t[3]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t[0]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w2_t[1]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w2_t[2]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w2_t[3]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w3_t[0]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t[1]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t[2]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w0_t[3]); - - #undef K - #define K SHA1C01 - - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[3]); - - #undef K - #define K SHA1C02 - - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w2_t[3]); - - #undef K - #define K SHA1C03 - - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[0]); - - if (e != e_rev) continue; - - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00120_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00120_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00120_a1.cu b/nv/m00120_a1.cu deleted file mode 100644 index 10e99ed..0000000 --- a/nv/m00120_a1.cu +++ /dev/null @@ -1,692 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00120_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - u32 wordr1[4]; - u32 wordr2[4]; - u32 wordr3[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * prepend salt - */ - - const u32 pw_salt_len = pw_len + salt_len; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len); - - w3_t[3] = pw_salt_len * 8; - - /** - * sha1 - */ - - w0_t[0] = swap_workaround (w0_t[0]); - w0_t[1] = swap_workaround (w0_t[1]); - w0_t[2] = swap_workaround (w0_t[2]); - w0_t[3] = swap_workaround (w0_t[3]); - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - //w3_t[2] = swap_workaround (w3_t[2]); - //w3_t[3] = swap_workaround (w3_t[3]); - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t[0]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[1]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[2]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[3]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w1_t[0]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w1_t[1]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t[2]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t[3]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t[0]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w2_t[1]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w2_t[2]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w2_t[3]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w3_t[0]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t[1]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t[2]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w0_t[3]); - - #undef K - #define K SHA1C01 - - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[3]); - - #undef K - #define K SHA1C02 - - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w2_t[3]); - - #undef K - #define K SHA1C03 - - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00120_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00120_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00120_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - u32 wordr1[4]; - u32 wordr2[4]; - u32 wordr3[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * prepend salt - */ - - const u32 pw_salt_len = pw_len + salt_len; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len); - - w3_t[3] = pw_salt_len * 8; - - /** - * sha1 - */ - - w0_t[0] = swap_workaround (w0_t[0]); - w0_t[1] = swap_workaround (w0_t[1]); - w0_t[2] = swap_workaround (w0_t[2]); - w0_t[3] = swap_workaround (w0_t[3]); - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - //w3_t[2] = swap_workaround (w3_t[2]); - //w3_t[3] = swap_workaround (w3_t[3]); - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t[0]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[1]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[2]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[3]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w1_t[0]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w1_t[1]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t[2]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t[3]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t[0]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w2_t[1]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w2_t[2]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w2_t[3]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w3_t[0]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t[1]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t[2]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w0_t[3]); - - #undef K - #define K SHA1C01 - - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[3]); - - #undef K - #define K SHA1C02 - - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w2_t[3]); - - #undef K - #define K SHA1C03 - - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[0]); - - if (e != e_rev) continue; - - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00120_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00120_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00120_a3.cu b/nv/m00120_a3.cu deleted file mode 100644 index dfe9b0d..0000000 --- a/nv/m00120_a3.cu +++ /dev/null @@ -1,996 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void overwrite_at (u32x sw[16], const u32x w0, const u32 salt_len) -{ - switch (salt_len) - { - case 0: sw[0] = w0; - break; - case 1: sw[0] = (sw[0] & 0xff000000) | (w0 >> 8); - sw[1] = (sw[1] & 0x00ffffff) | (w0 << 24); - break; - case 2: sw[0] = (sw[0] & 0xffff0000) | (w0 >> 16); - sw[1] = (sw[1] & 0x0000ffff) | (w0 << 16); - break; - case 3: sw[0] = (sw[0] & 0xffffff00) | (w0 >> 24); - sw[1] = (sw[1] & 0x000000ff) | (w0 << 8); - break; - case 4: sw[1] = w0; - break; - case 5: sw[1] = (sw[1] & 0xff000000) | (w0 >> 8); - sw[2] = (sw[2] & 0x00ffffff) | (w0 << 24); - break; - case 6: sw[1] = (sw[1] & 0xffff0000) | (w0 >> 16); - sw[2] = (sw[2] & 0x0000ffff) | (w0 << 16); - break; - case 7: sw[1] = (sw[1] & 0xffffff00) | (w0 >> 24); - sw[2] = (sw[2] & 0x000000ff) | (w0 << 8); - break; - case 8: sw[2] = w0; - break; - case 9: sw[2] = (sw[2] & 0xff000000) | (w0 >> 8); - sw[3] = (sw[3] & 0x00ffffff) | (w0 << 24); - break; - case 10: sw[2] = (sw[2] & 0xffff0000) | (w0 >> 16); - sw[3] = (sw[3] & 0x0000ffff) | (w0 << 16); - break; - case 11: sw[2] = (sw[2] & 0xffffff00) | (w0 >> 24); - sw[3] = (sw[3] & 0x000000ff) | (w0 << 8); - break; - case 12: sw[3] = w0; - break; - case 13: sw[3] = (sw[3] & 0xff000000) | (w0 >> 8); - sw[4] = (sw[4] & 0x00ffffff) | (w0 << 24); - break; - case 14: sw[3] = (sw[3] & 0xffff0000) | (w0 >> 16); - sw[4] = (sw[4] & 0x0000ffff) | (w0 << 16); - break; - case 15: sw[3] = (sw[3] & 0xffffff00) | (w0 >> 24); - sw[4] = (sw[4] & 0x000000ff) | (w0 << 8); - break; - case 16: sw[4] = w0; - break; - case 17: sw[4] = (sw[4] & 0xff000000) | (w0 >> 8); - sw[5] = (sw[5] & 0x00ffffff) | (w0 << 24); - break; - case 18: sw[4] = (sw[4] & 0xffff0000) | (w0 >> 16); - sw[5] = (sw[5] & 0x0000ffff) | (w0 << 16); - break; - case 19: sw[4] = (sw[4] & 0xffffff00) | (w0 >> 24); - sw[5] = (sw[5] & 0x000000ff) | (w0 << 8); - break; - case 20: sw[5] = w0; - break; - case 21: sw[5] = (sw[5] & 0xff000000) | (w0 >> 8); - sw[6] = (sw[6] & 0x00ffffff) | (w0 << 24); - break; - case 22: sw[5] = (sw[5] & 0xffff0000) | (w0 >> 16); - sw[6] = (sw[6] & 0x0000ffff) | (w0 << 16); - break; - case 23: sw[5] = (sw[5] & 0xffffff00) | (w0 >> 24); - sw[6] = (sw[6] & 0x000000ff) | (w0 << 8); - break; - case 24: sw[6] = w0; - break; - case 25: sw[6] = (sw[6] & 0xff000000) | (w0 >> 8); - sw[7] = (sw[7] & 0x00ffffff) | (w0 << 24); - break; - case 26: sw[6] = (sw[6] & 0xffff0000) | (w0 >> 16); - sw[7] = (sw[7] & 0x0000ffff) | (w0 << 16); - break; - case 27: sw[6] = (sw[6] & 0xffffff00) | (w0 >> 24); - sw[7] = (sw[7] & 0x000000ff) | (w0 << 8); - break; - case 28: sw[7] = w0; - break; - case 29: sw[7] = (sw[7] & 0xff000000) | (w0 >> 8); - sw[8] = (sw[8] & 0x00ffffff) | (w0 << 24); - break; - case 30: sw[7] = (sw[7] & 0xffff0000) | (w0 >> 16); - sw[8] = (sw[8] & 0x0000ffff) | (w0 << 16); - break; - case 31: sw[7] = (sw[7] & 0xffffff00) | (w0 >> 24); - sw[8] = (sw[8] & 0x000000ff) | (w0 << 8); - break; - } -} - -__device__ static void m00120m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - /** - * prepend salt - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = swap_workaround (w3[2]); - w3_t[3] = swap_workaround (w3[3]); - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - w2_t[0] |= salt_buf2[0]; - w2_t[1] |= salt_buf2[1]; - w2_t[2] |= salt_buf2[2]; - w2_t[3] |= salt_buf2[3]; - w3_t[0] |= salt_buf3[0]; - w3_t[1] |= salt_buf3[1]; - w3_t[2] |= salt_buf3[2]; - w3_t[3] |= salt_buf3[3]; - - w0_t[0] = swap_workaround (w0_t[0]); - w0_t[1] = swap_workaround (w0_t[1]); - w0_t[2] = swap_workaround (w0_t[2]); - w0_t[3] = swap_workaround (w0_t[3]); - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - w3_t[2] = swap_workaround (w3_t[2]); - w3_t[3] = swap_workaround (w3_t[3]); - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - const u32x w0n = w0l | w0r; - - u32x wx[16]; - - wx[ 0] = w0_t[0]; - wx[ 1] = w0_t[1]; - wx[ 2] = w0_t[2]; - wx[ 3] = w0_t[3]; - wx[ 4] = w1_t[0]; - wx[ 5] = w1_t[1]; - wx[ 6] = w1_t[2]; - wx[ 7] = w1_t[3]; - wx[ 8] = w2_t[0]; - wx[ 9] = w2_t[1]; - wx[10] = w2_t[2]; - wx[11] = w2_t[3]; - wx[12] = w3_t[0]; - wx[13] = w3_t[1]; - wx[14] = w3_t[2]; - wx[15] = w3_t[3]; - - overwrite_at (wx, w0n, salt_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = wx[ 0]; - w0_t[1] = wx[ 1]; - w0_t[2] = wx[ 2]; - w0_t[3] = wx[ 3]; - w1_t[0] = wx[ 4]; - w1_t[1] = wx[ 5]; - w1_t[2] = wx[ 6]; - w1_t[3] = wx[ 7]; - w2_t[0] = wx[ 8]; - w2_t[1] = wx[ 9]; - w2_t[2] = wx[10]; - w2_t[3] = wx[11]; - w3_t[0] = wx[12]; - w3_t[1] = wx[13]; - w3_t[2] = 0; - w3_t[3] = pw_salt_len * 8; - - /** - * sha1 - */ - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t[0]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[1]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[2]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[3]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w1_t[0]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w1_t[1]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t[2]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t[3]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t[0]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w2_t[1]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w2_t[2]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w2_t[3]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w3_t[0]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t[1]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t[2]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w0_t[3]); - - #undef K - #define K SHA1C01 - - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[3]); - - #undef K - #define K SHA1C02 - - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w2_t[3]); - - #undef K - #define K SHA1C03 - - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m00120s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u); - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - /** - * prepend salt - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = swap_workaround (w3[2]); - w3_t[3] = swap_workaround (w3[3]); - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - w2_t[0] |= salt_buf2[0]; - w2_t[1] |= salt_buf2[1]; - w2_t[2] |= salt_buf2[2]; - w2_t[3] |= salt_buf2[3]; - w3_t[0] |= salt_buf3[0]; - w3_t[1] |= salt_buf3[1]; - w3_t[2] |= salt_buf3[2]; - w3_t[3] |= salt_buf3[3]; - - w0_t[0] = swap_workaround (w0_t[0]); - w0_t[1] = swap_workaround (w0_t[1]); - w0_t[2] = swap_workaround (w0_t[2]); - w0_t[3] = swap_workaround (w0_t[3]); - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - w3_t[2] = swap_workaround (w3_t[2]); - w3_t[3] = swap_workaround (w3_t[3]); - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - const u32x w0n = w0l | w0r; - - u32x wx[16]; - - wx[ 0] = w0_t[0]; - wx[ 1] = w0_t[1]; - wx[ 2] = w0_t[2]; - wx[ 3] = w0_t[3]; - wx[ 4] = w1_t[0]; - wx[ 5] = w1_t[1]; - wx[ 6] = w1_t[2]; - wx[ 7] = w1_t[3]; - wx[ 8] = w2_t[0]; - wx[ 9] = w2_t[1]; - wx[10] = w2_t[2]; - wx[11] = w2_t[3]; - wx[12] = w3_t[0]; - wx[13] = w3_t[1]; - wx[14] = w3_t[2]; - wx[15] = w3_t[3]; - - overwrite_at (wx, w0n, salt_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = wx[ 0]; - w0_t[1] = wx[ 1]; - w0_t[2] = wx[ 2]; - w0_t[3] = wx[ 3]; - w1_t[0] = wx[ 4]; - w1_t[1] = wx[ 5]; - w1_t[2] = wx[ 6]; - w1_t[3] = wx[ 7]; - w2_t[0] = wx[ 8]; - w2_t[1] = wx[ 9]; - w2_t[2] = wx[10]; - w2_t[3] = wx[11]; - w3_t[0] = wx[12]; - w3_t[1] = wx[13]; - w3_t[2] = 0; - w3_t[3] = pw_salt_len * 8; - - /** - * sha1 - */ - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t[0]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[1]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[2]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[3]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w1_t[0]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w1_t[1]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t[2]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t[3]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t[0]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w2_t[1]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w2_t[2]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w2_t[3]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w3_t[0]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t[1]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t[2]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w0_t[3]); - - #undef K - #define K SHA1C01 - - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[3]); - - #undef K - #define K SHA1C02 - - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w2_t[3]); - - #undef K - #define K SHA1C03 - - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[0]); - - if (e != e_rev) continue; - - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00120_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00120m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00120_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00120m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00120_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00120m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00120_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00120s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00120_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00120s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00120_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00120s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m00130_a0.cu b/nv/m00130_a0.cu deleted file mode 100644 index a658bce..0000000 --- a/nv/m00130_a0.cu +++ /dev/null @@ -1,654 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00130_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, (out_len * 2)); - - const u32 out_salt_len = (out_len * 2) + salt_len; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w0_t[0] |= s0[0]; - w0_t[1] |= s0[1]; - w0_t[2] |= s0[2]; - w0_t[3] |= s0[3]; - w1_t[0] |= s1[0]; - w1_t[1] |= s1[1]; - w1_t[2] |= s1[2]; - w1_t[3] |= s1[3]; - w2_t[0] |= s2[0]; - w2_t[1] |= s2[1]; - w2_t[2] |= s2[2]; - w2_t[3] |= s2[3]; - w3_t[0] |= s3[0]; - w3_t[1] |= s3[1]; - w3_t[2] |= s3[2]; - w3_t[3] |= s3[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, out_salt_len); - - w3_t[3] = out_salt_len * 8; - - /** - * sha1 - */ - - w0_t[0] = swap_workaround (w0_t[0]); - w0_t[1] = swap_workaround (w0_t[1]); - w0_t[2] = swap_workaround (w0_t[2]); - w0_t[3] = swap_workaround (w0_t[3]); - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - //w3_t[2] = swap_workaround (w3_t[2]); - //w3_t[3] = swap_workaround (w3_t[3]); - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t[0]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[1]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[2]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[3]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w1_t[0]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w1_t[1]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t[2]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t[3]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t[0]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w2_t[1]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w2_t[2]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w2_t[3]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w3_t[0]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t[1]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t[2]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w0_t[3]); - - #undef K - #define K SHA1C01 - - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[3]); - - #undef K - #define K SHA1C02 - - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w2_t[3]); - - #undef K - #define K SHA1C03 - - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00130_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00130_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00130_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, (out_len * 2)); - - const u32 out_salt_len = (out_len * 2) + salt_len; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w0_t[0] |= s0[0]; - w0_t[1] |= s0[1]; - w0_t[2] |= s0[2]; - w0_t[3] |= s0[3]; - w1_t[0] |= s1[0]; - w1_t[1] |= s1[1]; - w1_t[2] |= s1[2]; - w1_t[3] |= s1[3]; - w2_t[0] |= s2[0]; - w2_t[1] |= s2[1]; - w2_t[2] |= s2[2]; - w2_t[3] |= s2[3]; - w3_t[0] |= s3[0]; - w3_t[1] |= s3[1]; - w3_t[2] |= s3[2]; - w3_t[3] |= s3[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, out_salt_len); - - w3_t[3] = out_salt_len * 8; - - /** - * sha1 - */ - - w0_t[0] = swap_workaround (w0_t[0]); - w0_t[1] = swap_workaround (w0_t[1]); - w0_t[2] = swap_workaround (w0_t[2]); - w0_t[3] = swap_workaround (w0_t[3]); - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - //w3_t[2] = swap_workaround (w3_t[2]); - //w3_t[3] = swap_workaround (w3_t[3]); - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t[0]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[1]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[2]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[3]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w1_t[0]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w1_t[1]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t[2]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t[3]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t[0]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w2_t[1]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w2_t[2]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w2_t[3]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w3_t[0]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t[1]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t[2]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w0_t[3]); - - #undef K - #define K SHA1C01 - - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[3]); - - #undef K - #define K SHA1C02 - - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w2_t[3]); - - #undef K - #define K SHA1C03 - - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[3]); - - if (e != e_rev) continue; - - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00130_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00130_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00130_a1.cu b/nv/m00130_a1.cu deleted file mode 100644 index 22ceaa1..0000000 --- a/nv/m00130_a1.cu +++ /dev/null @@ -1,748 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00130_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, (pw_len * 2)); - - const u32 pw_salt_len = (pw_len * 2) + salt_len; - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w0_t[0] |= s0[0]; - w0_t[1] |= s0[1]; - w0_t[2] |= s0[2]; - w0_t[3] |= s0[3]; - w1_t[0] |= s1[0]; - w1_t[1] |= s1[1]; - w1_t[2] |= s1[2]; - w1_t[3] |= s1[3]; - w2_t[0] |= s2[0]; - w2_t[1] |= s2[1]; - w2_t[2] |= s2[2]; - w2_t[3] |= s2[3]; - w3_t[0] |= s3[0]; - w3_t[1] |= s3[1]; - w3_t[2] |= s3[2]; - w3_t[3] |= s3[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len); - - w3_t[3] = pw_salt_len * 8; - - /** - * sha1 - */ - - w0_t[0] = swap_workaround (w0_t[0]); - w0_t[1] = swap_workaround (w0_t[1]); - w0_t[2] = swap_workaround (w0_t[2]); - w0_t[3] = swap_workaround (w0_t[3]); - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - //w3_t[2] = swap_workaround (w3_t[2]); - //w3_t[3] = swap_workaround (w3_t[3]); - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t[0]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[1]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[2]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[3]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w1_t[0]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w1_t[1]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t[2]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t[3]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t[0]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w2_t[1]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w2_t[2]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w2_t[3]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w3_t[0]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t[1]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t[2]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w0_t[3]); - - #undef K - #define K SHA1C01 - - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[3]); - - #undef K - #define K SHA1C02 - - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w2_t[3]); - - #undef K - #define K SHA1C03 - - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00130_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00130_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00130_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, (pw_len * 2)); - - const u32 pw_salt_len = (pw_len * 2) + salt_len; - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w0_t[0] |= s0[0]; - w0_t[1] |= s0[1]; - w0_t[2] |= s0[2]; - w0_t[3] |= s0[3]; - w1_t[0] |= s1[0]; - w1_t[1] |= s1[1]; - w1_t[2] |= s1[2]; - w1_t[3] |= s1[3]; - w2_t[0] |= s2[0]; - w2_t[1] |= s2[1]; - w2_t[2] |= s2[2]; - w2_t[3] |= s2[3]; - w3_t[0] |= s3[0]; - w3_t[1] |= s3[1]; - w3_t[2] |= s3[2]; - w3_t[3] |= s3[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len); - - w3_t[3] = pw_salt_len * 8; - - /** - * sha1 - */ - - w0_t[0] = swap_workaround (w0_t[0]); - w0_t[1] = swap_workaround (w0_t[1]); - w0_t[2] = swap_workaround (w0_t[2]); - w0_t[3] = swap_workaround (w0_t[3]); - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - //w3_t[2] = swap_workaround (w3_t[2]); - //w3_t[3] = swap_workaround (w3_t[3]); - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t[0]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[1]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[2]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[3]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w1_t[0]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w1_t[1]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t[2]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t[3]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t[0]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w2_t[1]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w2_t[2]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w2_t[3]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w3_t[0]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t[1]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t[2]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w0_t[3]); - - #undef K - #define K SHA1C01 - - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[3]); - - #undef K - #define K SHA1C02 - - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w2_t[3]); - - #undef K - #define K SHA1C03 - - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[3]); - - if (e != e_rev) continue; - - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00130_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00130_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00130_a3.cu b/nv/m00130_a3.cu deleted file mode 100644 index 665e589..0000000 --- a/nv/m00130_a3.cu +++ /dev/null @@ -1,887 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ -#define _SCALAR_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -__device__ __constant__ u32x c_bfs[1024]; - -__device__ static void m00130m (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - switch_buffer_by_offset (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); - - w[ 0] |= swap_workaround (salt_buf0[0]); - w[ 1] |= swap_workaround (salt_buf0[1]); - w[ 2] |= swap_workaround (salt_buf0[2]); - w[ 3] |= swap_workaround (salt_buf0[3]); - w[ 4] |= swap_workaround (salt_buf1[0]); - w[ 5] |= swap_workaround (salt_buf1[1]); - w[ 6] |= swap_workaround (salt_buf1[2]); - w[ 7] |= swap_workaround (salt_buf1[3]); - w[ 8] |= swap_workaround (salt_buf2[0]); - w[ 9] |= swap_workaround (salt_buf2[1]); - w[10] |= swap_workaround (salt_buf2[2]); - w[11] |= swap_workaround (salt_buf2[3]); - w[12] |= swap_workaround (salt_buf3[0]); - w[13] |= swap_workaround (salt_buf3[1]); - w[14] |= swap_workaround (salt_buf3[2]); - w[15] |= swap_workaround (salt_buf3[3]); - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - w[15] = pw_salt_len * 8; - - /** - * base - */ - - const u32 c_16s = rotl32 ((w[13] ^ w[ 8] ^ w[ 2] ), 1u); - const u32 c_17s = rotl32 ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u); - const u32 c_18s = rotl32 ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u); - const u32 c_19s = rotl32 ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u); - const u32 c_20s = rotl32 ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u); - const u32 c_21s = rotl32 ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u); - const u32 c_22s = rotl32 ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u); - const u32 c_23s = rotl32 ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u); - const u32 c_24s = rotl32 ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u); - const u32 c_25s = rotl32 ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u); - const u32 c_26s = rotl32 ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u); - const u32 c_27s = rotl32 ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u); - const u32 c_28s = rotl32 ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u); - const u32 c_29s = rotl32 ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u); - const u32 c_30s = rotl32 ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u); - const u32 c_31s = rotl32 ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u); - const u32 c_32s = rotl32 ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u); - const u32 c_33s = rotl32 ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u); - const u32 c_34s = rotl32 ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u); - const u32 c_35s = rotl32 ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u); - const u32 c_36s = rotl32 ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u); - const u32 c_37s = rotl32 ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u); - const u32 c_38s = rotl32 ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u); - const u32 c_39s = rotl32 ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u); - const u32 c_40s = rotl32 ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u); - const u32 c_41s = rotl32 ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u); - const u32 c_42s = rotl32 ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u); - const u32 c_43s = rotl32 ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u); - const u32 c_44s = rotl32 ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u); - const u32 c_45s = rotl32 ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u); - const u32 c_46s = rotl32 ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u); - const u32 c_47s = rotl32 ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u); - const u32 c_48s = rotl32 ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u); - const u32 c_49s = rotl32 ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u); - const u32 c_50s = rotl32 ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u); - const u32 c_51s = rotl32 ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u); - const u32 c_52s = rotl32 ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u); - const u32 c_53s = rotl32 ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u); - const u32 c_54s = rotl32 ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u); - const u32 c_55s = rotl32 ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u); - const u32 c_56s = rotl32 ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u); - const u32 c_57s = rotl32 ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u); - const u32 c_58s = rotl32 ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u); - const u32 c_59s = rotl32 ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u); - const u32 c_60s = rotl32 ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u); - const u32 c_61s = rotl32 ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u); - const u32 c_62s = rotl32 ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u); - const u32 c_63s = rotl32 ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u); - const u32 c_64s = rotl32 ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u); - const u32 c_65s = rotl32 ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u); - const u32 c_66s = rotl32 ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u); - const u32 c_67s = rotl32 ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u); - const u32 c_68s = rotl32 ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u); - const u32 c_69s = rotl32 ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u); - const u32 c_70s = rotl32 ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u); - const u32 c_71s = rotl32 ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u); - const u32 c_72s = rotl32 ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u); - const u32 c_73s = rotl32 ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u); - const u32 c_74s = rotl32 ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u); - const u32 c_75s = rotl32 ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u); - const u32 c_76s = rotl32 ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u); - const u32 c_77s = rotl32 ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u); - const u32 c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u); - const u32 c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u); - - const u32 c_17sK = c_17s + SHA1C00; - const u32 c_18sK = c_18s + SHA1C00; - const u32 c_20sK = c_20s + SHA1C01; - const u32 c_21sK = c_21s + SHA1C01; - const u32 c_23sK = c_23s + SHA1C01; - const u32 c_26sK = c_26s + SHA1C01; - const u32 c_27sK = c_27s + SHA1C01; - const u32 c_29sK = c_29s + SHA1C01; - const u32 c_33sK = c_33s + SHA1C01; - const u32 c_39sK = c_39s + SHA1C01; - const u32 c_41sK = c_41s + SHA1C02; - const u32 c_45sK = c_45s + SHA1C02; - const u32 c_53sK = c_53s + SHA1C02; - const u32 c_65sK = c_65s + SHA1C03; - const u32 c_69sK = c_69s + SHA1C03; - - const u32 w1 = w[ 1] + SHA1C00; - const u32 w2 = w[ 2] + SHA1C00; - const u32 w3 = w[ 3] + SHA1C00; - const u32 w4 = w[ 4] + SHA1C00; - const u32 w5 = w[ 5] + SHA1C00; - const u32 w6 = w[ 6] + SHA1C00; - const u32 w7 = w[ 7] + SHA1C00; - const u32 w8 = w[ 8] + SHA1C00; - const u32 w9 = w[ 9] + SHA1C00; - const u32 wa = w[10] + SHA1C00; - const u32 wb = w[11] + SHA1C00; - const u32 wc = w[12] + SHA1C00; - const u32 wd = w[13] + SHA1C00; - const u32 we = w[14] + SHA1C00; - const u32 wf = w[15] + SHA1C00; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - const u32x w0s01 = rotl32 (w0, 1u); - const u32x w0s02 = rotl32 (w0, 2u); - const u32x w0s03 = rotl32 (w0, 3u); - const u32x w0s04 = rotl32 (w0, 4u); - const u32x w0s05 = rotl32 (w0, 5u); - const u32x w0s06 = rotl32 (w0, 6u); - const u32x w0s07 = rotl32 (w0, 7u); - const u32x w0s08 = rotl32 (w0, 8u); - const u32x w0s09 = rotl32 (w0, 9u); - const u32x w0s10 = rotl32 (w0, 10u); - const u32x w0s11 = rotl32 (w0, 11u); - const u32x w0s12 = rotl32 (w0, 12u); - const u32x w0s13 = rotl32 (w0, 13u); - const u32x w0s14 = rotl32 (w0, 14u); - const u32x w0s15 = rotl32 (w0, 15u); - const u32x w0s16 = rotl32 (w0, 16u); - const u32x w0s17 = rotl32 (w0, 17u); - const u32x w0s18 = rotl32 (w0, 18u); - const u32x w0s19 = rotl32 (w0, 19u); - const u32x w0s20 = rotl32 (w0, 20u); - const u32x w0s21 = rotl32 (w0, 21u); - const u32x w0s22 = rotl32 (w0, 22U); - - const u32x w0s04___w0s06 = w0s04 ^ w0s06; - const u32x w0s04___w0s08 = w0s04 ^ w0s08; - const u32x w0s08___w0s12 = w0s08 ^ w0s12; - const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0); - SHA1_STEPX(SHA1_F0o, e, a, b, c, d, w1); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, w2); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, w3); - SHA1_STEPX(SHA1_F0o, b, c, d, e, a, w4); - SHA1_STEPX(SHA1_F0o, a, b, c, d, e, w5); - SHA1_STEPX(SHA1_F0o, e, a, b, c, d, w6); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, w7); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, w8); - SHA1_STEPX(SHA1_F0o, b, c, d, e, a, w9); - SHA1_STEPX(SHA1_F0o, a, b, c, d, e, wa); - SHA1_STEPX(SHA1_F0o, e, a, b, c, d, wb); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, wc); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, wd); - SHA1_STEPX(SHA1_F0o, b, c, d, e, a, we); - SHA1_STEPX(SHA1_F0o, a, b, c, d, e, wf); - - SHA1_STEP (SHA1_F0o, e, a, b, c, d, (c_16s ^ w0s01)); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, (c_17sK)); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, (c_18sK)); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, (c_19s ^ w0s02)); - - #undef K - #define K SHA1C01 - - SHA1_STEPX(SHA1_F1 , a, b, c, d, e, (c_20sK)); - SHA1_STEPX(SHA1_F1 , e, a, b, c, d, (c_21sK)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_22s ^ w0s03)); - SHA1_STEPX(SHA1_F1 , c, d, e, a, b, (c_23sK)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_24s ^ w0s02)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_25s ^ w0s04)); - SHA1_STEPX(SHA1_F1 , e, a, b, c, d, (c_26sK)); - SHA1_STEPX(SHA1_F1 , d, e, a, b, c, (c_27sK)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_28s ^ w0s05)); - SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_29sK)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_30s ^ w0s02 ^ w0s04)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_31s ^ w0s06)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_32s ^ w0s02 ^ w0s03)); - SHA1_STEPX(SHA1_F1 , c, d, e, a, b, (c_33sK)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_34s ^ w0s07)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_35s ^ w0s04)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_36s ^ w0s04___w0s06)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_37s ^ w0s08)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_38s ^ w0s04)); - SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_39sK)); - - #undef K - #define K SHA1C02 - - SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_40s ^ w0s04 ^ w0s09)); - SHA1_STEPX(SHA1_F2o, e, a, b, c, d, (c_41sK)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_42s ^ w0s06 ^ w0s08)); - SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_43s ^ w0s10)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_44s ^ w0s03 ^ w0s06 ^ w0s07)); - SHA1_STEPX(SHA1_F2o, a, b, c, d, e, (c_45sK)); - SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_46s ^ w0s04 ^ w0s11)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_47s ^ w0s04___w0s08)); - SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_48s ^ w0s03 ^ w0s04___w0s08 ^ w0s05 ^ w0s10)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_49s ^ w0s12)); - SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_50s ^ w0s08)); - SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_51s ^ w0s04___w0s06)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_52s ^ w0s04___w0s08 ^ w0s13)); - SHA1_STEPX(SHA1_F2o, c, d, e, a, b, (c_53sK)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_54s ^ w0s07 ^ w0s10 ^ w0s12)); - SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_55s ^ w0s14)); - SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_56s ^ w0s04___w0s06___w0s07 ^ w0s10 ^ w0s11)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_57s ^ w0s08)); - SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_58s ^ w0s04___w0s08 ^ w0s15)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_59s ^ w0s08___w0s12)); - - #undef K - #define K SHA1C03 - - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_60s ^ w0s04 ^ w0s08___w0s12 ^ w0s07 ^ w0s14)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_61s ^ w0s16)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_62s ^ w0s04___w0s06 ^ w0s08___w0s12)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_63s ^ w0s08)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_64s ^ w0s04___w0s06___w0s07 ^ w0s08___w0s12 ^ w0s17)); - SHA1_STEPX(SHA1_F1 , a, b, c, d, e, (c_65sK)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_66s ^ w0s14 ^ w0s16)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_67s ^ w0s08 ^ w0s18)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_68s ^ w0s11 ^ w0s14 ^ w0s15)); - SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_69sK)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_70s ^ w0s12 ^ w0s19)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_71s ^ w0s12 ^ w0s16)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_72s ^ w0s05 ^ w0s11 ^ w0s12 ^ w0s13 ^ w0s16 ^ w0s18)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_73s ^ w0s20)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_74s ^ w0s08 ^ w0s16)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_75s ^ w0s06 ^ w0s12 ^ w0s14)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_76s ^ w0s07 ^ w0s08___w0s12 ^ w0s16 ^ w0s21)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_77s)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_78s ^ w0s07 ^ w0s08 ^ w0s15 ^ w0s18 ^ w0s20)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_79s ^ w0s08 ^ w0s22)); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m00130s (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 c_16s = rotl32 ((w[13] ^ w[ 8] ^ w[ 2] ), 1u); - const u32 c_17s = rotl32 ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u); - const u32 c_18s = rotl32 ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u); - const u32 c_19s = rotl32 ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u); - const u32 c_20s = rotl32 ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u); - const u32 c_21s = rotl32 ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u); - const u32 c_22s = rotl32 ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u); - const u32 c_23s = rotl32 ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u); - const u32 c_24s = rotl32 ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u); - const u32 c_25s = rotl32 ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u); - const u32 c_26s = rotl32 ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u); - const u32 c_27s = rotl32 ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u); - const u32 c_28s = rotl32 ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u); - const u32 c_29s = rotl32 ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u); - const u32 c_30s = rotl32 ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u); - const u32 c_31s = rotl32 ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u); - const u32 c_32s = rotl32 ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u); - const u32 c_33s = rotl32 ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u); - const u32 c_34s = rotl32 ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u); - const u32 c_35s = rotl32 ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u); - const u32 c_36s = rotl32 ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u); - const u32 c_37s = rotl32 ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u); - const u32 c_38s = rotl32 ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u); - const u32 c_39s = rotl32 ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u); - const u32 c_40s = rotl32 ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u); - const u32 c_41s = rotl32 ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u); - const u32 c_42s = rotl32 ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u); - const u32 c_43s = rotl32 ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u); - const u32 c_44s = rotl32 ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u); - const u32 c_45s = rotl32 ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u); - const u32 c_46s = rotl32 ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u); - const u32 c_47s = rotl32 ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u); - const u32 c_48s = rotl32 ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u); - const u32 c_49s = rotl32 ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u); - const u32 c_50s = rotl32 ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u); - const u32 c_51s = rotl32 ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u); - const u32 c_52s = rotl32 ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u); - const u32 c_53s = rotl32 ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u); - const u32 c_54s = rotl32 ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u); - const u32 c_55s = rotl32 ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u); - const u32 c_56s = rotl32 ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u); - const u32 c_57s = rotl32 ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u); - const u32 c_58s = rotl32 ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u); - const u32 c_59s = rotl32 ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u); - const u32 c_60s = rotl32 ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u); - const u32 c_61s = rotl32 ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u); - const u32 c_62s = rotl32 ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u); - const u32 c_63s = rotl32 ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u); - const u32 c_64s = rotl32 ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u); - const u32 c_65s = rotl32 ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u); - const u32 c_66s = rotl32 ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u); - const u32 c_67s = rotl32 ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u); - const u32 c_68s = rotl32 ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u); - const u32 c_69s = rotl32 ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u); - const u32 c_70s = rotl32 ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u); - const u32 c_71s = rotl32 ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u); - const u32 c_72s = rotl32 ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u); - const u32 c_73s = rotl32 ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u); - const u32 c_74s = rotl32 ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u); - const u32 c_75s = rotl32 ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u); - - const u32 c_17sK = c_17s + SHA1C00; - const u32 c_18sK = c_18s + SHA1C00; - const u32 c_20sK = c_20s + SHA1C01; - const u32 c_21sK = c_21s + SHA1C01; - const u32 c_23sK = c_23s + SHA1C01; - const u32 c_26sK = c_26s + SHA1C01; - const u32 c_27sK = c_27s + SHA1C01; - const u32 c_29sK = c_29s + SHA1C01; - const u32 c_33sK = c_33s + SHA1C01; - const u32 c_39sK = c_39s + SHA1C01; - const u32 c_41sK = c_41s + SHA1C02; - const u32 c_45sK = c_45s + SHA1C02; - const u32 c_53sK = c_53s + SHA1C02; - const u32 c_65sK = c_65s + SHA1C03; - const u32 c_69sK = c_69s + SHA1C03; - - const u32 w1 = w[ 1] + SHA1C00; - const u32 w2 = w[ 2] + SHA1C00; - const u32 w3 = w[ 3] + SHA1C00; - const u32 w4 = w[ 4] + SHA1C00; - const u32 w5 = w[ 5] + SHA1C00; - const u32 w6 = w[ 6] + SHA1C00; - const u32 w7 = w[ 7] + SHA1C00; - const u32 w8 = w[ 8] + SHA1C00; - const u32 w9 = w[ 9] + SHA1C00; - const u32 wa = w[10] + SHA1C00; - const u32 wb = w[11] + SHA1C00; - const u32 wc = w[12] + SHA1C00; - const u32 wd = w[13] + SHA1C00; - const u32 we = w[14] + SHA1C00; - const u32 wf = w[15] + SHA1C00; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u) - SHA1C03; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - const u32x w0s01 = rotl32 (w0, 1u); - const u32x w0s02 = rotl32 (w0, 2u); - const u32x w0s03 = rotl32 (w0, 3u); - const u32x w0s04 = rotl32 (w0, 4u); - const u32x w0s05 = rotl32 (w0, 5u); - const u32x w0s06 = rotl32 (w0, 6u); - const u32x w0s07 = rotl32 (w0, 7u); - const u32x w0s08 = rotl32 (w0, 8u); - const u32x w0s09 = rotl32 (w0, 9u); - const u32x w0s10 = rotl32 (w0, 10u); - const u32x w0s11 = rotl32 (w0, 11u); - const u32x w0s12 = rotl32 (w0, 12u); - const u32x w0s13 = rotl32 (w0, 13u); - const u32x w0s14 = rotl32 (w0, 14u); - const u32x w0s15 = rotl32 (w0, 15u); - const u32x w0s16 = rotl32 (w0, 16u); - const u32x w0s17 = rotl32 (w0, 17u); - const u32x w0s18 = rotl32 (w0, 18u); - const u32x w0s19 = rotl32 (w0, 19u); - const u32x w0s20 = rotl32 (w0, 20u); - - const u32x w0s04___w0s06 = w0s04 ^ w0s06; - const u32x w0s04___w0s08 = w0s04 ^ w0s08; - const u32x w0s08___w0s12 = w0s08 ^ w0s12; - const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0); - SHA1_STEPX(SHA1_F0o, e, a, b, c, d, w1); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, w2); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, w3); - SHA1_STEPX(SHA1_F0o, b, c, d, e, a, w4); - SHA1_STEPX(SHA1_F0o, a, b, c, d, e, w5); - SHA1_STEPX(SHA1_F0o, e, a, b, c, d, w6); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, w7); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, w8); - SHA1_STEPX(SHA1_F0o, b, c, d, e, a, w9); - SHA1_STEPX(SHA1_F0o, a, b, c, d, e, wa); - SHA1_STEPX(SHA1_F0o, e, a, b, c, d, wb); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, wc); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, wd); - SHA1_STEPX(SHA1_F0o, b, c, d, e, a, we); - SHA1_STEPX(SHA1_F0o, a, b, c, d, e, wf); - - SHA1_STEP (SHA1_F0o, e, a, b, c, d, (c_16s ^ w0s01)); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, (c_17sK)); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, (c_18sK)); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, (c_19s ^ w0s02)); - - #undef K - #define K SHA1C01 - - SHA1_STEPX(SHA1_F1 , a, b, c, d, e, (c_20sK)); - SHA1_STEPX(SHA1_F1 , e, a, b, c, d, (c_21sK)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_22s ^ w0s03)); - SHA1_STEPX(SHA1_F1 , c, d, e, a, b, (c_23sK)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_24s ^ w0s02)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_25s ^ w0s04)); - SHA1_STEPX(SHA1_F1 , e, a, b, c, d, (c_26sK)); - SHA1_STEPX(SHA1_F1 , d, e, a, b, c, (c_27sK)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_28s ^ w0s05)); - SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_29sK)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_30s ^ w0s02 ^ w0s04)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_31s ^ w0s06)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_32s ^ w0s02 ^ w0s03)); - SHA1_STEPX(SHA1_F1 , c, d, e, a, b, (c_33sK)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_34s ^ w0s07)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_35s ^ w0s04)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_36s ^ w0s04___w0s06)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_37s ^ w0s08)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_38s ^ w0s04)); - SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_39sK)); - - #undef K - #define K SHA1C02 - - SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_40s ^ w0s04 ^ w0s09)); - SHA1_STEPX(SHA1_F2o, e, a, b, c, d, (c_41sK)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_42s ^ w0s06 ^ w0s08)); - SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_43s ^ w0s10)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_44s ^ w0s03 ^ w0s06 ^ w0s07)); - SHA1_STEPX(SHA1_F2o, a, b, c, d, e, (c_45sK)); - SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_46s ^ w0s04 ^ w0s11)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_47s ^ w0s04___w0s08)); - SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_48s ^ w0s03 ^ w0s04___w0s08 ^ w0s05 ^ w0s10)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_49s ^ w0s12)); - SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_50s ^ w0s08)); - SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_51s ^ w0s04___w0s06)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_52s ^ w0s04___w0s08 ^ w0s13)); - SHA1_STEPX(SHA1_F2o, c, d, e, a, b, (c_53sK)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_54s ^ w0s07 ^ w0s10 ^ w0s12)); - SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_55s ^ w0s14)); - SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_56s ^ w0s04___w0s06___w0s07 ^ w0s10 ^ w0s11)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_57s ^ w0s08)); - SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_58s ^ w0s04___w0s08 ^ w0s15)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_59s ^ w0s08___w0s12)); - - #undef K - #define K SHA1C03 - - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_60s ^ w0s04 ^ w0s08___w0s12 ^ w0s07 ^ w0s14)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_61s ^ w0s16)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_62s ^ w0s04___w0s06 ^ w0s08___w0s12)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_63s ^ w0s08)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_64s ^ w0s04___w0s06___w0s07 ^ w0s08___w0s12 ^ w0s17)); - SHA1_STEPX(SHA1_F1 , a, b, c, d, e, (c_65sK)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_66s ^ w0s14 ^ w0s16)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_67s ^ w0s08 ^ w0s18)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_68s ^ w0s11 ^ w0s14 ^ w0s15)); - SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_69sK)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_70s ^ w0s12 ^ w0s19)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_71s ^ w0s12 ^ w0s16)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_72s ^ w0s05 ^ w0s11 ^ w0s12 ^ w0s13 ^ w0s16 ^ w0s18)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_73s ^ w0s20)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_74s ^ w0s08 ^ w0s16)); - - SHA1_STEP_PE (SHA1_F1, a, b, c, d, e, (c_75s ^ w0s06 ^ w0s12 ^ w0s14)); - - bool q_cond = (e_rev != e); - - if (q_cond) continue; - - SHA1_STEP_PB (SHA1_F1, a, b, c, d, e, 0); - - const u32 c_76s = rotl32 ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u); - const u32 c_77s = rotl32 ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u); - const u32 c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u); - const u32 c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u); - - const u32x w0s21 = rotl32 (w0, 21u); - const u32x w0s22 = rotl32 (w0, 22U); - - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_76s ^ w0s07 ^ w0s08___w0s12 ^ w0s16 ^ w0s21)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_77s)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_78s ^ w0s07 ^ w0s08 ^ w0s15 ^ w0s18 ^ w0s20)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_79s ^ w0s08 ^ w0s22)); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00130_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00130m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00130_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00130m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00130_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00130m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00130_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00130s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00130_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00130s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00130_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00130s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m00140_a0.cu b/nv/m00140_a0.cu deleted file mode 100644 index 3b0ecff..0000000 --- a/nv/m00140_a0.cu +++ /dev/null @@ -1,582 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00140_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * prepend salt - */ - - const u32 out_salt_len = (out_len * 2) + salt_len; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, out_salt_len); - - w3_t[3] = out_salt_len * 8; - - /** - * sha1 - */ - - w0_t[0] = swap_workaround (w0_t[0]); - w0_t[1] = swap_workaround (w0_t[1]); - w0_t[2] = swap_workaround (w0_t[2]); - w0_t[3] = swap_workaround (w0_t[3]); - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - //w3_t[2] = swap_workaround (w3_t[2]); - //w3_t[3] = swap_workaround (w3_t[3]); - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t[0]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[1]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[2]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[3]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w1_t[0]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w1_t[1]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t[2]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t[3]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t[0]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w2_t[1]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w2_t[2]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w2_t[3]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w3_t[0]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t[1]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t[2]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w0_t[3]); - - #undef K - #define K SHA1C01 - - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[3]); - - #undef K - #define K SHA1C02 - - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w2_t[3]); - - #undef K - #define K SHA1C03 - - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00140_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00140_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00140_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * prepend salt - */ - - const u32 out_salt_len = (out_len * 2) + salt_len; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, out_salt_len); - - w3_t[3] = out_salt_len * 8; - - /** - * sha1 - */ - - w0_t[0] = swap_workaround (w0_t[0]); - w0_t[1] = swap_workaround (w0_t[1]); - w0_t[2] = swap_workaround (w0_t[2]); - w0_t[3] = swap_workaround (w0_t[3]); - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - //w3_t[2] = swap_workaround (w3_t[2]); - //w3_t[3] = swap_workaround (w3_t[3]); - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t[0]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[1]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[2]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[3]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w1_t[0]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w1_t[1]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t[2]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t[3]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t[0]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w2_t[1]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w2_t[2]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w2_t[3]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w3_t[0]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t[1]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t[2]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w0_t[3]); - - #undef K - #define K SHA1C01 - - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[3]); - - #undef K - #define K SHA1C02 - - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w2_t[3]); - - #undef K - #define K SHA1C03 - - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[0]); - - if (e != e_rev) continue; - - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00140_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00140_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00140_a1.cu b/nv/m00140_a1.cu deleted file mode 100644 index a56cee5..0000000 --- a/nv/m00140_a1.cu +++ /dev/null @@ -1,676 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00140_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - /** - * prepend salt - */ - - const u32 pw_salt_len = (pw_len * 2) + salt_len; - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len); - - w3_t[3] = pw_salt_len * 8; - - /** - * sha1 - */ - - w0_t[0] = swap_workaround (w0_t[0]); - w0_t[1] = swap_workaround (w0_t[1]); - w0_t[2] = swap_workaround (w0_t[2]); - w0_t[3] = swap_workaround (w0_t[3]); - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - //w3_t[2] = swap_workaround (w3_t[2]); - //w3_t[3] = swap_workaround (w3_t[3]); - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t[0]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[1]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[2]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[3]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w1_t[0]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w1_t[1]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t[2]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t[3]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t[0]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w2_t[1]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w2_t[2]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w2_t[3]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w3_t[0]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t[1]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t[2]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w0_t[3]); - - #undef K - #define K SHA1C01 - - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[3]); - - #undef K - #define K SHA1C02 - - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w2_t[3]); - - #undef K - #define K SHA1C03 - - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00140_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00140_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00140_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - /** - * prepend salt - */ - - const u32 pw_salt_len = (pw_len * 2) + salt_len; - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len); - - w3_t[3] = pw_salt_len * 8; - - /** - * sha1 - */ - - w0_t[0] = swap_workaround (w0_t[0]); - w0_t[1] = swap_workaround (w0_t[1]); - w0_t[2] = swap_workaround (w0_t[2]); - w0_t[3] = swap_workaround (w0_t[3]); - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - //w3_t[2] = swap_workaround (w3_t[2]); - //w3_t[3] = swap_workaround (w3_t[3]); - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t[0]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[1]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[2]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[3]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w1_t[0]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w1_t[1]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t[2]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t[3]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t[0]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w2_t[1]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w2_t[2]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w2_t[3]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w3_t[0]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t[1]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t[2]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w0_t[3]); - - #undef K - #define K SHA1C01 - - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[3]); - - #undef K - #define K SHA1C02 - - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w2_t[3]); - - #undef K - #define K SHA1C03 - - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[0]); - - if (e != e_rev) continue; - - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00140_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00140_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00140_a3.cu b/nv/m00140_a3.cu deleted file mode 100644 index b94a99b..0000000 --- a/nv/m00140_a3.cu +++ /dev/null @@ -1,997 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void overwrite_at (u32x sw[16], const u32x w0, const u32 salt_len) -{ - switch (salt_len) - { - case 0: sw[0] = w0; - break; - case 1: sw[0] = (sw[0] & 0xff000000) | (w0 >> 8); - sw[1] = (sw[1] & 0x00ffffff) | (w0 << 24); - break; - case 2: sw[0] = (sw[0] & 0xffff0000) | (w0 >> 16); - sw[1] = (sw[1] & 0x0000ffff) | (w0 << 16); - break; - case 3: sw[0] = (sw[0] & 0xffffff00) | (w0 >> 24); - sw[1] = (sw[1] & 0x000000ff) | (w0 << 8); - break; - case 4: sw[1] = w0; - break; - case 5: sw[1] = (sw[1] & 0xff000000) | (w0 >> 8); - sw[2] = (sw[2] & 0x00ffffff) | (w0 << 24); - break; - case 6: sw[1] = (sw[1] & 0xffff0000) | (w0 >> 16); - sw[2] = (sw[2] & 0x0000ffff) | (w0 << 16); - break; - case 7: sw[1] = (sw[1] & 0xffffff00) | (w0 >> 24); - sw[2] = (sw[2] & 0x000000ff) | (w0 << 8); - break; - case 8: sw[2] = w0; - break; - case 9: sw[2] = (sw[2] & 0xff000000) | (w0 >> 8); - sw[3] = (sw[3] & 0x00ffffff) | (w0 << 24); - break; - case 10: sw[2] = (sw[2] & 0xffff0000) | (w0 >> 16); - sw[3] = (sw[3] & 0x0000ffff) | (w0 << 16); - break; - case 11: sw[2] = (sw[2] & 0xffffff00) | (w0 >> 24); - sw[3] = (sw[3] & 0x000000ff) | (w0 << 8); - break; - case 12: sw[3] = w0; - break; - case 13: sw[3] = (sw[3] & 0xff000000) | (w0 >> 8); - sw[4] = (sw[4] & 0x00ffffff) | (w0 << 24); - break; - case 14: sw[3] = (sw[3] & 0xffff0000) | (w0 >> 16); - sw[4] = (sw[4] & 0x0000ffff) | (w0 << 16); - break; - case 15: sw[3] = (sw[3] & 0xffffff00) | (w0 >> 24); - sw[4] = (sw[4] & 0x000000ff) | (w0 << 8); - break; - case 16: sw[4] = w0; - break; - case 17: sw[4] = (sw[4] & 0xff000000) | (w0 >> 8); - sw[5] = (sw[5] & 0x00ffffff) | (w0 << 24); - break; - case 18: sw[4] = (sw[4] & 0xffff0000) | (w0 >> 16); - sw[5] = (sw[5] & 0x0000ffff) | (w0 << 16); - break; - case 19: sw[4] = (sw[4] & 0xffffff00) | (w0 >> 24); - sw[5] = (sw[5] & 0x000000ff) | (w0 << 8); - break; - case 20: sw[5] = w0; - break; - case 21: sw[5] = (sw[5] & 0xff000000) | (w0 >> 8); - sw[6] = (sw[6] & 0x00ffffff) | (w0 << 24); - break; - case 22: sw[5] = (sw[5] & 0xffff0000) | (w0 >> 16); - sw[6] = (sw[6] & 0x0000ffff) | (w0 << 16); - break; - case 23: sw[5] = (sw[5] & 0xffffff00) | (w0 >> 24); - sw[6] = (sw[6] & 0x000000ff) | (w0 << 8); - break; - case 24: sw[6] = w0; - break; - case 25: sw[6] = (sw[6] & 0xff000000) | (w0 >> 8); - sw[7] = (sw[7] & 0x00ffffff) | (w0 << 24); - break; - case 26: sw[6] = (sw[6] & 0xffff0000) | (w0 >> 16); - sw[7] = (sw[7] & 0x0000ffff) | (w0 << 16); - break; - case 27: sw[6] = (sw[6] & 0xffffff00) | (w0 >> 24); - sw[7] = (sw[7] & 0x000000ff) | (w0 << 8); - break; - case 28: sw[7] = w0; - break; - case 29: sw[7] = (sw[7] & 0xff000000) | (w0 >> 8); - sw[8] = (sw[8] & 0x00ffffff) | (w0 << 24); - break; - case 30: sw[7] = (sw[7] & 0xffff0000) | (w0 >> 16); - sw[8] = (sw[8] & 0x0000ffff) | (w0 << 16); - break; - case 31: sw[7] = (sw[7] & 0xffffff00) | (w0 >> 24); - sw[8] = (sw[8] & 0x000000ff) | (w0 << 8); - break; - } -} - -__device__ static void m00140m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - /** - * prepend salt - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = swap_workaround (w3[2]); - w3_t[3] = swap_workaround (w3[3]); - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - w2_t[0] |= salt_buf2[0]; - w2_t[1] |= salt_buf2[1]; - w2_t[2] |= salt_buf2[2]; - w2_t[3] |= salt_buf2[3]; - w3_t[0] |= salt_buf3[0]; - w3_t[1] |= salt_buf3[1]; - w3_t[2] |= salt_buf3[2]; - w3_t[3] |= salt_buf3[3]; - - w0_t[0] = swap_workaround (w0_t[0]); - w0_t[1] = swap_workaround (w0_t[1]); - w0_t[2] = swap_workaround (w0_t[2]); - w0_t[3] = swap_workaround (w0_t[3]); - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - w3_t[2] = swap_workaround (w3_t[2]); - w3_t[3] = swap_workaround (w3_t[3]); - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - const u32x w0n = w0l | w0r; - - u32x wx[16]; - - wx[ 0] = w0_t[0]; - wx[ 1] = w0_t[1]; - wx[ 2] = w0_t[2]; - wx[ 3] = w0_t[3]; - wx[ 4] = w1_t[0]; - wx[ 5] = w1_t[1]; - wx[ 6] = w1_t[2]; - wx[ 7] = w1_t[3]; - wx[ 8] = w2_t[0]; - wx[ 9] = w2_t[1]; - wx[10] = w2_t[2]; - wx[11] = w2_t[3]; - wx[12] = w3_t[0]; - wx[13] = w3_t[1]; - wx[14] = w3_t[2]; - wx[15] = w3_t[3]; - - overwrite_at (wx, w0n, salt_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = wx[ 0]; - w0_t[1] = wx[ 1]; - w0_t[2] = wx[ 2]; - w0_t[3] = wx[ 3]; - w1_t[0] = wx[ 4]; - w1_t[1] = wx[ 5]; - w1_t[2] = wx[ 6]; - w1_t[3] = wx[ 7]; - w2_t[0] = wx[ 8]; - w2_t[1] = wx[ 9]; - w2_t[2] = wx[10]; - w2_t[3] = wx[11]; - w3_t[0] = wx[12]; - w3_t[1] = wx[13]; - w3_t[2] = 0; - w3_t[3] = pw_salt_len * 8; - - /** - * sha1 - */ - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t[0]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[1]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[2]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[3]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w1_t[0]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w1_t[1]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t[2]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t[3]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t[0]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w2_t[1]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w2_t[2]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w2_t[3]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w3_t[0]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t[1]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t[2]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w0_t[3]); - - #undef K - #define K SHA1C01 - - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[3]); - - #undef K - #define K SHA1C02 - - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w2_t[3]); - - #undef K - #define K SHA1C03 - - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m00140s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u); - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - /** - * prepend salt - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = swap_workaround (w3[2]); - w3_t[3] = swap_workaround (w3[3]); - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - w2_t[0] |= salt_buf2[0]; - w2_t[1] |= salt_buf2[1]; - w2_t[2] |= salt_buf2[2]; - w2_t[3] |= salt_buf2[3]; - w3_t[0] |= salt_buf3[0]; - w3_t[1] |= salt_buf3[1]; - w3_t[2] |= salt_buf3[2]; - w3_t[3] |= salt_buf3[3]; - - w0_t[0] = swap_workaround (w0_t[0]); - w0_t[1] = swap_workaround (w0_t[1]); - w0_t[2] = swap_workaround (w0_t[2]); - w0_t[3] = swap_workaround (w0_t[3]); - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - w3_t[2] = swap_workaround (w3_t[2]); - w3_t[3] = swap_workaround (w3_t[3]); - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - const u32x w0n = w0l | w0r; - - u32x wx[16]; - - wx[ 0] = w0_t[0]; - wx[ 1] = w0_t[1]; - wx[ 2] = w0_t[2]; - wx[ 3] = w0_t[3]; - wx[ 4] = w1_t[0]; - wx[ 5] = w1_t[1]; - wx[ 6] = w1_t[2]; - wx[ 7] = w1_t[3]; - wx[ 8] = w2_t[0]; - wx[ 9] = w2_t[1]; - wx[10] = w2_t[2]; - wx[11] = w2_t[3]; - wx[12] = w3_t[0]; - wx[13] = w3_t[1]; - wx[14] = w3_t[2]; - wx[15] = w3_t[3]; - - overwrite_at (wx, w0n, salt_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = wx[ 0]; - w0_t[1] = wx[ 1]; - w0_t[2] = wx[ 2]; - w0_t[3] = wx[ 3]; - w1_t[0] = wx[ 4]; - w1_t[1] = wx[ 5]; - w1_t[2] = wx[ 6]; - w1_t[3] = wx[ 7]; - w2_t[0] = wx[ 8]; - w2_t[1] = wx[ 9]; - w2_t[2] = wx[10]; - w2_t[3] = wx[11]; - w3_t[0] = wx[12]; - w3_t[1] = wx[13]; - w3_t[2] = 0; - w3_t[3] = pw_salt_len * 8; - - /** - * sha1 - */ - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t[0]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[1]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[2]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[3]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w1_t[0]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w1_t[1]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t[2]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t[3]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t[0]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w2_t[1]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w2_t[2]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w2_t[3]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w3_t[0]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t[1]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t[2]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w0_t[3]); - - #undef K - #define K SHA1C01 - - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[3]); - - #undef K - #define K SHA1C02 - - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w2_t[3]); - - #undef K - #define K SHA1C03 - - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[0]); - - if (e != e_rev) continue; - - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00140_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00140m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00140_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00140m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00140_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00140m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00140_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00140s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00140_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00140s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00140_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00140s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m00150_a0.cu b/nv/m00150_a0.cu deleted file mode 100644 index fd5ac23..0000000 --- a/nv/m00150_a0.cu +++ /dev/null @@ -1,600 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = SHA1M_A; - ipad[1] = SHA1M_B; - ipad[2] = SHA1M_C; - ipad[3] = SHA1M_D; - ipad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = SHA1M_A; - opad[1] = SHA1M_B; - opad[2] = SHA1M_C; - opad[3] = SHA1M_D; - opad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - - sha1_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - - sha1_transform (w0, w1, w2, w3, digest); -} - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00150_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[5]; - u32x opad[5]; - - hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (64 + salt_len) * 8; - - u32x digest[5]; - - hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00150_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00150_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00150_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[5]; - u32x opad[5]; - - hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (64 + salt_len) * 8; - - u32x digest[5]; - - hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00150_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00150_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00150_a1.cu b/nv/m00150_a1.cu deleted file mode 100644 index 9311a47..0000000 --- a/nv/m00150_a1.cu +++ /dev/null @@ -1,706 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = SHA1M_A; - ipad[1] = SHA1M_B; - ipad[2] = SHA1M_C; - ipad[3] = SHA1M_D; - ipad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = SHA1M_A; - opad[1] = SHA1M_B; - opad[2] = SHA1M_C; - opad[3] = SHA1M_D; - opad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - - sha1_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - - sha1_transform (w0, w1, w2, w3, digest); -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00150_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[5]; - u32x opad[5]; - - hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (64 + salt_len) * 8; - - u32x digest[5]; - - hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00150_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00150_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00150_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[5]; - u32x opad[5]; - - hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (64 + salt_len) * 8; - - u32x digest[5]; - - hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00150_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00150_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00150_a3.cu b/nv/m00150_a3.cu deleted file mode 100644 index 4f0a897..0000000 --- a/nv/m00150_a3.cu +++ /dev/null @@ -1,770 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = SHA1M_A; - ipad[1] = SHA1M_B; - ipad[2] = SHA1M_C; - ipad[3] = SHA1M_D; - ipad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = SHA1M_A; - opad[1] = SHA1M_B; - opad[2] = SHA1M_C; - opad[3] = SHA1M_D; - opad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - - sha1_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - - sha1_transform (w0, w1, w2, w3, digest); -} - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m00150m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[5]; - u32x opad[5]; - - hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (64 + salt_len) * 8; - - u32x digest[5]; - - hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -__device__ static void m00150s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[5]; - u32x opad[5]; - - hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (64 + salt_len) * 8; - - u32x digest[5]; - - hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00150_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00150m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00150_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00150m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00150_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00150m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00150_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00150s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00150_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00150s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00150_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00150s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m00160_a0.cu b/nv/m00160_a0.cu deleted file mode 100644 index 40f56bb..0000000 --- a/nv/m00160_a0.cu +++ /dev/null @@ -1,600 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = SHA1M_A; - ipad[1] = SHA1M_B; - ipad[2] = SHA1M_C; - ipad[3] = SHA1M_D; - ipad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = SHA1M_A; - opad[1] = SHA1M_B; - opad[2] = SHA1M_C; - opad[3] = SHA1M_D; - opad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - - sha1_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - - sha1_transform (w0, w1, w2, w3, digest); -} - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00160_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[5]; - u32x opad[5]; - - hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (64 + out_len) * 8; - - u32x digest[5]; - - hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00160_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00160_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00160_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[5]; - u32x opad[5]; - - hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (64 + out_len) * 8; - - u32x digest[5]; - - hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00160_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00160_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00160_a1.cu b/nv/m00160_a1.cu deleted file mode 100644 index 96e7dd6..0000000 --- a/nv/m00160_a1.cu +++ /dev/null @@ -1,706 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = SHA1M_A; - ipad[1] = SHA1M_B; - ipad[2] = SHA1M_C; - ipad[3] = SHA1M_D; - ipad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = SHA1M_A; - opad[1] = SHA1M_B; - opad[2] = SHA1M_C; - opad[3] = SHA1M_D; - opad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - - sha1_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - - sha1_transform (w0, w1, w2, w3, digest); -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00160_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[5]; - u32x opad[5]; - - hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - append_0x80_4 (w0, w1, w2, w3, pw_len); - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = 0; - w3_t[3] = (64 + pw_len) * 8; - - u32x digest[5]; - - hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00160_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00160_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00160_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[5]; - u32x opad[5]; - - hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - append_0x80_4 (w0, w1, w2, w3, pw_len); - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = 0; - w3_t[3] = (64 + pw_len) * 8; - - u32x digest[5]; - - hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00160_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00160_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00160_a3.cu b/nv/m00160_a3.cu deleted file mode 100644 index a5c4ef9..0000000 --- a/nv/m00160_a3.cu +++ /dev/null @@ -1,767 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = SHA1M_A; - ipad[1] = SHA1M_B; - ipad[2] = SHA1M_C; - ipad[3] = SHA1M_D; - ipad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = SHA1M_A; - opad[1] = SHA1M_B; - opad[2] = SHA1M_C; - opad[3] = SHA1M_D; - opad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - - sha1_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - - sha1_transform (w0, w1, w2, w3, digest); -} - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m00160m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[5]; - u32x opad[5]; - - hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = 0; - w3_t[3] = (64 + pw_len) * 8; - - u32x digest[5]; - - hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -__device__ static void m00160s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[5]; - u32x opad[5]; - - hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = 0; - w3_t[3] = (64 + pw_len) * 8; - - u32x digest[5]; - - hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00160_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00160m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00160_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00160m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00160_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00160m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00160_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00160s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00160_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00160s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00160_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00160s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m00190_a0.cu b/nv/m00190_a0.cu deleted file mode 100644 index f1d9685..0000000 --- a/nv/m00190_a0.cu +++ /dev/null @@ -1,513 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 4 -#define DGST_R2 3 -#define DGST_R3 2 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00190_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - /** - * sha1 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - e += SHA1M_E; - d += SHA1M_D; - c += SHA1M_C; - - { - const u32x r0 = a; - const u32x r1 = e; - const u32x r2 = d; - const u32x r3 = c; - - #include VECT_COMPARE_M - } - - a &= 0x00000fff; - - { - const u32x r0 = a; - const u32x r1 = e; - const u32x r2 = d; - const u32x r3 = c; - - #include VECT_COMPARE_M - } - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00190_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00190_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00190_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3], - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - /** - * sha1 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - e += SHA1M_E; - d += SHA1M_D; - c += SHA1M_C; - - { - const u32x r0 = a; - const u32x r1 = e; - const u32x r2 = d; - const u32x r3 = c; - - #include VECT_COMPARE_S - } - - a &= 0x00000fff; - - { - const u32x r0 = a; - const u32x r1 = e; - const u32x r2 = d; - const u32x r3 = c; - - #include VECT_COMPARE_S - } - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00190_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00190_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00190_a1.cu b/nv/m00190_a1.cu deleted file mode 100644 index 27100af..0000000 --- a/nv/m00190_a1.cu +++ /dev/null @@ -1,623 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 4 -#define DGST_R2 3 -#define DGST_R3 2 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00190_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - append_0x80_2 (wordr0, wordr1, pw_r_len); - - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - /** - * sha1 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - e += SHA1M_E; - d += SHA1M_D; - c += SHA1M_C; - - { - const u32x r0 = a; - const u32x r1 = e; - const u32x r2 = d; - const u32x r3 = c; - - #include VECT_COMPARE_M - } - - a &= 0x00000fff; - - { - const u32x r0 = a; - const u32x r1 = e; - const u32x r2 = d; - const u32x r3 = c; - - #include VECT_COMPARE_M - } - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00190_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00190_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00190_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3], - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - append_0x80_2 (wordr0, wordr1, pw_r_len); - - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - /** - * sha1 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - e += SHA1M_E; - d += SHA1M_D; - c += SHA1M_C; - - { - const u32x r0 = a; - const u32x r1 = e; - const u32x r2 = d; - const u32x r3 = c; - - #include VECT_COMPARE_S - } - - a &= 0x00000fff; - - { - const u32x r0 = a; - const u32x r1 = e; - const u32x r2 = d; - const u32x r3 = c; - - #include VECT_COMPARE_S - } - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00190_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00190_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00190_a3.cu b/nv/m00190_a3.cu deleted file mode 100644 index cd72f58..0000000 --- a/nv/m00190_a3.cu +++ /dev/null @@ -1,851 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ -#define _SCALAR_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 4 -#define DGST_R2 3 -#define DGST_R3 2 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -__device__ __constant__ u32x c_bfs[1024]; - -__device__ static void m00190m (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 c_16s = rotl32 ((w[13] ^ w[ 8] ^ w[ 2] ), 1u); - const u32 c_17s = rotl32 ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u); - const u32 c_18s = rotl32 ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u); - const u32 c_19s = rotl32 ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u); - const u32 c_20s = rotl32 ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u); - const u32 c_21s = rotl32 ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u); - const u32 c_22s = rotl32 ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u); - const u32 c_23s = rotl32 ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u); - const u32 c_24s = rotl32 ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u); - const u32 c_25s = rotl32 ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u); - const u32 c_26s = rotl32 ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u); - const u32 c_27s = rotl32 ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u); - const u32 c_28s = rotl32 ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u); - const u32 c_29s = rotl32 ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u); - const u32 c_30s = rotl32 ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u); - const u32 c_31s = rotl32 ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u); - const u32 c_32s = rotl32 ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u); - const u32 c_33s = rotl32 ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u); - const u32 c_34s = rotl32 ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u); - const u32 c_35s = rotl32 ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u); - const u32 c_36s = rotl32 ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u); - const u32 c_37s = rotl32 ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u); - const u32 c_38s = rotl32 ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u); - const u32 c_39s = rotl32 ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u); - const u32 c_40s = rotl32 ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u); - const u32 c_41s = rotl32 ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u); - const u32 c_42s = rotl32 ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u); - const u32 c_43s = rotl32 ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u); - const u32 c_44s = rotl32 ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u); - const u32 c_45s = rotl32 ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u); - const u32 c_46s = rotl32 ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u); - const u32 c_47s = rotl32 ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u); - const u32 c_48s = rotl32 ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u); - const u32 c_49s = rotl32 ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u); - const u32 c_50s = rotl32 ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u); - const u32 c_51s = rotl32 ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u); - const u32 c_52s = rotl32 ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u); - const u32 c_53s = rotl32 ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u); - const u32 c_54s = rotl32 ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u); - const u32 c_55s = rotl32 ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u); - const u32 c_56s = rotl32 ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u); - const u32 c_57s = rotl32 ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u); - const u32 c_58s = rotl32 ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u); - const u32 c_59s = rotl32 ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u); - const u32 c_60s = rotl32 ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u); - const u32 c_61s = rotl32 ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u); - const u32 c_62s = rotl32 ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u); - const u32 c_63s = rotl32 ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u); - const u32 c_64s = rotl32 ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u); - const u32 c_65s = rotl32 ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u); - const u32 c_66s = rotl32 ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u); - const u32 c_67s = rotl32 ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u); - const u32 c_68s = rotl32 ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u); - const u32 c_69s = rotl32 ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u); - const u32 c_70s = rotl32 ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u); - const u32 c_71s = rotl32 ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u); - const u32 c_72s = rotl32 ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u); - const u32 c_73s = rotl32 ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u); - const u32 c_74s = rotl32 ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u); - const u32 c_75s = rotl32 ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u); - const u32 c_76s = rotl32 ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u); - const u32 c_77s = rotl32 ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u); - const u32 c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u); - const u32 c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u); - - const u32 c_17sK = c_17s + SHA1C00; - const u32 c_18sK = c_18s + SHA1C00; - const u32 c_20sK = c_20s + SHA1C01; - const u32 c_21sK = c_21s + SHA1C01; - const u32 c_23sK = c_23s + SHA1C01; - const u32 c_26sK = c_26s + SHA1C01; - const u32 c_27sK = c_27s + SHA1C01; - const u32 c_29sK = c_29s + SHA1C01; - const u32 c_33sK = c_33s + SHA1C01; - const u32 c_39sK = c_39s + SHA1C01; - const u32 c_41sK = c_41s + SHA1C02; - const u32 c_45sK = c_45s + SHA1C02; - const u32 c_53sK = c_53s + SHA1C02; - const u32 c_65sK = c_65s + SHA1C03; - const u32 c_69sK = c_69s + SHA1C03; - - const u32 w1 = w[ 1] + SHA1C00; - const u32 w2 = w[ 2] + SHA1C00; - const u32 w3 = w[ 3] + SHA1C00; - const u32 w4 = w[ 4] + SHA1C00; - const u32 w5 = w[ 5] + SHA1C00; - const u32 w6 = w[ 6] + SHA1C00; - const u32 w7 = w[ 7] + SHA1C00; - const u32 w8 = w[ 8] + SHA1C00; - const u32 w9 = w[ 9] + SHA1C00; - const u32 wa = w[10] + SHA1C00; - const u32 wb = w[11] + SHA1C00; - const u32 wc = w[12] + SHA1C00; - const u32 wd = w[13] + SHA1C00; - const u32 we = w[14] + SHA1C00; - const u32 wf = w[15] + SHA1C00; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - const u32x w0s01 = rotl32 (w0, 1u); - const u32x w0s02 = rotl32 (w0, 2u); - const u32x w0s03 = rotl32 (w0, 3u); - const u32x w0s04 = rotl32 (w0, 4u); - const u32x w0s05 = rotl32 (w0, 5u); - const u32x w0s06 = rotl32 (w0, 6u); - const u32x w0s07 = rotl32 (w0, 7u); - const u32x w0s08 = rotl32 (w0, 8u); - const u32x w0s09 = rotl32 (w0, 9u); - const u32x w0s10 = rotl32 (w0, 10u); - const u32x w0s11 = rotl32 (w0, 11u); - const u32x w0s12 = rotl32 (w0, 12u); - const u32x w0s13 = rotl32 (w0, 13u); - const u32x w0s14 = rotl32 (w0, 14u); - const u32x w0s15 = rotl32 (w0, 15u); - const u32x w0s16 = rotl32 (w0, 16u); - const u32x w0s17 = rotl32 (w0, 17u); - const u32x w0s18 = rotl32 (w0, 18u); - const u32x w0s19 = rotl32 (w0, 19u); - const u32x w0s20 = rotl32 (w0, 20u); - const u32x w0s21 = rotl32 (w0, 21u); - const u32x w0s22 = rotl32 (w0, 22U); - - const u32x w0s04___w0s06 = w0s04 ^ w0s06; - const u32x w0s04___w0s08 = w0s04 ^ w0s08; - const u32x w0s08___w0s12 = w0s08 ^ w0s12; - const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0); - SHA1_STEPX(SHA1_F0o, e, a, b, c, d, w1); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, w2); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, w3); - SHA1_STEPX(SHA1_F0o, b, c, d, e, a, w4); - SHA1_STEPX(SHA1_F0o, a, b, c, d, e, w5); - SHA1_STEPX(SHA1_F0o, e, a, b, c, d, w6); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, w7); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, w8); - SHA1_STEPX(SHA1_F0o, b, c, d, e, a, w9); - SHA1_STEPX(SHA1_F0o, a, b, c, d, e, wa); - SHA1_STEPX(SHA1_F0o, e, a, b, c, d, wb); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, wc); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, wd); - SHA1_STEPX(SHA1_F0o, b, c, d, e, a, we); - SHA1_STEPX(SHA1_F0o, a, b, c, d, e, wf); - - SHA1_STEP (SHA1_F0o, e, a, b, c, d, (c_16s ^ w0s01)); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, (c_17sK)); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, (c_18sK)); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, (c_19s ^ w0s02)); - - #undef K - #define K SHA1C01 - - SHA1_STEPX(SHA1_F1 , a, b, c, d, e, (c_20sK)); - SHA1_STEPX(SHA1_F1 , e, a, b, c, d, (c_21sK)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_22s ^ w0s03)); - SHA1_STEPX(SHA1_F1 , c, d, e, a, b, (c_23sK)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_24s ^ w0s02)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_25s ^ w0s04)); - SHA1_STEPX(SHA1_F1 , e, a, b, c, d, (c_26sK)); - SHA1_STEPX(SHA1_F1 , d, e, a, b, c, (c_27sK)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_28s ^ w0s05)); - SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_29sK)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_30s ^ w0s02 ^ w0s04)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_31s ^ w0s06)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_32s ^ w0s02 ^ w0s03)); - SHA1_STEPX(SHA1_F1 , c, d, e, a, b, (c_33sK)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_34s ^ w0s07)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_35s ^ w0s04)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_36s ^ w0s04___w0s06)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_37s ^ w0s08)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_38s ^ w0s04)); - SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_39sK)); - - #undef K - #define K SHA1C02 - - SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_40s ^ w0s04 ^ w0s09)); - SHA1_STEPX(SHA1_F2o, e, a, b, c, d, (c_41sK)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_42s ^ w0s06 ^ w0s08)); - SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_43s ^ w0s10)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_44s ^ w0s03 ^ w0s06 ^ w0s07)); - SHA1_STEPX(SHA1_F2o, a, b, c, d, e, (c_45sK)); - SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_46s ^ w0s04 ^ w0s11)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_47s ^ w0s04___w0s08)); - SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_48s ^ w0s03 ^ w0s04___w0s08 ^ w0s05 ^ w0s10)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_49s ^ w0s12)); - SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_50s ^ w0s08)); - SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_51s ^ w0s04___w0s06)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_52s ^ w0s04___w0s08 ^ w0s13)); - SHA1_STEPX(SHA1_F2o, c, d, e, a, b, (c_53sK)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_54s ^ w0s07 ^ w0s10 ^ w0s12)); - SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_55s ^ w0s14)); - SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_56s ^ w0s04___w0s06___w0s07 ^ w0s10 ^ w0s11)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_57s ^ w0s08)); - SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_58s ^ w0s04___w0s08 ^ w0s15)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_59s ^ w0s08___w0s12)); - - #undef K - #define K SHA1C03 - - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_60s ^ w0s04 ^ w0s08___w0s12 ^ w0s07 ^ w0s14)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_61s ^ w0s16)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_62s ^ w0s04___w0s06 ^ w0s08___w0s12)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_63s ^ w0s08)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_64s ^ w0s04___w0s06___w0s07 ^ w0s08___w0s12 ^ w0s17)); - SHA1_STEPX(SHA1_F1 , a, b, c, d, e, (c_65sK)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_66s ^ w0s14 ^ w0s16)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_67s ^ w0s08 ^ w0s18)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_68s ^ w0s11 ^ w0s14 ^ w0s15)); - SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_69sK)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_70s ^ w0s12 ^ w0s19)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_71s ^ w0s12 ^ w0s16)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_72s ^ w0s05 ^ w0s11 ^ w0s12 ^ w0s13 ^ w0s16 ^ w0s18)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_73s ^ w0s20)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_74s ^ w0s08 ^ w0s16)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_75s ^ w0s06 ^ w0s12 ^ w0s14)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_76s ^ w0s07 ^ w0s08___w0s12 ^ w0s16 ^ w0s21)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_77s)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_78s ^ w0s07 ^ w0s08 ^ w0s15 ^ w0s18 ^ w0s20)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_79s ^ w0s08 ^ w0s22)); - - a += SHA1M_A; - e += SHA1M_E; - d += SHA1M_D; - c += SHA1M_C; - - { - const u32x r0 = a; - const u32x r1 = e; - const u32x r2 = d; - const u32x r3 = c; - - #include VECT_COMPARE_M - } - - a &= 0x00000fff; - - { - const u32x r0 = a; - const u32x r1 = e; - const u32x r2 = d; - const u32x r3 = c; - - #include VECT_COMPARE_M - } - } -} - -__device__ static void m00190s (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 c_16s = rotl32 ((w[13] ^ w[ 8] ^ w[ 2] ), 1u); - const u32 c_17s = rotl32 ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u); - const u32 c_18s = rotl32 ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u); - const u32 c_19s = rotl32 ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u); - const u32 c_20s = rotl32 ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u); - const u32 c_21s = rotl32 ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u); - const u32 c_22s = rotl32 ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u); - const u32 c_23s = rotl32 ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u); - const u32 c_24s = rotl32 ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u); - const u32 c_25s = rotl32 ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u); - const u32 c_26s = rotl32 ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u); - const u32 c_27s = rotl32 ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u); - const u32 c_28s = rotl32 ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u); - const u32 c_29s = rotl32 ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u); - const u32 c_30s = rotl32 ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u); - const u32 c_31s = rotl32 ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u); - const u32 c_32s = rotl32 ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u); - const u32 c_33s = rotl32 ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u); - const u32 c_34s = rotl32 ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u); - const u32 c_35s = rotl32 ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u); - const u32 c_36s = rotl32 ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u); - const u32 c_37s = rotl32 ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u); - const u32 c_38s = rotl32 ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u); - const u32 c_39s = rotl32 ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u); - const u32 c_40s = rotl32 ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u); - const u32 c_41s = rotl32 ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u); - const u32 c_42s = rotl32 ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u); - const u32 c_43s = rotl32 ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u); - const u32 c_44s = rotl32 ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u); - const u32 c_45s = rotl32 ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u); - const u32 c_46s = rotl32 ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u); - const u32 c_47s = rotl32 ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u); - const u32 c_48s = rotl32 ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u); - const u32 c_49s = rotl32 ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u); - const u32 c_50s = rotl32 ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u); - const u32 c_51s = rotl32 ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u); - const u32 c_52s = rotl32 ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u); - const u32 c_53s = rotl32 ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u); - const u32 c_54s = rotl32 ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u); - const u32 c_55s = rotl32 ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u); - const u32 c_56s = rotl32 ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u); - const u32 c_57s = rotl32 ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u); - const u32 c_58s = rotl32 ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u); - const u32 c_59s = rotl32 ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u); - const u32 c_60s = rotl32 ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u); - const u32 c_61s = rotl32 ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u); - const u32 c_62s = rotl32 ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u); - const u32 c_63s = rotl32 ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u); - const u32 c_64s = rotl32 ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u); - const u32 c_65s = rotl32 ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u); - const u32 c_66s = rotl32 ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u); - const u32 c_67s = rotl32 ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u); - const u32 c_68s = rotl32 ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u); - const u32 c_69s = rotl32 ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u); - const u32 c_70s = rotl32 ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u); - const u32 c_71s = rotl32 ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u); - const u32 c_72s = rotl32 ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u); - const u32 c_73s = rotl32 ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u); - const u32 c_74s = rotl32 ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u); - const u32 c_75s = rotl32 ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u); - - const u32 c_17sK = c_17s + SHA1C00; - const u32 c_18sK = c_18s + SHA1C00; - const u32 c_20sK = c_20s + SHA1C01; - const u32 c_21sK = c_21s + SHA1C01; - const u32 c_23sK = c_23s + SHA1C01; - const u32 c_26sK = c_26s + SHA1C01; - const u32 c_27sK = c_27s + SHA1C01; - const u32 c_29sK = c_29s + SHA1C01; - const u32 c_33sK = c_33s + SHA1C01; - const u32 c_39sK = c_39s + SHA1C01; - const u32 c_41sK = c_41s + SHA1C02; - const u32 c_45sK = c_45s + SHA1C02; - const u32 c_53sK = c_53s + SHA1C02; - const u32 c_65sK = c_65s + SHA1C03; - const u32 c_69sK = c_69s + SHA1C03; - - const u32 w1 = w[ 1] + SHA1C00; - const u32 w2 = w[ 2] + SHA1C00; - const u32 w3 = w[ 3] + SHA1C00; - const u32 w4 = w[ 4] + SHA1C00; - const u32 w5 = w[ 5] + SHA1C00; - const u32 w6 = w[ 6] + SHA1C00; - const u32 w7 = w[ 7] + SHA1C00; - const u32 w8 = w[ 8] + SHA1C00; - const u32 w9 = w[ 9] + SHA1C00; - const u32 wa = w[10] + SHA1C00; - const u32 wb = w[11] + SHA1C00; - const u32 wc = w[12] + SHA1C00; - const u32 wd = w[13] + SHA1C00; - const u32 we = w[14] + SHA1C00; - const u32 wf = w[15] + SHA1C00; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3], - }; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - const u32x w0s01 = rotl32 (w0, 1u); - const u32x w0s02 = rotl32 (w0, 2u); - const u32x w0s03 = rotl32 (w0, 3u); - const u32x w0s04 = rotl32 (w0, 4u); - const u32x w0s05 = rotl32 (w0, 5u); - const u32x w0s06 = rotl32 (w0, 6u); - const u32x w0s07 = rotl32 (w0, 7u); - const u32x w0s08 = rotl32 (w0, 8u); - const u32x w0s09 = rotl32 (w0, 9u); - const u32x w0s10 = rotl32 (w0, 10u); - const u32x w0s11 = rotl32 (w0, 11u); - const u32x w0s12 = rotl32 (w0, 12u); - const u32x w0s13 = rotl32 (w0, 13u); - const u32x w0s14 = rotl32 (w0, 14u); - const u32x w0s15 = rotl32 (w0, 15u); - const u32x w0s16 = rotl32 (w0, 16u); - const u32x w0s17 = rotl32 (w0, 17u); - const u32x w0s18 = rotl32 (w0, 18u); - const u32x w0s19 = rotl32 (w0, 19u); - const u32x w0s20 = rotl32 (w0, 20u); - - const u32x w0s04___w0s06 = w0s04 ^ w0s06; - const u32x w0s04___w0s08 = w0s04 ^ w0s08; - const u32x w0s08___w0s12 = w0s08 ^ w0s12; - const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0); - SHA1_STEPX(SHA1_F0o, e, a, b, c, d, w1); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, w2); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, w3); - SHA1_STEPX(SHA1_F0o, b, c, d, e, a, w4); - SHA1_STEPX(SHA1_F0o, a, b, c, d, e, w5); - SHA1_STEPX(SHA1_F0o, e, a, b, c, d, w6); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, w7); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, w8); - SHA1_STEPX(SHA1_F0o, b, c, d, e, a, w9); - SHA1_STEPX(SHA1_F0o, a, b, c, d, e, wa); - SHA1_STEPX(SHA1_F0o, e, a, b, c, d, wb); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, wc); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, wd); - SHA1_STEPX(SHA1_F0o, b, c, d, e, a, we); - SHA1_STEPX(SHA1_F0o, a, b, c, d, e, wf); - - SHA1_STEP (SHA1_F0o, e, a, b, c, d, (c_16s ^ w0s01)); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, (c_17sK)); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, (c_18sK)); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, (c_19s ^ w0s02)); - - #undef K - #define K SHA1C01 - - SHA1_STEPX(SHA1_F1 , a, b, c, d, e, (c_20sK)); - SHA1_STEPX(SHA1_F1 , e, a, b, c, d, (c_21sK)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_22s ^ w0s03)); - SHA1_STEPX(SHA1_F1 , c, d, e, a, b, (c_23sK)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_24s ^ w0s02)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_25s ^ w0s04)); - SHA1_STEPX(SHA1_F1 , e, a, b, c, d, (c_26sK)); - SHA1_STEPX(SHA1_F1 , d, e, a, b, c, (c_27sK)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_28s ^ w0s05)); - SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_29sK)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_30s ^ w0s02 ^ w0s04)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_31s ^ w0s06)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_32s ^ w0s02 ^ w0s03)); - SHA1_STEPX(SHA1_F1 , c, d, e, a, b, (c_33sK)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_34s ^ w0s07)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_35s ^ w0s04)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_36s ^ w0s04___w0s06)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_37s ^ w0s08)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_38s ^ w0s04)); - SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_39sK)); - - #undef K - #define K SHA1C02 - - SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_40s ^ w0s04 ^ w0s09)); - SHA1_STEPX(SHA1_F2o, e, a, b, c, d, (c_41sK)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_42s ^ w0s06 ^ w0s08)); - SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_43s ^ w0s10)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_44s ^ w0s03 ^ w0s06 ^ w0s07)); - SHA1_STEPX(SHA1_F2o, a, b, c, d, e, (c_45sK)); - SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_46s ^ w0s04 ^ w0s11)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_47s ^ w0s04___w0s08)); - SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_48s ^ w0s03 ^ w0s04___w0s08 ^ w0s05 ^ w0s10)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_49s ^ w0s12)); - SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_50s ^ w0s08)); - SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_51s ^ w0s04___w0s06)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_52s ^ w0s04___w0s08 ^ w0s13)); - SHA1_STEPX(SHA1_F2o, c, d, e, a, b, (c_53sK)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_54s ^ w0s07 ^ w0s10 ^ w0s12)); - SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_55s ^ w0s14)); - SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_56s ^ w0s04___w0s06___w0s07 ^ w0s10 ^ w0s11)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_57s ^ w0s08)); - SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_58s ^ w0s04___w0s08 ^ w0s15)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_59s ^ w0s08___w0s12)); - - #undef K - #define K SHA1C03 - - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_60s ^ w0s04 ^ w0s08___w0s12 ^ w0s07 ^ w0s14)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_61s ^ w0s16)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_62s ^ w0s04___w0s06 ^ w0s08___w0s12)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_63s ^ w0s08)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_64s ^ w0s04___w0s06___w0s07 ^ w0s08___w0s12 ^ w0s17)); - SHA1_STEPX(SHA1_F1 , a, b, c, d, e, (c_65sK)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_66s ^ w0s14 ^ w0s16)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_67s ^ w0s08 ^ w0s18)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_68s ^ w0s11 ^ w0s14 ^ w0s15)); - SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_69sK)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_70s ^ w0s12 ^ w0s19)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_71s ^ w0s12 ^ w0s16)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_72s ^ w0s05 ^ w0s11 ^ w0s12 ^ w0s13 ^ w0s16 ^ w0s18)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_73s ^ w0s20)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_74s ^ w0s08 ^ w0s16)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_75s ^ w0s06 ^ w0s12 ^ w0s14)); - - const u32 c_76s = rotl32 ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u); - const u32 c_77s = rotl32 ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u); - const u32 c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u); - const u32 c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u); - - const u32x w0s21 = rotl32 (w0, 21u); - const u32x w0s22 = rotl32 (w0, 22U); - - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_76s ^ w0s07 ^ w0s08___w0s12 ^ w0s16 ^ w0s21)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_77s)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_78s ^ w0s07 ^ w0s08 ^ w0s15 ^ w0s18 ^ w0s20)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_79s ^ w0s08 ^ w0s22)); - - a += SHA1M_A; - e += SHA1M_E; - d += SHA1M_D; - c += SHA1M_C; - - { - const u32x r0 = a; - const u32x r1 = e; - const u32x r2 = d; - const u32x r3 = c; - - #include VECT_COMPARE_S - } - - a &= 0x00000fff; - - { - const u32x r0 = a; - const u32x r1 = e; - const u32x r2 = d; - const u32x r3 = c; - - #include VECT_COMPARE_S - } - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00190_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00190m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00190_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00190m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00190_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00190m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00190_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00190s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00190_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00190s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00190_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00190s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m00200_a0.cu b/nv/m00200_a0.cu deleted file mode 100644 index f4590e8..0000000 --- a/nv/m00200_a0.cu +++ /dev/null @@ -1,361 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MYSQL323_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00200_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - u32x w_t[16]; - - w_t[ 0] = w0[0]; - w_t[ 1] = w0[1]; - w_t[ 2] = w0[2]; - w_t[ 3] = w0[3]; - w_t[ 4] = w1[0]; - w_t[ 5] = w1[1]; - w_t[ 6] = w1[2]; - w_t[ 7] = w1[3]; - w_t[ 8] = 0; - w_t[ 9] = 0; - w_t[10] = 0; - w_t[11] = 0; - w_t[12] = 0; - w_t[13] = 0; - w_t[14] = 0; - w_t[15] = 0; - - u32x a = MYSQL323_A; - u32x b = MYSQL323_B; - - u32x add = 7; - - #define ROUND(v) \ - { \ - a ^= (((a & 0x3f) + add) * (v)) + (a << 8); \ - b += (b << 8) ^ a; \ - add += v; \ - } - - int i; - int j; - - for (i = 0, j = 0; i <= (int) out_len - 4; i += 4, j += 1) - { - const u32x wj = w_t[j]; - - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); - ROUND ((wj >> 16) & 0xff); - ROUND ((wj >> 24) & 0xff); - } - - const u32x wj = w_t[j]; - - const u32 left = out_len - i; - - if (left == 3) - { - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); - ROUND ((wj >> 16) & 0xff); - } - else if (left == 2) - { - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); - } - else if (left == 1) - { - ROUND ((wj >> 0) & 0xff); - } - - a &= 0x7fffffff; - b &= 0x7fffffff; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00200_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00200_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00200_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - u32x w_t[16]; - - w_t[ 0] = w0[0]; - w_t[ 1] = w0[1]; - w_t[ 2] = w0[2]; - w_t[ 3] = w0[3]; - w_t[ 4] = w1[0]; - w_t[ 5] = w1[1]; - w_t[ 6] = w1[2]; - w_t[ 7] = w1[3]; - w_t[ 8] = 0; - w_t[ 9] = 0; - w_t[10] = 0; - w_t[11] = 0; - w_t[12] = 0; - w_t[13] = 0; - w_t[14] = 0; - w_t[15] = 0; - - u32x a = MYSQL323_A; - u32x b = MYSQL323_B; - - u32x add = 7; - - #define ROUND(v) \ - { \ - a ^= (((a & 0x3f) + add) * (v)) + (a << 8); \ - b += (b << 8) ^ a; \ - add += v; \ - } - - int i; - int j; - - for (i = 0, j = 0; i <= (int) out_len - 4; i += 4, j += 1) - { - const u32x wj = w_t[j]; - - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); - ROUND ((wj >> 16) & 0xff); - ROUND ((wj >> 24) & 0xff); - } - - const u32x wj = w_t[j]; - - const u32 left = out_len - i; - - if (left == 3) - { - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); - ROUND ((wj >> 16) & 0xff); - } - else if (left == 2) - { - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); - } - else if (left == 1) - { - ROUND ((wj >> 0) & 0xff); - } - - a &= 0x7fffffff; - b &= 0x7fffffff; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00200_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00200_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00200_a1.cu b/nv/m00200_a1.cu deleted file mode 100644 index ae9b032..0000000 --- a/nv/m00200_a1.cu +++ /dev/null @@ -1,411 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MYSQL323_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00200_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w_t[16]; - - w_t[ 0] = wordl0[0] | wordr0[0]; - w_t[ 1] = wordl0[1] | wordr0[1]; - w_t[ 2] = wordl0[2] | wordr0[2]; - w_t[ 3] = wordl0[3] | wordr0[3]; - w_t[ 4] = wordl1[0] | wordr1[0]; - w_t[ 5] = wordl1[1] | wordr1[1]; - w_t[ 6] = wordl1[2] | wordr1[2]; - w_t[ 7] = wordl1[3] | wordr1[3]; - w_t[ 8] = wordl2[0] | wordr2[0]; - w_t[ 9] = wordl2[1] | wordr2[1]; - w_t[10] = wordl2[2] | wordr2[2]; - w_t[11] = wordl2[3] | wordr2[3]; - w_t[12] = wordl3[0] | wordr3[0]; - w_t[13] = wordl3[1] | wordr3[1]; - w_t[14] = wordl3[2] | wordr3[2]; - w_t[15] = 0; - - u32x a = MYSQL323_A; - u32x b = MYSQL323_B; - - u32x add = 7; - - #define ROUND(v) \ - { \ - a ^= (((a & 0x3f) + add) * (v)) + (a << 8); \ - b += (b << 8) ^ a; \ - add += v; \ - } - - int i; - int j; - - for (i = 0, j = 0; i <= (int) pw_len - 4; i += 4, j += 1) - { - const u32x wj = w_t[j]; - - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); - ROUND ((wj >> 16) & 0xff); - ROUND ((wj >> 24) & 0xff); - } - - const u32x wj = w_t[j]; - - const u32 left = pw_len - i; - - if (left == 3) - { - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); - ROUND ((wj >> 16) & 0xff); - } - else if (left == 2) - { - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); - } - else if (left == 1) - { - ROUND ((wj >> 0) & 0xff); - } - - a &= 0x7fffffff; - b &= 0x7fffffff; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00200_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00200_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00200_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w_t[16]; - - w_t[ 0] = wordl0[0] | wordr0[0]; - w_t[ 1] = wordl0[1] | wordr0[1]; - w_t[ 2] = wordl0[2] | wordr0[2]; - w_t[ 3] = wordl0[3] | wordr0[3]; - w_t[ 4] = wordl1[0] | wordr1[0]; - w_t[ 5] = wordl1[1] | wordr1[1]; - w_t[ 6] = wordl1[2] | wordr1[2]; - w_t[ 7] = wordl1[3] | wordr1[3]; - w_t[ 8] = wordl2[0] | wordr2[0]; - w_t[ 9] = wordl2[1] | wordr2[1]; - w_t[10] = wordl2[2] | wordr2[2]; - w_t[11] = wordl2[3] | wordr2[3]; - w_t[12] = wordl3[0] | wordr3[0]; - w_t[13] = wordl3[1] | wordr3[1]; - w_t[14] = wordl3[2] | wordr3[2]; - w_t[15] = 0; - - u32x a = MYSQL323_A; - u32x b = MYSQL323_B; - - u32x add = 7; - - #define ROUND(v) \ - { \ - a ^= (((a & 0x3f) + add) * (v)) + (a << 8); \ - b += (b << 8) ^ a; \ - add += v; \ - } - - int i; - int j; - - for (i = 0, j = 0; i <= (int) pw_len - 4; i += 4, j += 1) - { - const u32x wj = w_t[j]; - - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); - ROUND ((wj >> 16) & 0xff); - ROUND ((wj >> 24) & 0xff); - } - - const u32x wj = w_t[j]; - - const u32 left = pw_len - i; - - if (left == 3) - { - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); - ROUND ((wj >> 16) & 0xff); - } - else if (left == 2) - { - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); - } - else if (left == 1) - { - ROUND ((wj >> 0) & 0xff); - } - - a &= 0x7fffffff; - b &= 0x7fffffff; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00200_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00200_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00200_a3.cu b/nv/m00200_a3.cu deleted file mode 100644 index 939ab68..0000000 --- a/nv/m00200_a3.cu +++ /dev/null @@ -1,490 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MYSQL323_ -#define _SCALAR_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -__device__ __constant__ u32x c_bfs[1024]; - -__device__ static void m00200m (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x a = MYSQL323_A; - u32x b = MYSQL323_B; - - u32x add = 7; - - #define ROUND(v) \ - { \ - a ^= (((a & 0x3f) + add) * (v)) + (a << 8); \ - b += (b << 8) ^ a; \ - add += v; \ - } - - if (pw_len >= 4) - { - ROUND ((w0 >> 0) & 0xff); - ROUND ((w0 >> 8) & 0xff); - ROUND ((w0 >> 16) & 0xff); - ROUND ((w0 >> 24) & 0xff); - } - else if (pw_len == 3) - { - ROUND ((w0 >> 0) & 0xff); - ROUND ((w0 >> 8) & 0xff); - ROUND ((w0 >> 16) & 0xff); - } - else if (pw_len == 2) - { - ROUND ((w0 >> 0) & 0xff); - ROUND ((w0 >> 8) & 0xff); - } - else if (pw_len == 1) - { - ROUND ((w0 >> 0) & 0xff); - } - - int i; - int j; - - for (i = 4, j = 1; i <= (int) pw_len - 4; i += 4, j += 1) - { - const u32 wj = w[j]; - - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); - ROUND ((wj >> 16) & 0xff); - ROUND ((wj >> 24) & 0xff); - } - - const u32 wj = w[j]; - - const u32 left = pw_len - i; - - if (left == 3) - { - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); - ROUND ((wj >> 16) & 0xff); - } - else if (left == 2) - { - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); - } - else if (left == 1) - { - ROUND ((wj >> 0) & 0xff); - } - - a &= 0x7fffffff; - b &= 0x7fffffff; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } -} - -__device__ static void m00200s (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x a = MYSQL323_A; - u32x b = MYSQL323_B; - - u32x add = 7; - - #define ROUND(v) \ - { \ - a ^= (((a & 0x3f) + add) * (v)) + (a << 8); \ - b += (b << 8) ^ a; \ - add += v; \ - } - - if (pw_len >= 4) - { - ROUND ((w0 >> 0) & 0xff); - ROUND ((w0 >> 8) & 0xff); - ROUND ((w0 >> 16) & 0xff); - ROUND ((w0 >> 24) & 0xff); - } - else if (pw_len == 3) - { - ROUND ((w0 >> 0) & 0xff); - ROUND ((w0 >> 8) & 0xff); - ROUND ((w0 >> 16) & 0xff); - } - else if (pw_len == 2) - { - ROUND ((w0 >> 0) & 0xff); - ROUND ((w0 >> 8) & 0xff); - } - else if (pw_len == 1) - { - ROUND ((w0 >> 0) & 0xff); - } - - int i; - int j; - - for (i = 4, j = 1; i <= (int) pw_len - 4; i += 4, j += 1) - { - const u32 wj = w[j]; - - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); - ROUND ((wj >> 16) & 0xff); - ROUND ((wj >> 24) & 0xff); - } - - const u32 wj = w[j]; - - const u32 left = pw_len - i; - - if (left == 3) - { - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); - ROUND ((wj >> 16) & 0xff); - } - else if (left == 2) - { - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); - } - else if (left == 1) - { - ROUND ((wj >> 0) & 0xff); - } - - a &= 0x7fffffff; - b &= 0x7fffffff; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00200_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00200m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00200_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00200m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00200_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00200m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00200_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00200s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00200_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00200s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00200_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00200s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m00300_a0.cu b/nv/m00300_a0.cu deleted file mode 100644 index 9c8a6d3..0000000 --- a/nv/m00300_a0.cu +++ /dev/null @@ -1,738 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00300_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - /** - * sha1 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - w0_t = a; - w1_t = b; - w2_t = c; - w3_t = d; - w4_t = e; - w5_t = 0x80000000; - w6_t = 0; - w7_t = 0; - w8_t = 0; - w9_t = 0; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 20 * 8; - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00300_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00300_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00300_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3], - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - /** - * sha1 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - w0_t = a; - w1_t = b; - w2_t = c; - w3_t = d; - w4_t = e; - w5_t = 0x80000000; - w6_t = 0; - w7_t = 0; - w8_t = 0; - w9_t = 0; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 20 * 8; - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - - if (e != e_rev) continue; - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00300_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00300_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00300_a1.cu b/nv/m00300_a1.cu deleted file mode 100644 index 5f00e67..0000000 --- a/nv/m00300_a1.cu +++ /dev/null @@ -1,848 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00300_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - append_0x80_2 (wordr0, wordr1, pw_r_len); - - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - /** - * sha1 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - w0_t = a; - w1_t = b; - w2_t = c; - w3_t = d; - w4_t = e; - w5_t = 0x80000000; - w6_t = 0; - w7_t = 0; - w8_t = 0; - w9_t = 0; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 20 * 8; - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00300_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00300_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00300_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3], - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - append_0x80_2 (wordr0, wordr1, pw_r_len); - - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - /** - * sha1 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - w0_t = a; - w1_t = b; - w2_t = c; - w3_t = d; - w4_t = e; - w5_t = 0x80000000; - w6_t = 0; - w7_t = 0; - w8_t = 0; - w9_t = 0; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 20 * 8; - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - - if (e != e_rev) continue; - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00300_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00300_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00300_a3.cu b/nv/m00300_a3.cu deleted file mode 100644 index b0a2558..0000000 --- a/nv/m00300_a3.cu +++ /dev/null @@ -1,1078 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ -#define _SCALAR_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -__device__ __constant__ u32x c_bfs[1024]; - -__device__ static void m00300m (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 c_16s = rotl32 ((w[13] ^ w[ 8] ^ w[ 2] ), 1u); - const u32 c_17s = rotl32 ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u); - const u32 c_18s = rotl32 ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u); - const u32 c_19s = rotl32 ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u); - const u32 c_20s = rotl32 ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u); - const u32 c_21s = rotl32 ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u); - const u32 c_22s = rotl32 ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u); - const u32 c_23s = rotl32 ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u); - const u32 c_24s = rotl32 ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u); - const u32 c_25s = rotl32 ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u); - const u32 c_26s = rotl32 ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u); - const u32 c_27s = rotl32 ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u); - const u32 c_28s = rotl32 ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u); - const u32 c_29s = rotl32 ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u); - const u32 c_30s = rotl32 ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u); - const u32 c_31s = rotl32 ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u); - const u32 c_32s = rotl32 ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u); - const u32 c_33s = rotl32 ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u); - const u32 c_34s = rotl32 ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u); - const u32 c_35s = rotl32 ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u); - const u32 c_36s = rotl32 ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u); - const u32 c_37s = rotl32 ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u); - const u32 c_38s = rotl32 ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u); - const u32 c_39s = rotl32 ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u); - const u32 c_40s = rotl32 ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u); - const u32 c_41s = rotl32 ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u); - const u32 c_42s = rotl32 ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u); - const u32 c_43s = rotl32 ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u); - const u32 c_44s = rotl32 ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u); - const u32 c_45s = rotl32 ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u); - const u32 c_46s = rotl32 ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u); - const u32 c_47s = rotl32 ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u); - const u32 c_48s = rotl32 ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u); - const u32 c_49s = rotl32 ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u); - const u32 c_50s = rotl32 ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u); - const u32 c_51s = rotl32 ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u); - const u32 c_52s = rotl32 ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u); - const u32 c_53s = rotl32 ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u); - const u32 c_54s = rotl32 ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u); - const u32 c_55s = rotl32 ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u); - const u32 c_56s = rotl32 ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u); - const u32 c_57s = rotl32 ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u); - const u32 c_58s = rotl32 ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u); - const u32 c_59s = rotl32 ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u); - const u32 c_60s = rotl32 ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u); - const u32 c_61s = rotl32 ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u); - const u32 c_62s = rotl32 ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u); - const u32 c_63s = rotl32 ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u); - const u32 c_64s = rotl32 ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u); - const u32 c_65s = rotl32 ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u); - const u32 c_66s = rotl32 ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u); - const u32 c_67s = rotl32 ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u); - const u32 c_68s = rotl32 ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u); - const u32 c_69s = rotl32 ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u); - const u32 c_70s = rotl32 ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u); - const u32 c_71s = rotl32 ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u); - const u32 c_72s = rotl32 ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u); - const u32 c_73s = rotl32 ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u); - const u32 c_74s = rotl32 ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u); - const u32 c_75s = rotl32 ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u); - const u32 c_76s = rotl32 ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u); - const u32 c_77s = rotl32 ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u); - const u32 c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u); - const u32 c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u); - - const u32 c_17sK = c_17s + SHA1C00; - const u32 c_18sK = c_18s + SHA1C00; - const u32 c_20sK = c_20s + SHA1C01; - const u32 c_21sK = c_21s + SHA1C01; - const u32 c_23sK = c_23s + SHA1C01; - const u32 c_26sK = c_26s + SHA1C01; - const u32 c_27sK = c_27s + SHA1C01; - const u32 c_29sK = c_29s + SHA1C01; - const u32 c_33sK = c_33s + SHA1C01; - const u32 c_39sK = c_39s + SHA1C01; - const u32 c_41sK = c_41s + SHA1C02; - const u32 c_45sK = c_45s + SHA1C02; - const u32 c_53sK = c_53s + SHA1C02; - const u32 c_65sK = c_65s + SHA1C03; - const u32 c_69sK = c_69s + SHA1C03; - - const u32 w1 = w[ 1] + SHA1C00; - const u32 w2 = w[ 2] + SHA1C00; - const u32 w3 = w[ 3] + SHA1C00; - const u32 w4 = w[ 4] + SHA1C00; - const u32 w5 = w[ 5] + SHA1C00; - const u32 w6 = w[ 6] + SHA1C00; - const u32 w7 = w[ 7] + SHA1C00; - const u32 w8 = w[ 8] + SHA1C00; - const u32 w9 = w[ 9] + SHA1C00; - const u32 wa = w[10] + SHA1C00; - const u32 wb = w[11] + SHA1C00; - const u32 wc = w[12] + SHA1C00; - const u32 wd = w[13] + SHA1C00; - const u32 we = w[14] + SHA1C00; - const u32 wf = w[15] + SHA1C00; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - const u32x w0s01 = rotl32 (w0, 1u); - const u32x w0s02 = rotl32 (w0, 2u); - const u32x w0s03 = rotl32 (w0, 3u); - const u32x w0s04 = rotl32 (w0, 4u); - const u32x w0s05 = rotl32 (w0, 5u); - const u32x w0s06 = rotl32 (w0, 6u); - const u32x w0s07 = rotl32 (w0, 7u); - const u32x w0s08 = rotl32 (w0, 8u); - const u32x w0s09 = rotl32 (w0, 9u); - const u32x w0s10 = rotl32 (w0, 10u); - const u32x w0s11 = rotl32 (w0, 11u); - const u32x w0s12 = rotl32 (w0, 12u); - const u32x w0s13 = rotl32 (w0, 13u); - const u32x w0s14 = rotl32 (w0, 14u); - const u32x w0s15 = rotl32 (w0, 15u); - const u32x w0s16 = rotl32 (w0, 16u); - const u32x w0s17 = rotl32 (w0, 17u); - const u32x w0s18 = rotl32 (w0, 18u); - const u32x w0s19 = rotl32 (w0, 19u); - const u32x w0s20 = rotl32 (w0, 20u); - const u32x w0s21 = rotl32 (w0, 21u); - const u32x w0s22 = rotl32 (w0, 22U); - - const u32x w0s04___w0s06 = w0s04 ^ w0s06; - const u32x w0s04___w0s08 = w0s04 ^ w0s08; - const u32x w0s08___w0s12 = w0s08 ^ w0s12; - const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0); - SHA1_STEPX(SHA1_F0o, e, a, b, c, d, w1); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, w2); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, w3); - SHA1_STEPX(SHA1_F0o, b, c, d, e, a, w4); - SHA1_STEPX(SHA1_F0o, a, b, c, d, e, w5); - SHA1_STEPX(SHA1_F0o, e, a, b, c, d, w6); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, w7); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, w8); - SHA1_STEPX(SHA1_F0o, b, c, d, e, a, w9); - SHA1_STEPX(SHA1_F0o, a, b, c, d, e, wa); - SHA1_STEPX(SHA1_F0o, e, a, b, c, d, wb); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, wc); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, wd); - SHA1_STEPX(SHA1_F0o, b, c, d, e, a, we); - SHA1_STEPX(SHA1_F0o, a, b, c, d, e, wf); - - SHA1_STEP (SHA1_F0o, e, a, b, c, d, (c_16s ^ w0s01)); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, (c_17sK)); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, (c_18sK)); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, (c_19s ^ w0s02)); - - #undef K - #define K SHA1C01 - - SHA1_STEPX(SHA1_F1 , a, b, c, d, e, (c_20sK)); - SHA1_STEPX(SHA1_F1 , e, a, b, c, d, (c_21sK)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_22s ^ w0s03)); - SHA1_STEPX(SHA1_F1 , c, d, e, a, b, (c_23sK)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_24s ^ w0s02)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_25s ^ w0s04)); - SHA1_STEPX(SHA1_F1 , e, a, b, c, d, (c_26sK)); - SHA1_STEPX(SHA1_F1 , d, e, a, b, c, (c_27sK)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_28s ^ w0s05)); - SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_29sK)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_30s ^ w0s02 ^ w0s04)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_31s ^ w0s06)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_32s ^ w0s02 ^ w0s03)); - SHA1_STEPX(SHA1_F1 , c, d, e, a, b, (c_33sK)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_34s ^ w0s07)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_35s ^ w0s04)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_36s ^ w0s04___w0s06)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_37s ^ w0s08)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_38s ^ w0s04)); - SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_39sK)); - - #undef K - #define K SHA1C02 - - SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_40s ^ w0s04 ^ w0s09)); - SHA1_STEPX(SHA1_F2o, e, a, b, c, d, (c_41sK)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_42s ^ w0s06 ^ w0s08)); - SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_43s ^ w0s10)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_44s ^ w0s03 ^ w0s06 ^ w0s07)); - SHA1_STEPX(SHA1_F2o, a, b, c, d, e, (c_45sK)); - SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_46s ^ w0s04 ^ w0s11)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_47s ^ w0s04___w0s08)); - SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_48s ^ w0s03 ^ w0s04___w0s08 ^ w0s05 ^ w0s10)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_49s ^ w0s12)); - SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_50s ^ w0s08)); - SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_51s ^ w0s04___w0s06)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_52s ^ w0s04___w0s08 ^ w0s13)); - SHA1_STEPX(SHA1_F2o, c, d, e, a, b, (c_53sK)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_54s ^ w0s07 ^ w0s10 ^ w0s12)); - SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_55s ^ w0s14)); - SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_56s ^ w0s04___w0s06___w0s07 ^ w0s10 ^ w0s11)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_57s ^ w0s08)); - SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_58s ^ w0s04___w0s08 ^ w0s15)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_59s ^ w0s08___w0s12)); - - #undef K - #define K SHA1C03 - - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_60s ^ w0s04 ^ w0s08___w0s12 ^ w0s07 ^ w0s14)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_61s ^ w0s16)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_62s ^ w0s04___w0s06 ^ w0s08___w0s12)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_63s ^ w0s08)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_64s ^ w0s04___w0s06___w0s07 ^ w0s08___w0s12 ^ w0s17)); - SHA1_STEPX(SHA1_F1 , a, b, c, d, e, (c_65sK)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_66s ^ w0s14 ^ w0s16)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_67s ^ w0s08 ^ w0s18)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_68s ^ w0s11 ^ w0s14 ^ w0s15)); - SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_69sK)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_70s ^ w0s12 ^ w0s19)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_71s ^ w0s12 ^ w0s16)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_72s ^ w0s05 ^ w0s11 ^ w0s12 ^ w0s13 ^ w0s16 ^ w0s18)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_73s ^ w0s20)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_74s ^ w0s08 ^ w0s16)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_75s ^ w0s06 ^ w0s12 ^ w0s14)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_76s ^ w0s07 ^ w0s08___w0s12 ^ w0s16 ^ w0s21)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_77s)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_78s ^ w0s07 ^ w0s08 ^ w0s15 ^ w0s18 ^ w0s20)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_79s ^ w0s08 ^ w0s22)); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - u32x w0_t = a; - u32x w1_t = b; - u32x w2_t = c; - u32x w3_t = d; - u32x w4_t = e; - u32x w5_t = 0x80000000; - u32x w6_t = 0; - u32x w7_t = 0; - u32x w8_t = 0; - u32x w9_t = 0; - u32x wa_t = 0; - u32x wb_t = 0; - u32x wc_t = 0; - u32x wd_t = 0; - u32x we_t = 0; - u32x wf_t = 20 * 8; - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m00300s (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 c_16s = rotl32 ((w[13] ^ w[ 8] ^ w[ 2] ), 1u); - const u32 c_17s = rotl32 ((w[14] ^ w[ 9] ^ w[ 3] ^ w[ 1]), 1u); - const u32 c_18s = rotl32 ((w[15] ^ w[10] ^ w[ 4] ^ w[ 2]), 1u); - const u32 c_19s = rotl32 ((c_16s ^ w[11] ^ w[ 5] ^ w[ 3]), 1u); - const u32 c_20s = rotl32 ((c_17s ^ w[12] ^ w[ 6] ^ w[ 4]), 1u); - const u32 c_21s = rotl32 ((c_18s ^ w[13] ^ w[ 7] ^ w[ 5]), 1u); - const u32 c_22s = rotl32 ((c_19s ^ w[14] ^ w[ 8] ^ w[ 6]), 1u); - const u32 c_23s = rotl32 ((c_20s ^ w[15] ^ w[ 9] ^ w[ 7]), 1u); - const u32 c_24s = rotl32 ((c_21s ^ c_16s ^ w[10] ^ w[ 8]), 1u); - const u32 c_25s = rotl32 ((c_22s ^ c_17s ^ w[11] ^ w[ 9]), 1u); - const u32 c_26s = rotl32 ((c_23s ^ c_18s ^ w[12] ^ w[10]), 1u); - const u32 c_27s = rotl32 ((c_24s ^ c_19s ^ w[13] ^ w[11]), 1u); - const u32 c_28s = rotl32 ((c_25s ^ c_20s ^ w[14] ^ w[12]), 1u); - const u32 c_29s = rotl32 ((c_26s ^ c_21s ^ w[15] ^ w[13]), 1u); - const u32 c_30s = rotl32 ((c_27s ^ c_22s ^ c_16s ^ w[14]), 1u); - const u32 c_31s = rotl32 ((c_28s ^ c_23s ^ c_17s ^ w[15]), 1u); - const u32 c_32s = rotl32 ((c_29s ^ c_24s ^ c_18s ^ c_16s), 1u); - const u32 c_33s = rotl32 ((c_30s ^ c_25s ^ c_19s ^ c_17s), 1u); - const u32 c_34s = rotl32 ((c_31s ^ c_26s ^ c_20s ^ c_18s), 1u); - const u32 c_35s = rotl32 ((c_32s ^ c_27s ^ c_21s ^ c_19s), 1u); - const u32 c_36s = rotl32 ((c_33s ^ c_28s ^ c_22s ^ c_20s), 1u); - const u32 c_37s = rotl32 ((c_34s ^ c_29s ^ c_23s ^ c_21s), 1u); - const u32 c_38s = rotl32 ((c_35s ^ c_30s ^ c_24s ^ c_22s), 1u); - const u32 c_39s = rotl32 ((c_36s ^ c_31s ^ c_25s ^ c_23s), 1u); - const u32 c_40s = rotl32 ((c_37s ^ c_32s ^ c_26s ^ c_24s), 1u); - const u32 c_41s = rotl32 ((c_38s ^ c_33s ^ c_27s ^ c_25s), 1u); - const u32 c_42s = rotl32 ((c_39s ^ c_34s ^ c_28s ^ c_26s), 1u); - const u32 c_43s = rotl32 ((c_40s ^ c_35s ^ c_29s ^ c_27s), 1u); - const u32 c_44s = rotl32 ((c_41s ^ c_36s ^ c_30s ^ c_28s), 1u); - const u32 c_45s = rotl32 ((c_42s ^ c_37s ^ c_31s ^ c_29s), 1u); - const u32 c_46s = rotl32 ((c_43s ^ c_38s ^ c_32s ^ c_30s), 1u); - const u32 c_47s = rotl32 ((c_44s ^ c_39s ^ c_33s ^ c_31s), 1u); - const u32 c_48s = rotl32 ((c_45s ^ c_40s ^ c_34s ^ c_32s), 1u); - const u32 c_49s = rotl32 ((c_46s ^ c_41s ^ c_35s ^ c_33s), 1u); - const u32 c_50s = rotl32 ((c_47s ^ c_42s ^ c_36s ^ c_34s), 1u); - const u32 c_51s = rotl32 ((c_48s ^ c_43s ^ c_37s ^ c_35s), 1u); - const u32 c_52s = rotl32 ((c_49s ^ c_44s ^ c_38s ^ c_36s), 1u); - const u32 c_53s = rotl32 ((c_50s ^ c_45s ^ c_39s ^ c_37s), 1u); - const u32 c_54s = rotl32 ((c_51s ^ c_46s ^ c_40s ^ c_38s), 1u); - const u32 c_55s = rotl32 ((c_52s ^ c_47s ^ c_41s ^ c_39s), 1u); - const u32 c_56s = rotl32 ((c_53s ^ c_48s ^ c_42s ^ c_40s), 1u); - const u32 c_57s = rotl32 ((c_54s ^ c_49s ^ c_43s ^ c_41s), 1u); - const u32 c_58s = rotl32 ((c_55s ^ c_50s ^ c_44s ^ c_42s), 1u); - const u32 c_59s = rotl32 ((c_56s ^ c_51s ^ c_45s ^ c_43s), 1u); - const u32 c_60s = rotl32 ((c_57s ^ c_52s ^ c_46s ^ c_44s), 1u); - const u32 c_61s = rotl32 ((c_58s ^ c_53s ^ c_47s ^ c_45s), 1u); - const u32 c_62s = rotl32 ((c_59s ^ c_54s ^ c_48s ^ c_46s), 1u); - const u32 c_63s = rotl32 ((c_60s ^ c_55s ^ c_49s ^ c_47s), 1u); - const u32 c_64s = rotl32 ((c_61s ^ c_56s ^ c_50s ^ c_48s), 1u); - const u32 c_65s = rotl32 ((c_62s ^ c_57s ^ c_51s ^ c_49s), 1u); - const u32 c_66s = rotl32 ((c_63s ^ c_58s ^ c_52s ^ c_50s), 1u); - const u32 c_67s = rotl32 ((c_64s ^ c_59s ^ c_53s ^ c_51s), 1u); - const u32 c_68s = rotl32 ((c_65s ^ c_60s ^ c_54s ^ c_52s), 1u); - const u32 c_69s = rotl32 ((c_66s ^ c_61s ^ c_55s ^ c_53s), 1u); - const u32 c_70s = rotl32 ((c_67s ^ c_62s ^ c_56s ^ c_54s), 1u); - const u32 c_71s = rotl32 ((c_68s ^ c_63s ^ c_57s ^ c_55s), 1u); - const u32 c_72s = rotl32 ((c_69s ^ c_64s ^ c_58s ^ c_56s), 1u); - const u32 c_73s = rotl32 ((c_70s ^ c_65s ^ c_59s ^ c_57s), 1u); - const u32 c_74s = rotl32 ((c_71s ^ c_66s ^ c_60s ^ c_58s), 1u); - const u32 c_75s = rotl32 ((c_72s ^ c_67s ^ c_61s ^ c_59s), 1u); - - const u32 c_17sK = c_17s + SHA1C00; - const u32 c_18sK = c_18s + SHA1C00; - const u32 c_20sK = c_20s + SHA1C01; - const u32 c_21sK = c_21s + SHA1C01; - const u32 c_23sK = c_23s + SHA1C01; - const u32 c_26sK = c_26s + SHA1C01; - const u32 c_27sK = c_27s + SHA1C01; - const u32 c_29sK = c_29s + SHA1C01; - const u32 c_33sK = c_33s + SHA1C01; - const u32 c_39sK = c_39s + SHA1C01; - const u32 c_41sK = c_41s + SHA1C02; - const u32 c_45sK = c_45s + SHA1C02; - const u32 c_53sK = c_53s + SHA1C02; - const u32 c_65sK = c_65s + SHA1C03; - const u32 c_69sK = c_69s + SHA1C03; - - const u32 w1 = w[ 1] + SHA1C00; - const u32 w2 = w[ 2] + SHA1C00; - const u32 w3 = w[ 3] + SHA1C00; - const u32 w4 = w[ 4] + SHA1C00; - const u32 w5 = w[ 5] + SHA1C00; - const u32 w6 = w[ 6] + SHA1C00; - const u32 w7 = w[ 7] + SHA1C00; - const u32 w8 = w[ 8] + SHA1C00; - const u32 w9 = w[ 9] + SHA1C00; - const u32 wa = w[10] + SHA1C00; - const u32 wb = w[11] + SHA1C00; - const u32 wc = w[12] + SHA1C00; - const u32 wd = w[13] + SHA1C00; - const u32 we = w[14] + SHA1C00; - const u32 wf = w[15] + SHA1C00; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3], - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u); - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - const u32x w0s01 = rotl32 (w0, 1u); - const u32x w0s02 = rotl32 (w0, 2u); - const u32x w0s03 = rotl32 (w0, 3u); - const u32x w0s04 = rotl32 (w0, 4u); - const u32x w0s05 = rotl32 (w0, 5u); - const u32x w0s06 = rotl32 (w0, 6u); - const u32x w0s07 = rotl32 (w0, 7u); - const u32x w0s08 = rotl32 (w0, 8u); - const u32x w0s09 = rotl32 (w0, 9u); - const u32x w0s10 = rotl32 (w0, 10u); - const u32x w0s11 = rotl32 (w0, 11u); - const u32x w0s12 = rotl32 (w0, 12u); - const u32x w0s13 = rotl32 (w0, 13u); - const u32x w0s14 = rotl32 (w0, 14u); - const u32x w0s15 = rotl32 (w0, 15u); - const u32x w0s16 = rotl32 (w0, 16u); - const u32x w0s17 = rotl32 (w0, 17u); - const u32x w0s18 = rotl32 (w0, 18u); - const u32x w0s19 = rotl32 (w0, 19u); - const u32x w0s20 = rotl32 (w0, 20u); - - const u32x w0s04___w0s06 = w0s04 ^ w0s06; - const u32x w0s04___w0s08 = w0s04 ^ w0s08; - const u32x w0s08___w0s12 = w0s08 ^ w0s12; - const u32x w0s04___w0s06___w0s07 = w0s04___w0s06 ^ w0s07; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0); - SHA1_STEPX(SHA1_F0o, e, a, b, c, d, w1); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, w2); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, w3); - SHA1_STEPX(SHA1_F0o, b, c, d, e, a, w4); - SHA1_STEPX(SHA1_F0o, a, b, c, d, e, w5); - SHA1_STEPX(SHA1_F0o, e, a, b, c, d, w6); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, w7); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, w8); - SHA1_STEPX(SHA1_F0o, b, c, d, e, a, w9); - SHA1_STEPX(SHA1_F0o, a, b, c, d, e, wa); - SHA1_STEPX(SHA1_F0o, e, a, b, c, d, wb); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, wc); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, wd); - SHA1_STEPX(SHA1_F0o, b, c, d, e, a, we); - SHA1_STEPX(SHA1_F0o, a, b, c, d, e, wf); - - SHA1_STEP (SHA1_F0o, e, a, b, c, d, (c_16s ^ w0s01)); - SHA1_STEPX(SHA1_F0o, d, e, a, b, c, (c_17sK)); - SHA1_STEPX(SHA1_F0o, c, d, e, a, b, (c_18sK)); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, (c_19s ^ w0s02)); - - #undef K - #define K SHA1C01 - - SHA1_STEPX(SHA1_F1 , a, b, c, d, e, (c_20sK)); - SHA1_STEPX(SHA1_F1 , e, a, b, c, d, (c_21sK)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_22s ^ w0s03)); - SHA1_STEPX(SHA1_F1 , c, d, e, a, b, (c_23sK)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_24s ^ w0s02)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_25s ^ w0s04)); - SHA1_STEPX(SHA1_F1 , e, a, b, c, d, (c_26sK)); - SHA1_STEPX(SHA1_F1 , d, e, a, b, c, (c_27sK)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_28s ^ w0s05)); - SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_29sK)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_30s ^ w0s02 ^ w0s04)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_31s ^ w0s06)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_32s ^ w0s02 ^ w0s03)); - SHA1_STEPX(SHA1_F1 , c, d, e, a, b, (c_33sK)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_34s ^ w0s07)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_35s ^ w0s04)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_36s ^ w0s04___w0s06)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_37s ^ w0s08)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_38s ^ w0s04)); - SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_39sK)); - - #undef K - #define K SHA1C02 - - SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_40s ^ w0s04 ^ w0s09)); - SHA1_STEPX(SHA1_F2o, e, a, b, c, d, (c_41sK)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_42s ^ w0s06 ^ w0s08)); - SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_43s ^ w0s10)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_44s ^ w0s03 ^ w0s06 ^ w0s07)); - SHA1_STEPX(SHA1_F2o, a, b, c, d, e, (c_45sK)); - SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_46s ^ w0s04 ^ w0s11)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_47s ^ w0s04___w0s08)); - SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_48s ^ w0s03 ^ w0s04___w0s08 ^ w0s05 ^ w0s10)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_49s ^ w0s12)); - SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_50s ^ w0s08)); - SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_51s ^ w0s04___w0s06)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_52s ^ w0s04___w0s08 ^ w0s13)); - SHA1_STEPX(SHA1_F2o, c, d, e, a, b, (c_53sK)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_54s ^ w0s07 ^ w0s10 ^ w0s12)); - SHA1_STEP (SHA1_F2o, a, b, c, d, e, (c_55s ^ w0s14)); - SHA1_STEP (SHA1_F2o, e, a, b, c, d, (c_56s ^ w0s04___w0s06___w0s07 ^ w0s10 ^ w0s11)); - SHA1_STEP (SHA1_F2o, d, e, a, b, c, (c_57s ^ w0s08)); - SHA1_STEP (SHA1_F2o, c, d, e, a, b, (c_58s ^ w0s04___w0s08 ^ w0s15)); - SHA1_STEP (SHA1_F2o, b, c, d, e, a, (c_59s ^ w0s08___w0s12)); - - #undef K - #define K SHA1C03 - - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_60s ^ w0s04 ^ w0s08___w0s12 ^ w0s07 ^ w0s14)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_61s ^ w0s16)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_62s ^ w0s04___w0s06 ^ w0s08___w0s12)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_63s ^ w0s08)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_64s ^ w0s04___w0s06___w0s07 ^ w0s08___w0s12 ^ w0s17)); - SHA1_STEPX(SHA1_F1 , a, b, c, d, e, (c_65sK)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_66s ^ w0s14 ^ w0s16)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_67s ^ w0s08 ^ w0s18)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_68s ^ w0s11 ^ w0s14 ^ w0s15)); - SHA1_STEPX(SHA1_F1 , b, c, d, e, a, (c_69sK)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_70s ^ w0s12 ^ w0s19)); - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_71s ^ w0s12 ^ w0s16)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_72s ^ w0s05 ^ w0s11 ^ w0s12 ^ w0s13 ^ w0s16 ^ w0s18)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_73s ^ w0s20)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_74s ^ w0s08 ^ w0s16)); - SHA1_STEP (SHA1_F1 , a, b, c, d, e, (c_75s ^ w0s06 ^ w0s12 ^ w0s14)); - - const u32 c_76s = rotl32 ((c_73s ^ c_68s ^ c_62s ^ c_60s), 1u); - const u32 c_77s = rotl32 ((c_74s ^ c_69s ^ c_63s ^ c_61s), 1u); - const u32 c_78s = rotl32 ((c_75s ^ c_70s ^ c_64s ^ c_62s), 1u); - const u32 c_79s = rotl32 ((c_76s ^ c_71s ^ c_65s ^ c_63s), 1u); - - const u32x w0s21 = rotl32 (w0, 21u); - const u32x w0s22 = rotl32 (w0, 22U); - - SHA1_STEP (SHA1_F1 , e, a, b, c, d, (c_76s ^ w0s07 ^ w0s08___w0s12 ^ w0s16 ^ w0s21)); - SHA1_STEP (SHA1_F1 , d, e, a, b, c, (c_77s)); - SHA1_STEP (SHA1_F1 , c, d, e, a, b, (c_78s ^ w0s07 ^ w0s08 ^ w0s15 ^ w0s18 ^ w0s20)); - SHA1_STEP (SHA1_F1 , b, c, d, e, a, (c_79s ^ w0s08 ^ w0s22)); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - u32x w0_t = a; - u32x w1_t = b; - u32x w2_t = c; - u32x w3_t = d; - u32x w4_t = e; - u32x w5_t = 0x80000000; - u32x w6_t = 0; - u32x w7_t = 0; - u32x w8_t = 0; - u32x w9_t = 0; - u32x wa_t = 0; - u32x wb_t = 0; - u32x wc_t = 0; - u32x wd_t = 0; - u32x we_t = 0; - u32x wf_t = 20 * 8; - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - - bool q_cond = (e_rev != e); - - if (q_cond) continue; - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00300_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00300_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00300_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00300m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00300_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00300_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00300_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00300s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m00400.cu b/nv/m00400.cu deleted file mode 100644 index f2cb53c..0000000 --- a/nv/m00400.cu +++ /dev/null @@ -1,358 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE2 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = 0; - - u32x tmp2; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00400_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, phpass_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = 0; - w2[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf[2]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - - /** - * init - */ - - u32 block_len = 8 + pw_len; - - u32x block0[4]; - - block0[0] = salt_buf[0]; - block0[1] = salt_buf[1]; - block0[2] = w0[0]; - block0[3] = w0[1]; - - u32x block1[4]; - - block1[0] = w0[2]; - block1[1] = w0[3]; - block1[2] = w1[0]; - block1[3] = w1[1]; - - u32x block2[4]; - - block2[0] = w1[2]; - block2[1] = w1[3]; - block2[2] = w2[0]; - block2[3] = w2[1]; - - u32x block3[4]; - - block3[0] = 0; - block3[1] = 0; - block3[2] = block_len * 8; - block3[3] = 0; - - append_0x80_4 (block0, block1, block2, block3, block_len); - - /** - * init - */ - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (block0, block1, block2, block3, digest); - - tmps[gid].digest_buf[0] = digest[0]; - tmps[gid].digest_buf[1] = digest[1]; - tmps[gid].digest_buf[2] = digest[2]; - tmps[gid].digest_buf[3] = digest[3]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00400_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, phpass_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = 0; - w2[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * digest - */ - - u32x digest[4]; - - digest[0] = tmps[gid].digest_buf[0]; - digest[1] = tmps[gid].digest_buf[1]; - digest[2] = tmps[gid].digest_buf[2]; - digest[3] = tmps[gid].digest_buf[3]; - - /** - * loop - */ - - u32 block_len = (16 + pw_len); - - u32x block0[4]; - - block0[0] = 0; - block0[1] = 0; - block0[2] = 0; - block0[3] = 0; - - u32x block1[4]; - - block1[0] = w0[0]; - block1[1] = w0[1]; - block1[2] = w0[2]; - block1[3] = w0[3]; - - u32x block2[4]; - - block2[0] = w1[0]; - block2[1] = w1[1]; - block2[2] = w1[2]; - block2[3] = w1[3]; - - u32x block3[4]; - - block3[0] = w2[0]; - block3[1] = w2[1]; - block3[2] = block_len * 8; - block3[3] = 0; - - append_0x80_4 (block0, block1, block2, block3, block_len); - - /** - * init - */ - - for (u32 i = 0; i < loop_cnt; i++) - { - block0[0] = digest[0]; - block0[1] = digest[1]; - block0[2] = digest[2]; - block0[3] = digest[3]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (block0, block1, block2, block3, digest); - } - - tmps[gid].digest_buf[0] = digest[0]; - tmps[gid].digest_buf[1] = digest[1]; - tmps[gid].digest_buf[2] = digest[2]; - tmps[gid].digest_buf[3] = digest[3]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00400_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, phpass_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32x r0 = tmps[gid].digest_buf[DGST_R0]; - const u32x r1 = tmps[gid].digest_buf[DGST_R1]; - const u32x r2 = tmps[gid].digest_buf[DGST_R2]; - const u32x r3 = tmps[gid].digest_buf[DGST_R3]; - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m00500.cu b/nv/m00500.cu deleted file mode 100644 index b582d90..0000000 --- a/nv/m00500.cu +++ /dev/null @@ -1,1174 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE2 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -#define md5crypt_magic 0x00243124 - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = 0; - - u32x tmp2; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void memcat16 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32x append[4]) -{ - u32x tmp0; - u32x tmp1; - u32x tmp2; - u32x tmp3; - u32x tmp4; - - #if __CUDA_ARCH__ >= 200 - - const int offset_minus_4 = 4 - (block_len & 3); - - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - - tmp0 = __byte_perm ( 0, append[0], selector); - tmp1 = __byte_perm (append[0], append[1], selector); - tmp2 = __byte_perm (append[1], append[2], selector); - tmp3 = __byte_perm (append[2], append[3], selector); - tmp4 = __byte_perm (append[3], 0, selector); - - #else - - const u32 mod = block_len & 3; - - switch (mod) - { - case 0: tmp0 = append[0]; - tmp1 = append[1]; - tmp2 = append[2]; - tmp3 = append[3]; - tmp4 = 0; - break; - case 1: tmp0 = append[0] << 8; - tmp1 = append[0] >> 24 | append[1] << 8; - tmp2 = append[1] >> 24 | append[2] << 8; - tmp3 = append[2] >> 24 | append[3] << 8; - tmp4 = append[3] >> 24; - break; - case 2: tmp0 = append[0] << 16; - tmp1 = append[0] >> 16 | append[1] << 16; - tmp2 = append[1] >> 16 | append[2] << 16; - tmp3 = append[2] >> 16 | append[3] << 16; - tmp4 = append[3] >> 16; - break; - case 3: tmp0 = append[0] << 24; - tmp1 = append[0] >> 8 | append[1] << 24; - tmp2 = append[1] >> 8 | append[2] << 24; - tmp3 = append[2] >> 8 | append[3] << 24; - tmp4 = append[3] >> 8; - break; - } - - #endif - - const u32 div = block_len / 4; - - switch (div) - { - case 0: block0[0] |= tmp0; - block0[1] = tmp1; - block0[2] = tmp2; - block0[3] = tmp3; - block1[0] = tmp4; - break; - case 1: block0[1] |= tmp0; - block0[2] = tmp1; - block0[3] = tmp2; - block1[0] = tmp3; - block1[1] = tmp4; - break; - case 2: block0[2] |= tmp0; - block0[3] = tmp1; - block1[0] = tmp2; - block1[1] = tmp3; - block1[2] = tmp4; - break; - case 3: block0[3] |= tmp0; - block1[0] = tmp1; - block1[1] = tmp2; - block1[2] = tmp3; - block1[3] = tmp4; - break; - case 4: block1[0] |= tmp0; - block1[1] = tmp1; - block1[2] = tmp2; - block1[3] = tmp3; - block2[0] = tmp4; - break; - case 5: block1[1] |= tmp0; - block1[2] = tmp1; - block1[3] = tmp2; - block2[0] = tmp3; - block2[1] = tmp4; - break; - case 6: block1[2] |= tmp0; - block1[3] = tmp1; - block2[0] = tmp2; - block2[1] = tmp3; - block2[2] = tmp4; - break; - case 7: block1[3] |= tmp0; - block2[0] = tmp1; - block2[1] = tmp2; - block2[2] = tmp3; - block2[3] = tmp4; - break; - case 8: block2[0] |= tmp0; - block2[1] = tmp1; - block2[2] = tmp2; - block2[3] = tmp3; - block3[0] = tmp4; - break; - case 9: block2[1] |= tmp0; - block2[2] = tmp1; - block2[3] = tmp2; - block3[0] = tmp3; - block3[1] = tmp4; - break; - } - - return; -} - -__device__ static void memcat16_x80 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32x append[4]) -{ - u32x tmp0; - u32x tmp1; - u32x tmp2; - u32x tmp3; - u32x tmp4; - - #if __CUDA_ARCH__ >= 200 - - const int offset_minus_4 = 4 - (block_len & 3); - - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - - tmp0 = __byte_perm ( 0, append[0], selector); - tmp1 = __byte_perm (append[0], append[1], selector); - tmp2 = __byte_perm (append[1], append[2], selector); - tmp3 = __byte_perm (append[2], append[3], selector); - tmp4 = __byte_perm (append[3], 0x80, selector); - - #else - - const u32 mod = block_len & 3; - - switch (mod) - { - case 0: tmp0 = append[0]; - tmp1 = append[1]; - tmp2 = append[2]; - tmp3 = append[3]; - tmp4 = 0x80; - break; - case 1: tmp0 = append[0] << 8; - tmp1 = append[0] >> 24 | append[1] << 8; - tmp2 = append[1] >> 24 | append[2] << 8; - tmp3 = append[2] >> 24 | append[3] << 8; - tmp4 = append[3] >> 24; - break; - case 2: tmp0 = append[0] << 16; - tmp1 = append[0] >> 16 | append[1] << 16; - tmp2 = append[1] >> 16 | append[2] << 16; - tmp3 = append[2] >> 16 | append[3] << 16; - tmp4 = append[3] >> 16; - break; - case 3: tmp0 = append[0] << 24; - tmp1 = append[0] >> 8 | append[1] << 24; - tmp2 = append[1] >> 8 | append[2] << 24; - tmp3 = append[2] >> 8 | append[3] << 24; - tmp4 = append[3] >> 8; - break; - } - - #endif - - const u32 div = block_len / 4; - - switch (div) - { - case 0: block0[0] |= tmp0; - block0[1] = tmp1; - block0[2] = tmp2; - block0[3] = tmp3; - block1[0] = tmp4; - break; - case 1: block0[1] |= tmp0; - block0[2] = tmp1; - block0[3] = tmp2; - block1[0] = tmp3; - block1[1] = tmp4; - break; - case 2: block0[2] |= tmp0; - block0[3] = tmp1; - block1[0] = tmp2; - block1[1] = tmp3; - block1[2] = tmp4; - break; - case 3: block0[3] |= tmp0; - block1[0] = tmp1; - block1[1] = tmp2; - block1[2] = tmp3; - block1[3] = tmp4; - break; - case 4: block1[0] |= tmp0; - block1[1] = tmp1; - block1[2] = tmp2; - block1[3] = tmp3; - block2[0] = tmp4; - break; - case 5: block1[1] |= tmp0; - block1[2] = tmp1; - block1[3] = tmp2; - block2[0] = tmp3; - block2[1] = tmp4; - break; - case 6: block1[2] |= tmp0; - block1[3] = tmp1; - block2[0] = tmp2; - block2[1] = tmp3; - block2[2] = tmp4; - break; - case 7: block1[3] |= tmp0; - block2[0] = tmp1; - block2[1] = tmp2; - block2[2] = tmp3; - block2[3] = tmp4; - break; - case 8: block2[0] |= tmp0; - block2[1] = tmp1; - block2[2] = tmp2; - block2[3] = tmp3; - block3[0] = tmp4; - break; - case 9: block2[1] |= tmp0; - block2[2] = tmp1; - block2[3] = tmp2; - block3[0] = tmp3; - block3[1] = tmp4; - break; - } - - return; -} - -__device__ static void memcat8 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32 append[2]) -{ - u32x tmp0; - u32x tmp1; - u32x tmp2; - - #if __CUDA_ARCH__ >= 200 - - const int offset_minus_4 = 4 - (block_len & 3); - - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - - tmp0 = __byte_perm ( 0, append[0], selector); - tmp1 = __byte_perm (append[0], append[1], selector); - tmp2 = __byte_perm (append[1], 0, selector); - - #else - - const u32 mod = block_len & 3; - - switch (mod) - { - case 0: tmp0 = append[0]; - tmp1 = append[1]; - tmp2 = 0; - break; - case 1: tmp0 = append[0] << 8; - tmp1 = append[0] >> 24 | append[1] << 8; - tmp2 = append[1] >> 24; - break; - case 2: tmp0 = append[0] << 16; - tmp1 = append[0] >> 16 | append[1] << 16; - tmp2 = append[1] >> 16; - break; - case 3: tmp0 = append[0] << 24; - tmp1 = append[0] >> 8 | append[1] << 24; - tmp2 = append[1] >> 8; - break; - } - - #endif - - const u32 div = block_len / 4; - - switch (div) - { - case 0: block0[0] |= tmp0; - block0[1] = tmp1; - block0[2] = tmp2; - break; - case 1: block0[1] |= tmp0; - block0[2] = tmp1; - block0[3] = tmp2; - break; - case 2: block0[2] |= tmp0; - block0[3] = tmp1; - block1[0] = tmp2; - break; - case 3: block0[3] |= tmp0; - block1[0] = tmp1; - block1[1] = tmp2; - break; - case 4: block1[0] |= tmp0; - block1[1] = tmp1; - block1[2] = tmp2; - break; - case 5: block1[1] |= tmp0; - block1[2] = tmp1; - block1[3] = tmp2; - break; - case 6: block1[2] |= tmp0; - block1[3] = tmp1; - block2[0] = tmp2; - break; - case 7: block1[3] |= tmp0; - block2[0] = tmp1; - block2[1] = tmp2; - break; - case 8: block2[0] |= tmp0; - block2[1] = tmp1; - block2[2] = tmp2; - break; - case 9: block2[1] |= tmp0; - block2[2] = tmp1; - block2[3] = tmp2; - break; - case 10: block2[2] |= tmp0; - block2[3] = tmp1; - block3[0] = tmp2; - break; - case 11: block2[3] |= tmp0; - block3[0] = tmp1; - block3[1] = tmp2; - break; - } - - return; -} - -__device__ static void append_sign (u32x block0[4], u32x block1[4], const u32 block_len) -{ - switch (block_len) - { - case 0: - block0[0] = md5crypt_magic; - break; - - case 1: - block0[0] = block0[0] | md5crypt_magic << 8; - block0[1] = md5crypt_magic >> 24; - break; - - case 2: - block0[0] = block0[0] | md5crypt_magic << 16; - block0[1] = md5crypt_magic >> 16; - break; - - case 3: - block0[0] = block0[0] | md5crypt_magic << 24; - block0[1] = md5crypt_magic >> 8; - break; - - case 4: - block0[1] = md5crypt_magic; - break; - - case 5: - block0[1] = block0[1] | md5crypt_magic << 8; - block0[2] = md5crypt_magic >> 24; - break; - - case 6: - block0[1] = block0[1] | md5crypt_magic << 16; - block0[2] = md5crypt_magic >> 16; - break; - - case 7: - block0[1] = block0[1] | md5crypt_magic << 24; - block0[2] = md5crypt_magic >> 8; - break; - - case 8: - block0[2] = md5crypt_magic; - break; - - case 9: - block0[2] = block0[2] | md5crypt_magic << 8; - block0[3] = md5crypt_magic >> 24; - break; - - case 10: - block0[2] = block0[2] | md5crypt_magic << 16; - block0[3] = md5crypt_magic >> 16; - break; - - case 11: - block0[2] = block0[2] | md5crypt_magic << 24; - block0[3] = md5crypt_magic >> 8; - break; - - case 12: - block0[3] = md5crypt_magic; - break; - - case 13: - block0[3] = block0[3] | md5crypt_magic << 8; - block1[0] = md5crypt_magic >> 24; - break; - - case 14: - block0[3] = block0[3] | md5crypt_magic << 16; - block1[0] = md5crypt_magic >> 16; - break; - - case 15: - block0[3] = block0[3] | md5crypt_magic << 24; - block1[0] = md5crypt_magic >> 8; - break; - } -} - -__device__ static void append_1st (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32x append) -{ - switch (block_len) - { - case 0: - block0[0] = append; - break; - - case 1: - block0[0] = block0[0] | append << 8; - break; - - case 2: - block0[0] = block0[0] | append << 16; - break; - - case 3: - block0[0] = block0[0] | append << 24; - break; - - case 4: - block0[1] = append; - break; - - case 5: - block0[1] = block0[1] | append << 8; - break; - - case 6: - block0[1] = block0[1] | append << 16; - break; - - case 7: - block0[1] = block0[1] | append << 24; - break; - - case 8: - block0[2] = append; - break; - - case 9: - block0[2] = block0[2] | append << 8; - break; - - case 10: - block0[2] = block0[2] | append << 16; - break; - - case 11: - block0[2] = block0[2] | append << 24; - break; - - case 12: - block0[3] = append; - break; - - case 13: - block0[3] = block0[3] | append << 8; - break; - - case 14: - block0[3] = block0[3] | append << 16; - break; - - case 15: - block0[3] = block0[3] | append << 24; - break; - - case 16: - block1[0] = append; - break; - - case 17: - block1[0] = block1[0] | append << 8; - break; - - case 18: - block1[0] = block1[0] | append << 16; - break; - - case 19: - block1[0] = block1[0] | append << 24; - break; - - case 20: - block1[1] = append; - break; - - case 21: - block1[1] = block1[1] | append << 8; - break; - - case 22: - block1[1] = block1[1] | append << 16; - break; - - case 23: - block1[1] = block1[1] | append << 24; - break; - - case 24: - block1[2] = append; - break; - - case 25: - block1[2] = block1[2] | append << 8; - break; - - case 26: - block1[2] = block1[2] | append << 16; - break; - - case 27: - block1[2] = block1[2] | append << 24; - break; - - case 28: - block1[3] = append; - break; - - case 29: - block1[3] = block1[3] | append << 8; - break; - - case 30: - block1[3] = block1[3] | append << 16; - break; - - case 31: - block1[3] = block1[3] | append << 24; - break; - - case 32: - block2[0] = append; - break; - - case 33: - block2[0] = block2[0] | append << 8; - break; - - case 34: - block2[0] = block2[0] | append << 16; - break; - - case 35: - block2[0] = block2[0] | append << 24; - break; - - case 36: - block2[1] = append; - break; - - case 37: - block2[1] = block2[1] | append << 8; - break; - - case 38: - block2[1] = block2[1] | append << 16; - break; - - case 39: - block2[1] = block2[1] | append << 24; - break; - - case 40: - block2[2] = append; - break; - - case 41: - block2[2] = block2[2] | append << 8; - break; - - case 42: - block2[2] = block2[2] | append << 16; - break; - - case 43: - block2[2] = block2[2] | append << 24; - break; - - case 44: - block2[3] = append; - break; - - case 45: - block2[3] = block2[3] | append << 8; - break; - - case 46: - block2[3] = block2[3] | append << 16; - break; - - case 47: - block2[3] = block2[3] | append << 24; - break; - - case 48: - block3[0] = append; - break; - - case 49: - block3[0] = block3[0] | append << 8; - break; - - case 50: - block3[0] = block3[0] | append << 16; - break; - - case 51: - block3[0] = block3[0] | append << 24; - break; - - case 52: - block3[1] = append; - break; - - case 53: - block3[1] = block3[1] | append << 8; - break; - - case 54: - block3[1] = block3[1] | append << 16; - break; - - case 55: - block3[1] = block3[1] | append << 24; - break; - - case 56: - block3[2] = append; - break; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00500_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, md5crypt_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[0]; - w0[1] = pws[gid].i[1]; - w0[2] = pws[gid].i[2]; - w0[3] = pws[gid].i[3]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf[2]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * init - */ - - //memcat16 (block0, block1, block2, block3, block_len, w0); - //block_len += pw_len; - - u32 block_len = pw_len; - - u32x block0[4]; - - block0[0] = w0[0]; - block0[1] = w0[1]; - block0[2] = w0[2]; - block0[3] = w0[3]; - - u32x block1[4]; - - block1[0] = 0; - block1[1] = 0; - block1[2] = 0; - block1[3] = 0; - - u32x block2[4]; - - block2[0] = 0; - block2[1] = 0; - block2[2] = 0; - block2[3] = 0; - - u32x block3[4]; - - block3[0] = 0; - block3[1] = 0; - block3[2] = 0; - block3[3] = 0; - - memcat8 (block0, block1, block2, block3, block_len, salt_buf); - - block_len += salt_len; - - memcat16 (block0, block1, block2, block3, block_len, w0); - - block_len += pw_len; - - append_0x80_4 (block0, block1, block2, block3, block_len); - - block3[2] = block_len * 8; - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (block0, block1, block2, block3, digest); - - /* The password first, since that is what is most unknown */ - /* Then our magic string */ - /* Then the raw salt */ - /* Then just as many characters of the MD5(pw,salt,pw) */ - - //memcat16 (block0, block1, block2, block3, block_len, w); - //block_len += pw_len; - - block_len = pw_len; - - block0[0] = w0[0]; - block0[1] = w0[1]; - block0[2] = w0[2]; - block0[3] = w0[3]; - - block1[0] = 0; - block1[1] = 0; - block1[2] = 0; - block1[3] = 0; - - block2[0] = 0; - block2[1] = 0; - block2[2] = 0; - block2[3] = 0; - - block3[0] = 0; - block3[1] = 0; - block3[2] = 0; - block3[3] = 0; - - append_sign (block0, block1, block_len); - - block_len += 3; - - memcat8 (block0, block1, block2, block3, block_len, salt_buf); - - block_len += salt_len; - - truncate_block (digest, pw_len); - - memcat16 (block0, block1, block2, block3, block_len, digest); - - block_len += pw_len; - - /* Then something really weird... */ - - u32x append = block0[0] & 0xFF; - - for (u32 j = pw_len; j; j >>= 1) - { - if ((j & 1) == 0) - { - append_1st (block0, block1, block2, block3, block_len, append); - } - - block_len++; - } - - append_0x80_4 (block0, block1, block2, block3, block_len); - - block3[2] = block_len * 8; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (block0, block1, block2, block3, digest); - - tmps[gid].digest_buf[0] = digest[0]; - tmps[gid].digest_buf[1] = digest[1]; - tmps[gid].digest_buf[2] = digest[2]; - tmps[gid].digest_buf[3] = digest[3]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00500_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, md5crypt_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[0]; - w0[1] = pws[gid].i[1]; - w0[2] = pws[gid].i[2]; - w0[3] = pws[gid].i[3]; - - const u32 pw_len = pws[gid].pw_len; - - u32x w0_x80[4]; - - w0_x80[0] = w0[0]; - w0_x80[1] = w0[1]; - w0_x80[2] = w0[2]; - w0_x80[3] = w0[3]; - - append_0x80_1 (w0_x80, pw_len); - - /** - * salt - */ - - u32 salt_buf[2]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - u32x digest[4]; - - digest[0] = tmps[gid].digest_buf[0]; - digest[1] = tmps[gid].digest_buf[1]; - digest[2] = tmps[gid].digest_buf[2]; - digest[3] = tmps[gid].digest_buf[3]; - - /** - * loop - */ - - /* and now, just to make sure things don't run too fast */ - - u32 block_len; - - u32x block0[4]; - - block0[0] = 0; - block0[1] = 0; - block0[2] = 0; - block0[3] = 0; - - u32x block1[4]; - - block1[0] = 0; - block1[1] = 0; - block1[2] = 0; - block1[3] = 0; - - u32x block2[4]; - - block2[0] = 0; - block2[1] = 0; - block2[2] = 0; - block2[3] = 0; - - u32x block3[4]; - - block3[0] = 0; - block3[1] = 0; - block3[2] = 0; - block3[3] = 0; - - for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++) - { - block1[0] = 0; - block1[1] = 0; - block1[2] = 0; - block1[3] = 0; - block2[0] = 0; - block2[1] = 0; - block2[2] = 0; - block2[3] = 0; - block3[0] = 0; - block3[1] = 0; - - const u32 j1 = (j & 1) ? 1 : 0; - const u32 j3 = (j % 3) ? 1 : 0; - const u32 j7 = (j % 7) ? 1 : 0; - - if (j1) - { - block0[0] = w0[0]; - block0[1] = w0[1]; - block0[2] = w0[2]; - block0[3] = w0[3]; - - block_len = pw_len; - - if (j3) - { - memcat8 (block0, block1, block2, block3, block_len, salt_buf); - - block_len += salt_len; - } - - if (j7) - { - memcat16 (block0, block1, block2, block3, block_len, w0); - - block_len += pw_len; - } - - memcat16_x80 (block0, block1, block2, block3, block_len, digest); - - block_len += 16; - } - else - { - block0[0] = digest[0]; - block0[1] = digest[1]; - block0[2] = digest[2]; - block0[3] = digest[3]; - - block_len = 16; - - if (j3 && j7) - { - block1[0] = salt_buf[0]; - block1[1] = salt_buf[1]; - - block_len += salt_len; - - memcat16 (block0, block1, block2, block3, block_len, w0); - - block_len += pw_len; - } - else if (j3) - { - block1[0] = salt_buf[0]; - block1[1] = salt_buf[1]; - - block_len += salt_len; - } - else if (j7) - { - block1[0] = w0[0]; - block1[1] = w0[1]; - block1[2] = w0[2]; - block1[3] = w0[3]; - - block_len += pw_len; - } - - memcat16 (block0, block1, block2, block3, block_len, w0_x80); - - block_len += pw_len; - } - - block3[2] = block_len * 8; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (block0, block1, block2, block3, digest); - } - - tmps[gid].digest_buf[0] = digest[0]; - tmps[gid].digest_buf[1] = digest[1]; - tmps[gid].digest_buf[2] = digest[2]; - tmps[gid].digest_buf[3] = digest[3]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00500_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, md5crypt_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32x r0 = tmps[gid].digest_buf[DGST_R0]; - const u32x r1 = tmps[gid].digest_buf[DGST_R1]; - const u32x r2 = tmps[gid].digest_buf[DGST_R2]; - const u32x r3 = tmps[gid].digest_buf[DGST_R3]; - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m00900_a0.cu b/nv/m00900_a0.cu deleted file mode 100644 index d22f016..0000000 --- a/nv/m00900_a0.cu +++ /dev/null @@ -1,347 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD4_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00900_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - w3[2] = out_len * 8; - - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; - - MD4_STEP (MD4_Fo, a, b, c, d, w0[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w0[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w0[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w0[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w1[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w1[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w1[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w1[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w2[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w2[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w2[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w2[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w3[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w3[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w3[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w3[3], MD4C00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0[0], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[0], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[0], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[0], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0[1], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[1], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[1], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[1], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0[2], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[2], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[2], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[2], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0[3], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[3], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[3], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[3], MD4C01, MD4S13); - - MD4_STEP (MD4_H , a, b, c, d, w0[0], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[0], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[0], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[0], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0[2], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[2], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[2], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[2], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0[1], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[1], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[1], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[1], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0[3], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[3], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[3], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[3], MD4C02, MD4S23); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00900_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00900_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00900_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - w3[2] = out_len * 8; - - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; - - MD4_STEP (MD4_Fo, a, b, c, d, w0[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w0[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w0[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w0[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w1[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w1[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w1[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w1[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w2[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w2[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w2[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w2[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w3[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w3[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w3[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w3[3], MD4C00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0[0], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[0], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[0], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[0], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0[1], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[1], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[1], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[1], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0[2], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[2], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[2], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[2], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0[3], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[3], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[3], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[3], MD4C01, MD4S13); - - MD4_STEP (MD4_H , a, b, c, d, w0[0], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[0], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[0], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[0], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0[2], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[2], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[2], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[2], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0[1], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[1], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[1], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[1], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0[3], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[3], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[3], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[3], MD4C02, MD4S23); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00900_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00900_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00900_a1.cu b/nv/m00900_a1.cu deleted file mode 100644 index f568544..0000000 --- a/nv/m00900_a1.cu +++ /dev/null @@ -1,449 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD4_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m00900_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = pw_len * 8; - w3[3] = 0; - - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; - - MD4_STEP (MD4_Fo, a, b, c, d, w0[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w0[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w0[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w0[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w1[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w1[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w1[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w1[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w2[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w2[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w2[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w2[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w3[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w3[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w3[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w3[3], MD4C00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0[0], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[0], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[0], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[0], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0[1], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[1], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[1], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[1], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0[2], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[2], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[2], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[2], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0[3], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[3], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[3], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[3], MD4C01, MD4S13); - - MD4_STEP (MD4_H , a, b, c, d, w0[0], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[0], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[0], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[0], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0[2], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[2], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[2], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[2], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0[1], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[1], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[1], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[1], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0[3], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[3], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[3], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[3], MD4C02, MD4S23); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00900_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00900_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00900_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = pw_len * 8; - w3[3] = 0; - - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; - - MD4_STEP (MD4_Fo, a, b, c, d, w0[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w0[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w0[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w0[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w1[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w1[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w1[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w1[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w2[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w2[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w2[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w2[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w3[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w3[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w3[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w3[3], MD4C00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0[0], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[0], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[0], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[0], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0[1], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[1], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[1], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[1], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0[2], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[2], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[2], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[2], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0[3], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[3], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[3], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[3], MD4C01, MD4S13); - - MD4_STEP (MD4_H , a, b, c, d, w0[0], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[0], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[0], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[0], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0[2], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[2], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[2], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[2], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0[1], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[1], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[1], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[1], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0[3], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[3], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[3], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[3], MD4C02, MD4S23); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00900_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00900_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m00900_a3.cu b/nv/m00900_a3.cu deleted file mode 100644 index 19b3193..0000000 --- a/nv/m00900_a3.cu +++ /dev/null @@ -1,630 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD4_ -#define _SCALAR_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -#define MD4_STEP_REV(f,a,b,c,d,x,t,s) \ -{ \ - a = rotr32 (a, s); \ - a -= f (b, c, d); \ - a -= x; \ - a -= t; \ -} - -#define MD4_STEP_REV1(f,a,b,c,d,x,t,s) \ -{ \ - a = rotr32 (a, s); \ - a -= x; \ - a -= t; \ -} - -__device__ __constant__ u32x c_bfs[1024]; - -__device__ static void m00900m (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 F_w0c00 = 0 + MD4C00; - const u32 F_w1c00 = w[ 1] + MD4C00; - const u32 F_w2c00 = w[ 2] + MD4C00; - const u32 F_w3c00 = w[ 3] + MD4C00; - const u32 F_w4c00 = w[ 4] + MD4C00; - const u32 F_w5c00 = w[ 5] + MD4C00; - const u32 F_w6c00 = w[ 6] + MD4C00; - const u32 F_w7c00 = w[ 7] + MD4C00; - const u32 F_w8c00 = w[ 8] + MD4C00; - const u32 F_w9c00 = w[ 9] + MD4C00; - const u32 F_wac00 = w[10] + MD4C00; - const u32 F_wbc00 = w[11] + MD4C00; - const u32 F_wcc00 = w[12] + MD4C00; - const u32 F_wdc00 = w[13] + MD4C00; - const u32 F_wec00 = w[14] + MD4C00; - const u32 F_wfc00 = w[15] + MD4C00; - - const u32 G_w0c01 = 0 + MD4C01; - const u32 G_w4c01 = w[ 4] + MD4C01; - const u32 G_w8c01 = w[ 8] + MD4C01; - const u32 G_wcc01 = w[12] + MD4C01; - const u32 G_w1c01 = w[ 1] + MD4C01; - const u32 G_w5c01 = w[ 5] + MD4C01; - const u32 G_w9c01 = w[ 9] + MD4C01; - const u32 G_wdc01 = w[13] + MD4C01; - const u32 G_w2c01 = w[ 2] + MD4C01; - const u32 G_w6c01 = w[ 6] + MD4C01; - const u32 G_wac01 = w[10] + MD4C01; - const u32 G_wec01 = w[14] + MD4C01; - const u32 G_w3c01 = w[ 3] + MD4C01; - const u32 G_w7c01 = w[ 7] + MD4C01; - const u32 G_wbc01 = w[11] + MD4C01; - const u32 G_wfc01 = w[15] + MD4C01; - - const u32 H_w0c02 = 0 + MD4C02; - const u32 H_w8c02 = w[ 8] + MD4C02; - const u32 H_w4c02 = w[ 4] + MD4C02; - const u32 H_wcc02 = w[12] + MD4C02; - const u32 H_w2c02 = w[ 2] + MD4C02; - const u32 H_wac02 = w[10] + MD4C02; - const u32 H_w6c02 = w[ 6] + MD4C02; - const u32 H_wec02 = w[14] + MD4C02; - const u32 H_w1c02 = w[ 1] + MD4C02; - const u32 H_w9c02 = w[ 9] + MD4C02; - const u32 H_w5c02 = w[ 5] + MD4C02; - const u32 H_wdc02 = w[13] + MD4C02; - const u32 H_w3c02 = w[ 3] + MD4C02; - const u32 H_wbc02 = w[11] + MD4C02; - const u32 H_w7c02 = w[ 7] + MD4C02; - const u32 H_wfc02 = w[15] + MD4C02; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; - - MD4_STEP (MD4_Fo, a, b, c, d, w0, F_w0c00, MD4S00); - MD4_STEP0(MD4_Fo, d, a, b, c, F_w1c00, MD4S01); - MD4_STEP0(MD4_Fo, c, d, a, b, F_w2c00, MD4S02); - MD4_STEP0(MD4_Fo, b, c, d, a, F_w3c00, MD4S03); - MD4_STEP0(MD4_Fo, a, b, c, d, F_w4c00, MD4S00); - MD4_STEP0(MD4_Fo, d, a, b, c, F_w5c00, MD4S01); - MD4_STEP0(MD4_Fo, c, d, a, b, F_w6c00, MD4S02); - MD4_STEP0(MD4_Fo, b, c, d, a, F_w7c00, MD4S03); - MD4_STEP0(MD4_Fo, a, b, c, d, F_w8c00, MD4S00); - MD4_STEP0(MD4_Fo, d, a, b, c, F_w9c00, MD4S01); - MD4_STEP0(MD4_Fo, c, d, a, b, F_wac00, MD4S02); - MD4_STEP0(MD4_Fo, b, c, d, a, F_wbc00, MD4S03); - MD4_STEP0(MD4_Fo, a, b, c, d, F_wcc00, MD4S00); - MD4_STEP0(MD4_Fo, d, a, b, c, F_wdc00, MD4S01); - MD4_STEP0(MD4_Fo, c, d, a, b, F_wec00, MD4S02); - MD4_STEP0(MD4_Fo, b, c, d, a, F_wfc00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0, G_w0c01, MD4S10); - MD4_STEP0(MD4_Go, d, a, b, c, G_w4c01, MD4S11); - MD4_STEP0(MD4_Go, c, d, a, b, G_w8c01, MD4S12); - MD4_STEP0(MD4_Go, b, c, d, a, G_wcc01, MD4S13); - MD4_STEP0(MD4_Go, a, b, c, d, G_w1c01, MD4S10); - MD4_STEP0(MD4_Go, d, a, b, c, G_w5c01, MD4S11); - MD4_STEP0(MD4_Go, c, d, a, b, G_w9c01, MD4S12); - MD4_STEP0(MD4_Go, b, c, d, a, G_wdc01, MD4S13); - MD4_STEP0(MD4_Go, a, b, c, d, G_w2c01, MD4S10); - MD4_STEP0(MD4_Go, d, a, b, c, G_w6c01, MD4S11); - MD4_STEP0(MD4_Go, c, d, a, b, G_wac01, MD4S12); - MD4_STEP0(MD4_Go, b, c, d, a, G_wec01, MD4S13); - MD4_STEP0(MD4_Go, a, b, c, d, G_w3c01, MD4S10); - MD4_STEP0(MD4_Go, d, a, b, c, G_w7c01, MD4S11); - MD4_STEP0(MD4_Go, c, d, a, b, G_wbc01, MD4S12); - MD4_STEP0(MD4_Go, b, c, d, a, G_wfc01, MD4S13); - - MD4_STEP (MD4_H , a, b, c, d, w0, H_w0c02, MD4S20); - MD4_STEP0(MD4_H , d, a, b, c, H_w8c02, MD4S21); - MD4_STEP0(MD4_H , c, d, a, b, H_w4c02, MD4S22); - MD4_STEP0(MD4_H , b, c, d, a, H_wcc02, MD4S23); - MD4_STEP0(MD4_H , a, b, c, d, H_w2c02, MD4S20); - MD4_STEP0(MD4_H , d, a, b, c, H_wac02, MD4S21); - MD4_STEP0(MD4_H , c, d, a, b, H_w6c02, MD4S22); - MD4_STEP0(MD4_H , b, c, d, a, H_wec02, MD4S23); - MD4_STEP0(MD4_H , a, b, c, d, H_w1c02, MD4S20); - MD4_STEP0(MD4_H , d, a, b, c, H_w9c02, MD4S21); - MD4_STEP0(MD4_H , c, d, a, b, H_w5c02, MD4S22); - MD4_STEP0(MD4_H , b, c, d, a, H_wdc02, MD4S23); - MD4_STEP0(MD4_H , a, b, c, d, H_w3c02, MD4S20); - MD4_STEP0(MD4_H , d, a, b, c, H_wbc02, MD4S21); - MD4_STEP0(MD4_H , c, d, a, b, H_w7c02, MD4S22); - MD4_STEP0(MD4_H , b, c, d, a, H_wfc02, MD4S23); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m00900s (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 F_w0c00 = 0 + MD4C00; - const u32 F_w1c00 = w[ 1] + MD4C00; - const u32 F_w2c00 = w[ 2] + MD4C00; - const u32 F_w3c00 = w[ 3] + MD4C00; - const u32 F_w4c00 = w[ 4] + MD4C00; - const u32 F_w5c00 = w[ 5] + MD4C00; - const u32 F_w6c00 = w[ 6] + MD4C00; - const u32 F_w7c00 = w[ 7] + MD4C00; - const u32 F_w8c00 = w[ 8] + MD4C00; - const u32 F_w9c00 = w[ 9] + MD4C00; - const u32 F_wac00 = w[10] + MD4C00; - const u32 F_wbc00 = w[11] + MD4C00; - const u32 F_wcc00 = w[12] + MD4C00; - const u32 F_wdc00 = w[13] + MD4C00; - const u32 F_wec00 = w[14] + MD4C00; - const u32 F_wfc00 = w[15] + MD4C00; - - const u32 G_w0c01 = 0 + MD4C01; - const u32 G_w4c01 = w[ 4] + MD4C01; - const u32 G_w8c01 = w[ 8] + MD4C01; - const u32 G_wcc01 = w[12] + MD4C01; - const u32 G_w1c01 = w[ 1] + MD4C01; - const u32 G_w5c01 = w[ 5] + MD4C01; - const u32 G_w9c01 = w[ 9] + MD4C01; - const u32 G_wdc01 = w[13] + MD4C01; - const u32 G_w2c01 = w[ 2] + MD4C01; - const u32 G_w6c01 = w[ 6] + MD4C01; - const u32 G_wac01 = w[10] + MD4C01; - const u32 G_wec01 = w[14] + MD4C01; - const u32 G_w3c01 = w[ 3] + MD4C01; - const u32 G_w7c01 = w[ 7] + MD4C01; - const u32 G_wbc01 = w[11] + MD4C01; - const u32 G_wfc01 = w[15] + MD4C01; - - const u32 H_w0c02 = 0 + MD4C02; - const u32 H_w8c02 = w[ 8] + MD4C02; - const u32 H_w4c02 = w[ 4] + MD4C02; - const u32 H_wcc02 = w[12] + MD4C02; - const u32 H_w2c02 = w[ 2] + MD4C02; - const u32 H_wac02 = w[10] + MD4C02; - const u32 H_w6c02 = w[ 6] + MD4C02; - const u32 H_wec02 = w[14] + MD4C02; - const u32 H_w1c02 = w[ 1] + MD4C02; - const u32 H_w9c02 = w[ 9] + MD4C02; - const u32 H_w5c02 = w[ 5] + MD4C02; - const u32 H_wdc02 = w[13] + MD4C02; - const u32 H_w3c02 = w[ 3] + MD4C02; - const u32 H_wbc02 = w[11] + MD4C02; - const u32 H_w7c02 = w[ 7] + MD4C02; - const u32 H_wfc02 = w[15] + MD4C02; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * reverse - */ - - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; - - MD4_STEP_REV (MD4_H, b_rev, c_rev, d_rev, a_rev, w[15], MD4C02, MD4S23); - MD4_STEP_REV (MD4_H, c_rev, d_rev, a_rev, b_rev, w[ 7], MD4C02, MD4S22); - MD4_STEP_REV (MD4_H, d_rev, a_rev, b_rev, c_rev, w[11], MD4C02, MD4S21); - MD4_STEP_REV (MD4_H, a_rev, b_rev, c_rev, d_rev, w[ 3], MD4C02, MD4S20); - MD4_STEP_REV (MD4_H, b_rev, c_rev, d_rev, a_rev, w[13], MD4C02, MD4S23); - MD4_STEP_REV (MD4_H, c_rev, d_rev, a_rev, b_rev, w[ 5], MD4C02, MD4S22); - MD4_STEP_REV (MD4_H, d_rev, a_rev, b_rev, c_rev, w[ 9], MD4C02, MD4S21); - MD4_STEP_REV (MD4_H, a_rev, b_rev, c_rev, d_rev, w[ 1], MD4C02, MD4S20); - MD4_STEP_REV (MD4_H, b_rev, c_rev, d_rev, a_rev, w[14], MD4C02, MD4S23); - MD4_STEP_REV (MD4_H, c_rev, d_rev, a_rev, b_rev, w[ 6], MD4C02, MD4S22); - MD4_STEP_REV (MD4_H, d_rev, a_rev, b_rev, c_rev, w[10], MD4C02, MD4S21); - MD4_STEP_REV (MD4_H, a_rev, b_rev, c_rev, d_rev, w[ 2], MD4C02, MD4S20); - MD4_STEP_REV (MD4_H, b_rev, c_rev, d_rev, a_rev, w[12], MD4C02, MD4S23); - MD4_STEP_REV (MD4_H, c_rev, d_rev, a_rev, b_rev, w[ 4], MD4C02, MD4S22); - MD4_STEP_REV (MD4_H, d_rev, a_rev, b_rev, c_rev, w[ 8], MD4C02, MD4S21); - MD4_STEP_REV (MD4_H, a_rev, b_rev, c_rev, d_rev, 0, MD4C02, MD4S20); - - const u32x sav_c = c_rev; - const u32x sav_d = d_rev; - - MD4_STEP_REV1(MD4_G, b_rev, c_rev, d_rev, a_rev, w[15], MD4C01, MD4S13); - MD4_STEP_REV1(MD4_G, c_rev, d_rev, a_rev, b_rev, w[11], MD4C01, MD4S12); - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x pre_a = a_rev; - u32x pre_b = b_rev; - u32x pre_c = c_rev; - - pre_a = pre_a - w0; - pre_b = pre_b - MD4_G (sav_c, sav_d, pre_a); - pre_c = pre_c - MD4_G (sav_d, pre_a, pre_b); - - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; - - MD4_STEP (MD4_Fo, a, b, c, d, w0, F_w0c00, MD4S00); - MD4_STEP0(MD4_Fo, d, a, b, c, F_w1c00, MD4S01); - MD4_STEP0(MD4_Fo, c, d, a, b, F_w2c00, MD4S02); - MD4_STEP0(MD4_Fo, b, c, d, a, F_w3c00, MD4S03); - MD4_STEP0(MD4_Fo, a, b, c, d, F_w4c00, MD4S00); - MD4_STEP0(MD4_Fo, d, a, b, c, F_w5c00, MD4S01); - MD4_STEP0(MD4_Fo, c, d, a, b, F_w6c00, MD4S02); - MD4_STEP0(MD4_Fo, b, c, d, a, F_w7c00, MD4S03); - MD4_STEP0(MD4_Fo, a, b, c, d, F_w8c00, MD4S00); - MD4_STEP0(MD4_Fo, d, a, b, c, F_w9c00, MD4S01); - MD4_STEP0(MD4_Fo, c, d, a, b, F_wac00, MD4S02); - MD4_STEP0(MD4_Fo, b, c, d, a, F_wbc00, MD4S03); - MD4_STEP0(MD4_Fo, a, b, c, d, F_wcc00, MD4S00); - MD4_STEP0(MD4_Fo, d, a, b, c, F_wdc00, MD4S01); - MD4_STEP0(MD4_Fo, c, d, a, b, F_wec00, MD4S02); - MD4_STEP0(MD4_Fo, b, c, d, a, F_wfc00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0, G_w0c01, MD4S10); - MD4_STEP0(MD4_Go, d, a, b, c, G_w4c01, MD4S11); - MD4_STEP0(MD4_Go, c, d, a, b, G_w8c01, MD4S12); - MD4_STEP0(MD4_Go, b, c, d, a, G_wcc01, MD4S13); - MD4_STEP0(MD4_Go, a, b, c, d, G_w1c01, MD4S10); - MD4_STEP0(MD4_Go, d, a, b, c, G_w5c01, MD4S11); - MD4_STEP0(MD4_Go, c, d, a, b, G_w9c01, MD4S12); - MD4_STEP0(MD4_Go, b, c, d, a, G_wdc01, MD4S13); - MD4_STEP0(MD4_Go, a, b, c, d, G_w2c01, MD4S10); - MD4_STEP0(MD4_Go, d, a, b, c, G_w6c01, MD4S11); - MD4_STEP0(MD4_Go, c, d, a, b, G_wac01, MD4S12); - - bool q_cond = (pre_c != c); - - if (q_cond) continue; - - MD4_STEP0(MD4_Go, b, c, d, a, G_wec01, MD4S13); - MD4_STEP0(MD4_Go, a, b, c, d, G_w3c01, MD4S10); - MD4_STEP0(MD4_Go, d, a, b, c, G_w7c01, MD4S11); - MD4_STEP0(MD4_Go, c, d, a, b, G_wbc01, MD4S12); - MD4_STEP0(MD4_Go, b, c, d, a, G_wfc01, MD4S13); - - MD4_STEP (MD4_H , a, b, c, d, w0, H_w0c02, MD4S20); - MD4_STEP0(MD4_H , d, a, b, c, H_w8c02, MD4S21); - MD4_STEP0(MD4_H , c, d, a, b, H_w4c02, MD4S22); - MD4_STEP0(MD4_H , b, c, d, a, H_wcc02, MD4S23); - MD4_STEP0(MD4_H , a, b, c, d, H_w2c02, MD4S20); - MD4_STEP0(MD4_H , d, a, b, c, H_wac02, MD4S21); - MD4_STEP0(MD4_H , c, d, a, b, H_w6c02, MD4S22); - MD4_STEP0(MD4_H , b, c, d, a, H_wec02, MD4S23); - MD4_STEP0(MD4_H , a, b, c, d, H_w1c02, MD4S20); - MD4_STEP0(MD4_H , d, a, b, c, H_w9c02, MD4S21); - MD4_STEP0(MD4_H , c, d, a, b, H_w5c02, MD4S22); - MD4_STEP0(MD4_H , b, c, d, a, H_wdc02, MD4S23); - MD4_STEP0(MD4_H , a, b, c, d, H_w3c02, MD4S20); - MD4_STEP0(MD4_H , d, a, b, c, H_wbc02, MD4S21); - MD4_STEP0(MD4_H , c, d, a, b, H_w7c02, MD4S22); - MD4_STEP0(MD4_H , b, c, d, a, H_wfc02, MD4S23); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00900_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r,void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00900_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r,void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00900_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r,void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00900_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r,void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00900_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r,void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m00900_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r,void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m00900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m01000_a0.cu b/nv/m01000_a0.cu deleted file mode 100644 index 54f1e20..0000000 --- a/nv/m01000_a0.cu +++ /dev/null @@ -1,367 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD4_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m01000_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w3_t[2] = out_len * 8 * 2; - - u32x tmp2; - - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; - - MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w0_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w0_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w1_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w1_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w1_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w1_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w2_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w2_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w2_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w2_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w3_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w3_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w3_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w3_t[3], MD4C00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0_t[0], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[0], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[0], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[0], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[1], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[1], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[1], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[1], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[2], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[2], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[2], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[2], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[3], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[3], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[3], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[3], MD4C01, MD4S13); - - MD4_STEP (MD4_H1, a, b, c, d, w0_t[0], MD4C02, MD4S20); - MD4_STEP (MD4_H2, d, a, b, c, w2_t[0], MD4C02, MD4S21); - MD4_STEP (MD4_H1, c, d, a, b, w1_t[0], MD4C02, MD4S22); - MD4_STEP (MD4_H2, b, c, d, a, w3_t[0], MD4C02, MD4S23); - MD4_STEP (MD4_H1, a, b, c, d, w0_t[2], MD4C02, MD4S20); - MD4_STEP (MD4_H2, d, a, b, c, w2_t[2], MD4C02, MD4S21); - MD4_STEP (MD4_H1, c, d, a, b, w1_t[2], MD4C02, MD4S22); - MD4_STEP (MD4_H2, b, c, d, a, w3_t[2], MD4C02, MD4S23); - MD4_STEP (MD4_H1, a, b, c, d, w0_t[1], MD4C02, MD4S20); - MD4_STEP (MD4_H2, d, a, b, c, w2_t[1], MD4C02, MD4S21); - MD4_STEP (MD4_H1, c, d, a, b, w1_t[1], MD4C02, MD4S22); - MD4_STEP (MD4_H2, b, c, d, a, w3_t[1], MD4C02, MD4S23); - MD4_STEP (MD4_H1, a, b, c, d, w0_t[3], MD4C02, MD4S20); - MD4_STEP (MD4_H2, d, a, b, c, w2_t[3], MD4C02, MD4S21); - MD4_STEP (MD4_H1, c, d, a, b, w1_t[3], MD4C02, MD4S22); - MD4_STEP (MD4_H2, b, c, d, a, w3_t[3], MD4C02, MD4S23); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01000_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01000_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01000_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w3_t[2] = out_len * 8 * 2; - - u32x tmp2; - - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; - - MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w0_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w0_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w1_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w1_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w1_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w1_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w2_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w2_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w2_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w2_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w3_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w3_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w3_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w3_t[3], MD4C00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0_t[0], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[0], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[0], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[0], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[1], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[1], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[1], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[1], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[2], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[2], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[2], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[2], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[3], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[3], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[3], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[3], MD4C01, MD4S13); - - MD4_STEP (MD4_H1, a, b, c, d, w0_t[0], MD4C02, MD4S20); - MD4_STEP (MD4_H2, d, a, b, c, w2_t[0], MD4C02, MD4S21); - MD4_STEP (MD4_H1, c, d, a, b, w1_t[0], MD4C02, MD4S22); - MD4_STEP (MD4_H2, b, c, d, a, w3_t[0], MD4C02, MD4S23); - MD4_STEP (MD4_H1, a, b, c, d, w0_t[2], MD4C02, MD4S20); - MD4_STEP (MD4_H2, d, a, b, c, w2_t[2], MD4C02, MD4S21); - MD4_STEP (MD4_H1, c, d, a, b, w1_t[2], MD4C02, MD4S22); - MD4_STEP (MD4_H2, b, c, d, a, w3_t[2], MD4C02, MD4S23); - MD4_STEP (MD4_H1, a, b, c, d, w0_t[1], MD4C02, MD4S20); - MD4_STEP (MD4_H2, d, a, b, c, w2_t[1], MD4C02, MD4S21); - MD4_STEP (MD4_H1, c, d, a, b, w1_t[1], MD4C02, MD4S22); - MD4_STEP (MD4_H2, b, c, d, a, w3_t[1], MD4C02, MD4S23); - MD4_STEP (MD4_H1, a, b, c, d, w0_t[3], MD4C02, MD4S20); - MD4_STEP (MD4_H2, d, a, b, c, w2_t[3], MD4C02, MD4S21); - MD4_STEP (MD4_H1, c, d, a, b, w1_t[3], MD4C02, MD4S22); - MD4_STEP (MD4_H2, b, c, d, a, w3_t[3], MD4C02, MD4S23); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01000_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01000_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01000_a1.cu b/nv/m01000_a1.cu deleted file mode 100644 index 4bd32dc..0000000 --- a/nv/m01000_a1.cu +++ /dev/null @@ -1,473 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD4_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m01000_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w3_t[2] = pw_len * 8 * 2; - - u32x tmp2; - - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; - - MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w0_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w0_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w1_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w1_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w1_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w1_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w2_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w2_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w2_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w2_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w3_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w3_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w3_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w3_t[3], MD4C00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0_t[0], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[0], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[0], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[0], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[1], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[1], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[1], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[1], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[2], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[2], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[2], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[2], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[3], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[3], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[3], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[3], MD4C01, MD4S13); - - MD4_STEP (MD4_H1, a, b, c, d, w0_t[0], MD4C02, MD4S20); - MD4_STEP (MD4_H2, d, a, b, c, w2_t[0], MD4C02, MD4S21); - MD4_STEP (MD4_H1, c, d, a, b, w1_t[0], MD4C02, MD4S22); - MD4_STEP (MD4_H2, b, c, d, a, w3_t[0], MD4C02, MD4S23); - MD4_STEP (MD4_H1, a, b, c, d, w0_t[2], MD4C02, MD4S20); - MD4_STEP (MD4_H2, d, a, b, c, w2_t[2], MD4C02, MD4S21); - MD4_STEP (MD4_H1, c, d, a, b, w1_t[2], MD4C02, MD4S22); - MD4_STEP (MD4_H2, b, c, d, a, w3_t[2], MD4C02, MD4S23); - MD4_STEP (MD4_H1, a, b, c, d, w0_t[1], MD4C02, MD4S20); - MD4_STEP (MD4_H2, d, a, b, c, w2_t[1], MD4C02, MD4S21); - MD4_STEP (MD4_H1, c, d, a, b, w1_t[1], MD4C02, MD4S22); - MD4_STEP (MD4_H2, b, c, d, a, w3_t[1], MD4C02, MD4S23); - MD4_STEP (MD4_H1, a, b, c, d, w0_t[3], MD4C02, MD4S20); - MD4_STEP (MD4_H2, d, a, b, c, w2_t[3], MD4C02, MD4S21); - MD4_STEP (MD4_H1, c, d, a, b, w1_t[3], MD4C02, MD4S22); - MD4_STEP (MD4_H2, b, c, d, a, w3_t[3], MD4C02, MD4S23); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01000_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01000_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01000_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w3_t[2] = pw_len * 8 * 2; - - u32x tmp2; - - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; - - MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w0_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w0_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w1_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w1_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w1_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w1_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w2_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w2_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w2_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w2_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w3_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w3_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w3_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w3_t[3], MD4C00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0_t[0], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[0], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[0], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[0], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[1], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[1], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[1], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[1], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[2], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[2], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[2], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[2], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[3], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[3], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[3], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[3], MD4C01, MD4S13); - - MD4_STEP (MD4_H1, a, b, c, d, w0_t[0], MD4C02, MD4S20); - MD4_STEP (MD4_H2, d, a, b, c, w2_t[0], MD4C02, MD4S21); - MD4_STEP (MD4_H1, c, d, a, b, w1_t[0], MD4C02, MD4S22); - MD4_STEP (MD4_H2, b, c, d, a, w3_t[0], MD4C02, MD4S23); - MD4_STEP (MD4_H1, a, b, c, d, w0_t[2], MD4C02, MD4S20); - MD4_STEP (MD4_H2, d, a, b, c, w2_t[2], MD4C02, MD4S21); - MD4_STEP (MD4_H1, c, d, a, b, w1_t[2], MD4C02, MD4S22); - MD4_STEP (MD4_H2, b, c, d, a, w3_t[2], MD4C02, MD4S23); - MD4_STEP (MD4_H1, a, b, c, d, w0_t[1], MD4C02, MD4S20); - MD4_STEP (MD4_H2, d, a, b, c, w2_t[1], MD4C02, MD4S21); - MD4_STEP (MD4_H1, c, d, a, b, w1_t[1], MD4C02, MD4S22); - MD4_STEP (MD4_H2, b, c, d, a, w3_t[1], MD4C02, MD4S23); - MD4_STEP (MD4_H1, a, b, c, d, w0_t[3], MD4C02, MD4S20); - MD4_STEP (MD4_H2, d, a, b, c, w2_t[3], MD4C02, MD4S21); - MD4_STEP (MD4_H1, c, d, a, b, w1_t[3], MD4C02, MD4S22); - MD4_STEP (MD4_H2, b, c, d, a, w3_t[3], MD4C02, MD4S23); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01000_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01000_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01000_a3.cu b/nv/m01000_a3.cu deleted file mode 100644 index d28e4a2..0000000 --- a/nv/m01000_a3.cu +++ /dev/null @@ -1,634 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD4_ -#define _SCALAR_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -#define MD4_STEP_REV(f,a,b,c,d,x,t,s) \ -{ \ - a = rotr32 (a, s); \ - a -= f (b, c, d); \ - a -= x; \ - a -= t; \ -} - -#define MD4_STEP_REV1(f,a,b,c,d,x,t,s) \ -{ \ - a = rotr32 (a, s); \ - a -= x; \ - a -= t; \ -} - -__device__ __constant__ u32x c_bfs[1024]; - -__device__ static void m01000m (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 F_w0c00 = 0 + MD4C00; - const u32 F_w1c00 = w[ 1] + MD4C00; - const u32 F_w2c00 = w[ 2] + MD4C00; - const u32 F_w3c00 = w[ 3] + MD4C00; - const u32 F_w4c00 = w[ 4] + MD4C00; - const u32 F_w5c00 = w[ 5] + MD4C00; - const u32 F_w6c00 = w[ 6] + MD4C00; - const u32 F_w7c00 = w[ 7] + MD4C00; - const u32 F_w8c00 = w[ 8] + MD4C00; - const u32 F_w9c00 = w[ 9] + MD4C00; - const u32 F_wac00 = w[10] + MD4C00; - const u32 F_wbc00 = w[11] + MD4C00; - const u32 F_wcc00 = w[12] + MD4C00; - const u32 F_wdc00 = w[13] + MD4C00; - const u32 F_wec00 = w[14] + MD4C00; - const u32 F_wfc00 = w[15] + MD4C00; - - const u32 G_w0c01 = 0 + MD4C01; - const u32 G_w4c01 = w[ 4] + MD4C01; - const u32 G_w8c01 = w[ 8] + MD4C01; - const u32 G_wcc01 = w[12] + MD4C01; - const u32 G_w1c01 = w[ 1] + MD4C01; - const u32 G_w5c01 = w[ 5] + MD4C01; - const u32 G_w9c01 = w[ 9] + MD4C01; - const u32 G_wdc01 = w[13] + MD4C01; - const u32 G_w2c01 = w[ 2] + MD4C01; - const u32 G_w6c01 = w[ 6] + MD4C01; - const u32 G_wac01 = w[10] + MD4C01; - const u32 G_wec01 = w[14] + MD4C01; - const u32 G_w3c01 = w[ 3] + MD4C01; - const u32 G_w7c01 = w[ 7] + MD4C01; - const u32 G_wbc01 = w[11] + MD4C01; - const u32 G_wfc01 = w[15] + MD4C01; - - const u32 H_w0c02 = 0 + MD4C02; - const u32 H_w8c02 = w[ 8] + MD4C02; - const u32 H_w4c02 = w[ 4] + MD4C02; - const u32 H_wcc02 = w[12] + MD4C02; - const u32 H_w2c02 = w[ 2] + MD4C02; - const u32 H_wac02 = w[10] + MD4C02; - const u32 H_w6c02 = w[ 6] + MD4C02; - const u32 H_wec02 = w[14] + MD4C02; - const u32 H_w1c02 = w[ 1] + MD4C02; - const u32 H_w9c02 = w[ 9] + MD4C02; - const u32 H_w5c02 = w[ 5] + MD4C02; - const u32 H_wdc02 = w[13] + MD4C02; - const u32 H_w3c02 = w[ 3] + MD4C02; - const u32 H_wbc02 = w[11] + MD4C02; - const u32 H_w7c02 = w[ 7] + MD4C02; - const u32 H_wfc02 = w[15] + MD4C02; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x tmp2; - - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; - - MD4_STEP (MD4_Fo, a, b, c, d, w0, F_w0c00, MD4S00); - MD4_STEP0(MD4_Fo, d, a, b, c, F_w1c00, MD4S01); - MD4_STEP0(MD4_Fo, c, d, a, b, F_w2c00, MD4S02); - MD4_STEP0(MD4_Fo, b, c, d, a, F_w3c00, MD4S03); - MD4_STEP0(MD4_Fo, a, b, c, d, F_w4c00, MD4S00); - MD4_STEP0(MD4_Fo, d, a, b, c, F_w5c00, MD4S01); - MD4_STEP0(MD4_Fo, c, d, a, b, F_w6c00, MD4S02); - MD4_STEP0(MD4_Fo, b, c, d, a, F_w7c00, MD4S03); - MD4_STEP0(MD4_Fo, a, b, c, d, F_w8c00, MD4S00); - MD4_STEP0(MD4_Fo, d, a, b, c, F_w9c00, MD4S01); - MD4_STEP0(MD4_Fo, c, d, a, b, F_wac00, MD4S02); - MD4_STEP0(MD4_Fo, b, c, d, a, F_wbc00, MD4S03); - MD4_STEP0(MD4_Fo, a, b, c, d, F_wcc00, MD4S00); - MD4_STEP0(MD4_Fo, d, a, b, c, F_wdc00, MD4S01); - MD4_STEP0(MD4_Fo, c, d, a, b, F_wec00, MD4S02); - MD4_STEP0(MD4_Fo, b, c, d, a, F_wfc00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0, G_w0c01, MD4S10); - MD4_STEP0(MD4_Go, d, a, b, c, G_w4c01, MD4S11); - MD4_STEP0(MD4_Go, c, d, a, b, G_w8c01, MD4S12); - MD4_STEP0(MD4_Go, b, c, d, a, G_wcc01, MD4S13); - MD4_STEP0(MD4_Go, a, b, c, d, G_w1c01, MD4S10); - MD4_STEP0(MD4_Go, d, a, b, c, G_w5c01, MD4S11); - MD4_STEP0(MD4_Go, c, d, a, b, G_w9c01, MD4S12); - MD4_STEP0(MD4_Go, b, c, d, a, G_wdc01, MD4S13); - MD4_STEP0(MD4_Go, a, b, c, d, G_w2c01, MD4S10); - MD4_STEP0(MD4_Go, d, a, b, c, G_w6c01, MD4S11); - MD4_STEP0(MD4_Go, c, d, a, b, G_wac01, MD4S12); - MD4_STEP0(MD4_Go, b, c, d, a, G_wec01, MD4S13); - MD4_STEP0(MD4_Go, a, b, c, d, G_w3c01, MD4S10); - MD4_STEP0(MD4_Go, d, a, b, c, G_w7c01, MD4S11); - MD4_STEP0(MD4_Go, c, d, a, b, G_wbc01, MD4S12); - MD4_STEP0(MD4_Go, b, c, d, a, G_wfc01, MD4S13); - - MD4_STEP (MD4_H1, a, b, c, d, w0, H_w0c02, MD4S20); - MD4_STEP0(MD4_H2, d, a, b, c, H_w8c02, MD4S21); - MD4_STEP0(MD4_H1, c, d, a, b, H_w4c02, MD4S22); - MD4_STEP0(MD4_H2, b, c, d, a, H_wcc02, MD4S23); - MD4_STEP0(MD4_H1, a, b, c, d, H_w2c02, MD4S20); - MD4_STEP0(MD4_H2, d, a, b, c, H_wac02, MD4S21); - MD4_STEP0(MD4_H1, c, d, a, b, H_w6c02, MD4S22); - MD4_STEP0(MD4_H2, b, c, d, a, H_wec02, MD4S23); - MD4_STEP0(MD4_H1, a, b, c, d, H_w1c02, MD4S20); - MD4_STEP0(MD4_H2, d, a, b, c, H_w9c02, MD4S21); - MD4_STEP0(MD4_H1, c, d, a, b, H_w5c02, MD4S22); - MD4_STEP0(MD4_H2, b, c, d, a, H_wdc02, MD4S23); - MD4_STEP0(MD4_H1, a, b, c, d, H_w3c02, MD4S20); - MD4_STEP0(MD4_H2, d, a, b, c, H_wbc02, MD4S21); - MD4_STEP0(MD4_H1, c, d, a, b, H_w7c02, MD4S22); - MD4_STEP0(MD4_H2, b, c, d, a, H_wfc02, MD4S23); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m01000s (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 F_w0c00 = 0 + MD4C00; - const u32 F_w1c00 = w[ 1] + MD4C00; - const u32 F_w2c00 = w[ 2] + MD4C00; - const u32 F_w3c00 = w[ 3] + MD4C00; - const u32 F_w4c00 = w[ 4] + MD4C00; - const u32 F_w5c00 = w[ 5] + MD4C00; - const u32 F_w6c00 = w[ 6] + MD4C00; - const u32 F_w7c00 = w[ 7] + MD4C00; - const u32 F_w8c00 = w[ 8] + MD4C00; - const u32 F_w9c00 = w[ 9] + MD4C00; - const u32 F_wac00 = w[10] + MD4C00; - const u32 F_wbc00 = w[11] + MD4C00; - const u32 F_wcc00 = w[12] + MD4C00; - const u32 F_wdc00 = w[13] + MD4C00; - const u32 F_wec00 = w[14] + MD4C00; - const u32 F_wfc00 = w[15] + MD4C00; - - const u32 G_w0c01 = 0 + MD4C01; - const u32 G_w4c01 = w[ 4] + MD4C01; - const u32 G_w8c01 = w[ 8] + MD4C01; - const u32 G_wcc01 = w[12] + MD4C01; - const u32 G_w1c01 = w[ 1] + MD4C01; - const u32 G_w5c01 = w[ 5] + MD4C01; - const u32 G_w9c01 = w[ 9] + MD4C01; - const u32 G_wdc01 = w[13] + MD4C01; - const u32 G_w2c01 = w[ 2] + MD4C01; - const u32 G_w6c01 = w[ 6] + MD4C01; - const u32 G_wac01 = w[10] + MD4C01; - const u32 G_wec01 = w[14] + MD4C01; - const u32 G_w3c01 = w[ 3] + MD4C01; - const u32 G_w7c01 = w[ 7] + MD4C01; - const u32 G_wbc01 = w[11] + MD4C01; - const u32 G_wfc01 = w[15] + MD4C01; - - const u32 H_w0c02 = 0 + MD4C02; - const u32 H_w8c02 = w[ 8] + MD4C02; - const u32 H_w4c02 = w[ 4] + MD4C02; - const u32 H_wcc02 = w[12] + MD4C02; - const u32 H_w2c02 = w[ 2] + MD4C02; - const u32 H_wac02 = w[10] + MD4C02; - const u32 H_w6c02 = w[ 6] + MD4C02; - const u32 H_wec02 = w[14] + MD4C02; - const u32 H_w1c02 = w[ 1] + MD4C02; - const u32 H_w9c02 = w[ 9] + MD4C02; - const u32 H_w5c02 = w[ 5] + MD4C02; - const u32 H_wdc02 = w[13] + MD4C02; - const u32 H_w3c02 = w[ 3] + MD4C02; - const u32 H_wbc02 = w[11] + MD4C02; - const u32 H_w7c02 = w[ 7] + MD4C02; - const u32 H_wfc02 = w[15] + MD4C02; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * reverse - */ - - u32 a_rev = digests_buf[digests_offset].digest_buf[0]; - u32 b_rev = digests_buf[digests_offset].digest_buf[1]; - u32 c_rev = digests_buf[digests_offset].digest_buf[2]; - u32 d_rev = digests_buf[digests_offset].digest_buf[3]; - - MD4_STEP_REV (MD4_H, b_rev, c_rev, d_rev, a_rev, w[15], MD4C02, MD4S23); - MD4_STEP_REV (MD4_H, c_rev, d_rev, a_rev, b_rev, w[ 7], MD4C02, MD4S22); - MD4_STEP_REV (MD4_H, d_rev, a_rev, b_rev, c_rev, w[11], MD4C02, MD4S21); - MD4_STEP_REV (MD4_H, a_rev, b_rev, c_rev, d_rev, w[ 3], MD4C02, MD4S20); - MD4_STEP_REV (MD4_H, b_rev, c_rev, d_rev, a_rev, w[13], MD4C02, MD4S23); - MD4_STEP_REV (MD4_H, c_rev, d_rev, a_rev, b_rev, w[ 5], MD4C02, MD4S22); - MD4_STEP_REV (MD4_H, d_rev, a_rev, b_rev, c_rev, w[ 9], MD4C02, MD4S21); - MD4_STEP_REV (MD4_H, a_rev, b_rev, c_rev, d_rev, w[ 1], MD4C02, MD4S20); - MD4_STEP_REV (MD4_H, b_rev, c_rev, d_rev, a_rev, w[14], MD4C02, MD4S23); - MD4_STEP_REV (MD4_H, c_rev, d_rev, a_rev, b_rev, w[ 6], MD4C02, MD4S22); - MD4_STEP_REV (MD4_H, d_rev, a_rev, b_rev, c_rev, w[10], MD4C02, MD4S21); - MD4_STEP_REV (MD4_H, a_rev, b_rev, c_rev, d_rev, w[ 2], MD4C02, MD4S20); - MD4_STEP_REV (MD4_H, b_rev, c_rev, d_rev, a_rev, w[12], MD4C02, MD4S23); - MD4_STEP_REV (MD4_H, c_rev, d_rev, a_rev, b_rev, w[ 4], MD4C02, MD4S22); - MD4_STEP_REV (MD4_H, d_rev, a_rev, b_rev, c_rev, w[ 8], MD4C02, MD4S21); - MD4_STEP_REV (MD4_H, a_rev, b_rev, c_rev, d_rev, 0, MD4C02, MD4S20); - - const u32x sav_c = c_rev; - const u32x sav_d = d_rev; - - MD4_STEP_REV1(MD4_G, b_rev, c_rev, d_rev, a_rev, w[15], MD4C01, MD4S13); - MD4_STEP_REV1(MD4_G, c_rev, d_rev, a_rev, b_rev, w[11], MD4C01, MD4S12); - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x pre_a = a_rev; - u32x pre_b = b_rev; - u32x pre_c = c_rev; - - pre_a = pre_a - w0; - pre_b = pre_b - MD4_G (sav_c, sav_d, pre_a); - pre_c = pre_c - MD4_G (sav_d, pre_a, pre_b); - - u32x tmp2; - - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; - - MD4_STEP (MD4_Fo, a, b, c, d, w0, F_w0c00, MD4S00); - MD4_STEP0(MD4_Fo, d, a, b, c, F_w1c00, MD4S01); - MD4_STEP0(MD4_Fo, c, d, a, b, F_w2c00, MD4S02); - MD4_STEP0(MD4_Fo, b, c, d, a, F_w3c00, MD4S03); - MD4_STEP0(MD4_Fo, a, b, c, d, F_w4c00, MD4S00); - MD4_STEP0(MD4_Fo, d, a, b, c, F_w5c00, MD4S01); - MD4_STEP0(MD4_Fo, c, d, a, b, F_w6c00, MD4S02); - MD4_STEP0(MD4_Fo, b, c, d, a, F_w7c00, MD4S03); - MD4_STEP0(MD4_Fo, a, b, c, d, F_w8c00, MD4S00); - MD4_STEP0(MD4_Fo, d, a, b, c, F_w9c00, MD4S01); - MD4_STEP0(MD4_Fo, c, d, a, b, F_wac00, MD4S02); - MD4_STEP0(MD4_Fo, b, c, d, a, F_wbc00, MD4S03); - MD4_STEP0(MD4_Fo, a, b, c, d, F_wcc00, MD4S00); - MD4_STEP0(MD4_Fo, d, a, b, c, F_wdc00, MD4S01); - MD4_STEP0(MD4_Fo, c, d, a, b, F_wec00, MD4S02); - MD4_STEP0(MD4_Fo, b, c, d, a, F_wfc00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0, G_w0c01, MD4S10); - MD4_STEP0(MD4_Go, d, a, b, c, G_w4c01, MD4S11); - MD4_STEP0(MD4_Go, c, d, a, b, G_w8c01, MD4S12); - MD4_STEP0(MD4_Go, b, c, d, a, G_wcc01, MD4S13); - MD4_STEP0(MD4_Go, a, b, c, d, G_w1c01, MD4S10); - MD4_STEP0(MD4_Go, d, a, b, c, G_w5c01, MD4S11); - MD4_STEP0(MD4_Go, c, d, a, b, G_w9c01, MD4S12); - MD4_STEP0(MD4_Go, b, c, d, a, G_wdc01, MD4S13); - MD4_STEP0(MD4_Go, a, b, c, d, G_w2c01, MD4S10); - MD4_STEP0(MD4_Go, d, a, b, c, G_w6c01, MD4S11); - MD4_STEP0(MD4_Go, c, d, a, b, G_wac01, MD4S12); - - bool q_cond = (pre_c != c); - - if (q_cond) continue; - - MD4_STEP0(MD4_Go, b, c, d, a, G_wec01, MD4S13); - MD4_STEP0(MD4_Go, a, b, c, d, G_w3c01, MD4S10); - MD4_STEP0(MD4_Go, d, a, b, c, G_w7c01, MD4S11); - MD4_STEP0(MD4_Go, c, d, a, b, G_wbc01, MD4S12); - MD4_STEP0(MD4_Go, b, c, d, a, G_wfc01, MD4S13); - - MD4_STEP (MD4_H1, a, b, c, d, w0, H_w0c02, MD4S20); - MD4_STEP0(MD4_H2, d, a, b, c, H_w8c02, MD4S21); - MD4_STEP0(MD4_H1, c, d, a, b, H_w4c02, MD4S22); - MD4_STEP0(MD4_H2, b, c, d, a, H_wcc02, MD4S23); - MD4_STEP0(MD4_H1, a, b, c, d, H_w2c02, MD4S20); - MD4_STEP0(MD4_H2, d, a, b, c, H_wac02, MD4S21); - MD4_STEP0(MD4_H1, c, d, a, b, H_w6c02, MD4S22); - MD4_STEP0(MD4_H2, b, c, d, a, H_wec02, MD4S23); - MD4_STEP0(MD4_H1, a, b, c, d, H_w1c02, MD4S20); - MD4_STEP0(MD4_H2, d, a, b, c, H_w9c02, MD4S21); - MD4_STEP0(MD4_H1, c, d, a, b, H_w5c02, MD4S22); - MD4_STEP0(MD4_H2, b, c, d, a, H_wdc02, MD4S23); - MD4_STEP0(MD4_H1, a, b, c, d, H_w3c02, MD4S20); - MD4_STEP0(MD4_H2, d, a, b, c, H_wbc02, MD4S21); - MD4_STEP0(MD4_H1, c, d, a, b, H_w7c02, MD4S22); - MD4_STEP0(MD4_H2, b, c, d, a, H_wfc02, MD4S23); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01000_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r,void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01000_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r,void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01000_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r,void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01000_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r,void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01000_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r,void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01000_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r,void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m01100_a0.cu b/nv/m01100_a0.cu deleted file mode 100644 index 4c93f4b..0000000 --- a/nv/m01100_a0.cu +++ /dev/null @@ -1,578 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD4_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m01100_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w3_t[2] = out_len * 8 * 2; - - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; - - MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w0_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w0_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w1_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w1_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w1_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w1_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w2_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w2_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w2_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w2_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w3_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w3_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w3_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w3_t[3], MD4C00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0_t[0], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[0], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[0], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[0], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[1], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[1], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[1], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[1], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[2], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[2], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[2], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[2], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[3], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[3], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[3], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[3], MD4C01, MD4S13); - - MD4_STEP (MD4_H , a, b, c, d, w0_t[0], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[0], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[0], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[0], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[2], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[2], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[2], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[2], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[1], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[1], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[1], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[1], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[3], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[3], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23); - - a += MD4M_A; - b += MD4M_B; - c += MD4M_C; - d += MD4M_D; - - w0_t[0] = a; - w0_t[1] = b; - w0_t[2] = c; - w0_t[3] = d; - w1_t[0] = salt_buf0[0]; - w1_t[1] = salt_buf0[1]; - w1_t[2] = salt_buf0[2]; - w1_t[3] = salt_buf0[3]; - w2_t[0] = salt_buf1[0]; - w2_t[1] = salt_buf1[1]; - w2_t[2] = salt_buf1[2]; - w2_t[3] = salt_buf1[3]; - w3_t[0] = salt_buf2[0]; - w3_t[1] = salt_buf2[1]; - w3_t[2] = (16 + salt_len) * 8; - w3_t[3] = 0; - - a = MD4M_A; - b = MD4M_B; - c = MD4M_C; - d = MD4M_D; - - MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w0_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w0_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w1_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w1_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w1_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w1_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w2_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w2_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w2_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w2_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w3_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w3_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w3_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w3_t[3], MD4C00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0_t[0], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[0], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[0], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[0], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[1], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[1], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[1], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[1], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[2], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[2], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[2], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[2], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[3], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[3], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[3], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[3], MD4C01, MD4S13); - - MD4_STEP (MD4_H , a, b, c, d, w0_t[0], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[0], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[0], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[0], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[2], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[2], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[2], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[2], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[1], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[1], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[1], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[1], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[3], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[3], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01100_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01100_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01100_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w3_t[2] = out_len * 8 * 2; - - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; - - MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w0_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w0_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w1_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w1_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w1_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w1_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w2_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w2_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w2_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w2_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w3_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w3_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w3_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w3_t[3], MD4C00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0_t[0], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[0], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[0], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[0], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[1], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[1], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[1], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[1], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[2], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[2], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[2], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[2], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[3], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[3], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[3], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[3], MD4C01, MD4S13); - - MD4_STEP (MD4_H , a, b, c, d, w0_t[0], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[0], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[0], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[0], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[2], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[2], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[2], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[2], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[1], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[1], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[1], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[1], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[3], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[3], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23); - - a += MD4M_A; - b += MD4M_B; - c += MD4M_C; - d += MD4M_D; - - w0_t[0] = a; - w0_t[1] = b; - w0_t[2] = c; - w0_t[3] = d; - w1_t[0] = salt_buf0[0]; - w1_t[1] = salt_buf0[1]; - w1_t[2] = salt_buf0[2]; - w1_t[3] = salt_buf0[3]; - w2_t[0] = salt_buf1[0]; - w2_t[1] = salt_buf1[1]; - w2_t[2] = salt_buf1[2]; - w2_t[3] = salt_buf1[3]; - w3_t[0] = salt_buf2[0]; - w3_t[1] = salt_buf2[1]; - w3_t[2] = (16 + salt_len) * 8; - w3_t[3] = 0; - - a = MD4M_A; - b = MD4M_B; - c = MD4M_C; - d = MD4M_D; - - MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w0_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w0_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w1_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w1_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w1_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w1_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w2_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w2_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w2_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w2_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w3_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w3_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w3_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w3_t[3], MD4C00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0_t[0], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[0], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[0], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[0], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[1], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[1], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[1], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[1], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[2], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[2], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[2], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[2], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[3], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[3], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[3], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[3], MD4C01, MD4S13); - - MD4_STEP (MD4_H , a, b, c, d, w0_t[0], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[0], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[0], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[0], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[2], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[2], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[2], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[2], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[1], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[1], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[1], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[1], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[3], MD4C02, MD4S20); - - bool q_cond = (search[0] != a); - - if (q_cond) continue; - - MD4_STEP (MD4_H , d, a, b, c, w2_t[3], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01100_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01100_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01100_a1.cu b/nv/m01100_a1.cu deleted file mode 100644 index 048b274..0000000 --- a/nv/m01100_a1.cu +++ /dev/null @@ -1,684 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD4_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m01100_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w3_t[2] = pw_len * 8 * 2; - - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; - - MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w0_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w0_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w1_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w1_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w1_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w1_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w2_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w2_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w2_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w2_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w3_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w3_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w3_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w3_t[3], MD4C00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0_t[0], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[0], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[0], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[0], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[1], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[1], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[1], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[1], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[2], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[2], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[2], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[2], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[3], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[3], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[3], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[3], MD4C01, MD4S13); - - MD4_STEP (MD4_H , a, b, c, d, w0_t[0], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[0], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[0], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[0], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[2], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[2], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[2], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[2], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[1], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[1], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[1], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[1], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[3], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[3], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23); - - a += MD4M_A; - b += MD4M_B; - c += MD4M_C; - d += MD4M_D; - - w0_t[0] = a; - w0_t[1] = b; - w0_t[2] = c; - w0_t[3] = d; - w1_t[0] = salt_buf0[0]; - w1_t[1] = salt_buf0[1]; - w1_t[2] = salt_buf0[2]; - w1_t[3] = salt_buf0[3]; - w2_t[0] = salt_buf1[0]; - w2_t[1] = salt_buf1[1]; - w2_t[2] = salt_buf1[2]; - w2_t[3] = salt_buf1[3]; - w3_t[0] = salt_buf2[0]; - w3_t[1] = salt_buf2[1]; - w3_t[2] = (16 + salt_len) * 8; - w3_t[3] = 0; - - a = MD4M_A; - b = MD4M_B; - c = MD4M_C; - d = MD4M_D; - - MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w0_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w0_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w1_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w1_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w1_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w1_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w2_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w2_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w2_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w2_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w3_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w3_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w3_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w3_t[3], MD4C00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0_t[0], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[0], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[0], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[0], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[1], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[1], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[1], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[1], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[2], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[2], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[2], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[2], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[3], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[3], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[3], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[3], MD4C01, MD4S13); - - MD4_STEP (MD4_H , a, b, c, d, w0_t[0], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[0], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[0], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[0], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[2], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[2], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[2], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[2], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[1], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[1], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[1], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[1], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[3], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[3], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01100_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01100_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01100_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w3_t[2] = pw_len * 8 * 2; - - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; - - MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w0_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w0_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w1_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w1_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w1_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w1_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w2_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w2_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w2_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w2_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w3_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w3_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w3_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w3_t[3], MD4C00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0_t[0], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[0], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[0], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[0], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[1], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[1], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[1], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[1], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[2], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[2], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[2], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[2], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[3], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[3], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[3], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[3], MD4C01, MD4S13); - - MD4_STEP (MD4_H , a, b, c, d, w0_t[0], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[0], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[0], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[0], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[2], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[2], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[2], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[2], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[1], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[1], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[1], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[1], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[3], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[3], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23); - - a += MD4M_A; - b += MD4M_B; - c += MD4M_C; - d += MD4M_D; - - w0_t[0] = a; - w0_t[1] = b; - w0_t[2] = c; - w0_t[3] = d; - w1_t[0] = salt_buf0[0]; - w1_t[1] = salt_buf0[1]; - w1_t[2] = salt_buf0[2]; - w1_t[3] = salt_buf0[3]; - w2_t[0] = salt_buf1[0]; - w2_t[1] = salt_buf1[1]; - w2_t[2] = salt_buf1[2]; - w2_t[3] = salt_buf1[3]; - w3_t[0] = salt_buf2[0]; - w3_t[1] = salt_buf2[1]; - w3_t[2] = (16 + salt_len) * 8; - w3_t[3] = 0; - - a = MD4M_A; - b = MD4M_B; - c = MD4M_C; - d = MD4M_D; - - MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w0_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w0_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w1_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w1_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w1_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w1_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w2_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w2_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w2_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w2_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w3_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w3_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w3_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w3_t[3], MD4C00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0_t[0], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[0], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[0], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[0], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[1], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[1], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[1], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[1], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[2], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[2], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[2], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[2], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[3], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[3], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[3], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[3], MD4C01, MD4S13); - - MD4_STEP (MD4_H , a, b, c, d, w0_t[0], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[0], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[0], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[0], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[2], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[2], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[2], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[2], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[1], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[1], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[1], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[1], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[3], MD4C02, MD4S20); - - bool q_cond = (search[0] != a); - - if (q_cond) continue; - - MD4_STEP (MD4_H , d, a, b, c, w2_t[3], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01100_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01100_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01100_a3.cu b/nv/m01100_a3.cu deleted file mode 100644 index c6d2be4..0000000 --- a/nv/m01100_a3.cu +++ /dev/null @@ -1,727 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD4_ -#define _SCALAR_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -__device__ __constant__ u32x c_bfs[1024]; - -__device__ static void m01100m (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * base - */ - - const u32 F_w0c00 = 0 + MD4C00; - const u32 F_w1c00 = w[ 1] + MD4C00; - const u32 F_w2c00 = w[ 2] + MD4C00; - const u32 F_w3c00 = w[ 3] + MD4C00; - const u32 F_w4c00 = w[ 4] + MD4C00; - const u32 F_w5c00 = w[ 5] + MD4C00; - const u32 F_w6c00 = w[ 6] + MD4C00; - const u32 F_w7c00 = w[ 7] + MD4C00; - const u32 F_w8c00 = w[ 8] + MD4C00; - const u32 F_w9c00 = w[ 9] + MD4C00; - const u32 F_wac00 = w[10] + MD4C00; - const u32 F_wbc00 = w[11] + MD4C00; - const u32 F_wcc00 = w[12] + MD4C00; - const u32 F_wdc00 = w[13] + MD4C00; - const u32 F_wec00 = w[14] + MD4C00; - const u32 F_wfc00 = w[15] + MD4C00; - - const u32 G_w0c01 = 0 + MD4C01; - const u32 G_w4c01 = w[ 4] + MD4C01; - const u32 G_w8c01 = w[ 8] + MD4C01; - const u32 G_wcc01 = w[12] + MD4C01; - const u32 G_w1c01 = w[ 1] + MD4C01; - const u32 G_w5c01 = w[ 5] + MD4C01; - const u32 G_w9c01 = w[ 9] + MD4C01; - const u32 G_wdc01 = w[13] + MD4C01; - const u32 G_w2c01 = w[ 2] + MD4C01; - const u32 G_w6c01 = w[ 6] + MD4C01; - const u32 G_wac01 = w[10] + MD4C01; - const u32 G_wec01 = w[14] + MD4C01; - const u32 G_w3c01 = w[ 3] + MD4C01; - const u32 G_w7c01 = w[ 7] + MD4C01; - const u32 G_wbc01 = w[11] + MD4C01; - const u32 G_wfc01 = w[15] + MD4C01; - - const u32 H_w0c02 = 0 + MD4C02; - const u32 H_w8c02 = w[ 8] + MD4C02; - const u32 H_w4c02 = w[ 4] + MD4C02; - const u32 H_wcc02 = w[12] + MD4C02; - const u32 H_w2c02 = w[ 2] + MD4C02; - const u32 H_wac02 = w[10] + MD4C02; - const u32 H_w6c02 = w[ 6] + MD4C02; - const u32 H_wec02 = w[14] + MD4C02; - const u32 H_w1c02 = w[ 1] + MD4C02; - const u32 H_w9c02 = w[ 9] + MD4C02; - const u32 H_w5c02 = w[ 5] + MD4C02; - const u32 H_wdc02 = w[13] + MD4C02; - const u32 H_w3c02 = w[ 3] + MD4C02; - const u32 H_wbc02 = w[11] + MD4C02; - const u32 H_w7c02 = w[ 7] + MD4C02; - const u32 H_wfc02 = w[15] + MD4C02; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; - - MD4_STEP (MD4_Fo, a, b, c, d, w0, F_w0c00, MD4S00); - MD4_STEP0(MD4_Fo, d, a, b, c, F_w1c00, MD4S01); - MD4_STEP0(MD4_Fo, c, d, a, b, F_w2c00, MD4S02); - MD4_STEP0(MD4_Fo, b, c, d, a, F_w3c00, MD4S03); - MD4_STEP0(MD4_Fo, a, b, c, d, F_w4c00, MD4S00); - MD4_STEP0(MD4_Fo, d, a, b, c, F_w5c00, MD4S01); - MD4_STEP0(MD4_Fo, c, d, a, b, F_w6c00, MD4S02); - MD4_STEP0(MD4_Fo, b, c, d, a, F_w7c00, MD4S03); - MD4_STEP0(MD4_Fo, a, b, c, d, F_w8c00, MD4S00); - MD4_STEP0(MD4_Fo, d, a, b, c, F_w9c00, MD4S01); - MD4_STEP0(MD4_Fo, c, d, a, b, F_wac00, MD4S02); - MD4_STEP0(MD4_Fo, b, c, d, a, F_wbc00, MD4S03); - MD4_STEP0(MD4_Fo, a, b, c, d, F_wcc00, MD4S00); - MD4_STEP0(MD4_Fo, d, a, b, c, F_wdc00, MD4S01); - MD4_STEP0(MD4_Fo, c, d, a, b, F_wec00, MD4S02); - MD4_STEP0(MD4_Fo, b, c, d, a, F_wfc00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0, G_w0c01, MD4S10); - MD4_STEP0(MD4_Go, d, a, b, c, G_w4c01, MD4S11); - MD4_STEP0(MD4_Go, c, d, a, b, G_w8c01, MD4S12); - MD4_STEP0(MD4_Go, b, c, d, a, G_wcc01, MD4S13); - MD4_STEP0(MD4_Go, a, b, c, d, G_w1c01, MD4S10); - MD4_STEP0(MD4_Go, d, a, b, c, G_w5c01, MD4S11); - MD4_STEP0(MD4_Go, c, d, a, b, G_w9c01, MD4S12); - MD4_STEP0(MD4_Go, b, c, d, a, G_wdc01, MD4S13); - MD4_STEP0(MD4_Go, a, b, c, d, G_w2c01, MD4S10); - MD4_STEP0(MD4_Go, d, a, b, c, G_w6c01, MD4S11); - MD4_STEP0(MD4_Go, c, d, a, b, G_wac01, MD4S12); - MD4_STEP0(MD4_Go, b, c, d, a, G_wec01, MD4S13); - MD4_STEP0(MD4_Go, a, b, c, d, G_w3c01, MD4S10); - MD4_STEP0(MD4_Go, d, a, b, c, G_w7c01, MD4S11); - MD4_STEP0(MD4_Go, c, d, a, b, G_wbc01, MD4S12); - MD4_STEP0(MD4_Go, b, c, d, a, G_wfc01, MD4S13); - - MD4_STEP (MD4_H , a, b, c, d, w0, H_w0c02, MD4S20); - MD4_STEP0(MD4_H , d, a, b, c, H_w8c02, MD4S21); - MD4_STEP0(MD4_H , c, d, a, b, H_w4c02, MD4S22); - MD4_STEP0(MD4_H , b, c, d, a, H_wcc02, MD4S23); - MD4_STEP0(MD4_H , a, b, c, d, H_w2c02, MD4S20); - MD4_STEP0(MD4_H , d, a, b, c, H_wac02, MD4S21); - MD4_STEP0(MD4_H , c, d, a, b, H_w6c02, MD4S22); - MD4_STEP0(MD4_H , b, c, d, a, H_wec02, MD4S23); - MD4_STEP0(MD4_H , a, b, c, d, H_w1c02, MD4S20); - MD4_STEP0(MD4_H , d, a, b, c, H_w9c02, MD4S21); - MD4_STEP0(MD4_H , c, d, a, b, H_w5c02, MD4S22); - MD4_STEP0(MD4_H , b, c, d, a, H_wdc02, MD4S23); - MD4_STEP0(MD4_H , a, b, c, d, H_w3c02, MD4S20); - MD4_STEP0(MD4_H , d, a, b, c, H_wbc02, MD4S21); - MD4_STEP0(MD4_H , c, d, a, b, H_w7c02, MD4S22); - MD4_STEP0(MD4_H , b, c, d, a, H_wfc02, MD4S23); - - a += MD4M_A; - b += MD4M_B; - c += MD4M_C; - d += MD4M_D; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = a; - w0_t[1] = b; - w0_t[2] = c; - w0_t[3] = d; - w1_t[0] = salt_buf0[0]; - w1_t[1] = salt_buf0[1]; - w1_t[2] = salt_buf0[2]; - w1_t[3] = salt_buf0[3]; - w2_t[0] = salt_buf1[0]; - w2_t[1] = salt_buf1[1]; - w2_t[2] = salt_buf1[2]; - w2_t[3] = salt_buf1[3]; - w3_t[0] = salt_buf2[0]; - w3_t[1] = salt_buf2[1]; - w3_t[2] = (16 + salt_len) * 8; - w3_t[3] = 0; - - a = MD4M_A; - b = MD4M_B; - c = MD4M_C; - d = MD4M_D; - - MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w0_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w0_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w1_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w1_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w1_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w1_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w2_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w2_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w2_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w2_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w3_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w3_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w3_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w3_t[3], MD4C00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0_t[0], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[0], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[0], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[0], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[1], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[1], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[1], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[1], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[2], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[2], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[2], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[2], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[3], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[3], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[3], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[3], MD4C01, MD4S13); - - MD4_STEP (MD4_H , a, b, c, d, w0_t[0], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[0], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[0], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[0], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[2], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[2], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[2], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[2], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[1], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[1], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[1], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[1], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[3], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[3], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m01100s (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * base - */ - - const u32 F_w0c00 = 0 + MD4C00; - const u32 F_w1c00 = w[ 1] + MD4C00; - const u32 F_w2c00 = w[ 2] + MD4C00; - const u32 F_w3c00 = w[ 3] + MD4C00; - const u32 F_w4c00 = w[ 4] + MD4C00; - const u32 F_w5c00 = w[ 5] + MD4C00; - const u32 F_w6c00 = w[ 6] + MD4C00; - const u32 F_w7c00 = w[ 7] + MD4C00; - const u32 F_w8c00 = w[ 8] + MD4C00; - const u32 F_w9c00 = w[ 9] + MD4C00; - const u32 F_wac00 = w[10] + MD4C00; - const u32 F_wbc00 = w[11] + MD4C00; - const u32 F_wcc00 = w[12] + MD4C00; - const u32 F_wdc00 = w[13] + MD4C00; - const u32 F_wec00 = w[14] + MD4C00; - const u32 F_wfc00 = w[15] + MD4C00; - - const u32 G_w0c01 = 0 + MD4C01; - const u32 G_w4c01 = w[ 4] + MD4C01; - const u32 G_w8c01 = w[ 8] + MD4C01; - const u32 G_wcc01 = w[12] + MD4C01; - const u32 G_w1c01 = w[ 1] + MD4C01; - const u32 G_w5c01 = w[ 5] + MD4C01; - const u32 G_w9c01 = w[ 9] + MD4C01; - const u32 G_wdc01 = w[13] + MD4C01; - const u32 G_w2c01 = w[ 2] + MD4C01; - const u32 G_w6c01 = w[ 6] + MD4C01; - const u32 G_wac01 = w[10] + MD4C01; - const u32 G_wec01 = w[14] + MD4C01; - const u32 G_w3c01 = w[ 3] + MD4C01; - const u32 G_w7c01 = w[ 7] + MD4C01; - const u32 G_wbc01 = w[11] + MD4C01; - const u32 G_wfc01 = w[15] + MD4C01; - - const u32 H_w0c02 = 0 + MD4C02; - const u32 H_w8c02 = w[ 8] + MD4C02; - const u32 H_w4c02 = w[ 4] + MD4C02; - const u32 H_wcc02 = w[12] + MD4C02; - const u32 H_w2c02 = w[ 2] + MD4C02; - const u32 H_wac02 = w[10] + MD4C02; - const u32 H_w6c02 = w[ 6] + MD4C02; - const u32 H_wec02 = w[14] + MD4C02; - const u32 H_w1c02 = w[ 1] + MD4C02; - const u32 H_w9c02 = w[ 9] + MD4C02; - const u32 H_w5c02 = w[ 5] + MD4C02; - const u32 H_wdc02 = w[13] + MD4C02; - const u32 H_w3c02 = w[ 3] + MD4C02; - const u32 H_wbc02 = w[11] + MD4C02; - const u32 H_w7c02 = w[ 7] + MD4C02; - const u32 H_wfc02 = w[15] + MD4C02; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; - - MD4_STEP (MD4_Fo, a, b, c, d, w0, F_w0c00, MD4S00); - MD4_STEP0(MD4_Fo, d, a, b, c, F_w1c00, MD4S01); - MD4_STEP0(MD4_Fo, c, d, a, b, F_w2c00, MD4S02); - MD4_STEP0(MD4_Fo, b, c, d, a, F_w3c00, MD4S03); - MD4_STEP0(MD4_Fo, a, b, c, d, F_w4c00, MD4S00); - MD4_STEP0(MD4_Fo, d, a, b, c, F_w5c00, MD4S01); - MD4_STEP0(MD4_Fo, c, d, a, b, F_w6c00, MD4S02); - MD4_STEP0(MD4_Fo, b, c, d, a, F_w7c00, MD4S03); - MD4_STEP0(MD4_Fo, a, b, c, d, F_w8c00, MD4S00); - MD4_STEP0(MD4_Fo, d, a, b, c, F_w9c00, MD4S01); - MD4_STEP0(MD4_Fo, c, d, a, b, F_wac00, MD4S02); - MD4_STEP0(MD4_Fo, b, c, d, a, F_wbc00, MD4S03); - MD4_STEP0(MD4_Fo, a, b, c, d, F_wcc00, MD4S00); - MD4_STEP0(MD4_Fo, d, a, b, c, F_wdc00, MD4S01); - MD4_STEP0(MD4_Fo, c, d, a, b, F_wec00, MD4S02); - MD4_STEP0(MD4_Fo, b, c, d, a, F_wfc00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0, G_w0c01, MD4S10); - MD4_STEP0(MD4_Go, d, a, b, c, G_w4c01, MD4S11); - MD4_STEP0(MD4_Go, c, d, a, b, G_w8c01, MD4S12); - MD4_STEP0(MD4_Go, b, c, d, a, G_wcc01, MD4S13); - MD4_STEP0(MD4_Go, a, b, c, d, G_w1c01, MD4S10); - MD4_STEP0(MD4_Go, d, a, b, c, G_w5c01, MD4S11); - MD4_STEP0(MD4_Go, c, d, a, b, G_w9c01, MD4S12); - MD4_STEP0(MD4_Go, b, c, d, a, G_wdc01, MD4S13); - MD4_STEP0(MD4_Go, a, b, c, d, G_w2c01, MD4S10); - MD4_STEP0(MD4_Go, d, a, b, c, G_w6c01, MD4S11); - MD4_STEP0(MD4_Go, c, d, a, b, G_wac01, MD4S12); - MD4_STEP0(MD4_Go, b, c, d, a, G_wec01, MD4S13); - MD4_STEP0(MD4_Go, a, b, c, d, G_w3c01, MD4S10); - MD4_STEP0(MD4_Go, d, a, b, c, G_w7c01, MD4S11); - MD4_STEP0(MD4_Go, c, d, a, b, G_wbc01, MD4S12); - MD4_STEP0(MD4_Go, b, c, d, a, G_wfc01, MD4S13); - - MD4_STEP (MD4_H , a, b, c, d, w0, H_w0c02, MD4S20); - MD4_STEP0(MD4_H , d, a, b, c, H_w8c02, MD4S21); - MD4_STEP0(MD4_H , c, d, a, b, H_w4c02, MD4S22); - MD4_STEP0(MD4_H , b, c, d, a, H_wcc02, MD4S23); - MD4_STEP0(MD4_H , a, b, c, d, H_w2c02, MD4S20); - MD4_STEP0(MD4_H , d, a, b, c, H_wac02, MD4S21); - MD4_STEP0(MD4_H , c, d, a, b, H_w6c02, MD4S22); - MD4_STEP0(MD4_H , b, c, d, a, H_wec02, MD4S23); - MD4_STEP0(MD4_H , a, b, c, d, H_w1c02, MD4S20); - MD4_STEP0(MD4_H , d, a, b, c, H_w9c02, MD4S21); - MD4_STEP0(MD4_H , c, d, a, b, H_w5c02, MD4S22); - MD4_STEP0(MD4_H , b, c, d, a, H_wdc02, MD4S23); - MD4_STEP0(MD4_H , a, b, c, d, H_w3c02, MD4S20); - MD4_STEP0(MD4_H , d, a, b, c, H_wbc02, MD4S21); - MD4_STEP0(MD4_H , c, d, a, b, H_w7c02, MD4S22); - MD4_STEP0(MD4_H , b, c, d, a, H_wfc02, MD4S23); - - a += MD4M_A; - b += MD4M_B; - c += MD4M_C; - d += MD4M_D; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = a; - w0_t[1] = b; - w0_t[2] = c; - w0_t[3] = d; - w1_t[0] = salt_buf0[0]; - w1_t[1] = salt_buf0[1]; - w1_t[2] = salt_buf0[2]; - w1_t[3] = salt_buf0[3]; - w2_t[0] = salt_buf1[0]; - w2_t[1] = salt_buf1[1]; - w2_t[2] = salt_buf1[2]; - w2_t[3] = salt_buf1[3]; - w3_t[0] = salt_buf2[0]; - w3_t[1] = salt_buf2[1]; - w3_t[2] = (16 + salt_len) * 8; - w3_t[3] = 0; - - a = MD4M_A; - b = MD4M_B; - c = MD4M_C; - d = MD4M_D; - - MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w0_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w0_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w1_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w1_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w1_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w1_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w2_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w2_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w2_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w2_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w3_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w3_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w3_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w3_t[3], MD4C00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0_t[0], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[0], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[0], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[0], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[1], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[1], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[1], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[1], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[2], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[2], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[2], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[2], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[3], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[3], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[3], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[3], MD4C01, MD4S13); - - MD4_STEP (MD4_H , a, b, c, d, w0_t[0], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[0], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[0], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[0], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[2], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[2], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[2], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[2], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[1], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[1], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[1], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[1], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[3], MD4C02, MD4S20); - - bool q_cond = (search[0] != a); - - if (q_cond) continue; - - MD4_STEP (MD4_H , d, a, b, c, w2_t[3], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01100_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01100_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01100_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01100_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01100_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01100_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01400_a0.cu b/nv/m01400_a0.cu deleted file mode 100644 index 2e6f5c5..0000000 --- a/nv/m01400_a0.cu +++ /dev/null @@ -1,429 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA256_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 7 -#define DGST_R2 2 -#define DGST_R3 6 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m01400_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - /** - * SHA256 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = 0; - u32x w9_t = 0; - u32x wa_t = 0; - u32x wb_t = 0; - u32x wc_t = 0; - u32x wd_t = 0; - u32x we_t = 0; - u32x wf_t = out_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01400_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01400_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01400_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3], - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - /** - * SHA256 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = 0; - u32x w9_t = 0; - u32x wa_t = 0; - u32x wb_t = 0; - u32x wc_t = 0; - u32x wd_t = 0; - u32x we_t = 0; - u32x wf_t = out_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01400_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01400_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01400_a1.cu b/nv/m01400_a1.cu deleted file mode 100644 index dcdc544..0000000 --- a/nv/m01400_a1.cu +++ /dev/null @@ -1,527 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA256_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 7 -#define DGST_R2 2 -#define DGST_R3 6 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m01400_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - append_0x80_2 (wordr0, wordr1, pw_r_len); - - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * SHA256 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01400_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01400_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01400_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3], - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - append_0x80_2 (wordr0, wordr1, pw_r_len); - - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * SHA256 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01400_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01400_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01400_a3.cu b/nv/m01400_a3.cu deleted file mode 100644 index 0709f07..0000000 --- a/nv/m01400_a3.cu +++ /dev/null @@ -1,538 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA256_ -#define _SCALAR_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 7 -#define DGST_R2 2 -#define DGST_R3 6 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -__device__ __constant__ u32x c_bfs[1024]; - -__device__ static void m01400m (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x w0_t = w0; - u32x w1_t = w[ 1]; - u32x w2_t = w[ 2]; - u32x w3_t = w[ 3]; - u32x w4_t = w[ 4]; - u32x w5_t = w[ 5]; - u32x w6_t = w[ 6]; - u32x w7_t = w[ 7]; - u32x w8_t = w[ 8]; - u32x w9_t = w[ 9]; - u32x wa_t = w[10]; - u32x wb_t = w[11]; - u32x wc_t = w[12]; - u32x wd_t = w[13]; - u32x we_t = w[14]; - u32x wf_t = w[15]; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_M - } -} - -__device__ static void m01400s (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3], - }; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x w0_t = w0; - u32x w1_t = w[ 1]; - u32x w2_t = w[ 2]; - u32x w3_t = w[ 3]; - u32x w4_t = w[ 4]; - u32x w5_t = w[ 5]; - u32x w6_t = w[ 6]; - u32x w7_t = w[ 7]; - u32x w8_t = w[ 8]; - u32x w9_t = w[ 9]; - u32x wa_t = w[10]; - u32x wb_t = w[11]; - u32x wc_t = w[12]; - u32x wd_t = w[13]; - u32x we_t = w[14]; - u32x wf_t = w[15]; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01400_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01400_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01400_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01400_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01400_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01400_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m01410_a0.cu b/nv/m01410_a0.cu deleted file mode 100644 index f2ae603..0000000 --- a/nv/m01410_a0.cu +++ /dev/null @@ -1,581 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA256_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 7 -#define DGST_R2 2 -#define DGST_R3 6 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m01410_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, out_len); - - const u32 out_salt_len = out_len + salt_len; - - w0[0] |= s0[0]; - w0[1] |= s0[1]; - w0[2] |= s0[2]; - w0[3] |= s0[3]; - - w1[0] |= s1[0]; - w1[1] |= s1[1]; - w1[2] |= s1[2]; - w1[3] |= s1[3]; - - w2[0] |= s2[0]; - w2[1] |= s2[1]; - w2[2] |= s2[2]; - w2[3] |= s2[3]; - - w3[0] |= s3[0]; - w3[1] |= s3[1]; - w3[2] |= s3[2]; - w3[3] |= s3[3]; - - append_0x80_4 (w0, w1, w2, w3, out_salt_len); - - /** - * sha256 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01410_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01410_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01410_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, out_len); - - const u32 out_salt_len = out_len + salt_len; - - w0[0] |= s0[0]; - w0[1] |= s0[1]; - w0[2] |= s0[2]; - w0[3] |= s0[3]; - - w1[0] |= s1[0]; - w1[1] |= s1[1]; - w1[2] |= s1[2]; - w1[3] |= s1[3]; - - w2[0] |= s2[0]; - w2[1] |= s2[1]; - w2[2] |= s2[2]; - w2[3] |= s2[3]; - - w3[0] |= s3[0]; - w3[1] |= s3[1]; - w3[2] |= s3[2]; - w3[3] |= s3[3]; - - append_0x80_4 (w0, w1, w2, w3, out_salt_len); - - /** - * sha256 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01410_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01410_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01410_a1.cu b/nv/m01410_a1.cu deleted file mode 100644 index 1800098..0000000 --- a/nv/m01410_a1.cu +++ /dev/null @@ -1,635 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA256_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 7 -#define DGST_R2 2 -#define DGST_R3 6 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m01410_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, pw_len); - - const u32 pw_salt_len = pw_len + salt_len; - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0] | s0[0]; - w0[1] = wordl0[1] | wordr0[1] | s0[1]; - w0[2] = wordl0[2] | wordr0[2] | s0[2]; - w0[3] = wordl0[3] | wordr0[3] | s0[3]; - w1[0] = wordl1[0] | wordr1[0] | s1[0]; - w1[1] = wordl1[1] | wordr1[1] | s1[1]; - w1[2] = wordl1[2] | wordr1[2] | s1[2]; - w1[3] = wordl1[3] | wordr1[3] | s1[3]; - w2[0] = wordl2[0] | wordr2[0] | s2[0]; - w2[1] = wordl2[1] | wordr2[1] | s2[1]; - w2[2] = wordl2[2] | wordr2[2] | s2[2]; - w2[3] = wordl2[3] | wordr2[3] | s2[3]; - w3[0] = wordl3[0] | wordr3[0] | s3[0]; - w3[1] = wordl3[1] | wordr3[1] | s3[1]; - w3[2] = wordl3[2] | wordr3[2] | s3[2]; - w3[3] = wordl3[3] | wordr3[3] | s3[3]; - - append_0x80_4 (w0, w1, w2, w3, pw_salt_len); - - /** - * sha256 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01410_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01410_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01410_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, pw_len); - - const u32 pw_salt_len = pw_len + salt_len; - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0] | s0[0]; - w0[1] = wordl0[1] | wordr0[1] | s0[1]; - w0[2] = wordl0[2] | wordr0[2] | s0[2]; - w0[3] = wordl0[3] | wordr0[3] | s0[3]; - w1[0] = wordl1[0] | wordr1[0] | s1[0]; - w1[1] = wordl1[1] | wordr1[1] | s1[1]; - w1[2] = wordl1[2] | wordr1[2] | s1[2]; - w1[3] = wordl1[3] | wordr1[3] | s1[3]; - w2[0] = wordl2[0] | wordr2[0] | s2[0]; - w2[1] = wordl2[1] | wordr2[1] | s2[1]; - w2[2] = wordl2[2] | wordr2[2] | s2[2]; - w2[3] = wordl2[3] | wordr2[3] | s2[3]; - w3[0] = wordl3[0] | wordr3[0] | s3[0]; - w3[1] = wordl3[1] | wordr3[1] | s3[1]; - w3[2] = wordl3[2] | wordr3[2] | s3[2]; - w3[3] = wordl3[3] | wordr3[3] | s3[3]; - - append_0x80_4 (w0, w1, w2, w3, pw_salt_len); - - /** - * sha256 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01410_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01410_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01410_a3.cu b/nv/m01410_a3.cu deleted file mode 100644 index 1ae083c..0000000 --- a/nv/m01410_a3.cu +++ /dev/null @@ -1,595 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA256_ -#define _SCALAR_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 7 -#define DGST_R2 2 -#define DGST_R3 6 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -__device__ __constant__ u32x c_bfs[1024]; - -__device__ static void m01410m (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - switch_buffer_by_offset (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); - - w[ 0] |= swap_workaround (salt_buf0[0]); - w[ 1] |= swap_workaround (salt_buf0[1]); - w[ 2] |= swap_workaround (salt_buf0[2]); - w[ 3] |= swap_workaround (salt_buf0[3]); - w[ 4] |= swap_workaround (salt_buf1[0]); - w[ 5] |= swap_workaround (salt_buf1[1]); - w[ 6] |= swap_workaround (salt_buf1[2]); - w[ 7] |= swap_workaround (salt_buf1[3]); - w[ 8] |= swap_workaround (salt_buf2[0]); - w[ 9] |= swap_workaround (salt_buf2[1]); - w[10] |= swap_workaround (salt_buf2[2]); - w[11] |= swap_workaround (salt_buf2[3]); - w[12] |= swap_workaround (salt_buf3[0]); - w[13] |= swap_workaround (salt_buf3[1]); - w[14] |= swap_workaround (salt_buf3[2]); - w[15] |= swap_workaround (salt_buf3[3]); - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - w[15] = pw_salt_len * 8; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x w0_t = w0; - u32x w1_t = w[ 1]; - u32x w2_t = w[ 2]; - u32x w3_t = w[ 3]; - u32x w4_t = w[ 4]; - u32x w5_t = w[ 5]; - u32x w6_t = w[ 6]; - u32x w7_t = w[ 7]; - u32x w8_t = w[ 8]; - u32x w9_t = w[ 9]; - u32x wa_t = w[10]; - u32x wb_t = w[11]; - u32x wc_t = w[12]; - u32x wd_t = w[13]; - u32x we_t = w[14]; - u32x wf_t = w[15]; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_M - } -} - -__device__ static void m01410s (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x w0_t = w0; - u32x w1_t = w[ 1]; - u32x w2_t = w[ 2]; - u32x w3_t = w[ 3]; - u32x w4_t = w[ 4]; - u32x w5_t = w[ 5]; - u32x w6_t = w[ 6]; - u32x w7_t = w[ 7]; - u32x w8_t = w[ 8]; - u32x w9_t = w[ 9]; - u32x wa_t = w[10]; - u32x wb_t = w[11]; - u32x wc_t = w[12]; - u32x wd_t = w[13]; - u32x we_t = w[14]; - u32x wf_t = w[15]; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01410_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01410_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01410_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01410_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01410_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01410_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m01420_a0.cu b/nv/m01420_a0.cu deleted file mode 100644 index e256794..0000000 --- a/nv/m01420_a0.cu +++ /dev/null @@ -1,503 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA256_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 7 -#define DGST_R2 2 -#define DGST_R3 6 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m01420_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * prepend salt - */ - - const u32 out_salt_len = out_len + salt_len; - - switch_buffer_by_offset (w0, w1, w2, w3, salt_len); - - w0[0] |= salt_buf0[0]; - w0[1] |= salt_buf0[1]; - w0[2] |= salt_buf0[2]; - w0[3] |= salt_buf0[3]; - w1[0] |= salt_buf1[0]; - w1[1] |= salt_buf1[1]; - w1[2] |= salt_buf1[2]; - w1[3] |= salt_buf1[3]; - - append_0x80_4 (w0, w1, w2, w3, out_salt_len); - - /** - * sha256 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01420_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01420_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01420_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * prepend salt - */ - - const u32 out_salt_len = out_len + salt_len; - - switch_buffer_by_offset (w0, w1, w2, w3, salt_len); - - w0[0] |= salt_buf0[0]; - w0[1] |= salt_buf0[1]; - w0[2] |= salt_buf0[2]; - w0[3] |= salt_buf0[3]; - w1[0] |= salt_buf1[0]; - w1[1] |= salt_buf1[1]; - w1[2] |= salt_buf1[2]; - w1[3] |= salt_buf1[3]; - - append_0x80_4 (w0, w1, w2, w3, out_salt_len); - - /** - * sha256 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01420_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01420_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01420_a1.cu b/nv/m01420_a1.cu deleted file mode 100644 index 0d8bda5..0000000 --- a/nv/m01420_a1.cu +++ /dev/null @@ -1,585 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA256_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 7 -#define DGST_R2 2 -#define DGST_R3 6 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m01420_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - u32 wordr1[4]; - u32 wordr2[4]; - u32 wordr3[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * prepend salt - */ - - const u32 pw_salt_len = pw_len + salt_len; - - switch_buffer_by_offset (w0, w1, w2, w3, salt_len); - - w0[0] |= salt_buf0[0]; - w0[1] |= salt_buf0[1]; - w0[2] |= salt_buf0[2]; - w0[3] |= salt_buf0[3]; - w1[0] |= salt_buf1[0]; - w1[1] |= salt_buf1[1]; - w1[2] |= salt_buf1[2]; - w1[3] |= salt_buf1[3]; - - append_0x80_4 (w0, w1, w2, w3, pw_salt_len); - - /** - * sha256 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01420_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01420_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01420_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - u32 wordr1[4]; - u32 wordr2[4]; - u32 wordr3[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * prepend salt - */ - - const u32 pw_salt_len = pw_len + salt_len; - - switch_buffer_by_offset (w0, w1, w2, w3, salt_len); - - w0[0] |= salt_buf0[0]; - w0[1] |= salt_buf0[1]; - w0[2] |= salt_buf0[2]; - w0[3] |= salt_buf0[3]; - w1[0] |= salt_buf1[0]; - w1[1] |= salt_buf1[1]; - w1[2] |= salt_buf1[2]; - w1[3] |= salt_buf1[3]; - - append_0x80_4 (w0, w1, w2, w3, pw_salt_len); - - /** - * sha256 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01420_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01420_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01420_a3.cu b/nv/m01420_a3.cu deleted file mode 100644 index 3bd4a05..0000000 --- a/nv/m01420_a3.cu +++ /dev/null @@ -1,757 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA256_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 7 -#define DGST_R2 2 -#define DGST_R3 6 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m01420m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * prepend salt - */ - - u32x w0_t2[4]; - u32x w1_t2[4]; - u32x w2_t2[4]; - u32x w3_t2[4]; - - w0_t2[0] = swap_workaround (w0[0]); - w0_t2[1] = swap_workaround (w0[1]); - w0_t2[2] = swap_workaround (w0[2]); - w0_t2[3] = swap_workaround (w0[3]); - w1_t2[0] = swap_workaround (w1[0]); - w1_t2[1] = swap_workaround (w1[1]); - w1_t2[2] = swap_workaround (w1[2]); - w1_t2[3] = swap_workaround (w1[3]); - w2_t2[0] = swap_workaround (w2[0]); - w2_t2[1] = swap_workaround (w2[1]); - w2_t2[2] = swap_workaround (w2[2]); - w2_t2[3] = swap_workaround (w2[3]); - w3_t2[0] = swap_workaround (w3[0]); - w3_t2[1] = swap_workaround (w3[1]); - w3_t2[2] = swap_workaround (w3[2]); - w3_t2[3] = swap_workaround (w3[3]); - - switch_buffer_by_offset (w0_t2, w1_t2, w2_t2, w3_t2, salt_len); - - w0_t2[0] |= salt_buf0[0]; - w0_t2[1] |= salt_buf0[1]; - w0_t2[2] |= salt_buf0[2]; - w0_t2[3] |= salt_buf0[3]; - w1_t2[0] |= salt_buf1[0]; - w1_t2[1] |= salt_buf1[1]; - w1_t2[2] |= salt_buf1[2]; - w1_t2[3] |= salt_buf1[3]; - w2_t2[0] |= salt_buf2[0]; - w2_t2[1] |= salt_buf2[1]; - w2_t2[2] |= salt_buf2[2]; - w2_t2[3] |= salt_buf2[3]; - w3_t2[0] |= salt_buf3[0]; - w3_t2[1] |= salt_buf3[1]; - w3_t2[2] |= salt_buf3[2]; - w3_t2[3] |= salt_buf3[3]; - - /** - * sha256 - */ - - u32x w0_t = swap_workaround (w0_t2[0]); - u32x w1_t = swap_workaround (w0_t2[1]); - u32x w2_t = swap_workaround (w0_t2[2]); - u32x w3_t = swap_workaround (w0_t2[3]); - u32x w4_t = swap_workaround (w1_t2[0]); - u32x w5_t = swap_workaround (w1_t2[1]); - u32x w6_t = swap_workaround (w1_t2[2]); - u32x w7_t = swap_workaround (w1_t2[3]); - u32x w8_t = swap_workaround (w2_t2[0]); - u32x w9_t = swap_workaround (w2_t2[1]); - u32x wa_t = swap_workaround (w2_t2[2]); - u32x wb_t = swap_workaround (w2_t2[3]); - u32x wc_t = swap_workaround (w3_t2[0]); - u32x wd_t = swap_workaround (w3_t2[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_M - } -} - -__device__ static void m01420s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * prepend salt - */ - - u32x w0_t2[4]; - u32x w1_t2[4]; - u32x w2_t2[4]; - u32x w3_t2[4]; - - w0_t2[0] = swap_workaround (w0[0]); - w0_t2[1] = swap_workaround (w0[1]); - w0_t2[2] = swap_workaround (w0[2]); - w0_t2[3] = swap_workaround (w0[3]); - w1_t2[0] = swap_workaround (w1[0]); - w1_t2[1] = swap_workaround (w1[1]); - w1_t2[2] = swap_workaround (w1[2]); - w1_t2[3] = swap_workaround (w1[3]); - w2_t2[0] = swap_workaround (w2[0]); - w2_t2[1] = swap_workaround (w2[1]); - w2_t2[2] = swap_workaround (w2[2]); - w2_t2[3] = swap_workaround (w2[3]); - w3_t2[0] = swap_workaround (w3[0]); - w3_t2[1] = swap_workaround (w3[1]); - w3_t2[2] = swap_workaround (w3[2]); - w3_t2[3] = swap_workaround (w3[3]); - - switch_buffer_by_offset (w0_t2, w1_t2, w2_t2, w3_t2, salt_len); - - w0_t2[0] |= salt_buf0[0]; - w0_t2[1] |= salt_buf0[1]; - w0_t2[2] |= salt_buf0[2]; - w0_t2[3] |= salt_buf0[3]; - w1_t2[0] |= salt_buf1[0]; - w1_t2[1] |= salt_buf1[1]; - w1_t2[2] |= salt_buf1[2]; - w1_t2[3] |= salt_buf1[3]; - w2_t2[0] |= salt_buf2[0]; - w2_t2[1] |= salt_buf2[1]; - w2_t2[2] |= salt_buf2[2]; - w2_t2[3] |= salt_buf2[3]; - w3_t2[0] |= salt_buf3[0]; - w3_t2[1] |= salt_buf3[1]; - w3_t2[2] |= salt_buf3[2]; - w3_t2[3] |= salt_buf3[3]; - - /** - * sha256 - */ - - u32x w0_t = swap_workaround (w0_t2[0]); - u32x w1_t = swap_workaround (w0_t2[1]); - u32x w2_t = swap_workaround (w0_t2[2]); - u32x w3_t = swap_workaround (w0_t2[3]); - u32x w4_t = swap_workaround (w1_t2[0]); - u32x w5_t = swap_workaround (w1_t2[1]); - u32x w6_t = swap_workaround (w1_t2[2]); - u32x w7_t = swap_workaround (w1_t2[3]); - u32x w8_t = swap_workaround (w2_t2[0]); - u32x w9_t = swap_workaround (w2_t2[1]); - u32x wa_t = swap_workaround (w2_t2[2]); - u32x wb_t = swap_workaround (w2_t2[3]); - u32x wc_t = swap_workaround (w3_t2[0]); - u32x wd_t = swap_workaround (w3_t2[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01420_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01420m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01420_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01420m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01420_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01420m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01420_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01420s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01420_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01420s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01420_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01420s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m01430_a0.cu b/nv/m01430_a0.cu deleted file mode 100644 index bb87313..0000000 --- a/nv/m01430_a0.cu +++ /dev/null @@ -1,591 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA256_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 7 -#define DGST_R2 2 -#define DGST_R3 6 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m01430_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, (out_len * 2)); - - const u32 out_salt_len = (out_len * 2) + salt_len; - - u32x w0_t2[4]; - u32x w1_t2[4]; - u32x w2_t2[4]; - u32x w3_t2[4]; - - make_unicode (w0, w0_t2, w1_t2); - make_unicode (w1, w2_t2, w3_t2); - - w0_t2[0] |= s0[0]; - w0_t2[1] |= s0[1]; - w0_t2[2] |= s0[2]; - w0_t2[3] |= s0[3]; - w1_t2[0] |= s1[0]; - w1_t2[1] |= s1[1]; - w1_t2[2] |= s1[2]; - w1_t2[3] |= s1[3]; - w2_t2[0] |= s2[0]; - w2_t2[1] |= s2[1]; - w2_t2[2] |= s2[2]; - w2_t2[3] |= s2[3]; - w3_t2[0] |= s3[0]; - w3_t2[1] |= s3[1]; - w3_t2[2] |= s3[2]; - w3_t2[3] |= s3[3]; - - append_0x80_4 (w0_t2, w1_t2, w2_t2, w3_t2, out_salt_len); - - /** - * sha256 - */ - - u32x w0_t = swap_workaround (w0_t2[0]); - u32x w1_t = swap_workaround (w0_t2[1]); - u32x w2_t = swap_workaround (w0_t2[2]); - u32x w3_t = swap_workaround (w0_t2[3]); - u32x w4_t = swap_workaround (w1_t2[0]); - u32x w5_t = swap_workaround (w1_t2[1]); - u32x w6_t = swap_workaround (w1_t2[2]); - u32x w7_t = swap_workaround (w1_t2[3]); - u32x w8_t = swap_workaround (w2_t2[0]); - u32x w9_t = swap_workaround (w2_t2[1]); - u32x wa_t = swap_workaround (w2_t2[2]); - u32x wb_t = swap_workaround (w2_t2[3]); - u32x wc_t = swap_workaround (w3_t2[0]); - u32x wd_t = swap_workaround (w3_t2[1]); - u32x we_t = 0; - u32x wf_t = out_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01430_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01430_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01430_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, (out_len * 2)); - - const u32 out_salt_len = (out_len * 2) + salt_len; - - u32x w0_t2[4]; - u32x w1_t2[4]; - u32x w2_t2[4]; - u32x w3_t2[4]; - - make_unicode (w0, w0_t2, w1_t2); - make_unicode (w1, w2_t2, w3_t2); - - w0_t2[0] |= s0[0]; - w0_t2[1] |= s0[1]; - w0_t2[2] |= s0[2]; - w0_t2[3] |= s0[3]; - w1_t2[0] |= s1[0]; - w1_t2[1] |= s1[1]; - w1_t2[2] |= s1[2]; - w1_t2[3] |= s1[3]; - w2_t2[0] |= s2[0]; - w2_t2[1] |= s2[1]; - w2_t2[2] |= s2[2]; - w2_t2[3] |= s2[3]; - w3_t2[0] |= s3[0]; - w3_t2[1] |= s3[1]; - w3_t2[2] |= s3[2]; - w3_t2[3] |= s3[3]; - - append_0x80_4 (w0_t2, w1_t2, w2_t2, w3_t2, out_salt_len); - - /** - * sha256 - */ - - u32x w0_t = swap_workaround (w0_t2[0]); - u32x w1_t = swap_workaround (w0_t2[1]); - u32x w2_t = swap_workaround (w0_t2[2]); - u32x w3_t = swap_workaround (w0_t2[3]); - u32x w4_t = swap_workaround (w1_t2[0]); - u32x w5_t = swap_workaround (w1_t2[1]); - u32x w6_t = swap_workaround (w1_t2[2]); - u32x w7_t = swap_workaround (w1_t2[3]); - u32x w8_t = swap_workaround (w2_t2[0]); - u32x w9_t = swap_workaround (w2_t2[1]); - u32x wa_t = swap_workaround (w2_t2[2]); - u32x wb_t = swap_workaround (w2_t2[3]); - u32x wc_t = swap_workaround (w3_t2[0]); - u32x wd_t = swap_workaround (w3_t2[1]); - u32x we_t = 0; - u32x wf_t = out_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01430_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01430_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01430_a1.cu b/nv/m01430_a1.cu deleted file mode 100644 index 92caed4..0000000 --- a/nv/m01430_a1.cu +++ /dev/null @@ -1,673 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA256_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 7 -#define DGST_R2 2 -#define DGST_R3 6 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m01430_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - u32 wordr1[4]; - u32 wordr2[4]; - u32 wordr3[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, (pw_len * 2)); - - const u32 pw_salt_len = (pw_len * 2) + salt_len; - - u32x w0_t2[4]; - u32x w1_t2[4]; - u32x w2_t2[4]; - u32x w3_t2[4]; - - make_unicode (w0, w0_t2, w1_t2); - make_unicode (w1, w2_t2, w3_t2); - - w0_t2[0] |= s0[0]; - w0_t2[1] |= s0[1]; - w0_t2[2] |= s0[2]; - w0_t2[3] |= s0[3]; - w1_t2[0] |= s1[0]; - w1_t2[1] |= s1[1]; - w1_t2[2] |= s1[2]; - w1_t2[3] |= s1[3]; - w2_t2[0] |= s2[0]; - w2_t2[1] |= s2[1]; - w2_t2[2] |= s2[2]; - w2_t2[3] |= s2[3]; - w3_t2[0] |= s3[0]; - w3_t2[1] |= s3[1]; - w3_t2[2] |= s3[2]; - w3_t2[3] |= s3[3]; - - append_0x80_4 (w0_t2, w1_t2, w2_t2, w3_t2, pw_salt_len); - - /** - * sha256 - */ - - u32x w0_t = swap_workaround (w0_t2[0]); - u32x w1_t = swap_workaround (w0_t2[1]); - u32x w2_t = swap_workaround (w0_t2[2]); - u32x w3_t = swap_workaround (w0_t2[3]); - u32x w4_t = swap_workaround (w1_t2[0]); - u32x w5_t = swap_workaround (w1_t2[1]); - u32x w6_t = swap_workaround (w1_t2[2]); - u32x w7_t = swap_workaround (w1_t2[3]); - u32x w8_t = swap_workaround (w2_t2[0]); - u32x w9_t = swap_workaround (w2_t2[1]); - u32x wa_t = swap_workaround (w2_t2[2]); - u32x wb_t = swap_workaround (w2_t2[3]); - u32x wc_t = swap_workaround (w3_t2[0]); - u32x wd_t = swap_workaround (w3_t2[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01430_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01430_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01430_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - u32 wordr1[4]; - u32 wordr2[4]; - u32 wordr3[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, (pw_len * 2)); - - const u32 pw_salt_len = (pw_len * 2) + salt_len; - - u32x w0_t2[4]; - u32x w1_t2[4]; - u32x w2_t2[4]; - u32x w3_t2[4]; - - make_unicode (w0, w0_t2, w1_t2); - make_unicode (w1, w2_t2, w3_t2); - - w0_t2[0] |= s0[0]; - w0_t2[1] |= s0[1]; - w0_t2[2] |= s0[2]; - w0_t2[3] |= s0[3]; - w1_t2[0] |= s1[0]; - w1_t2[1] |= s1[1]; - w1_t2[2] |= s1[2]; - w1_t2[3] |= s1[3]; - w2_t2[0] |= s2[0]; - w2_t2[1] |= s2[1]; - w2_t2[2] |= s2[2]; - w2_t2[3] |= s2[3]; - w3_t2[0] |= s3[0]; - w3_t2[1] |= s3[1]; - w3_t2[2] |= s3[2]; - w3_t2[3] |= s3[3]; - - append_0x80_4 (w0_t2, w1_t2, w2_t2, w3_t2, pw_salt_len); - - /** - * sha256 - */ - - u32x w0_t = swap_workaround (w0_t2[0]); - u32x w1_t = swap_workaround (w0_t2[1]); - u32x w2_t = swap_workaround (w0_t2[2]); - u32x w3_t = swap_workaround (w0_t2[3]); - u32x w4_t = swap_workaround (w1_t2[0]); - u32x w5_t = swap_workaround (w1_t2[1]); - u32x w6_t = swap_workaround (w1_t2[2]); - u32x w7_t = swap_workaround (w1_t2[3]); - u32x w8_t = swap_workaround (w2_t2[0]); - u32x w9_t = swap_workaround (w2_t2[1]); - u32x wa_t = swap_workaround (w2_t2[2]); - u32x wb_t = swap_workaround (w2_t2[3]); - u32x wc_t = swap_workaround (w3_t2[0]); - u32x wd_t = swap_workaround (w3_t2[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01430_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01430_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01430_a3.cu b/nv/m01430_a3.cu deleted file mode 100644 index 13fac01..0000000 --- a/nv/m01430_a3.cu +++ /dev/null @@ -1,595 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA256_ -#define _SCALAR_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 7 -#define DGST_R2 2 -#define DGST_R3 6 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -__device__ __constant__ u32x c_bfs[1024]; - -__device__ static void m01430m (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - switch_buffer_by_offset (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); - - w[ 0] |= swap_workaround (salt_buf0[0]); - w[ 1] |= swap_workaround (salt_buf0[1]); - w[ 2] |= swap_workaround (salt_buf0[2]); - w[ 3] |= swap_workaround (salt_buf0[3]); - w[ 4] |= swap_workaround (salt_buf1[0]); - w[ 5] |= swap_workaround (salt_buf1[1]); - w[ 6] |= swap_workaround (salt_buf1[2]); - w[ 7] |= swap_workaround (salt_buf1[3]); - w[ 8] |= swap_workaround (salt_buf2[0]); - w[ 9] |= swap_workaround (salt_buf2[1]); - w[10] |= swap_workaround (salt_buf2[2]); - w[11] |= swap_workaround (salt_buf2[3]); - w[12] |= swap_workaround (salt_buf3[0]); - w[13] |= swap_workaround (salt_buf3[1]); - w[14] |= swap_workaround (salt_buf3[2]); - w[15] |= swap_workaround (salt_buf3[3]); - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - w[15] = pw_salt_len * 8; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x w0_t = w0; - u32x w1_t = w[ 1]; - u32x w2_t = w[ 2]; - u32x w3_t = w[ 3]; - u32x w4_t = w[ 4]; - u32x w5_t = w[ 5]; - u32x w6_t = w[ 6]; - u32x w7_t = w[ 7]; - u32x w8_t = w[ 8]; - u32x w9_t = w[ 9]; - u32x wa_t = w[10]; - u32x wb_t = w[11]; - u32x wc_t = w[12]; - u32x wd_t = w[13]; - u32x we_t = w[14]; - u32x wf_t = w[15]; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_M - } -} - -__device__ static void m01430s (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x w0_t = w0; - u32x w1_t = w[ 1]; - u32x w2_t = w[ 2]; - u32x w3_t = w[ 3]; - u32x w4_t = w[ 4]; - u32x w5_t = w[ 5]; - u32x w6_t = w[ 6]; - u32x w7_t = w[ 7]; - u32x w8_t = w[ 8]; - u32x w9_t = w[ 9]; - u32x wa_t = w[10]; - u32x wb_t = w[11]; - u32x wc_t = w[12]; - u32x wd_t = w[13]; - u32x we_t = w[14]; - u32x wf_t = w[15]; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01430_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01430m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01430_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01430m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01430_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01430m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01430_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01430s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01430_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01430s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01430_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01430s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m01440_a0.cu b/nv/m01440_a0.cu deleted file mode 100644 index 68d6f92..0000000 --- a/nv/m01440_a0.cu +++ /dev/null @@ -1,507 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA256_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 7 -#define DGST_R2 2 -#define DGST_R3 6 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m01440_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * prepend salt - */ - - const u32 out_salt_len = (out_len * 2) + salt_len; - - u32x w0_t2[4]; - u32x w1_t2[4]; - u32x w2_t2[4]; - u32x w3_t2[4]; - - make_unicode (w0, w0_t2, w1_t2); - make_unicode (w1, w2_t2, w3_t2); - - switch_buffer_by_offset (w0_t2, w1_t2, w2_t2, w3_t2, salt_len); - - w0_t2[0] |= salt_buf0[0]; - w0_t2[1] |= salt_buf0[1]; - w0_t2[2] |= salt_buf0[2]; - w0_t2[3] |= salt_buf0[3]; - w1_t2[0] |= salt_buf1[0]; - w1_t2[1] |= salt_buf1[1]; - w1_t2[2] |= salt_buf1[2]; - w1_t2[3] |= salt_buf1[3]; - - append_0x80_4 (w0_t2, w1_t2, w2_t2, w3_t2, out_salt_len); - - /** - * sha256 - */ - - u32x w0_t = swap_workaround (w0_t2[0]); - u32x w1_t = swap_workaround (w0_t2[1]); - u32x w2_t = swap_workaround (w0_t2[2]); - u32x w3_t = swap_workaround (w0_t2[3]); - u32x w4_t = swap_workaround (w1_t2[0]); - u32x w5_t = swap_workaround (w1_t2[1]); - u32x w6_t = swap_workaround (w1_t2[2]); - u32x w7_t = swap_workaround (w1_t2[3]); - u32x w8_t = swap_workaround (w2_t2[0]); - u32x w9_t = swap_workaround (w2_t2[1]); - u32x wa_t = swap_workaround (w2_t2[2]); - u32x wb_t = swap_workaround (w2_t2[3]); - u32x wc_t = swap_workaround (w3_t2[0]); - u32x wd_t = swap_workaround (w3_t2[1]); - u32x we_t = 0; - u32x wf_t = out_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01440_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01440_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01440_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * prepend salt - */ - - const u32 out_salt_len = (out_len * 2) + salt_len; - - u32x w0_t2[4]; - u32x w1_t2[4]; - u32x w2_t2[4]; - u32x w3_t2[4]; - - make_unicode (w0, w0_t2, w1_t2); - make_unicode (w1, w2_t2, w3_t2); - - switch_buffer_by_offset (w0_t2, w1_t2, w2_t2, w3_t2, salt_len); - - w0_t2[0] |= salt_buf0[0]; - w0_t2[1] |= salt_buf0[1]; - w0_t2[2] |= salt_buf0[2]; - w0_t2[3] |= salt_buf0[3]; - w1_t2[0] |= salt_buf1[0]; - w1_t2[1] |= salt_buf1[1]; - w1_t2[2] |= salt_buf1[2]; - w1_t2[3] |= salt_buf1[3]; - - append_0x80_4 (w0_t2, w1_t2, w2_t2, w3_t2, out_salt_len); - - /** - * sha256 - */ - - u32x w0_t = swap_workaround (w0_t2[0]); - u32x w1_t = swap_workaround (w0_t2[1]); - u32x w2_t = swap_workaround (w0_t2[2]); - u32x w3_t = swap_workaround (w0_t2[3]); - u32x w4_t = swap_workaround (w1_t2[0]); - u32x w5_t = swap_workaround (w1_t2[1]); - u32x w6_t = swap_workaround (w1_t2[2]); - u32x w7_t = swap_workaround (w1_t2[3]); - u32x w8_t = swap_workaround (w2_t2[0]); - u32x w9_t = swap_workaround (w2_t2[1]); - u32x wa_t = swap_workaround (w2_t2[2]); - u32x wb_t = swap_workaround (w2_t2[3]); - u32x wc_t = swap_workaround (w3_t2[0]); - u32x wd_t = swap_workaround (w3_t2[1]); - u32x we_t = 0; - u32x wf_t = out_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01440_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01440_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01440_a1.cu b/nv/m01440_a1.cu deleted file mode 100644 index 7b2badd..0000000 --- a/nv/m01440_a1.cu +++ /dev/null @@ -1,601 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA256_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 7 -#define DGST_R2 2 -#define DGST_R3 6 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m01440_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - u32 wordr1[4]; - u32 wordr2[4]; - u32 wordr3[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * prepend salt - */ - - const u32 pw_salt_len = (pw_len * 2) + salt_len; - - u32x w0_t2[4]; - u32x w1_t2[4]; - u32x w2_t2[4]; - u32x w3_t2[4]; - - make_unicode (w0, w0_t2, w1_t2); - make_unicode (w1, w2_t2, w3_t2); - - switch_buffer_by_offset (w0_t2, w1_t2, w2_t2, w3_t2, salt_len); - - w0_t2[0] |= salt_buf0[0]; - w0_t2[1] |= salt_buf0[1]; - w0_t2[2] |= salt_buf0[2]; - w0_t2[3] |= salt_buf0[3]; - w1_t2[0] |= salt_buf1[0]; - w1_t2[1] |= salt_buf1[1]; - w1_t2[2] |= salt_buf1[2]; - w1_t2[3] |= salt_buf1[3]; - - append_0x80_4 (w0_t2, w1_t2, w2_t2, w3_t2, pw_salt_len); - - /** - * sha256 - */ - - u32x w0_t = swap_workaround (w0_t2[0]); - u32x w1_t = swap_workaround (w0_t2[1]); - u32x w2_t = swap_workaround (w0_t2[2]); - u32x w3_t = swap_workaround (w0_t2[3]); - u32x w4_t = swap_workaround (w1_t2[0]); - u32x w5_t = swap_workaround (w1_t2[1]); - u32x w6_t = swap_workaround (w1_t2[2]); - u32x w7_t = swap_workaround (w1_t2[3]); - u32x w8_t = swap_workaround (w2_t2[0]); - u32x w9_t = swap_workaround (w2_t2[1]); - u32x wa_t = swap_workaround (w2_t2[2]); - u32x wb_t = swap_workaround (w2_t2[3]); - u32x wc_t = swap_workaround (w3_t2[0]); - u32x wd_t = swap_workaround (w3_t2[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01440_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01440_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01440_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - u32 wordr1[4]; - u32 wordr2[4]; - u32 wordr3[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * prepend salt - */ - - const u32 pw_salt_len = (pw_len * 2) + salt_len; - - u32x w0_t2[4]; - u32x w1_t2[4]; - u32x w2_t2[4]; - u32x w3_t2[4]; - - make_unicode (w0, w0_t2, w1_t2); - make_unicode (w1, w2_t2, w3_t2); - - switch_buffer_by_offset (w0_t2, w1_t2, w2_t2, w3_t2, salt_len); - - w0_t2[0] |= salt_buf0[0]; - w0_t2[1] |= salt_buf0[1]; - w0_t2[2] |= salt_buf0[2]; - w0_t2[3] |= salt_buf0[3]; - w1_t2[0] |= salt_buf1[0]; - w1_t2[1] |= salt_buf1[1]; - w1_t2[2] |= salt_buf1[2]; - w1_t2[3] |= salt_buf1[3]; - - append_0x80_4 (w0_t2, w1_t2, w2_t2, w3_t2, pw_salt_len); - - /** - * sha256 - */ - - u32x w0_t = swap_workaround (w0_t2[0]); - u32x w1_t = swap_workaround (w0_t2[1]); - u32x w2_t = swap_workaround (w0_t2[2]); - u32x w3_t = swap_workaround (w0_t2[3]); - u32x w4_t = swap_workaround (w1_t2[0]); - u32x w5_t = swap_workaround (w1_t2[1]); - u32x w6_t = swap_workaround (w1_t2[2]); - u32x w7_t = swap_workaround (w1_t2[3]); - u32x w8_t = swap_workaround (w2_t2[0]); - u32x w9_t = swap_workaround (w2_t2[1]); - u32x wa_t = swap_workaround (w2_t2[2]); - u32x wb_t = swap_workaround (w2_t2[3]); - u32x wc_t = swap_workaround (w3_t2[0]); - u32x wd_t = swap_workaround (w3_t2[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01440_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01440_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01440_a3.cu b/nv/m01440_a3.cu deleted file mode 100644 index bac6f9d..0000000 --- a/nv/m01440_a3.cu +++ /dev/null @@ -1,757 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA256_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 7 -#define DGST_R2 2 -#define DGST_R3 6 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m01440m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * prepend salt - */ - - u32x w0_t2[4]; - u32x w1_t2[4]; - u32x w2_t2[4]; - u32x w3_t2[4]; - - w0_t2[0] = swap_workaround (w0[0]); - w0_t2[1] = swap_workaround (w0[1]); - w0_t2[2] = swap_workaround (w0[2]); - w0_t2[3] = swap_workaround (w0[3]); - w1_t2[0] = swap_workaround (w1[0]); - w1_t2[1] = swap_workaround (w1[1]); - w1_t2[2] = swap_workaround (w1[2]); - w1_t2[3] = swap_workaround (w1[3]); - w2_t2[0] = swap_workaround (w2[0]); - w2_t2[1] = swap_workaround (w2[1]); - w2_t2[2] = swap_workaround (w2[2]); - w2_t2[3] = swap_workaround (w2[3]); - w3_t2[0] = swap_workaround (w3[0]); - w3_t2[1] = swap_workaround (w3[1]); - w3_t2[2] = swap_workaround (w3[2]); - w3_t2[3] = swap_workaround (w3[3]); - - switch_buffer_by_offset (w0_t2, w1_t2, w2_t2, w3_t2, salt_len); - - w0_t2[0] |= salt_buf0[0]; - w0_t2[1] |= salt_buf0[1]; - w0_t2[2] |= salt_buf0[2]; - w0_t2[3] |= salt_buf0[3]; - w1_t2[0] |= salt_buf1[0]; - w1_t2[1] |= salt_buf1[1]; - w1_t2[2] |= salt_buf1[2]; - w1_t2[3] |= salt_buf1[3]; - w2_t2[0] |= salt_buf2[0]; - w2_t2[1] |= salt_buf2[1]; - w2_t2[2] |= salt_buf2[2]; - w2_t2[3] |= salt_buf2[3]; - w3_t2[0] |= salt_buf3[0]; - w3_t2[1] |= salt_buf3[1]; - w3_t2[2] |= salt_buf3[2]; - w3_t2[3] |= salt_buf3[3]; - - /** - * sha256 - */ - - u32x w0_t = swap_workaround (w0_t2[0]); - u32x w1_t = swap_workaround (w0_t2[1]); - u32x w2_t = swap_workaround (w0_t2[2]); - u32x w3_t = swap_workaround (w0_t2[3]); - u32x w4_t = swap_workaround (w1_t2[0]); - u32x w5_t = swap_workaround (w1_t2[1]); - u32x w6_t = swap_workaround (w1_t2[2]); - u32x w7_t = swap_workaround (w1_t2[3]); - u32x w8_t = swap_workaround (w2_t2[0]); - u32x w9_t = swap_workaround (w2_t2[1]); - u32x wa_t = swap_workaround (w2_t2[2]); - u32x wb_t = swap_workaround (w2_t2[3]); - u32x wc_t = swap_workaround (w3_t2[0]); - u32x wd_t = swap_workaround (w3_t2[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_M - } -} - -__device__ static void m01440s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * prepend salt - */ - - u32x w0_t2[4]; - u32x w1_t2[4]; - u32x w2_t2[4]; - u32x w3_t2[4]; - - w0_t2[0] = swap_workaround (w0[0]); - w0_t2[1] = swap_workaround (w0[1]); - w0_t2[2] = swap_workaround (w0[2]); - w0_t2[3] = swap_workaround (w0[3]); - w1_t2[0] = swap_workaround (w1[0]); - w1_t2[1] = swap_workaround (w1[1]); - w1_t2[2] = swap_workaround (w1[2]); - w1_t2[3] = swap_workaround (w1[3]); - w2_t2[0] = swap_workaround (w2[0]); - w2_t2[1] = swap_workaround (w2[1]); - w2_t2[2] = swap_workaround (w2[2]); - w2_t2[3] = swap_workaround (w2[3]); - w3_t2[0] = swap_workaround (w3[0]); - w3_t2[1] = swap_workaround (w3[1]); - w3_t2[2] = swap_workaround (w3[2]); - w3_t2[3] = swap_workaround (w3[3]); - - switch_buffer_by_offset (w0_t2, w1_t2, w2_t2, w3_t2, salt_len); - - w0_t2[0] |= salt_buf0[0]; - w0_t2[1] |= salt_buf0[1]; - w0_t2[2] |= salt_buf0[2]; - w0_t2[3] |= salt_buf0[3]; - w1_t2[0] |= salt_buf1[0]; - w1_t2[1] |= salt_buf1[1]; - w1_t2[2] |= salt_buf1[2]; - w1_t2[3] |= salt_buf1[3]; - w2_t2[0] |= salt_buf2[0]; - w2_t2[1] |= salt_buf2[1]; - w2_t2[2] |= salt_buf2[2]; - w2_t2[3] |= salt_buf2[3]; - w3_t2[0] |= salt_buf3[0]; - w3_t2[1] |= salt_buf3[1]; - w3_t2[2] |= salt_buf3[2]; - w3_t2[3] |= salt_buf3[3]; - - /** - * sha256 - */ - - u32x w0_t = swap_workaround (w0_t2[0]); - u32x w1_t = swap_workaround (w0_t2[1]); - u32x w2_t = swap_workaround (w0_t2[2]); - u32x w3_t = swap_workaround (w0_t2[3]); - u32x w4_t = swap_workaround (w1_t2[0]); - u32x w5_t = swap_workaround (w1_t2[1]); - u32x w6_t = swap_workaround (w1_t2[2]); - u32x w7_t = swap_workaround (w1_t2[3]); - u32x w8_t = swap_workaround (w2_t2[0]); - u32x w9_t = swap_workaround (w2_t2[1]); - u32x wa_t = swap_workaround (w2_t2[2]); - u32x wb_t = swap_workaround (w2_t2[3]); - u32x wc_t = swap_workaround (w3_t2[0]); - u32x wd_t = swap_workaround (w3_t2[1]); - u32x we_t = 0; - u32x wf_t = pw_salt_len * 8; - - u32x a = SHA256M_A; - u32x b = SHA256M_B; - u32x c = SHA256M_C; - u32x d = SHA256M_D; - u32x e = SHA256M_E; - u32x f = SHA256M_F; - u32x g = SHA256M_G; - u32x h = SHA256M_H; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01440_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01440m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01440_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01440m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01440_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01440m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01440_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01440s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01440_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01440s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01440_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01440s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m01450_a0.cu b/nv/m01450_a0.cu deleted file mode 100644 index 52bea27..0000000 --- a/nv/m01450_a0.cu +++ /dev/null @@ -1,589 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA256_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 7 -#define DGST_R2 2 -#define DGST_R3 6 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ u32 k_sha256[64] = -{ - SHA256C00, SHA256C01, SHA256C02, SHA256C03, - SHA256C04, SHA256C05, SHA256C06, SHA256C07, - SHA256C08, SHA256C09, SHA256C0a, SHA256C0b, - SHA256C0c, SHA256C0d, SHA256C0e, SHA256C0f, - SHA256C10, SHA256C11, SHA256C12, SHA256C13, - SHA256C14, SHA256C15, SHA256C16, SHA256C17, - SHA256C18, SHA256C19, SHA256C1a, SHA256C1b, - SHA256C1c, SHA256C1d, SHA256C1e, SHA256C1f, - SHA256C20, SHA256C21, SHA256C22, SHA256C23, - SHA256C24, SHA256C25, SHA256C26, SHA256C27, - SHA256C28, SHA256C29, SHA256C2a, SHA256C2b, - SHA256C2c, SHA256C2d, SHA256C2e, SHA256C2f, - SHA256C30, SHA256C31, SHA256C32, SHA256C33, - SHA256C34, SHA256C35, SHA256C36, SHA256C37, - SHA256C38, SHA256C39, SHA256C3a, SHA256C3b, - SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f, -}; - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -__device__ static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha256[i + 0]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha256[i + 1]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha256[i + 2]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha256[i + 3]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha256[i + 4]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha256[i + 5]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha256[i + 6]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha256[i + 7]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha256[i + 8]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha256[i + 9]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha256[i + 10]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha256[i + 11]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha256[i + 12]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha256[i + 13]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, k_sha256[i + 14]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha256[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 64; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; -} - -__device__ static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = SHA256M_A; - ipad[1] = SHA256M_B; - ipad[2] = SHA256M_C; - ipad[3] = SHA256M_D; - ipad[4] = SHA256M_E; - ipad[5] = SHA256M_F; - ipad[6] = SHA256M_G; - ipad[7] = SHA256M_H; - - sha256_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = SHA256M_A; - opad[1] = SHA256M_B; - opad[2] = SHA256M_C; - opad[3] = SHA256M_D; - opad[4] = SHA256M_E; - opad[5] = SHA256M_F; - opad[6] = SHA256M_G; - opad[7] = SHA256M_H; - - sha256_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha256_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8], u32x digest[8]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - digest[5] = ipad[5]; - digest[6] = ipad[6]; - digest[7] = ipad[7]; - - sha256_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = digest[5]; - w1[2] = digest[6]; - w1[3] = digest[7]; - w2[0] = 0x80000000; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 32) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - digest[5] = opad[5]; - digest[6] = opad[6]; - digest[7] = opad[7]; - - sha256_transform (w0, w1, w2, w3, digest); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01450_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[8]; - u32x opad[8]; - - hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (64 + salt_len) * 8; - - u32x digest[8]; - - hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01450_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01450_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01450_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[8]; - u32x opad[8]; - - hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (64 + salt_len) * 8; - - u32x digest[8]; - - hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01450_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01450_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01450_a1.cu b/nv/m01450_a1.cu deleted file mode 100644 index 6c79920..0000000 --- a/nv/m01450_a1.cu +++ /dev/null @@ -1,704 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA256_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 7 -#define DGST_R2 2 -#define DGST_R3 6 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ u32 k_sha256[64] = -{ - SHA256C00, SHA256C01, SHA256C02, SHA256C03, - SHA256C04, SHA256C05, SHA256C06, SHA256C07, - SHA256C08, SHA256C09, SHA256C0a, SHA256C0b, - SHA256C0c, SHA256C0d, SHA256C0e, SHA256C0f, - SHA256C10, SHA256C11, SHA256C12, SHA256C13, - SHA256C14, SHA256C15, SHA256C16, SHA256C17, - SHA256C18, SHA256C19, SHA256C1a, SHA256C1b, - SHA256C1c, SHA256C1d, SHA256C1e, SHA256C1f, - SHA256C20, SHA256C21, SHA256C22, SHA256C23, - SHA256C24, SHA256C25, SHA256C26, SHA256C27, - SHA256C28, SHA256C29, SHA256C2a, SHA256C2b, - SHA256C2c, SHA256C2d, SHA256C2e, SHA256C2f, - SHA256C30, SHA256C31, SHA256C32, SHA256C33, - SHA256C34, SHA256C35, SHA256C36, SHA256C37, - SHA256C38, SHA256C39, SHA256C3a, SHA256C3b, - SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f, -}; - -__device__ __constant__ comb_t c_combs[1024]; - -__device__ static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha256[i + 0]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha256[i + 1]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha256[i + 2]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha256[i + 3]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha256[i + 4]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha256[i + 5]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha256[i + 6]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha256[i + 7]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha256[i + 8]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha256[i + 9]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha256[i + 10]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha256[i + 11]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha256[i + 12]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha256[i + 13]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, k_sha256[i + 14]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha256[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 64; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; -} - -__device__ static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = SHA256M_A; - ipad[1] = SHA256M_B; - ipad[2] = SHA256M_C; - ipad[3] = SHA256M_D; - ipad[4] = SHA256M_E; - ipad[5] = SHA256M_F; - ipad[6] = SHA256M_G; - ipad[7] = SHA256M_H; - - sha256_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = SHA256M_A; - opad[1] = SHA256M_B; - opad[2] = SHA256M_C; - opad[3] = SHA256M_D; - opad[4] = SHA256M_E; - opad[5] = SHA256M_F; - opad[6] = SHA256M_G; - opad[7] = SHA256M_H; - - sha256_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha256_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8], u32x digest[8]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - digest[5] = ipad[5]; - digest[6] = ipad[6]; - digest[7] = ipad[7]; - - sha256_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = digest[5]; - w1[2] = digest[6]; - w1[3] = digest[7]; - w2[0] = 0x80000000; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 32) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - digest[5] = opad[5]; - digest[6] = opad[6]; - digest[7] = opad[7]; - - sha256_transform (w0, w1, w2, w3, digest); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01450_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[8]; - u32x opad[8]; - - hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (64 + salt_len) * 8; - - u32x digest[8]; - - hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01450_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01450_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01450_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[8]; - u32x opad[8]; - - hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (64 + salt_len) * 8; - - u32x digest[8]; - - hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x a = digest[0]; - const u32x b = digest[1]; - const u32x c = digest[2]; - const u32x d = digest[3]; - const u32x e = digest[4]; - const u32x f = digest[5]; - const u32x g = digest[6]; - const u32x h = digest[7]; - - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01450_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01450_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01450_a3.cu b/nv/m01450_a3.cu deleted file mode 100644 index 4db52e1..0000000 --- a/nv/m01450_a3.cu +++ /dev/null @@ -1,759 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA256_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 7 -#define DGST_R2 2 -#define DGST_R3 6 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ u32 k_sha256[64] = -{ - SHA256C00, SHA256C01, SHA256C02, SHA256C03, - SHA256C04, SHA256C05, SHA256C06, SHA256C07, - SHA256C08, SHA256C09, SHA256C0a, SHA256C0b, - SHA256C0c, SHA256C0d, SHA256C0e, SHA256C0f, - SHA256C10, SHA256C11, SHA256C12, SHA256C13, - SHA256C14, SHA256C15, SHA256C16, SHA256C17, - SHA256C18, SHA256C19, SHA256C1a, SHA256C1b, - SHA256C1c, SHA256C1d, SHA256C1e, SHA256C1f, - SHA256C20, SHA256C21, SHA256C22, SHA256C23, - SHA256C24, SHA256C25, SHA256C26, SHA256C27, - SHA256C28, SHA256C29, SHA256C2a, SHA256C2b, - SHA256C2c, SHA256C2d, SHA256C2e, SHA256C2f, - SHA256C30, SHA256C31, SHA256C32, SHA256C33, - SHA256C34, SHA256C35, SHA256C36, SHA256C37, - SHA256C38, SHA256C39, SHA256C3a, SHA256C3b, - SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f, -}; - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha256[i + 0]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha256[i + 1]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha256[i + 2]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha256[i + 3]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha256[i + 4]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha256[i + 5]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha256[i + 6]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha256[i + 7]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha256[i + 8]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha256[i + 9]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha256[i + 10]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha256[i + 11]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha256[i + 12]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha256[i + 13]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, k_sha256[i + 14]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha256[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 64; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; -} - -__device__ static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = SHA256M_A; - ipad[1] = SHA256M_B; - ipad[2] = SHA256M_C; - ipad[3] = SHA256M_D; - ipad[4] = SHA256M_E; - ipad[5] = SHA256M_F; - ipad[6] = SHA256M_G; - ipad[7] = SHA256M_H; - - sha256_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = SHA256M_A; - opad[1] = SHA256M_B; - opad[2] = SHA256M_C; - opad[3] = SHA256M_D; - opad[4] = SHA256M_E; - opad[5] = SHA256M_F; - opad[6] = SHA256M_G; - opad[7] = SHA256M_H; - - sha256_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha256_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8], u32x digest[8]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - digest[5] = ipad[5]; - digest[6] = ipad[6]; - digest[7] = ipad[7]; - - sha256_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = digest[5]; - w1[2] = digest[6]; - w1[3] = digest[7]; - w2[0] = 0x80000000; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 32) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - digest[5] = opad[5]; - digest[6] = opad[6]; - digest[7] = opad[7]; - - sha256_transform (w0, w1, w2, w3, digest); -} - -__device__ static void m01450m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[8]; - u32x opad[8]; - - hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (64 + salt_len) * 8; - - u32x digest[8]; - - hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; - - #include VECT_COMPARE_M - } -} - -__device__ static void m01450s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[8]; - u32x opad[8]; - - hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (64 + salt_len) * 8; - - u32x digest[8]; - - hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01450_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01450m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01450_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01450m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01450_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01450m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01450_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01450s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01450_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01450s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01450_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01450s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m01460_a0.cu b/nv/m01460_a0.cu deleted file mode 100644 index 7a06bb2..0000000 --- a/nv/m01460_a0.cu +++ /dev/null @@ -1,589 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA256_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 7 -#define DGST_R2 2 -#define DGST_R3 6 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ u32 k_sha256[64] = -{ - SHA256C00, SHA256C01, SHA256C02, SHA256C03, - SHA256C04, SHA256C05, SHA256C06, SHA256C07, - SHA256C08, SHA256C09, SHA256C0a, SHA256C0b, - SHA256C0c, SHA256C0d, SHA256C0e, SHA256C0f, - SHA256C10, SHA256C11, SHA256C12, SHA256C13, - SHA256C14, SHA256C15, SHA256C16, SHA256C17, - SHA256C18, SHA256C19, SHA256C1a, SHA256C1b, - SHA256C1c, SHA256C1d, SHA256C1e, SHA256C1f, - SHA256C20, SHA256C21, SHA256C22, SHA256C23, - SHA256C24, SHA256C25, SHA256C26, SHA256C27, - SHA256C28, SHA256C29, SHA256C2a, SHA256C2b, - SHA256C2c, SHA256C2d, SHA256C2e, SHA256C2f, - SHA256C30, SHA256C31, SHA256C32, SHA256C33, - SHA256C34, SHA256C35, SHA256C36, SHA256C37, - SHA256C38, SHA256C39, SHA256C3a, SHA256C3b, - SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f, -}; - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -__device__ static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha256[i + 0]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha256[i + 1]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha256[i + 2]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha256[i + 3]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha256[i + 4]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha256[i + 5]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha256[i + 6]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha256[i + 7]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha256[i + 8]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha256[i + 9]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha256[i + 10]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha256[i + 11]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha256[i + 12]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha256[i + 13]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, k_sha256[i + 14]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha256[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 64; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; -} - -__device__ static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = SHA256M_A; - ipad[1] = SHA256M_B; - ipad[2] = SHA256M_C; - ipad[3] = SHA256M_D; - ipad[4] = SHA256M_E; - ipad[5] = SHA256M_F; - ipad[6] = SHA256M_G; - ipad[7] = SHA256M_H; - - sha256_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = SHA256M_A; - opad[1] = SHA256M_B; - opad[2] = SHA256M_C; - opad[3] = SHA256M_D; - opad[4] = SHA256M_E; - opad[5] = SHA256M_F; - opad[6] = SHA256M_G; - opad[7] = SHA256M_H; - - sha256_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha256_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8], u32x digest[8]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - digest[5] = ipad[5]; - digest[6] = ipad[6]; - digest[7] = ipad[7]; - - sha256_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = digest[5]; - w1[2] = digest[6]; - w1[3] = digest[7]; - w2[0] = 0x80000000; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 32) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - digest[5] = opad[5]; - digest[6] = opad[6]; - digest[7] = opad[7]; - - sha256_transform (w0, w1, w2, w3, digest); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01460_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[8]; - u32x opad[8]; - - hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (64 + out_len) * 8; - - u32x digest[8]; - - hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01460_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01460_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01460_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[8]; - u32x opad[8]; - - hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (64 + out_len) * 8; - - u32x digest[8]; - - hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01460_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01460_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01460_a1.cu b/nv/m01460_a1.cu deleted file mode 100644 index 042754b..0000000 --- a/nv/m01460_a1.cu +++ /dev/null @@ -1,695 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA256_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 7 -#define DGST_R2 2 -#define DGST_R3 6 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ u32 k_sha256[64] = -{ - SHA256C00, SHA256C01, SHA256C02, SHA256C03, - SHA256C04, SHA256C05, SHA256C06, SHA256C07, - SHA256C08, SHA256C09, SHA256C0a, SHA256C0b, - SHA256C0c, SHA256C0d, SHA256C0e, SHA256C0f, - SHA256C10, SHA256C11, SHA256C12, SHA256C13, - SHA256C14, SHA256C15, SHA256C16, SHA256C17, - SHA256C18, SHA256C19, SHA256C1a, SHA256C1b, - SHA256C1c, SHA256C1d, SHA256C1e, SHA256C1f, - SHA256C20, SHA256C21, SHA256C22, SHA256C23, - SHA256C24, SHA256C25, SHA256C26, SHA256C27, - SHA256C28, SHA256C29, SHA256C2a, SHA256C2b, - SHA256C2c, SHA256C2d, SHA256C2e, SHA256C2f, - SHA256C30, SHA256C31, SHA256C32, SHA256C33, - SHA256C34, SHA256C35, SHA256C36, SHA256C37, - SHA256C38, SHA256C39, SHA256C3a, SHA256C3b, - SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f, -}; - -__device__ __constant__ comb_t c_combs[1024]; - -__device__ static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha256[i + 0]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha256[i + 1]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha256[i + 2]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha256[i + 3]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha256[i + 4]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha256[i + 5]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha256[i + 6]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha256[i + 7]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha256[i + 8]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha256[i + 9]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha256[i + 10]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha256[i + 11]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha256[i + 12]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha256[i + 13]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, k_sha256[i + 14]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha256[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 64; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; -} - -__device__ static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = SHA256M_A; - ipad[1] = SHA256M_B; - ipad[2] = SHA256M_C; - ipad[3] = SHA256M_D; - ipad[4] = SHA256M_E; - ipad[5] = SHA256M_F; - ipad[6] = SHA256M_G; - ipad[7] = SHA256M_H; - - sha256_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = SHA256M_A; - opad[1] = SHA256M_B; - opad[2] = SHA256M_C; - opad[3] = SHA256M_D; - opad[4] = SHA256M_E; - opad[5] = SHA256M_F; - opad[6] = SHA256M_G; - opad[7] = SHA256M_H; - - sha256_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha256_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8], u32x digest[8]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - digest[5] = ipad[5]; - digest[6] = ipad[6]; - digest[7] = ipad[7]; - - sha256_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = digest[5]; - w1[2] = digest[6]; - w1[3] = digest[7]; - w2[0] = 0x80000000; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 32) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - digest[5] = opad[5]; - digest[6] = opad[6]; - digest[7] = opad[7]; - - sha256_transform (w0, w1, w2, w3, digest); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01460_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[8]; - u32x opad[8]; - - hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - append_0x80_4 (w0, w1, w2, w3, pw_len); - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = 0; - w3_t[3] = (64 + pw_len) * 8; - - u32x digest[8]; - - hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01460_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01460_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01460_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[8]; - u32x opad[8]; - - hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - append_0x80_4 (w0, w1, w2, w3, pw_len); - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = 0; - w3_t[3] = (64 + pw_len) * 8; - - u32x digest[8]; - - hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01460_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01460_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01460_a3.cu b/nv/m01460_a3.cu deleted file mode 100644 index 29b5263..0000000 --- a/nv/m01460_a3.cu +++ /dev/null @@ -1,755 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA256_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 7 -#define DGST_R2 2 -#define DGST_R3 6 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ u32 k_sha256[64] = -{ - SHA256C00, SHA256C01, SHA256C02, SHA256C03, - SHA256C04, SHA256C05, SHA256C06, SHA256C07, - SHA256C08, SHA256C09, SHA256C0a, SHA256C0b, - SHA256C0c, SHA256C0d, SHA256C0e, SHA256C0f, - SHA256C10, SHA256C11, SHA256C12, SHA256C13, - SHA256C14, SHA256C15, SHA256C16, SHA256C17, - SHA256C18, SHA256C19, SHA256C1a, SHA256C1b, - SHA256C1c, SHA256C1d, SHA256C1e, SHA256C1f, - SHA256C20, SHA256C21, SHA256C22, SHA256C23, - SHA256C24, SHA256C25, SHA256C26, SHA256C27, - SHA256C28, SHA256C29, SHA256C2a, SHA256C2b, - SHA256C2c, SHA256C2d, SHA256C2e, SHA256C2f, - SHA256C30, SHA256C31, SHA256C32, SHA256C33, - SHA256C34, SHA256C35, SHA256C36, SHA256C37, - SHA256C38, SHA256C39, SHA256C3a, SHA256C3b, - SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f, -}; - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha256[i + 0]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha256[i + 1]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha256[i + 2]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha256[i + 3]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha256[i + 4]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha256[i + 5]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha256[i + 6]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha256[i + 7]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha256[i + 8]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha256[i + 9]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha256[i + 10]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha256[i + 11]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha256[i + 12]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha256[i + 13]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, k_sha256[i + 14]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha256[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 64; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; -} - -__device__ static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = SHA256M_A; - ipad[1] = SHA256M_B; - ipad[2] = SHA256M_C; - ipad[3] = SHA256M_D; - ipad[4] = SHA256M_E; - ipad[5] = SHA256M_F; - ipad[6] = SHA256M_G; - ipad[7] = SHA256M_H; - - sha256_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = SHA256M_A; - opad[1] = SHA256M_B; - opad[2] = SHA256M_C; - opad[3] = SHA256M_D; - opad[4] = SHA256M_E; - opad[5] = SHA256M_F; - opad[6] = SHA256M_G; - opad[7] = SHA256M_H; - - sha256_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha256_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8], u32x digest[8]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - digest[5] = ipad[5]; - digest[6] = ipad[6]; - digest[7] = ipad[7]; - - sha256_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = digest[5]; - w1[2] = digest[6]; - w1[3] = digest[7]; - w2[0] = 0x80000000; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 32) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - digest[5] = opad[5]; - digest[6] = opad[6]; - digest[7] = opad[7]; - - sha256_transform (w0, w1, w2, w3, digest); -} - -__device__ static void m01460m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[8]; - u32x opad[8]; - - hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = 0; - w3_t[3] = (64 + pw_len) * 8; - - u32x digest[8]; - - hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; - - #include VECT_COMPARE_M - } -} - -__device__ static void m01460s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[8]; - u32x opad[8]; - - hmac_sha256_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = 0; - w3_t[3] = (64 + pw_len) * 8; - - u32x digest[8]; - - hmac_sha256_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01460_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01460m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01460_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01460m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01460_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01460m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01460_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01460s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01460_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01460s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01460_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01460s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m01500_a0.cu b/nv/m01500_a0.cu deleted file mode 100644 index 2b230e7..0000000 --- a/nv/m01500_a0.cu +++ /dev/null @@ -1,766 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _DES_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -#define PERM_OP(a,b,tt,n,m) \ -{ \ - tt = a >> n; \ - tt = tt ^ b; \ - tt = tt & m; \ - b = b ^ tt; \ - tt = tt << n; \ - a = a ^ tt; \ -} - -#define HPERM_OP(a,tt,n,m) \ -{ \ - tt = a << (16 + n); \ - tt = tt ^ a; \ - tt = tt & m; \ - a = a ^ tt; \ - tt = tt >> (16 + n); \ - a = a ^ tt; \ -} - -__device__ __constant__ u32 c_SPtrans[8][64] = -{ - /* nibble 0 */ - 0x00820200, 0x00020000, 0x80800000, 0x80820200, - 0x00800000, 0x80020200, 0x80020000, 0x80800000, - 0x80020200, 0x00820200, 0x00820000, 0x80000200, - 0x80800200, 0x00800000, 0x00000000, 0x80020000, - 0x00020000, 0x80000000, 0x00800200, 0x00020200, - 0x80820200, 0x00820000, 0x80000200, 0x00800200, - 0x80000000, 0x00000200, 0x00020200, 0x80820000, - 0x00000200, 0x80800200, 0x80820000, 0x00000000, - 0x00000000, 0x80820200, 0x00800200, 0x80020000, - 0x00820200, 0x00020000, 0x80000200, 0x00800200, - 0x80820000, 0x00000200, 0x00020200, 0x80800000, - 0x80020200, 0x80000000, 0x80800000, 0x00820000, - 0x80820200, 0x00020200, 0x00820000, 0x80800200, - 0x00800000, 0x80000200, 0x80020000, 0x00000000, - 0x00020000, 0x00800000, 0x80800200, 0x00820200, - 0x80000000, 0x80820000, 0x00000200, 0x80020200, - /* nibble 1 */ - 0x10042004, 0x00000000, 0x00042000, 0x10040000, - 0x10000004, 0x00002004, 0x10002000, 0x00042000, - 0x00002000, 0x10040004, 0x00000004, 0x10002000, - 0x00040004, 0x10042000, 0x10040000, 0x00000004, - 0x00040000, 0x10002004, 0x10040004, 0x00002000, - 0x00042004, 0x10000000, 0x00000000, 0x00040004, - 0x10002004, 0x00042004, 0x10042000, 0x10000004, - 0x10000000, 0x00040000, 0x00002004, 0x10042004, - 0x00040004, 0x10042000, 0x10002000, 0x00042004, - 0x10042004, 0x00040004, 0x10000004, 0x00000000, - 0x10000000, 0x00002004, 0x00040000, 0x10040004, - 0x00002000, 0x10000000, 0x00042004, 0x10002004, - 0x10042000, 0x00002000, 0x00000000, 0x10000004, - 0x00000004, 0x10042004, 0x00042000, 0x10040000, - 0x10040004, 0x00040000, 0x00002004, 0x10002000, - 0x10002004, 0x00000004, 0x10040000, 0x00042000, - /* nibble 2 */ - 0x41000000, 0x01010040, 0x00000040, 0x41000040, - 0x40010000, 0x01000000, 0x41000040, 0x00010040, - 0x01000040, 0x00010000, 0x01010000, 0x40000000, - 0x41010040, 0x40000040, 0x40000000, 0x41010000, - 0x00000000, 0x40010000, 0x01010040, 0x00000040, - 0x40000040, 0x41010040, 0x00010000, 0x41000000, - 0x41010000, 0x01000040, 0x40010040, 0x01010000, - 0x00010040, 0x00000000, 0x01000000, 0x40010040, - 0x01010040, 0x00000040, 0x40000000, 0x00010000, - 0x40000040, 0x40010000, 0x01010000, 0x41000040, - 0x00000000, 0x01010040, 0x00010040, 0x41010000, - 0x40010000, 0x01000000, 0x41010040, 0x40000000, - 0x40010040, 0x41000000, 0x01000000, 0x41010040, - 0x00010000, 0x01000040, 0x41000040, 0x00010040, - 0x01000040, 0x00000000, 0x41010000, 0x40000040, - 0x41000000, 0x40010040, 0x00000040, 0x01010000, - /* nibble 3 */ - 0x00100402, 0x04000400, 0x00000002, 0x04100402, - 0x00000000, 0x04100000, 0x04000402, 0x00100002, - 0x04100400, 0x04000002, 0x04000000, 0x00000402, - 0x04000002, 0x00100402, 0x00100000, 0x04000000, - 0x04100002, 0x00100400, 0x00000400, 0x00000002, - 0x00100400, 0x04000402, 0x04100000, 0x00000400, - 0x00000402, 0x00000000, 0x00100002, 0x04100400, - 0x04000400, 0x04100002, 0x04100402, 0x00100000, - 0x04100002, 0x00000402, 0x00100000, 0x04000002, - 0x00100400, 0x04000400, 0x00000002, 0x04100000, - 0x04000402, 0x00000000, 0x00000400, 0x00100002, - 0x00000000, 0x04100002, 0x04100400, 0x00000400, - 0x04000000, 0x04100402, 0x00100402, 0x00100000, - 0x04100402, 0x00000002, 0x04000400, 0x00100402, - 0x00100002, 0x00100400, 0x04100000, 0x04000402, - 0x00000402, 0x04000000, 0x04000002, 0x04100400, - /* nibble 4 */ - 0x02000000, 0x00004000, 0x00000100, 0x02004108, - 0x02004008, 0x02000100, 0x00004108, 0x02004000, - 0x00004000, 0x00000008, 0x02000008, 0x00004100, - 0x02000108, 0x02004008, 0x02004100, 0x00000000, - 0x00004100, 0x02000000, 0x00004008, 0x00000108, - 0x02000100, 0x00004108, 0x00000000, 0x02000008, - 0x00000008, 0x02000108, 0x02004108, 0x00004008, - 0x02004000, 0x00000100, 0x00000108, 0x02004100, - 0x02004100, 0x02000108, 0x00004008, 0x02004000, - 0x00004000, 0x00000008, 0x02000008, 0x02000100, - 0x02000000, 0x00004100, 0x02004108, 0x00000000, - 0x00004108, 0x02000000, 0x00000100, 0x00004008, - 0x02000108, 0x00000100, 0x00000000, 0x02004108, - 0x02004008, 0x02004100, 0x00000108, 0x00004000, - 0x00004100, 0x02004008, 0x02000100, 0x00000108, - 0x00000008, 0x00004108, 0x02004000, 0x02000008, - /* nibble 5 */ - 0x20000010, 0x00080010, 0x00000000, 0x20080800, - 0x00080010, 0x00000800, 0x20000810, 0x00080000, - 0x00000810, 0x20080810, 0x00080800, 0x20000000, - 0x20000800, 0x20000010, 0x20080000, 0x00080810, - 0x00080000, 0x20000810, 0x20080010, 0x00000000, - 0x00000800, 0x00000010, 0x20080800, 0x20080010, - 0x20080810, 0x20080000, 0x20000000, 0x00000810, - 0x00000010, 0x00080800, 0x00080810, 0x20000800, - 0x00000810, 0x20000000, 0x20000800, 0x00080810, - 0x20080800, 0x00080010, 0x00000000, 0x20000800, - 0x20000000, 0x00000800, 0x20080010, 0x00080000, - 0x00080010, 0x20080810, 0x00080800, 0x00000010, - 0x20080810, 0x00080800, 0x00080000, 0x20000810, - 0x20000010, 0x20080000, 0x00080810, 0x00000000, - 0x00000800, 0x20000010, 0x20000810, 0x20080800, - 0x20080000, 0x00000810, 0x00000010, 0x20080010, - /* nibble 6 */ - 0x00001000, 0x00000080, 0x00400080, 0x00400001, - 0x00401081, 0x00001001, 0x00001080, 0x00000000, - 0x00400000, 0x00400081, 0x00000081, 0x00401000, - 0x00000001, 0x00401080, 0x00401000, 0x00000081, - 0x00400081, 0x00001000, 0x00001001, 0x00401081, - 0x00000000, 0x00400080, 0x00400001, 0x00001080, - 0x00401001, 0x00001081, 0x00401080, 0x00000001, - 0x00001081, 0x00401001, 0x00000080, 0x00400000, - 0x00001081, 0x00401000, 0x00401001, 0x00000081, - 0x00001000, 0x00000080, 0x00400000, 0x00401001, - 0x00400081, 0x00001081, 0x00001080, 0x00000000, - 0x00000080, 0x00400001, 0x00000001, 0x00400080, - 0x00000000, 0x00400081, 0x00400080, 0x00001080, - 0x00000081, 0x00001000, 0x00401081, 0x00400000, - 0x00401080, 0x00000001, 0x00001001, 0x00401081, - 0x00400001, 0x00401080, 0x00401000, 0x00001001, - /* nibble 7 */ - 0x08200020, 0x08208000, 0x00008020, 0x00000000, - 0x08008000, 0x00200020, 0x08200000, 0x08208020, - 0x00000020, 0x08000000, 0x00208000, 0x00008020, - 0x00208020, 0x08008020, 0x08000020, 0x08200000, - 0x00008000, 0x00208020, 0x00200020, 0x08008000, - 0x08208020, 0x08000020, 0x00000000, 0x00208000, - 0x08000000, 0x00200000, 0x08008020, 0x08200020, - 0x00200000, 0x00008000, 0x08208000, 0x00000020, - 0x00200000, 0x00008000, 0x08000020, 0x08208020, - 0x00008020, 0x08000000, 0x00000000, 0x00208000, - 0x08200020, 0x08008020, 0x08008000, 0x00200020, - 0x08208000, 0x00000020, 0x00200020, 0x08008000, - 0x08208020, 0x00200000, 0x08200000, 0x08000020, - 0x00208000, 0x00008020, 0x08008020, 0x08200000, - 0x00000020, 0x08208000, 0x00208020, 0x00000000, - 0x08000000, 0x08200020, 0x00008000, 0x00208020 -}; - -__device__ __constant__ u32 c_skb[8][64] = -{ - /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ - 0x00000000, 0x00000010, 0x20000000, 0x20000010, - 0x00010000, 0x00010010, 0x20010000, 0x20010010, - 0x00000800, 0x00000810, 0x20000800, 0x20000810, - 0x00010800, 0x00010810, 0x20010800, 0x20010810, - 0x00000020, 0x00000030, 0x20000020, 0x20000030, - 0x00010020, 0x00010030, 0x20010020, 0x20010030, - 0x00000820, 0x00000830, 0x20000820, 0x20000830, - 0x00010820, 0x00010830, 0x20010820, 0x20010830, - 0x00080000, 0x00080010, 0x20080000, 0x20080010, - 0x00090000, 0x00090010, 0x20090000, 0x20090010, - 0x00080800, 0x00080810, 0x20080800, 0x20080810, - 0x00090800, 0x00090810, 0x20090800, 0x20090810, - 0x00080020, 0x00080030, 0x20080020, 0x20080030, - 0x00090020, 0x00090030, 0x20090020, 0x20090030, - 0x00080820, 0x00080830, 0x20080820, 0x20080830, - 0x00090820, 0x00090830, 0x20090820, 0x20090830, - /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */ - 0x00000000, 0x02000000, 0x00002000, 0x02002000, - 0x00200000, 0x02200000, 0x00202000, 0x02202000, - 0x00000004, 0x02000004, 0x00002004, 0x02002004, - 0x00200004, 0x02200004, 0x00202004, 0x02202004, - 0x00000400, 0x02000400, 0x00002400, 0x02002400, - 0x00200400, 0x02200400, 0x00202400, 0x02202400, - 0x00000404, 0x02000404, 0x00002404, 0x02002404, - 0x00200404, 0x02200404, 0x00202404, 0x02202404, - 0x10000000, 0x12000000, 0x10002000, 0x12002000, - 0x10200000, 0x12200000, 0x10202000, 0x12202000, - 0x10000004, 0x12000004, 0x10002004, 0x12002004, - 0x10200004, 0x12200004, 0x10202004, 0x12202004, - 0x10000400, 0x12000400, 0x10002400, 0x12002400, - 0x10200400, 0x12200400, 0x10202400, 0x12202400, - 0x10000404, 0x12000404, 0x10002404, 0x12002404, - 0x10200404, 0x12200404, 0x10202404, 0x12202404, - /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */ - 0x00000000, 0x00000001, 0x00040000, 0x00040001, - 0x01000000, 0x01000001, 0x01040000, 0x01040001, - 0x00000002, 0x00000003, 0x00040002, 0x00040003, - 0x01000002, 0x01000003, 0x01040002, 0x01040003, - 0x00000200, 0x00000201, 0x00040200, 0x00040201, - 0x01000200, 0x01000201, 0x01040200, 0x01040201, - 0x00000202, 0x00000203, 0x00040202, 0x00040203, - 0x01000202, 0x01000203, 0x01040202, 0x01040203, - 0x08000000, 0x08000001, 0x08040000, 0x08040001, - 0x09000000, 0x09000001, 0x09040000, 0x09040001, - 0x08000002, 0x08000003, 0x08040002, 0x08040003, - 0x09000002, 0x09000003, 0x09040002, 0x09040003, - 0x08000200, 0x08000201, 0x08040200, 0x08040201, - 0x09000200, 0x09000201, 0x09040200, 0x09040201, - 0x08000202, 0x08000203, 0x08040202, 0x08040203, - 0x09000202, 0x09000203, 0x09040202, 0x09040203, - /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */ - 0x00000000, 0x00100000, 0x00000100, 0x00100100, - 0x00000008, 0x00100008, 0x00000108, 0x00100108, - 0x00001000, 0x00101000, 0x00001100, 0x00101100, - 0x00001008, 0x00101008, 0x00001108, 0x00101108, - 0x04000000, 0x04100000, 0x04000100, 0x04100100, - 0x04000008, 0x04100008, 0x04000108, 0x04100108, - 0x04001000, 0x04101000, 0x04001100, 0x04101100, - 0x04001008, 0x04101008, 0x04001108, 0x04101108, - 0x00020000, 0x00120000, 0x00020100, 0x00120100, - 0x00020008, 0x00120008, 0x00020108, 0x00120108, - 0x00021000, 0x00121000, 0x00021100, 0x00121100, - 0x00021008, 0x00121008, 0x00021108, 0x00121108, - 0x04020000, 0x04120000, 0x04020100, 0x04120100, - 0x04020008, 0x04120008, 0x04020108, 0x04120108, - 0x04021000, 0x04121000, 0x04021100, 0x04121100, - 0x04021008, 0x04121008, 0x04021108, 0x04121108, - /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ - 0x00000000, 0x10000000, 0x00010000, 0x10010000, - 0x00000004, 0x10000004, 0x00010004, 0x10010004, - 0x20000000, 0x30000000, 0x20010000, 0x30010000, - 0x20000004, 0x30000004, 0x20010004, 0x30010004, - 0x00100000, 0x10100000, 0x00110000, 0x10110000, - 0x00100004, 0x10100004, 0x00110004, 0x10110004, - 0x20100000, 0x30100000, 0x20110000, 0x30110000, - 0x20100004, 0x30100004, 0x20110004, 0x30110004, - 0x00001000, 0x10001000, 0x00011000, 0x10011000, - 0x00001004, 0x10001004, 0x00011004, 0x10011004, - 0x20001000, 0x30001000, 0x20011000, 0x30011000, - 0x20001004, 0x30001004, 0x20011004, 0x30011004, - 0x00101000, 0x10101000, 0x00111000, 0x10111000, - 0x00101004, 0x10101004, 0x00111004, 0x10111004, - 0x20101000, 0x30101000, 0x20111000, 0x30111000, - 0x20101004, 0x30101004, 0x20111004, 0x30111004, - /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */ - 0x00000000, 0x08000000, 0x00000008, 0x08000008, - 0x00000400, 0x08000400, 0x00000408, 0x08000408, - 0x00020000, 0x08020000, 0x00020008, 0x08020008, - 0x00020400, 0x08020400, 0x00020408, 0x08020408, - 0x00000001, 0x08000001, 0x00000009, 0x08000009, - 0x00000401, 0x08000401, 0x00000409, 0x08000409, - 0x00020001, 0x08020001, 0x00020009, 0x08020009, - 0x00020401, 0x08020401, 0x00020409, 0x08020409, - 0x02000000, 0x0A000000, 0x02000008, 0x0A000008, - 0x02000400, 0x0A000400, 0x02000408, 0x0A000408, - 0x02020000, 0x0A020000, 0x02020008, 0x0A020008, - 0x02020400, 0x0A020400, 0x02020408, 0x0A020408, - 0x02000001, 0x0A000001, 0x02000009, 0x0A000009, - 0x02000401, 0x0A000401, 0x02000409, 0x0A000409, - 0x02020001, 0x0A020001, 0x02020009, 0x0A020009, - 0x02020401, 0x0A020401, 0x02020409, 0x0A020409, - /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */ - 0x00000000, 0x00000100, 0x00080000, 0x00080100, - 0x01000000, 0x01000100, 0x01080000, 0x01080100, - 0x00000010, 0x00000110, 0x00080010, 0x00080110, - 0x01000010, 0x01000110, 0x01080010, 0x01080110, - 0x00200000, 0x00200100, 0x00280000, 0x00280100, - 0x01200000, 0x01200100, 0x01280000, 0x01280100, - 0x00200010, 0x00200110, 0x00280010, 0x00280110, - 0x01200010, 0x01200110, 0x01280010, 0x01280110, - 0x00000200, 0x00000300, 0x00080200, 0x00080300, - 0x01000200, 0x01000300, 0x01080200, 0x01080300, - 0x00000210, 0x00000310, 0x00080210, 0x00080310, - 0x01000210, 0x01000310, 0x01080210, 0x01080310, - 0x00200200, 0x00200300, 0x00280200, 0x00280300, - 0x01200200, 0x01200300, 0x01280200, 0x01280300, - 0x00200210, 0x00200310, 0x00280210, 0x00280310, - 0x01200210, 0x01200310, 0x01280210, 0x01280310, - /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */ - 0x00000000, 0x04000000, 0x00040000, 0x04040000, - 0x00000002, 0x04000002, 0x00040002, 0x04040002, - 0x00002000, 0x04002000, 0x00042000, 0x04042000, - 0x00002002, 0x04002002, 0x00042002, 0x04042002, - 0x00000020, 0x04000020, 0x00040020, 0x04040020, - 0x00000022, 0x04000022, 0x00040022, 0x04040022, - 0x00002020, 0x04002020, 0x00042020, 0x04042020, - 0x00002022, 0x04002022, 0x00042022, 0x04042022, - 0x00000800, 0x04000800, 0x00040800, 0x04040800, - 0x00000802, 0x04000802, 0x00040802, 0x04040802, - 0x00002800, 0x04002800, 0x00042800, 0x04042800, - 0x00002802, 0x04002802, 0x00042802, 0x04042802, - 0x00000820, 0x04000820, 0x00040820, 0x04040820, - 0x00000822, 0x04000822, 0x00040822, 0x04040822, - 0x00002820, 0x04002820, 0x00042820, 0x04042820, - 0x00002822, 0x04002822, 0x00042822, 0x04042822 -}; - -#ifdef VECT_SIZE1 -#define BOX(i,n,S) u32x ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(i,n,S) u32x ((S)[(n)][(i).x], (S)[(n)][(i).y]) -#endif - -__device__ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], u32 s_skb[8][64]) -{ - u32x tt; - - PERM_OP (d, c, tt, 4, 0x0f0f0f0f); - HPERM_OP (c, tt, 2, 0xcccc0000); - HPERM_OP (d, tt, 2, 0xcccc0000); - PERM_OP (d, c, tt, 1, 0x55555555); - PERM_OP (c, d, tt, 8, 0x00ff00ff); - PERM_OP (d, c, tt, 1, 0x55555555); - - d = ((d & 0x000000ff) << 16) - | ((d & 0x0000ff00) << 0) - | ((d & 0x00ff0000) >> 16) - | ((c & 0xf0000000) >> 4); - - c = c & 0x0fffffff; - - #pragma unroll - for (u32 i = 0; i < 16; i++) - { - if ((i < 2) || (i == 8) || (i == 15)) - { - c = ((c >> 1) | (c << 27)); - d = ((d >> 1) | (d << 27)); - } - else - { - c = ((c >> 2) | (c << 26)); - d = ((d >> 2) | (d << 26)); - } - - c = c & 0x0fffffff; - d = d & 0x0fffffff; - - const u32x c00 = (c >> 0) & 0x0000003f; - const u32x c06 = (c >> 6) & 0x00383003; - const u32x c07 = (c >> 7) & 0x0000003c; - const u32x c13 = (c >> 13) & 0x0000060f; - const u32x c20 = (c >> 20) & 0x00000001; - - u32x s = BOX (((c00 >> 0) & 0xff), 0, s_skb) - | BOX (((c06 >> 0) & 0xff) - |((c07 >> 0) & 0xff), 1, s_skb) - | BOX (((c13 >> 0) & 0xff) - |((c06 >> 8) & 0xff), 2, s_skb) - | BOX (((c20 >> 0) & 0xff) - |((c13 >> 8) & 0xff) - |((c06 >> 16) & 0xff), 3, s_skb); - - const u32x d00 = (d >> 0) & 0x00003c3f; - const u32x d07 = (d >> 7) & 0x00003f03; - const u32x d21 = (d >> 21) & 0x0000000f; - const u32x d22 = (d >> 22) & 0x00000030; - - u32x t = BOX (((d00 >> 0) & 0xff), 4, s_skb) - | BOX (((d07 >> 0) & 0xff) - |((d00 >> 8) & 0xff), 5, s_skb) - | BOX (((d07 >> 8) & 0xff), 6, s_skb) - | BOX (((d21 >> 0) & 0xff) - |((d22 >> 0) & 0xff), 7, s_skb); - - #if __CUDA_ARCH__ >= 200 - Kc[i] = __byte_perm (s, t, 0x5410); - Kd[i] = __byte_perm (s, t, 0x7632); - #else - Kc[i] = ((t << 16) | (s & 0x0000ffff)); - Kd[i] = ((s >> 16) | (t & 0xffff0000)); - #endif - } -} - -__device__ static void _des_crypt_encrypt (u32x iv[2], u32 mask, u32x Kc[16], u32x Kd[16], u32 s_SPtrans[8][64]) -{ - const u32 E1 = (mask >> 2) & 0x3f0; - - const u32 E0 = mask & 0x3f; - - u32x r = 0; - u32x l = 0; - - for (u32 i = 0; i < 25; i++) - { - #pragma unroll - for (u32 j = 0; j < 16; j += 2) - { - u32x t; - u32x u; - - t = r ^ (r >> 16); - u = t & E0; - t = t & E1; - u = u ^ (u << 16); - u = u ^ r; - u = u ^ Kc[j + 0]; - t = t ^ (t << 16); - t = t ^ r; - t = rotl32 (t, 28u); - t = t ^ Kd[j + 0]; - - l ^= BOX (((u >> 0) & 0x3f), 0, s_SPtrans) - | BOX (((u >> 8) & 0x3f), 2, s_SPtrans) - | BOX (((u >> 16) & 0x3f), 4, s_SPtrans) - | BOX (((u >> 24) & 0x3f), 6, s_SPtrans) - | BOX (((t >> 0) & 0x3f), 1, s_SPtrans) - | BOX (((t >> 8) & 0x3f), 3, s_SPtrans) - | BOX (((t >> 16) & 0x3f), 5, s_SPtrans) - | BOX (((t >> 24) & 0x3f), 7, s_SPtrans); - - t = l ^ (l >> 16); - u = t & E0; - t = t & E1; - u = u ^ (u << 16); - u = u ^ l; - u = u ^ Kc[j + 1]; - t = t ^ (t << 16); - t = t ^ l; - t = rotl32 (t, 28u); - t = t ^ Kd[j + 1]; - - r ^= BOX (((u >> 0) & 0x3f), 0, s_SPtrans) - | BOX (((u >> 8) & 0x3f), 2, s_SPtrans) - | BOX (((u >> 16) & 0x3f), 4, s_SPtrans) - | BOX (((u >> 24) & 0x3f), 6, s_SPtrans) - | BOX (((t >> 0) & 0x3f), 1, s_SPtrans) - | BOX (((t >> 8) & 0x3f), 3, s_SPtrans) - | BOX (((t >> 16) & 0x3f), 5, s_SPtrans) - | BOX (((t >> 24) & 0x3f), 7, s_SPtrans); - } - - u32x tt; - - tt = l; - l = r; - r = tt; - } - - iv[0] = rotl32 (r, 31); - iv[1] = rotl32 (l, 31); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01500_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = 0; - pw_buf0[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * sbox, kbox - */ - - __shared__ u32 s_skb[8][64]; - __shared__ u32 s_SPtrans[8][64]; - - if (lid < 64) - { - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * salt - */ - - const u32 mask = salt_bufs[salt_pos].salt_buf[0]; - - /** - * main - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = 0; - w0[3] = 0; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - out_len = (out_len >= 8) ? 8 : out_len; - - u32x data[2]; - - data[0] = (w0[0] << 1) & 0xfefefefe; - data[1] = (w0[1] << 1) & 0xfefefefe; - - u32x Kc[16]; - u32x Kd[16]; - - _des_crypt_keysetup (data[0], data[1], Kc, Kd, s_skb); - - u32x iv[2]; - - _des_crypt_encrypt (iv, mask, Kc, Kd, s_SPtrans); - - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01500_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01500_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01500_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = 0; - pw_buf0[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * sbox, kbox - */ - - __shared__ u32 s_skb[8][64]; - __shared__ u32 s_SPtrans[8][64]; - - if (lid < 64) - { - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * salt - */ - - const u32 mask = salt_bufs[salt_pos].salt_buf[0]; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * main - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = 0; - w0[3] = 0; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - out_len = (out_len >= 8) ? 8 : out_len; - - u32x data[2]; - - data[0] = (w0[0] << 1) & 0xfefefefe; - data[1] = (w0[1] << 1) & 0xfefefefe; - - u32x Kc[16]; - u32x Kd[16]; - - _des_crypt_keysetup (data[0], data[1], Kc, Kd, s_skb); - - u32x iv[2]; - - _des_crypt_encrypt (iv, mask, Kc, Kd, s_SPtrans); - - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01500_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01500_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01500_a1.cu b/nv/m01500_a1.cu deleted file mode 100644 index 5306099..0000000 --- a/nv/m01500_a1.cu +++ /dev/null @@ -1,886 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _DES_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -#define PERM_OP(a,b,tt,n,m) \ -{ \ - tt = a >> n; \ - tt = tt ^ b; \ - tt = tt & m; \ - b = b ^ tt; \ - tt = tt << n; \ - a = a ^ tt; \ -} - -#define HPERM_OP(a,tt,n,m) \ -{ \ - tt = a << (16 + n); \ - tt = tt ^ a; \ - tt = tt & m; \ - a = a ^ tt; \ - tt = tt >> (16 + n); \ - a = a ^ tt; \ -} - -__device__ __constant__ u32 c_SPtrans[8][64] = -{ - /* nibble 0 */ - 0x00820200, 0x00020000, 0x80800000, 0x80820200, - 0x00800000, 0x80020200, 0x80020000, 0x80800000, - 0x80020200, 0x00820200, 0x00820000, 0x80000200, - 0x80800200, 0x00800000, 0x00000000, 0x80020000, - 0x00020000, 0x80000000, 0x00800200, 0x00020200, - 0x80820200, 0x00820000, 0x80000200, 0x00800200, - 0x80000000, 0x00000200, 0x00020200, 0x80820000, - 0x00000200, 0x80800200, 0x80820000, 0x00000000, - 0x00000000, 0x80820200, 0x00800200, 0x80020000, - 0x00820200, 0x00020000, 0x80000200, 0x00800200, - 0x80820000, 0x00000200, 0x00020200, 0x80800000, - 0x80020200, 0x80000000, 0x80800000, 0x00820000, - 0x80820200, 0x00020200, 0x00820000, 0x80800200, - 0x00800000, 0x80000200, 0x80020000, 0x00000000, - 0x00020000, 0x00800000, 0x80800200, 0x00820200, - 0x80000000, 0x80820000, 0x00000200, 0x80020200, - /* nibble 1 */ - 0x10042004, 0x00000000, 0x00042000, 0x10040000, - 0x10000004, 0x00002004, 0x10002000, 0x00042000, - 0x00002000, 0x10040004, 0x00000004, 0x10002000, - 0x00040004, 0x10042000, 0x10040000, 0x00000004, - 0x00040000, 0x10002004, 0x10040004, 0x00002000, - 0x00042004, 0x10000000, 0x00000000, 0x00040004, - 0x10002004, 0x00042004, 0x10042000, 0x10000004, - 0x10000000, 0x00040000, 0x00002004, 0x10042004, - 0x00040004, 0x10042000, 0x10002000, 0x00042004, - 0x10042004, 0x00040004, 0x10000004, 0x00000000, - 0x10000000, 0x00002004, 0x00040000, 0x10040004, - 0x00002000, 0x10000000, 0x00042004, 0x10002004, - 0x10042000, 0x00002000, 0x00000000, 0x10000004, - 0x00000004, 0x10042004, 0x00042000, 0x10040000, - 0x10040004, 0x00040000, 0x00002004, 0x10002000, - 0x10002004, 0x00000004, 0x10040000, 0x00042000, - /* nibble 2 */ - 0x41000000, 0x01010040, 0x00000040, 0x41000040, - 0x40010000, 0x01000000, 0x41000040, 0x00010040, - 0x01000040, 0x00010000, 0x01010000, 0x40000000, - 0x41010040, 0x40000040, 0x40000000, 0x41010000, - 0x00000000, 0x40010000, 0x01010040, 0x00000040, - 0x40000040, 0x41010040, 0x00010000, 0x41000000, - 0x41010000, 0x01000040, 0x40010040, 0x01010000, - 0x00010040, 0x00000000, 0x01000000, 0x40010040, - 0x01010040, 0x00000040, 0x40000000, 0x00010000, - 0x40000040, 0x40010000, 0x01010000, 0x41000040, - 0x00000000, 0x01010040, 0x00010040, 0x41010000, - 0x40010000, 0x01000000, 0x41010040, 0x40000000, - 0x40010040, 0x41000000, 0x01000000, 0x41010040, - 0x00010000, 0x01000040, 0x41000040, 0x00010040, - 0x01000040, 0x00000000, 0x41010000, 0x40000040, - 0x41000000, 0x40010040, 0x00000040, 0x01010000, - /* nibble 3 */ - 0x00100402, 0x04000400, 0x00000002, 0x04100402, - 0x00000000, 0x04100000, 0x04000402, 0x00100002, - 0x04100400, 0x04000002, 0x04000000, 0x00000402, - 0x04000002, 0x00100402, 0x00100000, 0x04000000, - 0x04100002, 0x00100400, 0x00000400, 0x00000002, - 0x00100400, 0x04000402, 0x04100000, 0x00000400, - 0x00000402, 0x00000000, 0x00100002, 0x04100400, - 0x04000400, 0x04100002, 0x04100402, 0x00100000, - 0x04100002, 0x00000402, 0x00100000, 0x04000002, - 0x00100400, 0x04000400, 0x00000002, 0x04100000, - 0x04000402, 0x00000000, 0x00000400, 0x00100002, - 0x00000000, 0x04100002, 0x04100400, 0x00000400, - 0x04000000, 0x04100402, 0x00100402, 0x00100000, - 0x04100402, 0x00000002, 0x04000400, 0x00100402, - 0x00100002, 0x00100400, 0x04100000, 0x04000402, - 0x00000402, 0x04000000, 0x04000002, 0x04100400, - /* nibble 4 */ - 0x02000000, 0x00004000, 0x00000100, 0x02004108, - 0x02004008, 0x02000100, 0x00004108, 0x02004000, - 0x00004000, 0x00000008, 0x02000008, 0x00004100, - 0x02000108, 0x02004008, 0x02004100, 0x00000000, - 0x00004100, 0x02000000, 0x00004008, 0x00000108, - 0x02000100, 0x00004108, 0x00000000, 0x02000008, - 0x00000008, 0x02000108, 0x02004108, 0x00004008, - 0x02004000, 0x00000100, 0x00000108, 0x02004100, - 0x02004100, 0x02000108, 0x00004008, 0x02004000, - 0x00004000, 0x00000008, 0x02000008, 0x02000100, - 0x02000000, 0x00004100, 0x02004108, 0x00000000, - 0x00004108, 0x02000000, 0x00000100, 0x00004008, - 0x02000108, 0x00000100, 0x00000000, 0x02004108, - 0x02004008, 0x02004100, 0x00000108, 0x00004000, - 0x00004100, 0x02004008, 0x02000100, 0x00000108, - 0x00000008, 0x00004108, 0x02004000, 0x02000008, - /* nibble 5 */ - 0x20000010, 0x00080010, 0x00000000, 0x20080800, - 0x00080010, 0x00000800, 0x20000810, 0x00080000, - 0x00000810, 0x20080810, 0x00080800, 0x20000000, - 0x20000800, 0x20000010, 0x20080000, 0x00080810, - 0x00080000, 0x20000810, 0x20080010, 0x00000000, - 0x00000800, 0x00000010, 0x20080800, 0x20080010, - 0x20080810, 0x20080000, 0x20000000, 0x00000810, - 0x00000010, 0x00080800, 0x00080810, 0x20000800, - 0x00000810, 0x20000000, 0x20000800, 0x00080810, - 0x20080800, 0x00080010, 0x00000000, 0x20000800, - 0x20000000, 0x00000800, 0x20080010, 0x00080000, - 0x00080010, 0x20080810, 0x00080800, 0x00000010, - 0x20080810, 0x00080800, 0x00080000, 0x20000810, - 0x20000010, 0x20080000, 0x00080810, 0x00000000, - 0x00000800, 0x20000010, 0x20000810, 0x20080800, - 0x20080000, 0x00000810, 0x00000010, 0x20080010, - /* nibble 6 */ - 0x00001000, 0x00000080, 0x00400080, 0x00400001, - 0x00401081, 0x00001001, 0x00001080, 0x00000000, - 0x00400000, 0x00400081, 0x00000081, 0x00401000, - 0x00000001, 0x00401080, 0x00401000, 0x00000081, - 0x00400081, 0x00001000, 0x00001001, 0x00401081, - 0x00000000, 0x00400080, 0x00400001, 0x00001080, - 0x00401001, 0x00001081, 0x00401080, 0x00000001, - 0x00001081, 0x00401001, 0x00000080, 0x00400000, - 0x00001081, 0x00401000, 0x00401001, 0x00000081, - 0x00001000, 0x00000080, 0x00400000, 0x00401001, - 0x00400081, 0x00001081, 0x00001080, 0x00000000, - 0x00000080, 0x00400001, 0x00000001, 0x00400080, - 0x00000000, 0x00400081, 0x00400080, 0x00001080, - 0x00000081, 0x00001000, 0x00401081, 0x00400000, - 0x00401080, 0x00000001, 0x00001001, 0x00401081, - 0x00400001, 0x00401080, 0x00401000, 0x00001001, - /* nibble 7 */ - 0x08200020, 0x08208000, 0x00008020, 0x00000000, - 0x08008000, 0x00200020, 0x08200000, 0x08208020, - 0x00000020, 0x08000000, 0x00208000, 0x00008020, - 0x00208020, 0x08008020, 0x08000020, 0x08200000, - 0x00008000, 0x00208020, 0x00200020, 0x08008000, - 0x08208020, 0x08000020, 0x00000000, 0x00208000, - 0x08000000, 0x00200000, 0x08008020, 0x08200020, - 0x00200000, 0x00008000, 0x08208000, 0x00000020, - 0x00200000, 0x00008000, 0x08000020, 0x08208020, - 0x00008020, 0x08000000, 0x00000000, 0x00208000, - 0x08200020, 0x08008020, 0x08008000, 0x00200020, - 0x08208000, 0x00000020, 0x00200020, 0x08008000, - 0x08208020, 0x00200000, 0x08200000, 0x08000020, - 0x00208000, 0x00008020, 0x08008020, 0x08200000, - 0x00000020, 0x08208000, 0x00208020, 0x00000000, - 0x08000000, 0x08200020, 0x00008000, 0x00208020 -}; - -__device__ __constant__ u32 c_skb[8][64] = -{ - /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ - 0x00000000, 0x00000010, 0x20000000, 0x20000010, - 0x00010000, 0x00010010, 0x20010000, 0x20010010, - 0x00000800, 0x00000810, 0x20000800, 0x20000810, - 0x00010800, 0x00010810, 0x20010800, 0x20010810, - 0x00000020, 0x00000030, 0x20000020, 0x20000030, - 0x00010020, 0x00010030, 0x20010020, 0x20010030, - 0x00000820, 0x00000830, 0x20000820, 0x20000830, - 0x00010820, 0x00010830, 0x20010820, 0x20010830, - 0x00080000, 0x00080010, 0x20080000, 0x20080010, - 0x00090000, 0x00090010, 0x20090000, 0x20090010, - 0x00080800, 0x00080810, 0x20080800, 0x20080810, - 0x00090800, 0x00090810, 0x20090800, 0x20090810, - 0x00080020, 0x00080030, 0x20080020, 0x20080030, - 0x00090020, 0x00090030, 0x20090020, 0x20090030, - 0x00080820, 0x00080830, 0x20080820, 0x20080830, - 0x00090820, 0x00090830, 0x20090820, 0x20090830, - /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */ - 0x00000000, 0x02000000, 0x00002000, 0x02002000, - 0x00200000, 0x02200000, 0x00202000, 0x02202000, - 0x00000004, 0x02000004, 0x00002004, 0x02002004, - 0x00200004, 0x02200004, 0x00202004, 0x02202004, - 0x00000400, 0x02000400, 0x00002400, 0x02002400, - 0x00200400, 0x02200400, 0x00202400, 0x02202400, - 0x00000404, 0x02000404, 0x00002404, 0x02002404, - 0x00200404, 0x02200404, 0x00202404, 0x02202404, - 0x10000000, 0x12000000, 0x10002000, 0x12002000, - 0x10200000, 0x12200000, 0x10202000, 0x12202000, - 0x10000004, 0x12000004, 0x10002004, 0x12002004, - 0x10200004, 0x12200004, 0x10202004, 0x12202004, - 0x10000400, 0x12000400, 0x10002400, 0x12002400, - 0x10200400, 0x12200400, 0x10202400, 0x12202400, - 0x10000404, 0x12000404, 0x10002404, 0x12002404, - 0x10200404, 0x12200404, 0x10202404, 0x12202404, - /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */ - 0x00000000, 0x00000001, 0x00040000, 0x00040001, - 0x01000000, 0x01000001, 0x01040000, 0x01040001, - 0x00000002, 0x00000003, 0x00040002, 0x00040003, - 0x01000002, 0x01000003, 0x01040002, 0x01040003, - 0x00000200, 0x00000201, 0x00040200, 0x00040201, - 0x01000200, 0x01000201, 0x01040200, 0x01040201, - 0x00000202, 0x00000203, 0x00040202, 0x00040203, - 0x01000202, 0x01000203, 0x01040202, 0x01040203, - 0x08000000, 0x08000001, 0x08040000, 0x08040001, - 0x09000000, 0x09000001, 0x09040000, 0x09040001, - 0x08000002, 0x08000003, 0x08040002, 0x08040003, - 0x09000002, 0x09000003, 0x09040002, 0x09040003, - 0x08000200, 0x08000201, 0x08040200, 0x08040201, - 0x09000200, 0x09000201, 0x09040200, 0x09040201, - 0x08000202, 0x08000203, 0x08040202, 0x08040203, - 0x09000202, 0x09000203, 0x09040202, 0x09040203, - /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */ - 0x00000000, 0x00100000, 0x00000100, 0x00100100, - 0x00000008, 0x00100008, 0x00000108, 0x00100108, - 0x00001000, 0x00101000, 0x00001100, 0x00101100, - 0x00001008, 0x00101008, 0x00001108, 0x00101108, - 0x04000000, 0x04100000, 0x04000100, 0x04100100, - 0x04000008, 0x04100008, 0x04000108, 0x04100108, - 0x04001000, 0x04101000, 0x04001100, 0x04101100, - 0x04001008, 0x04101008, 0x04001108, 0x04101108, - 0x00020000, 0x00120000, 0x00020100, 0x00120100, - 0x00020008, 0x00120008, 0x00020108, 0x00120108, - 0x00021000, 0x00121000, 0x00021100, 0x00121100, - 0x00021008, 0x00121008, 0x00021108, 0x00121108, - 0x04020000, 0x04120000, 0x04020100, 0x04120100, - 0x04020008, 0x04120008, 0x04020108, 0x04120108, - 0x04021000, 0x04121000, 0x04021100, 0x04121100, - 0x04021008, 0x04121008, 0x04021108, 0x04121108, - /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ - 0x00000000, 0x10000000, 0x00010000, 0x10010000, - 0x00000004, 0x10000004, 0x00010004, 0x10010004, - 0x20000000, 0x30000000, 0x20010000, 0x30010000, - 0x20000004, 0x30000004, 0x20010004, 0x30010004, - 0x00100000, 0x10100000, 0x00110000, 0x10110000, - 0x00100004, 0x10100004, 0x00110004, 0x10110004, - 0x20100000, 0x30100000, 0x20110000, 0x30110000, - 0x20100004, 0x30100004, 0x20110004, 0x30110004, - 0x00001000, 0x10001000, 0x00011000, 0x10011000, - 0x00001004, 0x10001004, 0x00011004, 0x10011004, - 0x20001000, 0x30001000, 0x20011000, 0x30011000, - 0x20001004, 0x30001004, 0x20011004, 0x30011004, - 0x00101000, 0x10101000, 0x00111000, 0x10111000, - 0x00101004, 0x10101004, 0x00111004, 0x10111004, - 0x20101000, 0x30101000, 0x20111000, 0x30111000, - 0x20101004, 0x30101004, 0x20111004, 0x30111004, - /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */ - 0x00000000, 0x08000000, 0x00000008, 0x08000008, - 0x00000400, 0x08000400, 0x00000408, 0x08000408, - 0x00020000, 0x08020000, 0x00020008, 0x08020008, - 0x00020400, 0x08020400, 0x00020408, 0x08020408, - 0x00000001, 0x08000001, 0x00000009, 0x08000009, - 0x00000401, 0x08000401, 0x00000409, 0x08000409, - 0x00020001, 0x08020001, 0x00020009, 0x08020009, - 0x00020401, 0x08020401, 0x00020409, 0x08020409, - 0x02000000, 0x0A000000, 0x02000008, 0x0A000008, - 0x02000400, 0x0A000400, 0x02000408, 0x0A000408, - 0x02020000, 0x0A020000, 0x02020008, 0x0A020008, - 0x02020400, 0x0A020400, 0x02020408, 0x0A020408, - 0x02000001, 0x0A000001, 0x02000009, 0x0A000009, - 0x02000401, 0x0A000401, 0x02000409, 0x0A000409, - 0x02020001, 0x0A020001, 0x02020009, 0x0A020009, - 0x02020401, 0x0A020401, 0x02020409, 0x0A020409, - /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */ - 0x00000000, 0x00000100, 0x00080000, 0x00080100, - 0x01000000, 0x01000100, 0x01080000, 0x01080100, - 0x00000010, 0x00000110, 0x00080010, 0x00080110, - 0x01000010, 0x01000110, 0x01080010, 0x01080110, - 0x00200000, 0x00200100, 0x00280000, 0x00280100, - 0x01200000, 0x01200100, 0x01280000, 0x01280100, - 0x00200010, 0x00200110, 0x00280010, 0x00280110, - 0x01200010, 0x01200110, 0x01280010, 0x01280110, - 0x00000200, 0x00000300, 0x00080200, 0x00080300, - 0x01000200, 0x01000300, 0x01080200, 0x01080300, - 0x00000210, 0x00000310, 0x00080210, 0x00080310, - 0x01000210, 0x01000310, 0x01080210, 0x01080310, - 0x00200200, 0x00200300, 0x00280200, 0x00280300, - 0x01200200, 0x01200300, 0x01280200, 0x01280300, - 0x00200210, 0x00200310, 0x00280210, 0x00280310, - 0x01200210, 0x01200310, 0x01280210, 0x01280310, - /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */ - 0x00000000, 0x04000000, 0x00040000, 0x04040000, - 0x00000002, 0x04000002, 0x00040002, 0x04040002, - 0x00002000, 0x04002000, 0x00042000, 0x04042000, - 0x00002002, 0x04002002, 0x00042002, 0x04042002, - 0x00000020, 0x04000020, 0x00040020, 0x04040020, - 0x00000022, 0x04000022, 0x00040022, 0x04040022, - 0x00002020, 0x04002020, 0x00042020, 0x04042020, - 0x00002022, 0x04002022, 0x00042022, 0x04042022, - 0x00000800, 0x04000800, 0x00040800, 0x04040800, - 0x00000802, 0x04000802, 0x00040802, 0x04040802, - 0x00002800, 0x04002800, 0x00042800, 0x04042800, - 0x00002802, 0x04002802, 0x00042802, 0x04042802, - 0x00000820, 0x04000820, 0x00040820, 0x04040820, - 0x00000822, 0x04000822, 0x00040822, 0x04040822, - 0x00002820, 0x04002820, 0x00042820, 0x04042820, - 0x00002822, 0x04002822, 0x00042822, 0x04042822 -}; - -#ifdef VECT_SIZE1 -#define BOX(i,n,S) u32x ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(i,n,S) u32x ((S)[(n)][(i).x], (S)[(n)][(i).y]) -#endif - -__device__ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], u32 s_skb[8][64]) -{ - u32x tt; - - PERM_OP (d, c, tt, 4, 0x0f0f0f0f); - HPERM_OP (c, tt, 2, 0xcccc0000); - HPERM_OP (d, tt, 2, 0xcccc0000); - PERM_OP (d, c, tt, 1, 0x55555555); - PERM_OP (c, d, tt, 8, 0x00ff00ff); - PERM_OP (d, c, tt, 1, 0x55555555); - - d = ((d & 0x000000ff) << 16) - | ((d & 0x0000ff00) << 0) - | ((d & 0x00ff0000) >> 16) - | ((c & 0xf0000000) >> 4); - - c = c & 0x0fffffff; - - #pragma unroll - for (u32 i = 0; i < 16; i++) - { - if ((i < 2) || (i == 8) || (i == 15)) - { - c = ((c >> 1) | (c << 27)); - d = ((d >> 1) | (d << 27)); - } - else - { - c = ((c >> 2) | (c << 26)); - d = ((d >> 2) | (d << 26)); - } - - c = c & 0x0fffffff; - d = d & 0x0fffffff; - - const u32x c00 = (c >> 0) & 0x0000003f; - const u32x c06 = (c >> 6) & 0x00383003; - const u32x c07 = (c >> 7) & 0x0000003c; - const u32x c13 = (c >> 13) & 0x0000060f; - const u32x c20 = (c >> 20) & 0x00000001; - - u32x s = BOX (((c00 >> 0) & 0xff), 0, s_skb) - | BOX (((c06 >> 0) & 0xff) - |((c07 >> 0) & 0xff), 1, s_skb) - | BOX (((c13 >> 0) & 0xff) - |((c06 >> 8) & 0xff), 2, s_skb) - | BOX (((c20 >> 0) & 0xff) - |((c13 >> 8) & 0xff) - |((c06 >> 16) & 0xff), 3, s_skb); - - const u32x d00 = (d >> 0) & 0x00003c3f; - const u32x d07 = (d >> 7) & 0x00003f03; - const u32x d21 = (d >> 21) & 0x0000000f; - const u32x d22 = (d >> 22) & 0x00000030; - - u32x t = BOX (((d00 >> 0) & 0xff), 4, s_skb) - | BOX (((d07 >> 0) & 0xff) - |((d00 >> 8) & 0xff), 5, s_skb) - | BOX (((d07 >> 8) & 0xff), 6, s_skb) - | BOX (((d21 >> 0) & 0xff) - |((d22 >> 0) & 0xff), 7, s_skb); - - #if __CUDA_ARCH__ >= 200 - Kc[i] = __byte_perm (s, t, 0x5410); - Kd[i] = __byte_perm (s, t, 0x7632); - #else - Kc[i] = ((t << 16) | (s & 0x0000ffff)); - Kd[i] = ((s >> 16) | (t & 0xffff0000)); - #endif - } -} - -__device__ static void _des_crypt_encrypt (u32x iv[2], u32 mask, u32x Kc[16], u32x Kd[16], u32 s_SPtrans[8][64]) -{ - const u32 E1 = (mask >> 2) & 0x3f0; - - const u32 E0 = mask & 0x3f; - - u32x r = 0; - u32x l = 0; - - for (u32 i = 0; i < 25; i++) - { - #pragma unroll - for (u32 j = 0; j < 16; j += 2) - { - u32x t; - u32x u; - - t = r ^ (r >> 16); - u = t & E0; - t = t & E1; - u = u ^ (u << 16); - u = u ^ r; - u = u ^ Kc[j + 0]; - t = t ^ (t << 16); - t = t ^ r; - t = rotl32 (t, 28u); - t = t ^ Kd[j + 0]; - - l ^= BOX (((u >> 0) & 0x3f), 0, s_SPtrans) - | BOX (((u >> 8) & 0x3f), 2, s_SPtrans) - | BOX (((u >> 16) & 0x3f), 4, s_SPtrans) - | BOX (((u >> 24) & 0x3f), 6, s_SPtrans) - | BOX (((t >> 0) & 0x3f), 1, s_SPtrans) - | BOX (((t >> 8) & 0x3f), 3, s_SPtrans) - | BOX (((t >> 16) & 0x3f), 5, s_SPtrans) - | BOX (((t >> 24) & 0x3f), 7, s_SPtrans); - - t = l ^ (l >> 16); - u = t & E0; - t = t & E1; - u = u ^ (u << 16); - u = u ^ l; - u = u ^ Kc[j + 1]; - t = t ^ (t << 16); - t = t ^ l; - t = rotl32 (t, 28u); - t = t ^ Kd[j + 1]; - - r ^= BOX (((u >> 0) & 0x3f), 0, s_SPtrans) - | BOX (((u >> 8) & 0x3f), 2, s_SPtrans) - | BOX (((u >> 16) & 0x3f), 4, s_SPtrans) - | BOX (((u >> 24) & 0x3f), 6, s_SPtrans) - | BOX (((t >> 0) & 0x3f), 1, s_SPtrans) - | BOX (((t >> 8) & 0x3f), 3, s_SPtrans) - | BOX (((t >> 16) & 0x3f), 5, s_SPtrans) - | BOX (((t >> 24) & 0x3f), 7, s_SPtrans); - } - - u32x tt; - - tt = l; - l = r; - r = tt; - } - - iv[0] = rotl32 (r, 31); - iv[1] = rotl32 (l, 31); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01500_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = 0; - wordl0[3] = 0; - - u32x wordl1[4]; - - wordl1[0] = 0; - wordl1[1] = 0; - wordl1[2] = 0; - wordl1[3] = 0; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * sbox, kbox - */ - - __shared__ u32 s_skb[8][64]; - __shared__ u32 s_SPtrans[8][64]; - - if (lid < 64) - { - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * salt - */ - - const u32 mask = salt_bufs[salt_pos].salt_buf[0]; - - /** - * main - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - u32 pw_len = pw_l_len + pw_r_len; - - pw_len = (pw_len >= 8) ? 8 : pw_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = 0; - wordr0[3] = 0; - - u32 wordr1[4]; - - wordr1[0] = 0; - wordr1[1] = 0; - wordr1[2] = 0; - wordr1[3] = 0; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = 0; - w0[3] = 0; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - u32x data[2]; - - data[0] = (w0[0] << 1) & 0xfefefefe; - data[1] = (w0[1] << 1) & 0xfefefefe; - - u32x Kc[16]; - u32x Kd[16]; - - _des_crypt_keysetup (data[0], data[1], Kc, Kd, s_skb); - - u32x iv[2]; - - _des_crypt_encrypt (iv, mask, Kc, Kd, s_SPtrans); - - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01500_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01500_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01500_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = 0; - wordl0[3] = 0; - - u32x wordl1[4]; - - wordl1[0] = 0; - wordl1[1] = 0; - wordl1[2] = 0; - wordl1[3] = 0; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * sbox, kbox - */ - - __shared__ u32 s_skb[8][64]; - __shared__ u32 s_SPtrans[8][64]; - - if (lid < 64) - { - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * salt - */ - - const u32 mask = salt_bufs[salt_pos].salt_buf[0]; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * main - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - u32 pw_len = pw_l_len + pw_r_len; - - pw_len = (pw_len >= 8) ? 8 : pw_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = 0; - wordr0[3] = 0; - - u32 wordr1[4]; - - wordr1[0] = 0; - wordr1[1] = 0; - wordr1[2] = 0; - wordr1[3] = 0; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = 0; - w0[3] = 0; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - u32x data[2]; - - data[0] = (w0[0] << 1) & 0xfefefefe; - data[1] = (w0[1] << 1) & 0xfefefefe; - - u32x Kc[16]; - u32x Kd[16]; - - _des_crypt_keysetup (data[0], data[1], Kc, Kd, s_skb); - - u32x iv[2]; - - _des_crypt_encrypt (iv, mask, Kc, Kd, s_SPtrans); - - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01500_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01500_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01500_a3.cu b/nv/m01500_a3.cu deleted file mode 100644 index 86fe186..0000000 --- a/nv/m01500_a3.cu +++ /dev/null @@ -1,2051 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - * NOTE........: sboxes for maxwell were taken from DeepLearningJohnDoe, license below - * : sboxes for others were takes fron JtR, license below - */ - -#define _DES_ -#define _SCALAR_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp_bs.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp_bs.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp_bs.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp_bs.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp_bs.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp_bs.c" -#endif - -#define KXX_DECL -#define sXXX_DECL - -#define myselx(a,b,c) ((c) ? (b) : (a)) - -__device__ __constant__ u32x c_bfs[1024]; -__device__ __constant__ bs_word_t c_tm[32]; -__device__ __shared__ u32 s_S[64]; - -#if __CUDA_ARCH__ >= 500 - -// -// Bitslice DES S-boxes with LOP3.LUT instructions -// For NVIDIA Maxwell architecture and CUDA 7.5 RC -// by DeepLearningJohnDoe, version 0.1.6, 2015/07/19 -// -// Gate counts: 25 24 25 18 25 24 24 23 -// Average: 23.5 -// Depth: 8 7 7 6 8 10 10 8 -// Average: 8 -// -// Note that same S-box function with a lower gate count isn't necessarily faster. -// -// These Boolean expressions corresponding to DES S-boxes were -// discovered by -// -// This file itself is Copyright (c) 2015 by -// Redistribution and use in source and binary forms, with or without -// modification, are permitted. -// -// The underlying mathematical formulas are NOT copyrighted. -// - -#define LUT(a,b,c,d,e) u32 a; asm ("lop3.b32 %0, %1, %2, %3, "#e";" : "=r"(a): "r"(b), "r"(c), "r"(d)); - -__device__ static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) -{ - LUT(xAA55AA5500550055, a1, a4, a6, 0xC1) - LUT(xA55AA55AF0F5F0F5, a3, a6, xAA55AA5500550055, 0x9E) - LUT(x5F5F5F5FA5A5A5A5, a1, a3, a6, 0xD6) - LUT(xF5A0F5A0A55AA55A, a4, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x56) - LUT(x947A947AD1E7D1E7, a2, xA55AA55AF0F5F0F5, xF5A0F5A0A55AA55A, 0x6C) - LUT(x5FFF5FFFFFFAFFFA, a6, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x7B) - LUT(xB96CB96C69936993, a2, xF5A0F5A0A55AA55A, x5FFF5FFFFFFAFFFA, 0xD6) - LUT(x3, a5, x947A947AD1E7D1E7, xB96CB96C69936993, 0x6A) - LUT(x55EE55EE55EE55EE, a1, a2, a4, 0x7A) - LUT(x084C084CB77BB77B, a2, a6, xF5A0F5A0A55AA55A, 0xC9) - LUT(x9C329C32E295E295, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x72) - LUT(xA51EA51E50E050E0, a3, a6, x55EE55EE55EE55EE, 0x29) - LUT(x4AD34AD3BE3CBE3C, a2, x947A947AD1E7D1E7, xA51EA51E50E050E0, 0x95) - LUT(x2, a5, x9C329C32E295E295, x4AD34AD3BE3CBE3C, 0xC6) - LUT(xD955D95595D195D1, a1, a2, x9C329C32E295E295, 0xD2) - LUT(x8058805811621162, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x90) - LUT(x7D0F7D0FC4B3C4B3, xA51EA51E50E050E0, xD955D95595D195D1, x8058805811621162, 0x76) - LUT(x0805080500010001, a3, xAA55AA5500550055, xD955D95595D195D1, 0x80) - LUT(x4A964A96962D962D, xB96CB96C69936993, x4AD34AD3BE3CBE3C, x0805080500010001, 0xA6) - LUT(x4, a5, x7D0F7D0FC4B3C4B3, x4A964A96962D962D, 0xA6) - LUT(x148014807B087B08, a1, xAA55AA5500550055, x947A947AD1E7D1E7, 0x21) - LUT(x94D894D86B686B68, xA55AA55AF0F5F0F5, x8058805811621162, x148014807B087B08, 0x6A) - LUT(x5555555540044004, a1, a6, x084C084CB77BB77B, 0x70) - LUT(xAFB4AFB4BF5BBF5B, x5F5F5F5FA5A5A5A5, xA51EA51E50E050E0, x5555555540044004, 0x97) - LUT(x1, a5, x94D894D86B686B68, xAFB4AFB4BF5BBF5B, 0x6C) - - *out1 ^= x1; - *out2 ^= x2; - *out3 ^= x3; - *out4 ^= x4; -} - -__device__ static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) -{ - LUT(xEEEEEEEE99999999, a1, a2, a6, 0x97) - LUT(xFFFFEEEE66666666, a5, a6, xEEEEEEEE99999999, 0x67) - LUT(x5555FFFFFFFF0000, a1, a5, a6, 0x76) - LUT(x6666DDDD5555AAAA, a2, xFFFFEEEE66666666, x5555FFFFFFFF0000, 0x69) - LUT(x6969D3D35353ACAC, a3, xFFFFEEEE66666666, x6666DDDD5555AAAA, 0x6A) - LUT(xCFCF3030CFCF3030, a2, a3, a5, 0x65) - LUT(xE4E4EEEE9999F0F0, a3, xEEEEEEEE99999999, x5555FFFFFFFF0000, 0x8D) - LUT(xE5E5BABACDCDB0B0, a1, xCFCF3030CFCF3030, xE4E4EEEE9999F0F0, 0xCA) - LUT(x3, a4, x6969D3D35353ACAC, xE5E5BABACDCDB0B0, 0xC6) - LUT(x3333CCCC00000000, a2, a5, a6, 0x14) - LUT(xCCCCDDDDFFFF0F0F, a5, xE4E4EEEE9999F0F0, x3333CCCC00000000, 0xB5) - LUT(x00000101F0F0F0F0, a3, a6, xFFFFEEEE66666666, 0x1C) - LUT(x9A9A64646A6A9595, a1, xCFCF3030CFCF3030, x00000101F0F0F0F0, 0x96) - LUT(x2, a4, xCCCCDDDDFFFF0F0F, x9A9A64646A6A9595, 0x6A) - LUT(x3333BBBB3333FFFF, a1, a2, x6666DDDD5555AAAA, 0xDE) - LUT(x1414141441410000, a1, a3, xE4E4EEEE9999F0F0, 0x90) - LUT(x7F7FF3F3F5F53939, x6969D3D35353ACAC, x9A9A64646A6A9595, x3333BBBB3333FFFF, 0x79) - LUT(x9494E3E34B4B3939, a5, x1414141441410000, x7F7FF3F3F5F53939, 0x29) - LUT(x1, a4, x3333BBBB3333FFFF, x9494E3E34B4B3939, 0xA6) - LUT(xB1B1BBBBCCCCA5A5, a1, a1, xE4E4EEEE9999F0F0, 0x4A) - LUT(xFFFFECECEEEEDDDD, a2, x3333CCCC00000000, x9A9A64646A6A9595, 0xEF) - LUT(xB1B1A9A9DCDC8787, xE5E5BABACDCDB0B0, xB1B1BBBBCCCCA5A5, xFFFFECECEEEEDDDD, 0x8D) - LUT(xFFFFCCCCEEEE4444, a2, a5, xFFFFEEEE66666666, 0x2B) - LUT(x4, a4, xB1B1A9A9DCDC8787, xFFFFCCCCEEEE4444, 0x6C) - - *out1 ^= x1; - *out2 ^= x2; - *out3 ^= x3; - *out4 ^= x4; -} - -__device__ static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) -{ - LUT(xA50FA50FA50FA50F, a1, a3, a4, 0xC9) - LUT(xF0F00F0FF0F0F0F0, a3, a5, a6, 0x4B) - LUT(xAF0FA0AAAF0FAF0F, a1, xA50FA50FA50FA50F, xF0F00F0FF0F0F0F0, 0x4D) - LUT(x5AA5A55A5AA55AA5, a1, a4, xF0F00F0FF0F0F0F0, 0x69) - LUT(xAA005FFFAA005FFF, a3, a5, xA50FA50FA50FA50F, 0xD6) - LUT(x5AA5A55A0F5AFAA5, a6, x5AA5A55A5AA55AA5, xAA005FFFAA005FFF, 0x9C) - LUT(x1, a2, xAF0FA0AAAF0FAF0F, x5AA5A55A0F5AFAA5, 0xA6) - LUT(xAA55AA5500AA00AA, a1, a4, a6, 0x49) - LUT(xFAFAA50FFAFAA50F, a1, a5, xA50FA50FA50FA50F, 0x9B) - LUT(x50AF0F5AFA50A5A5, a1, xAA55AA5500AA00AA, xFAFAA50FFAFAA50F, 0x66) - LUT(xAFAFAFAFFAFAFAFA, a1, a3, a6, 0x6F) - LUT(xAFAFFFFFFFFAFAFF, a4, x50AF0F5AFA50A5A5, xAFAFAFAFFAFAFAFA, 0xEB) - LUT(x4, a2, x50AF0F5AFA50A5A5, xAFAFFFFFFFFAFAFF, 0x6C) - LUT(x500F500F500F500F, a1, a3, a4, 0x98) - LUT(xF0505A0505A5050F, x5AA5A55A0F5AFAA5, xAA55AA5500AA00AA, xAFAFAFAFFAFAFAFA, 0x1D) - LUT(xF0505A05AA55AAFF, a6, x500F500F500F500F, xF0505A0505A5050F, 0x9A) - LUT(xFF005F55FF005F55, a1, a4, xAA005FFFAA005FFF, 0xB2) - LUT(xA55F5AF0A55F5AF0, a5, xA50FA50FA50FA50F, x5AA5A55A5AA55AA5, 0x3D) - LUT(x5A5F05A5A55F5AF0, a6, xFF005F55FF005F55, xA55F5AF0A55F5AF0, 0xA6) - LUT(x3, a2, xF0505A05AA55AAFF, x5A5F05A5A55F5AF0, 0xA6) - LUT(x0F0F0F0FA5A5A5A5, a1, a3, a6, 0xC6) - LUT(x5FFFFF5FFFA0FFA0, x5AA5A55A5AA55AA5, xAFAFAFAFFAFAFAFA, x0F0F0F0FA5A5A5A5, 0xDB) - LUT(xF5555AF500A05FFF, a5, xFAFAA50FFAFAA50F, xF0505A0505A5050F, 0xB9) - LUT(x05A5AAF55AFA55A5, xF0505A05AA55AAFF, x0F0F0F0FA5A5A5A5, xF5555AF500A05FFF, 0x9B) - LUT(x2, a2, x5FFFFF5FFFA0FFA0, x05A5AAF55AFA55A5, 0xA6) - - *out1 ^= x1; - *out2 ^= x2; - *out3 ^= x3; - *out4 ^= x4; -} - -__device__ static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) -{ - LUT(x55F055F055F055F0, a1, a3, a4, 0x72) - LUT(xA500F5F0A500F5F0, a3, a5, x55F055F055F055F0, 0xAD) - LUT(xF50AF50AF50AF50A, a1, a3, a4, 0x59) - LUT(xF5FA0FFFF5FA0FFF, a3, a5, xF50AF50AF50AF50A, 0xE7) - LUT(x61C8F93C61C8F93C, a2, xA500F5F0A500F5F0, xF5FA0FFFF5FA0FFF, 0xC6) - LUT(x9999666699996666, a1, a2, a5, 0x69) - LUT(x22C022C022C022C0, a2, a4, x55F055F055F055F0, 0x18) - LUT(xB35C94A6B35C94A6, xF5FA0FFFF5FA0FFF, x9999666699996666, x22C022C022C022C0, 0x63) - LUT(x4, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x6A) - LUT(x4848484848484848, a1, a2, a3, 0x12) - LUT(x55500AAA55500AAA, a1, a5, xF5FA0FFFF5FA0FFF, 0x28) - LUT(x3C90B3D63C90B3D6, x61C8F93C61C8F93C, x4848484848484848, x55500AAA55500AAA, 0x1E) - LUT(x8484333384843333, a1, x9999666699996666, x4848484848484848, 0x14) - LUT(x4452F1AC4452F1AC, xF50AF50AF50AF50A, xF5FA0FFFF5FA0FFF, xB35C94A6B35C94A6, 0x78) - LUT(x9586CA379586CA37, x55500AAA55500AAA, x8484333384843333, x4452F1AC4452F1AC, 0xD6) - LUT(x2, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0x6A) - LUT(x1, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0xA9) - LUT(x3, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x56) - - *out1 ^= x1; - *out2 ^= x2; - *out3 ^= x3; - *out4 ^= x4; -} - -__device__ static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) -{ - LUT(xA0A0A0A0FFFFFFFF, a1, a3, a6, 0xAB) - LUT(xFFFF00005555FFFF, a1, a5, a6, 0xB9) - LUT(xB3B320207777FFFF, a2, xA0A0A0A0FFFFFFFF, xFFFF00005555FFFF, 0xE8) - LUT(x50505A5A5A5A5050, a1, a3, xFFFF00005555FFFF, 0x34) - LUT(xA2A2FFFF2222FFFF, a1, a5, xB3B320207777FFFF, 0xCE) - LUT(x2E2E6969A4A46363, a2, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, 0x29) - LUT(x3, a4, xB3B320207777FFFF, x2E2E6969A4A46363, 0xA6) - LUT(xA5A50A0AA5A50A0A, a1, a3, a5, 0x49) - LUT(x969639396969C6C6, a2, a6, xA5A50A0AA5A50A0A, 0x96) - LUT(x1B1B1B1B1B1B1B1B, a1, a2, a3, 0xCA) - LUT(xBFBFBFBFF6F6F9F9, a3, xA0A0A0A0FFFFFFFF, x969639396969C6C6, 0x7E) - LUT(x5B5BA4A4B8B81D1D, xFFFF00005555FFFF, x1B1B1B1B1B1B1B1B, xBFBFBFBFF6F6F9F9, 0x96) - LUT(x2, a4, x969639396969C6C6, x5B5BA4A4B8B81D1D, 0xCA) - LUT(x5555BBBBFFFF5555, a1, a2, xFFFF00005555FFFF, 0xE5) - LUT(x6D6D9C9C95956969, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, x969639396969C6C6, 0x97) - LUT(x1A1A67676A6AB4B4, xA5A50A0AA5A50A0A, x5555BBBBFFFF5555, x6D6D9C9C95956969, 0x47) - LUT(xA0A0FFFFAAAA0000, a3, xFFFF00005555FFFF, xA5A50A0AA5A50A0A, 0x3B) - LUT(x36369C9CC1C1D6D6, x969639396969C6C6, x6D6D9C9C95956969, xA0A0FFFFAAAA0000, 0xD9) - LUT(x1, a4, x1A1A67676A6AB4B4, x36369C9CC1C1D6D6, 0xCA) - LUT(x5555F0F0F5F55555, a1, a3, xFFFF00005555FFFF, 0xB1) - LUT(x79790202DCDC0808, xA2A2FFFF2222FFFF, xA5A50A0AA5A50A0A, x969639396969C6C6, 0x47) - LUT(x6C6CF2F229295D5D, xBFBFBFBFF6F6F9F9, x5555F0F0F5F55555, x79790202DCDC0808, 0x6E) - LUT(xA3A3505010101A1A, a2, xA2A2FFFF2222FFFF, x36369C9CC1C1D6D6, 0x94) - LUT(x7676C7C74F4FC7C7, a1, x2E2E6969A4A46363, xA3A3505010101A1A, 0xD9) - LUT(x4, a4, x6C6CF2F229295D5D, x7676C7C74F4FC7C7, 0xC6) - - *out1 ^= x1; - *out2 ^= x2; - *out3 ^= x3; - *out4 ^= x4; -} - -__device__ static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) -{ - LUT(x5050F5F55050F5F5, a1, a3, a5, 0xB2) - LUT(x6363C6C66363C6C6, a1, a2, x5050F5F55050F5F5, 0x66) - LUT(xAAAA5555AAAA5555, a1, a1, a5, 0xA9) - LUT(x3A3A65653A3A6565, a3, x6363C6C66363C6C6, xAAAA5555AAAA5555, 0xA9) - LUT(x5963A3C65963A3C6, a4, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xC6) - LUT(xE7E76565E7E76565, a5, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xAD) - LUT(x455D45DF455D45DF, a1, a4, xE7E76565E7E76565, 0xE4) - LUT(x4, a6, x5963A3C65963A3C6, x455D45DF455D45DF, 0x6C) - LUT(x1101220211012202, a2, xAAAA5555AAAA5555, x5963A3C65963A3C6, 0x20) - LUT(xF00F0FF0F00F0FF0, a3, a4, a5, 0x69) - LUT(x16E94A9716E94A97, xE7E76565E7E76565, x1101220211012202, xF00F0FF0F00F0FF0, 0x9E) - LUT(x2992922929929229, a1, a2, xF00F0FF0F00F0FF0, 0x49) - LUT(xAFAF9823AFAF9823, a5, x5050F5F55050F5F5, x2992922929929229, 0x93) - LUT(x3, a6, x16E94A9716E94A97, xAFAF9823AFAF9823, 0x6C) - LUT(x4801810248018102, a4, x5963A3C65963A3C6, x1101220211012202, 0xA4) - LUT(x5EE8FFFD5EE8FFFD, a5, x16E94A9716E94A97, x4801810248018102, 0x76) - LUT(xF0FF00FFF0FF00FF, a3, a4, a5, 0xCD) - LUT(x942D9A67942D9A67, x3A3A65653A3A6565, x5EE8FFFD5EE8FFFD, xF0FF00FFF0FF00FF, 0x86) - LUT(x1, a6, x5EE8FFFD5EE8FFFD, x942D9A67942D9A67, 0xA6) - LUT(x6A40D4ED6F4DD4EE, a2, x4, xAFAF9823AFAF9823, 0x2D) - LUT(x6CA89C7869A49C79, x1101220211012202, x16E94A9716E94A97, x6A40D4ED6F4DD4EE, 0x26) - LUT(xD6DE73F9D6DE73F9, a3, x6363C6C66363C6C6, x455D45DF455D45DF, 0x6B) - LUT(x925E63E1965A63E1, x3A3A65653A3A6565, x6CA89C7869A49C79, xD6DE73F9D6DE73F9, 0xA2) - LUT(x2, a6, x6CA89C7869A49C79, x925E63E1965A63E1, 0xCA) - - *out1 ^= x1; - *out2 ^= x2; - *out3 ^= x3; - *out4 ^= x4; -} - -__device__ static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) -{ - LUT(x88AA88AA88AA88AA, a1, a2, a4, 0x0B) - LUT(xAAAAFF00AAAAFF00, a1, a4, a5, 0x27) - LUT(xADAFF8A5ADAFF8A5, a3, x88AA88AA88AA88AA, xAAAAFF00AAAAFF00, 0x9E) - LUT(x0A0AF5F50A0AF5F5, a1, a3, a5, 0xA6) - LUT(x6B69C5DC6B69C5DC, a2, xADAFF8A5ADAFF8A5, x0A0AF5F50A0AF5F5, 0x6B) - LUT(x1C69B2DC1C69B2DC, a4, x88AA88AA88AA88AA, x6B69C5DC6B69C5DC, 0xA9) - LUT(x1, a6, xADAFF8A5ADAFF8A5, x1C69B2DC1C69B2DC, 0x6A) - LUT(x9C9C9C9C9C9C9C9C, a1, a2, a3, 0x63) - LUT(xE6E63BFDE6E63BFD, a2, xAAAAFF00AAAAFF00, x0A0AF5F50A0AF5F5, 0xE7) - LUT(x6385639E6385639E, a4, x9C9C9C9C9C9C9C9C, xE6E63BFDE6E63BFD, 0x93) - LUT(x5959C4CE5959C4CE, a2, x6B69C5DC6B69C5DC, xE6E63BFDE6E63BFD, 0x5D) - LUT(x5B53F53B5B53F53B, a4, x0A0AF5F50A0AF5F5, x5959C4CE5959C4CE, 0x6E) - LUT(x3, a6, x6385639E6385639E, x5B53F53B5B53F53B, 0xC6) - LUT(xFAF505FAFAF505FA, a3, a4, x0A0AF5F50A0AF5F5, 0x6D) - LUT(x6A65956A6A65956A, a3, x9C9C9C9C9C9C9C9C, xFAF505FAFAF505FA, 0xA6) - LUT(x8888CCCC8888CCCC, a1, a2, a5, 0x23) - LUT(x94E97A9494E97A94, x1C69B2DC1C69B2DC, x6A65956A6A65956A, x8888CCCC8888CCCC, 0x72) - LUT(x4, a6, x6A65956A6A65956A, x94E97A9494E97A94, 0xAC) - LUT(xA050A050A050A050, a1, a3, a4, 0x21) - LUT(xC1B87A2BC1B87A2B, xAAAAFF00AAAAFF00, x5B53F53B5B53F53B, x94E97A9494E97A94, 0xA4) - LUT(xE96016B7E96016B7, x8888CCCC8888CCCC, xA050A050A050A050, xC1B87A2BC1B87A2B, 0x96) - LUT(xE3CF1FD5E3CF1FD5, x88AA88AA88AA88AA, x6A65956A6A65956A, xE96016B7E96016B7, 0x3E) - LUT(x6776675B6776675B, xADAFF8A5ADAFF8A5, x94E97A9494E97A94, xE3CF1FD5E3CF1FD5, 0x6B) - LUT(x2, a6, xE96016B7E96016B7, x6776675B6776675B, 0xC6) - - *out1 ^= x1; - *out2 ^= x2; - *out3 ^= x3; - *out4 ^= x4; -} - -__device__ static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) -{ - LUT(xEEEE3333EEEE3333, a1, a2, a5, 0x9D) - LUT(xBBBBBBBBBBBBBBBB, a1, a1, a2, 0x83) - LUT(xDDDDAAAADDDDAAAA, a1, a2, a5, 0x5B) - LUT(x29295A5A29295A5A, a3, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0x85) - LUT(xC729695AC729695A, a4, xEEEE3333EEEE3333, x29295A5A29295A5A, 0xA6) - LUT(x3BF77B7B3BF77B7B, a2, a5, xC729695AC729695A, 0xF9) - LUT(x2900FF002900FF00, a4, a5, x29295A5A29295A5A, 0x0E) - LUT(x56B3803F56B3803F, xBBBBBBBBBBBBBBBB, x3BF77B7B3BF77B7B, x2900FF002900FF00, 0x61) - LUT(x4, a6, xC729695AC729695A, x56B3803F56B3803F, 0x6C) - LUT(xFBFBFBFBFBFBFBFB, a1, a2, a3, 0xDF) - LUT(x3012B7B73012B7B7, a2, a5, xC729695AC729695A, 0xD4) - LUT(x34E9B34C34E9B34C, a4, xFBFBFBFBFBFBFBFB, x3012B7B73012B7B7, 0x69) - LUT(xBFEAEBBEBFEAEBBE, a1, x29295A5A29295A5A, x34E9B34C34E9B34C, 0x6F) - LUT(xFFAEAFFEFFAEAFFE, a3, xBBBBBBBBBBBBBBBB, xBFEAEBBEBFEAEBBE, 0xB9) - LUT(x2, a6, x34E9B34C34E9B34C, xFFAEAFFEFFAEAFFE, 0xC6) - LUT(xCFDE88BBCFDE88BB, a2, xDDDDAAAADDDDAAAA, x34E9B34C34E9B34C, 0x5C) - LUT(x3055574530555745, a1, xC729695AC729695A, xCFDE88BBCFDE88BB, 0x71) - LUT(x99DDEEEE99DDEEEE, a4, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0xB9) - LUT(x693CD926693CD926, x3BF77B7B3BF77B7B, x34E9B34C34E9B34C, x99DDEEEE99DDEEEE, 0x69) - LUT(x3, a6, x3055574530555745, x693CD926693CD926, 0x6A) - LUT(x9955EE559955EE55, a1, a4, x99DDEEEE99DDEEEE, 0xE2) - LUT(x9D48FA949D48FA94, x3BF77B7B3BF77B7B, xBFEAEBBEBFEAEBBE, x9955EE559955EE55, 0x9C) - LUT(x1, a6, xC729695AC729695A, x9D48FA949D48FA94, 0x39) - - *out1 ^= x1; - *out2 ^= x2; - *out3 ^= x3; - *out4 ^= x4; -} - -#else - -/* - * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC - * architectures. These use AND, OR, XOR, NOT, and AND-NOT gates. - * - * Gate counts: 49 44 46 33 48 46 46 41 - * Average: 44.125 - * - * Several same-gate-count expressions for each S-box are included (for use on - * different CPUs/GPUs). - * - * These Boolean expressions corresponding to DES S-boxes have been generated - * by Roman Rusakov for use in Openwall's - * John the Ripper password cracker: http://www.openwall.com/john/ - * Being mathematical formulas, they are not copyrighted and are free for reuse - * by anyone. - * - * This file (a specific representation of the S-box expressions, surrounding - * logic) is Copyright (c) 2011 by Solar Designer . - * Redistribution and use in source and binary forms, with or without - * modification, are permitted. (This is a heavily cut-down "BSD license".) - * - * The effort has been sponsored by Rapid7: http://www.rapid7.com - */ - -__device__ static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) -{ - u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969, - x25202160; - u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93; - u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69; - u32 x0A0A0000, x0AD80096, x00999900, x0AD99996; - u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC; - u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0; - u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A; - u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205; - u32 x00, x01, x10, x11, x20, x21, x30, x31; - - x55005500 = a1 & ~a5; - x5A0F5A0F = a4 ^ x55005500; - x3333FFFF = a3 | a6; - x66666666 = a1 ^ a3; - x22226666 = x3333FFFF & x66666666; - x2D2D6969 = a4 ^ x22226666; - x25202160 = x2D2D6969 & ~x5A0F5A0F; - - x00FFFF00 = a5 ^ a6; - x33CCCC33 = a3 ^ x00FFFF00; - x4803120C = x5A0F5A0F & ~x33CCCC33; - x2222FFFF = a6 | x22226666; - x6A21EDF3 = x4803120C ^ x2222FFFF; - x4A01CC93 = x6A21EDF3 & ~x25202160; - - x5555FFFF = a1 | a6; - x7F75FFFF = x6A21EDF3 | x5555FFFF; - x00D20096 = a5 & ~x2D2D6969; - x7FA7FF69 = x7F75FFFF ^ x00D20096; - - x0A0A0000 = a4 & ~x5555FFFF; - x0AD80096 = x00D20096 ^ x0A0A0000; - x00999900 = x00FFFF00 & ~x66666666; - x0AD99996 = x0AD80096 | x00999900; - - x22332233 = a3 & ~x55005500; - x257AA5F0 = x5A0F5A0F ^ x7F75FFFF; - x054885C0 = x257AA5F0 & ~x22332233; - xFAB77A3F = ~x054885C0; - x2221EDF3 = x3333FFFF & x6A21EDF3; - xD89697CC = xFAB77A3F ^ x2221EDF3; - x20 = x7FA7FF69 & ~a2; - x21 = x20 ^ xD89697CC; - *out3 ^= x21; - - x05B77AC0 = x00FFFF00 ^ x054885C0; - x05F77AD6 = x00D20096 | x05B77AC0; - x36C48529 = x3333FFFF ^ x05F77AD6; - x6391D07C = a1 ^ x36C48529; - xBB0747B0 = xD89697CC ^ x6391D07C; - x00 = x25202160 | a2; - x01 = x00 ^ xBB0747B0; - *out1 ^= x01; - - x4C460000 = x3333FFFF ^ x7F75FFFF; - x4EDF9996 = x0AD99996 | x4C460000; - x2D4E49EA = x6391D07C ^ x4EDF9996; - xBBFFFFB0 = x00FFFF00 | xBB0747B0; - x96B1B65A = x2D4E49EA ^ xBBFFFFB0; - x10 = x4A01CC93 | a2; - x11 = x10 ^ x96B1B65A; - *out2 ^= x11; - - x5AFF5AFF = a5 | x5A0F5A0F; - x52B11215 = x5AFF5AFF & ~x2D4E49EA; - x4201C010 = x4A01CC93 & x6391D07C; - x10B0D205 = x52B11215 ^ x4201C010; - x30 = x10B0D205 | a2; - x31 = x30 ^ x0AD99996; - *out4 ^= x31; -} - -__device__ static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) -{ - u32 x33CC33CC; - u32 x55550000, x00AA00FF, x33BB33FF; - u32 x33CC0000, x11441144, x11BB11BB, x003311BB; - u32 x00000F0F, x336600FF, x332200FF, x332200F0; - u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95; - u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39; - u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53; - u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F; - u32 x0A451047, xBBDFDD7B, xB19ACD3C; - u32 x00, x01, x10, x11, x20, x21, x30, x31; - - x33CC33CC = a2 ^ a5; - - x55550000 = a1 & ~a6; - x00AA00FF = a5 & ~x55550000; - x33BB33FF = a2 | x00AA00FF; - - x33CC0000 = x33CC33CC & ~a6; - x11441144 = a1 & x33CC33CC; - x11BB11BB = a5 ^ x11441144; - x003311BB = x11BB11BB & ~x33CC0000; - - x00000F0F = a3 & a6; - x336600FF = x00AA00FF ^ x33CC0000; - x332200FF = x33BB33FF & x336600FF; - x332200F0 = x332200FF & ~x00000F0F; - - x0302000F = a3 & x332200FF; - xAAAAAAAA = ~a1; - xA9A8AAA5 = x0302000F ^ xAAAAAAAA; - x33CCCC33 = a6 ^ x33CC33CC; - x33CCC030 = x33CCCC33 & ~x00000F0F; - x9A646A95 = xA9A8AAA5 ^ x33CCC030; - x10 = a4 & ~x332200F0; - x11 = x10 ^ x9A646A95; - *out2 ^= x11; - - x00333303 = a2 & ~x33CCC030; - x118822B8 = x11BB11BB ^ x00333303; - xA8208805 = xA9A8AAA5 & ~x118822B8; - x3CC3C33C = a3 ^ x33CCCC33; - x94E34B39 = xA8208805 ^ x3CC3C33C; - x00 = x33BB33FF & ~a4; - x01 = x00 ^ x94E34B39; - *out1 ^= x01; - - x0331330C = x0302000F ^ x00333303; - x3FF3F33C = x3CC3C33C | x0331330C; - xA9DF596A = x33BB33FF ^ x9A646A95; - xA9DF5F6F = x00000F0F | xA9DF596A; - x962CAC53 = x3FF3F33C ^ xA9DF5F6F; - - xA9466A6A = x332200FF ^ x9A646A95; - x3DA52153 = x94E34B39 ^ xA9466A6A; - x29850143 = xA9DF5F6F & x3DA52153; - x33C0330C = x33CC33CC & x3FF3F33C; - x1A45324F = x29850143 ^ x33C0330C; - x20 = x1A45324F | a4; - x21 = x20 ^ x962CAC53; - *out3 ^= x21; - - x0A451047 = x1A45324F & ~x118822B8; - xBBDFDD7B = x33CCCC33 | xA9DF596A; - xB19ACD3C = x0A451047 ^ xBBDFDD7B; - x30 = x003311BB | a4; - x31 = x30 ^ xB19ACD3C; - *out4 ^= x31; -} - -__device__ static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) -{ - u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4; - u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00; - u32 x00005EF4, x00FF5EFF, x00555455, x3C699796; - u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F; - u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8; - u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A; - u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356; - u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B; - u32 x00, x01, x10, x11, x20, x21, x30, x31; - - x44444444 = a1 & ~a2; - x0F0FF0F0 = a3 ^ a6; - x4F4FF4F4 = x44444444 | x0F0FF0F0; - x00FFFF00 = a4 ^ a6; - x00AAAA00 = x00FFFF00 & ~a1; - x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00; - - x3C3CC3C3 = a2 ^ x0F0FF0F0; - x3C3C0000 = x3C3CC3C3 & ~a6; - x7373F4F4 = x4F4FF4F4 ^ x3C3C0000; - x0C840A00 = x4FE55EF4 & ~x7373F4F4; - - x00005EF4 = a6 & x4FE55EF4; - x00FF5EFF = a4 | x00005EF4; - x00555455 = a1 & x00FF5EFF; - x3C699796 = x3C3CC3C3 ^ x00555455; - x30 = x4FE55EF4 & ~a5; - x31 = x30 ^ x3C699796; - *out4 ^= x31; - - x000FF000 = x0F0FF0F0 & x00FFFF00; - x55AA55AA = a1 ^ a4; - x26D9A15E = x7373F4F4 ^ x55AA55AA; - x2FDFAF5F = a3 | x26D9A15E; - x2FD00F5F = x2FDFAF5F & ~x000FF000; - - x55AAFFAA = x00AAAA00 | x55AA55AA; - x28410014 = x3C699796 & ~x55AAFFAA; - x000000FF = a4 & a6; - x000000CC = x000000FF & ~a2; - x284100D8 = x28410014 ^ x000000CC; - - x204100D0 = x7373F4F4 & x284100D8; - x3C3CC3FF = x3C3CC3C3 | x000000FF; - x1C3CC32F = x3C3CC3FF & ~x204100D0; - x4969967A = a1 ^ x1C3CC32F; - x10 = x2FD00F5F & a5; - x11 = x10 ^ x4969967A; - *out2 ^= x11; - - x4CC44CC4 = x4FE55EF4 & ~a2; - x40C040C0 = x4CC44CC4 & ~a3; - xC3C33C3C = ~x3C3CC3C3; - x9669C396 = x55AAFFAA ^ xC3C33C3C; - xD6A98356 = x40C040C0 ^ x9669C396; - x00 = a5 & ~x0C840A00; - x01 = x00 ^ xD6A98356; - *out1 ^= x01; - - xD6E9C3D6 = x40C040C0 | x9669C396; - x4CEEEEC4 = x00AAAA00 | x4CC44CC4; - x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4; - x001A000B = a4 & ~x4FE55EF4; - x9A1F2D1B = x9A072D12 | x001A000B; - x20 = a5 & ~x284100D8; - x21 = x20 ^ x9A1F2D1B; - *out3 ^= x21; -} - -__device__ static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) -{ - u32 x5A5A5A5A, x0F0FF0F0; - u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F, - x52FBCA0F, x61C8F93C; - u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6; - u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1; - u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6; - u32 x00, x01, x10, x11, x20, x21, x30, x31; - - x5A5A5A5A = a1 ^ a3; - x0F0FF0F0 = a3 ^ a5; - x33FF33FF = a2 | a4; - x33FFCC00 = a5 ^ x33FF33FF; - x0C0030F0 = x0F0FF0F0 & ~x33FFCC00; - x0C0CC0C0 = x0F0FF0F0 & ~a2; - x0CF3C03F = a4 ^ x0C0CC0C0; - x5EFBDA7F = x5A5A5A5A | x0CF3C03F; - x52FBCA0F = x5EFBDA7F & ~x0C0030F0; - x61C8F93C = a2 ^ x52FBCA0F; - - x00C0C03C = x0CF3C03F & x61C8F93C; - x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C; - x3B92A366 = x5A5A5A5A ^ x61C8F93C; - x30908326 = x3B92A366 & ~x0F0F30C0; - x3C90B3D6 = x0C0030F0 ^ x30908326; - - x33CC33CC = a2 ^ a4; - x0C0CFFFF = a5 | x0C0CC0C0; - x379E5C99 = x3B92A366 ^ x0C0CFFFF; - x04124C11 = x379E5C99 & ~x33CC33CC; - x56E9861E = x52FBCA0F ^ x04124C11; - x00 = a6 & ~x3C90B3D6; - x01 = x00 ^ x56E9861E; - *out1 ^= x01; - - xA91679E1 = ~x56E9861E; - x10 = x3C90B3D6 & ~a6; - x11 = x10 ^ xA91679E1; - *out2 ^= x11; - - x9586CA37 = x3C90B3D6 ^ xA91679E1; - x8402C833 = x9586CA37 & ~x33CC33CC; - x84C2C83F = x00C0C03C | x8402C833; - xB35C94A6 = x379E5C99 ^ x84C2C83F; - x20 = x61C8F93C | a6; - x21 = x20 ^ xB35C94A6; - *out3 ^= x21; - - x30 = a6 & x61C8F93C; - x31 = x30 ^ xB35C94A6; - *out4 ^= x31; -} - -__device__ static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) -{ - u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F; - u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B; - u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7; - u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF; - u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A; - u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2; - u32 x22222222, x16BCEE97, x0F080B04, x19B4E593; - u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D; - u32 x00, x01, x10, x11, x20, x21, x30, x31; - - x77777777 = a1 | a3; - x77770000 = x77777777 & ~a6; - x22225555 = a1 ^ x77770000; - x11116666 = a3 ^ x22225555; - x1F1F6F6F = a4 | x11116666; - - x70700000 = x77770000 & ~a4; - x43433333 = a3 ^ x70700000; - x00430033 = a5 & x43433333; - x55557777 = a1 | x11116666; - x55167744 = x00430033 ^ x55557777; - x5A19784B = a4 ^ x55167744; - - x5A1987B4 = a6 ^ x5A19784B; - x7A3BD7F5 = x22225555 | x5A1987B4; - x003B00F5 = a5 & x7A3BD7F5; - x221955A0 = x22225555 ^ x003B00F5; - x05050707 = a4 & x55557777; - x271C52A7 = x221955A0 ^ x05050707; - - x2A2A82A0 = x7A3BD7F5 & ~a1; - x6969B193 = x43433333 ^ x2A2A82A0; - x1FE06F90 = a5 ^ x1F1F6F6F; - x16804E00 = x1FE06F90 & ~x6969B193; - xE97FB1FF = ~x16804E00; - x20 = xE97FB1FF & ~a2; - x21 = x20 ^ x5A19784B; - *out3 ^= x21; - - x43403302 = x43433333 & ~x003B00F5; - x35CAED30 = x2A2A82A0 ^ x1FE06F90; - x37DEFFB7 = x271C52A7 | x35CAED30; - x349ECCB5 = x37DEFFB7 & ~x43403302; - x0B01234A = x1F1F6F6F & ~x349ECCB5; - - x101884B4 = x5A1987B4 & x349ECCB5; - x0FF8EB24 = x1FE06F90 ^ x101884B4; - x41413333 = x43433333 & x55557777; - x4FF9FB37 = x0FF8EB24 | x41413333; - x4FC2FBC2 = x003B00F5 ^ x4FF9FB37; - x30 = x4FC2FBC2 & a2; - x31 = x30 ^ x271C52A7; - *out4 ^= x31; - - x22222222 = a1 ^ x77777777; - x16BCEE97 = x349ECCB5 ^ x22222222; - x0F080B04 = a4 & x0FF8EB24; - x19B4E593 = x16BCEE97 ^ x0F080B04; - x00 = x0B01234A | a2; - x01 = x00 ^ x19B4E593; - *out1 ^= x01; - - x5C5C5C5C = x1F1F6F6F ^ x43433333; - x4448184C = x5C5C5C5C & ~x19B4E593; - x2DDABE71 = x22225555 ^ x0FF8EB24; - x6992A63D = x4448184C ^ x2DDABE71; - x10 = x1F1F6F6F & a2; - x11 = x10 ^ x6992A63D; - *out2 ^= x11; -} - -__device__ static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) -{ - u32 x33CC33CC; - u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099; - u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6; - u32 x09030C06, x09030000, x336622FF, x3A6522FF; - u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD; - u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B; - u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479; - u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5; - u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67; - u32 x00, x01, x10, x11, x20, x21, x30, x31; - - x33CC33CC = a2 ^ a5; - - x3333FFFF = a2 | a6; - x11115555 = a1 & x3333FFFF; - x22DD6699 = x33CC33CC ^ x11115555; - x22DD9966 = a6 ^ x22DD6699; - x00220099 = a5 & ~x22DD9966; - - x00551144 = a1 & x22DD9966; - x33662277 = a2 ^ x00551144; - x5A5A5A5A = a1 ^ a3; - x7B7E7A7F = x33662277 | x5A5A5A5A; - x59A31CE6 = x22DD6699 ^ x7B7E7A7F; - - x09030C06 = a3 & x59A31CE6; - x09030000 = x09030C06 & ~a6; - x336622FF = x00220099 | x33662277; - x3A6522FF = x09030000 ^ x336622FF; - x30 = x3A6522FF & a4; - x31 = x30 ^ x59A31CE6; - *out4 ^= x31; - - x484D494C = a2 ^ x7B7E7A7F; - x0000B6B3 = a6 & ~x484D494C; - x0F0FB9BC = a3 ^ x0000B6B3; - x00FC00F9 = a5 & ~x09030C06; - x0FFFB9FD = x0F0FB9BC | x00FC00F9; - - x5DF75DF7 = a1 | x59A31CE6; - x116600F7 = x336622FF & x5DF75DF7; - x1E69B94B = x0F0FB9BC ^ x116600F7; - x1668B94B = x1E69B94B & ~x09030000; - x20 = x00220099 | a4; - x21 = x20 ^ x1668B94B; - *out3 ^= x21; - - x7B7B7B7B = a2 | x5A5A5A5A; - x411E5984 = x3A6522FF ^ x7B7B7B7B; - x1FFFFDFD = x11115555 | x0FFFB9FD; - x5EE1A479 = x411E5984 ^ x1FFFFDFD; - - x3CB4DFD2 = x22DD6699 ^ x1E69B94B; - x004B002D = a5 & ~x3CB4DFD2; - xB7B2B6B3 = ~x484D494C; - xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3; - xCC82CDE5 = x004B002D ^ xCCC9CDC8; - x10 = xCC82CDE5 & ~a4; - x11 = x10 ^ x5EE1A479; - *out2 ^= x11; - - x0055EEBB = a6 ^ x00551144; - x5A5AECE9 = a1 ^ x0F0FB9BC; - x0050ECA9 = x0055EEBB & x5A5AECE9; - xC5CAC1CE = x09030C06 ^ xCCC9CDC8; - xC59A2D67 = x0050ECA9 ^ xC5CAC1CE; - x00 = x0FFFB9FD & ~a4; - x01 = x00 ^ xC59A2D67; - *out1 ^= x01; -} - -__device__ static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) -{ - u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841; - u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78; - u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D; - u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B; - u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB; - u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867; - u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD; - u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A; - u32 x00, x01, x10, x11, x20, x21, x30, x31; - - x0FF00FF0 = a4 ^ a5; - x3CC33CC3 = a3 ^ x0FF00FF0; - x00003CC3 = a6 & x3CC33CC3; - x0F000F00 = a4 & x0FF00FF0; - x5A555A55 = a2 ^ x0F000F00; - x00001841 = x00003CC3 & x5A555A55; - - x00000F00 = a6 & x0F000F00; - x33333C33 = a3 ^ x00000F00; - x7B777E77 = x5A555A55 | x33333C33; - x0FF0F00F = a6 ^ x0FF00FF0; - x74878E78 = x7B777E77 ^ x0FF0F00F; - x30 = a1 & ~x00001841; - x31 = x30 ^ x74878E78; - *out4 ^= x31; - - x003C003C = a5 & ~x3CC33CC3; - x5A7D5A7D = x5A555A55 | x003C003C; - x333300F0 = x00003CC3 ^ x33333C33; - x694E5A8D = x5A7D5A7D ^ x333300F0; - - x0FF0CCCC = x00003CC3 ^ x0FF0F00F; - x000F0303 = a4 & ~x0FF0CCCC; - x5A505854 = x5A555A55 & ~x000F0303; - x33CC000F = a5 ^ x333300F0; - x699C585B = x5A505854 ^ x33CC000F; - - x7F878F78 = x0F000F00 | x74878E78; - x21101013 = a3 & x699C585B; - x7F979F7B = x7F878F78 | x21101013; - x30030CC0 = x3CC33CC3 & ~x0FF0F00F; - x4F9493BB = x7F979F7B ^ x30030CC0; - x00 = x4F9493BB & ~a1; - x01 = x00 ^ x694E5A8D; - *out1 ^= x01; - - x6F9CDBFB = x699C585B | x4F9493BB; - x0000DBFB = a6 & x6F9CDBFB; - x00005151 = a2 & x0000DBFB; - x26DAC936 = x694E5A8D ^ x4F9493BB; - x26DA9867 = x00005151 ^ x26DAC936; - - x27DA9877 = x21101013 | x26DA9867; - x27DA438C = x0000DBFB ^ x27DA9877; - x2625C9C9 = a5 ^ x26DAC936; - x27FFCBCD = x27DA438C | x2625C9C9; - x20 = x27FFCBCD & a1; - x21 = x20 ^ x699C585B; - *out3 ^= x21; - - x27FF1036 = x0000DBFB ^ x27FFCBCD; - x27FF103E = x003C003C | x27FF1036; - xB06B6C44 = ~x4F9493BB; - x97947C7A = x27FF103E ^ xB06B6C44; - x10 = x97947C7A & ~a1; - x11 = x10 ^ x26DA9867; - *out2 ^= x11; -} - -__device__ static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) -{ - u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001; - u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745; - u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3; - u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A; - u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926; - u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F; - u32 xF700A600, x61008000, x03B7856B, x62B7056B; - u32 x00, x01, x10, x11, x20, x21, x30, x31; - - x0C0C0C0C = a3 & ~a2; - x0000F0F0 = a5 & ~a3; - x00FFF00F = a4 ^ x0000F0F0; - x00555005 = a1 & x00FFF00F; - x00515001 = x00555005 & ~x0C0C0C0C; - - x33000330 = a2 & ~x00FFF00F; - x77555775 = a1 | x33000330; - x30303030 = a2 & ~a3; - x3030CFCF = a5 ^ x30303030; - x30104745 = x77555775 & x3030CFCF; - x30555745 = x00555005 | x30104745; - - xFF000FF0 = ~x00FFF00F; - xCF1048B5 = x30104745 ^ xFF000FF0; - x080A080A = a3 & ~x77555775; - xC71A40BF = xCF1048B5 ^ x080A080A; - xCB164CB3 = x0C0C0C0C ^ xC71A40BF; - x10 = x00515001 | a6; - x11 = x10 ^ xCB164CB3; - *out2 ^= x11; - - x9E4319E6 = a1 ^ xCB164CB3; - x000019E6 = a5 & x9E4319E6; - xF429738C = a2 ^ xC71A40BF; - xF4296A6A = x000019E6 ^ xF429738C; - xC729695A = x33000330 ^ xF4296A6A; - - xC47C3D2F = x30555745 ^ xF4296A6A; - xF77F3F3F = a2 | xC47C3D2F; - x9E43E619 = a5 ^ x9E4319E6; - x693CD926 = xF77F3F3F ^ x9E43E619; - x20 = x30555745 & a6; - x21 = x20 ^ x693CD926; - *out3 ^= x21; - - xF719A695 = x3030CFCF ^ xC729695A; - xF4FF73FF = a4 | xF429738C; - x03E6D56A = xF719A695 ^ xF4FF73FF; - x56B3803F = a1 ^ x03E6D56A; - x30 = x56B3803F & a6; - x31 = x30 ^ xC729695A; - *out4 ^= x31; - - xF700A600 = xF719A695 & ~a4; - x61008000 = x693CD926 & xF700A600; - x03B7856B = x00515001 ^ x03E6D56A; - x62B7056B = x61008000 ^ x03B7856B; - x00 = x62B7056B | a6; - x01 = x00 ^ xC729695A; - *out1 ^= x01; -} - -#endif - -#define SWAP(a, b) { u32 tmp=a;a=b;b=tmp; } - -#define DATASWAP \ - SWAP(D00, D32); \ - SWAP(D01, D33); \ - SWAP(D02, D34); \ - SWAP(D03, D35); \ - SWAP(D04, D36); \ - SWAP(D05, D37); \ - SWAP(D06, D38); \ - SWAP(D07, D39); \ - SWAP(D08, D40); \ - SWAP(D09, D41); \ - SWAP(D10, D42); \ - SWAP(D11, D43); \ - SWAP(D12, D44); \ - SWAP(D13, D45); \ - SWAP(D14, D46); \ - SWAP(D15, D47); \ - SWAP(D16, D48); \ - SWAP(D17, D49); \ - SWAP(D18, D50); \ - SWAP(D19, D51); \ - SWAP(D20, D52); \ - SWAP(D21, D53); \ - SWAP(D22, D54); \ - SWAP(D23, D55); \ - SWAP(D24, D56); \ - SWAP(D25, D57); \ - SWAP(D26, D58); \ - SWAP(D27, D59); \ - SWAP(D28, D60); \ - SWAP(D29, D61); \ - SWAP(D30, D62); \ - SWAP(D31, D63); - -#define KEYSET00 { k00 = K08; k01 = K44; k02 = K29; k03 = K52; k04 = K42; k05 = K14; k06 = K28; k07 = K49; k08 = K01; k09 = K07; k10 = K16; k11 = K36; k12 = K02; k13 = K30; k14 = K22; k15 = K21; k16 = K38; k17 = K50; k18 = K51; k19 = K00; k20 = K31; k21 = K23; k22 = K15; k23 = K35; k24 = K19; k25 = K24; k26 = K34; k27 = K47; k28 = K32; k29 = K03; k30 = K41; k31 = K26; k32 = K04; k33 = K46; k34 = K20; k35 = K25; k36 = K53; k37 = K18; k38 = K33; k39 = K55; k40 = K13; k41 = K17; k42 = K39; k43 = K12; k44 = K11; k45 = K54; k46 = K48; k47 = K27; } -#define KEYSET10 { k00 = K49; k01 = K28; k02 = K45; k03 = K36; k04 = K01; k05 = K30; k06 = K44; k07 = K08; k08 = K42; k09 = K23; k10 = K00; k11 = K52; k12 = K43; k13 = K14; k14 = K38; k15 = K37; k16 = K22; k17 = K09; k18 = K35; k19 = K16; k20 = K15; k21 = K07; k22 = K31; k23 = K51; k24 = K03; k25 = K40; k26 = K46; k27 = K04; k28 = K20; k29 = K19; k30 = K53; k31 = K10; k32 = K47; k33 = K34; k34 = K32; k35 = K13; k36 = K41; k37 = K06; k38 = K17; k39 = K12; k40 = K25; k41 = K33; k42 = K27; k43 = K55; k44 = K54; k45 = K11; k46 = K05; k47 = K39; } -#define KEYSET01 { k00 = K01; k01 = K37; k02 = K22; k03 = K45; k04 = K35; k05 = K07; k06 = K21; k07 = K42; k08 = K51; k09 = K00; k10 = K09; k11 = K29; k12 = K52; k13 = K23; k14 = K15; k15 = K14; k16 = K31; k17 = K43; k18 = K44; k19 = K50; k20 = K49; k21 = K16; k22 = K08; k23 = K28; k24 = K12; k25 = K17; k26 = K27; k27 = K40; k28 = K25; k29 = K55; k30 = K34; k31 = K19; k32 = K24; k33 = K39; k34 = K13; k35 = K18; k36 = K46; k37 = K11; k38 = K26; k39 = K48; k40 = K06; k41 = K10; k42 = K32; k43 = K05; k44 = K04; k45 = K47; k46 = K41; k47 = K20; } -#define KEYSET11 { k00 = K35; k01 = K14; k02 = K31; k03 = K22; k04 = K44; k05 = K16; k06 = K30; k07 = K51; k08 = K28; k09 = K09; k10 = K43; k11 = K38; k12 = K29; k13 = K00; k14 = K49; k15 = K23; k16 = K08; k17 = K52; k18 = K21; k19 = K02; k20 = K01; k21 = K50; k22 = K42; k23 = K37; k24 = K48; k25 = K26; k26 = K32; k27 = K17; k28 = K06; k29 = K05; k30 = K39; k31 = K55; k32 = K33; k33 = K20; k34 = K18; k35 = K54; k36 = K27; k37 = K47; k38 = K03; k39 = K53; k40 = K11; k41 = K19; k42 = K13; k43 = K41; k44 = K40; k45 = K24; k46 = K46; k47 = K25; } -#define KEYSET02 { k00 = K44; k01 = K23; k02 = K08; k03 = K31; k04 = K21; k05 = K50; k06 = K07; k07 = K28; k08 = K37; k09 = K43; k10 = K52; k11 = K15; k12 = K38; k13 = K09; k14 = K01; k15 = K00; k16 = K42; k17 = K29; k18 = K30; k19 = K36; k20 = K35; k21 = K02; k22 = K51; k23 = K14; k24 = K53; k25 = K03; k26 = K13; k27 = K26; k28 = K11; k29 = K41; k30 = K20; k31 = K05; k32 = K10; k33 = K25; k34 = K54; k35 = K04; k36 = K32; k37 = K24; k38 = K12; k39 = K34; k40 = K47; k41 = K55; k42 = K18; k43 = K46; k44 = K17; k45 = K33; k46 = K27; k47 = K06; } -#define KEYSET12 { k00 = K21; k01 = K00; k02 = K42; k03 = K08; k04 = K30; k05 = K02; k06 = K16; k07 = K37; k08 = K14; k09 = K52; k10 = K29; k11 = K49; k12 = K15; k13 = K43; k14 = K35; k15 = K09; k16 = K51; k17 = K38; k18 = K07; k19 = K45; k20 = K44; k21 = K36; k22 = K28; k23 = K23; k24 = K34; k25 = K12; k26 = K18; k27 = K03; k28 = K47; k29 = K46; k30 = K25; k31 = K41; k32 = K19; k33 = K06; k34 = K04; k35 = K40; k36 = K13; k37 = K33; k38 = K48; k39 = K39; k40 = K24; k41 = K05; k42 = K54; k43 = K27; k44 = K26; k45 = K10; k46 = K32; k47 = K11; } -#define KEYSET03 { k00 = K30; k01 = K09; k02 = K51; k03 = K42; k04 = K07; k05 = K36; k06 = K50; k07 = K14; k08 = K23; k09 = K29; k10 = K38; k11 = K01; k12 = K49; k13 = K52; k14 = K44; k15 = K43; k16 = K28; k17 = K15; k18 = K16; k19 = K22; k20 = K21; k21 = K45; k22 = K37; k23 = K00; k24 = K39; k25 = K48; k26 = K54; k27 = K12; k28 = K24; k29 = K27; k30 = K06; k31 = K46; k32 = K55; k33 = K11; k34 = K40; k35 = K17; k36 = K18; k37 = K10; k38 = K53; k39 = K20; k40 = K33; k41 = K41; k42 = K04; k43 = K32; k44 = K03; k45 = K19; k46 = K13; k47 = K47; } -#define KEYSET13 { k00 = K07; k01 = K43; k02 = K28; k03 = K51; k04 = K16; k05 = K45; k06 = K02; k07 = K23; k08 = K00; k09 = K38; k10 = K15; k11 = K35; k12 = K01; k13 = K29; k14 = K21; k15 = K52; k16 = K37; k17 = K49; k18 = K50; k19 = K31; k20 = K30; k21 = K22; k22 = K14; k23 = K09; k24 = K20; k25 = K53; k26 = K04; k27 = K48; k28 = K33; k29 = K32; k30 = K11; k31 = K27; k32 = K05; k33 = K47; k34 = K17; k35 = K26; k36 = K54; k37 = K19; k38 = K34; k39 = K25; k40 = K10; k41 = K46; k42 = K40; k43 = K13; k44 = K12; k45 = K55; k46 = K18; k47 = K24; } -#define KEYSET04 { k00 = K16; k01 = K52; k02 = K37; k03 = K28; k04 = K50; k05 = K22; k06 = K36; k07 = K00; k08 = K09; k09 = K15; k10 = K49; k11 = K44; k12 = K35; k13 = K38; k14 = K30; k15 = K29; k16 = K14; k17 = K01; k18 = K02; k19 = K08; k20 = K07; k21 = K31; k22 = K23; k23 = K43; k24 = K25; k25 = K34; k26 = K40; k27 = K53; k28 = K10; k29 = K13; k30 = K47; k31 = K32; k32 = K41; k33 = K24; k34 = K26; k35 = K03; k36 = K04; k37 = K55; k38 = K39; k39 = K06; k40 = K19; k41 = K27; k42 = K17; k43 = K18; k44 = K48; k45 = K05; k46 = K54; k47 = K33; } -#define KEYSET14 { k00 = K50; k01 = K29; k02 = K14; k03 = K37; k04 = K02; k05 = K31; k06 = K45; k07 = K09; k08 = K43; k09 = K49; k10 = K01; k11 = K21; k12 = K44; k13 = K15; k14 = K07; k15 = K38; k16 = K23; k17 = K35; k18 = K36; k19 = K42; k20 = K16; k21 = K08; k22 = K00; k23 = K52; k24 = K06; k25 = K39; k26 = K17; k27 = K34; k28 = K19; k29 = K18; k30 = K24; k31 = K13; k32 = K46; k33 = K33; k34 = K03; k35 = K12; k36 = K40; k37 = K05; k38 = K20; k39 = K11; k40 = K55; k41 = K32; k42 = K26; k43 = K54; k44 = K53; k45 = K41; k46 = K04; k47 = K10; } -#define KEYSET05 { k00 = K02; k01 = K38; k02 = K23; k03 = K14; k04 = K36; k05 = K08; k06 = K22; k07 = K43; k08 = K52; k09 = K01; k10 = K35; k11 = K30; k12 = K21; k13 = K49; k14 = K16; k15 = K15; k16 = K00; k17 = K44; k18 = K45; k19 = K51; k20 = K50; k21 = K42; k22 = K09; k23 = K29; k24 = K11; k25 = K20; k26 = K26; k27 = K39; k28 = K55; k29 = K54; k30 = K33; k31 = K18; k32 = K27; k33 = K10; k34 = K12; k35 = K48; k36 = K17; k37 = K41; k38 = K25; k39 = K47; k40 = K05; k41 = K13; k42 = K03; k43 = K04; k44 = K34; k45 = K46; k46 = K40; k47 = K19; } -#define KEYSET15 { k00 = K36; k01 = K15; k02 = K00; k03 = K23; k04 = K45; k05 = K42; k06 = K31; k07 = K52; k08 = K29; k09 = K35; k10 = K44; k11 = K07; k12 = K30; k13 = K01; k14 = K50; k15 = K49; k16 = K09; k17 = K21; k18 = K22; k19 = K28; k20 = K02; k21 = K51; k22 = K43; k23 = K38; k24 = K47; k25 = K25; k26 = K03; k27 = K20; k28 = K05; k29 = K04; k30 = K10; k31 = K54; k32 = K32; k33 = K19; k34 = K48; k35 = K53; k36 = K26; k37 = K46; k38 = K06; k39 = K24; k40 = K41; k41 = K18; k42 = K12; k43 = K40; k44 = K39; k45 = K27; k46 = K17; k47 = K55; } -#define KEYSET06 { k00 = K45; k01 = K49; k02 = K09; k03 = K00; k04 = K22; k05 = K51; k06 = K08; k07 = K29; k08 = K38; k09 = K44; k10 = K21; k11 = K16; k12 = K07; k13 = K35; k14 = K02; k15 = K01; k16 = K43; k17 = K30; k18 = K31; k19 = K37; k20 = K36; k21 = K28; k22 = K52; k23 = K15; k24 = K24; k25 = K06; k26 = K12; k27 = K25; k28 = K41; k29 = K40; k30 = K19; k31 = K04; k32 = K13; k33 = K55; k34 = K53; k35 = K34; k36 = K03; k37 = K27; k38 = K11; k39 = K33; k40 = K46; k41 = K54; k42 = K48; k43 = K17; k44 = K20; k45 = K32; k46 = K26; k47 = K05; } -#define KEYSET16 { k00 = K22; k01 = K01; k02 = K43; k03 = K09; k04 = K31; k05 = K28; k06 = K42; k07 = K38; k08 = K15; k09 = K21; k10 = K30; k11 = K50; k12 = K16; k13 = K44; k14 = K36; k15 = K35; k16 = K52; k17 = K07; k18 = K08; k19 = K14; k20 = K45; k21 = K37; k22 = K29; k23 = K49; k24 = K33; k25 = K11; k26 = K48; k27 = K06; k28 = K46; k29 = K17; k30 = K55; k31 = K40; k32 = K18; k33 = K05; k34 = K34; k35 = K39; k36 = K12; k37 = K32; k38 = K47; k39 = K10; k40 = K27; k41 = K04; k42 = K53; k43 = K26; k44 = K25; k45 = K13; k46 = K03; k47 = K41; } -#define KEYSET07 { k00 = K31; k01 = K35; k02 = K52; k03 = K43; k04 = K08; k05 = K37; k06 = K51; k07 = K15; k08 = K49; k09 = K30; k10 = K07; k11 = K02; k12 = K50; k13 = K21; k14 = K45; k15 = K44; k16 = K29; k17 = K16; k18 = K42; k19 = K23; k20 = K22; k21 = K14; k22 = K38; k23 = K01; k24 = K10; k25 = K47; k26 = K53; k27 = K11; k28 = K27; k29 = K26; k30 = K05; k31 = K17; k32 = K54; k33 = K41; k34 = K39; k35 = K20; k36 = K48; k37 = K13; k38 = K24; k39 = K19; k40 = K32; k41 = K40; k42 = K34; k43 = K03; k44 = K06; k45 = K18; k46 = K12; k47 = K46; } -#define KEYSET17 { k00 = K15; k01 = K51; k02 = K36; k03 = K02; k04 = K49; k05 = K21; k06 = K35; k07 = K31; k08 = K08; k09 = K14; k10 = K23; k11 = K43; k12 = K09; k13 = K37; k14 = K29; k15 = K28; k16 = K45; k17 = K00; k18 = K01; k19 = K07; k20 = K38; k21 = K30; k22 = K22; k23 = K42; k24 = K26; k25 = K04; k26 = K41; k27 = K54; k28 = K39; k29 = K10; k30 = K48; k31 = K33; k32 = K11; k33 = K53; k34 = K27; k35 = K32; k36 = K05; k37 = K25; k38 = K40; k39 = K03; k40 = K20; k41 = K24; k42 = K46; k43 = K19; k44 = K18; k45 = K06; k46 = K55; k47 = K34; } - -__device__ static void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 &D00, u32 &D01, u32 &D02, u32 &D03, u32 &D04, u32 &D05, u32 &D06, u32 &D07, u32 &D08, u32 &D09, u32 &D10, u32 &D11, u32 &D12, u32 &D13, u32 &D14, u32 &D15, u32 &D16, u32 &D17, u32 &D18, u32 &D19, u32 &D20, u32 &D21, u32 &D22, u32 &D23, u32 &D24, u32 &D25, u32 &D26, u32 &D27, u32 &D28, u32 &D29, u32 &D30, u32 &D31, u32 &D32, u32 &D33, u32 &D34, u32 &D35, u32 &D36, u32 &D37, u32 &D38, u32 &D39, u32 &D40, u32 &D41, u32 &D42, u32 &D43, u32 &D44, u32 &D45, u32 &D46, u32 &D47, u32 &D48, u32 &D49, u32 &D50, u32 &D51, u32 &D52, u32 &D53, u32 &D54, u32 &D55, u32 &D56, u32 &D57, u32 &D58, u32 &D59, u32 &D60, u32 &D61, u32 &D62, u32 &D63) -{ - sXXX_DECL u32 s001 = (0x001 & SALT) ? 0xffffffff : 0; - sXXX_DECL u32 s002 = (0x002 & SALT) ? 0xffffffff : 0; - sXXX_DECL u32 s004 = (0x004 & SALT) ? 0xffffffff : 0; - sXXX_DECL u32 s008 = (0x008 & SALT) ? 0xffffffff : 0; - sXXX_DECL u32 s010 = (0x010 & SALT) ? 0xffffffff : 0; - sXXX_DECL u32 s020 = (0x020 & SALT) ? 0xffffffff : 0; - sXXX_DECL u32 s040 = (0x040 & SALT) ? 0xffffffff : 0; - sXXX_DECL u32 s080 = (0x080 & SALT) ? 0xffffffff : 0; - sXXX_DECL u32 s100 = (0x100 & SALT) ? 0xffffffff : 0; - sXXX_DECL u32 s200 = (0x200 & SALT) ? 0xffffffff : 0; - sXXX_DECL u32 s400 = (0x400 & SALT) ? 0xffffffff : 0; - sXXX_DECL u32 s800 = (0x800 & SALT) ? 0xffffffff : 0; - - KXX_DECL u32 k00, k01, k02, k03, k04, k05; - KXX_DECL u32 k06, k07, k08, k09, k10, k11; - KXX_DECL u32 k12, k13, k14, k15, k16, k17; - KXX_DECL u32 k18, k19, k20, k21, k22, k23; - KXX_DECL u32 k24, k25, k26, k27, k28, k29; - KXX_DECL u32 k30, k31, k32, k33, k34, k35; - KXX_DECL u32 k36, k37, k38, k39, k40, k41; - KXX_DECL u32 k42, k43, k44, k45, k46, k47; - - for (u32 ii = 0; ii < 25; ii++) - { - #if __CUDA_ARCH__ >= 500 - #pragma unroll 1 - #else - #pragma unroll - #endif - - for (u32 i = 0; i < 2; i++) - { - if (i) KEYSET10 else KEYSET00 - - s1(myselx (D63, D47, s001) ^ k00, myselx (D32, D48, s002) ^ k01, myselx (D33, D49, s004) ^ k02, myselx (D34, D50, s008) ^ k03, myselx (D35, D51, s010) ^ k04, myselx (D36, D52, s020) ^ k05, &D08, &D16, &D22, &D30); - s2(myselx (D35, D51, s040) ^ k06, myselx (D36, D52, s080) ^ k07, myselx (D37, D53, s100) ^ k08, myselx (D38, D54, s200) ^ k09, myselx (D39, D55, s400) ^ k10, myselx (D40, D56, s800) ^ k11, &D12, &D27, &D01, &D17); - s3( D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05); - s4( D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00); - s5(myselx (D47, D63, s001) ^ k24, myselx (D48, D32, s002) ^ k25, myselx (D49, D33, s004) ^ k26, myselx (D50, D34, s008) ^ k27, myselx (D51, D35, s010) ^ k28, myselx (D52, D36, s020) ^ k29, &D07, &D13, &D24, &D02); - s6(myselx (D51, D35, s040) ^ k30, myselx (D52, D36, s080) ^ k31, myselx (D53, D37, s100) ^ k32, myselx (D54, D38, s200) ^ k33, myselx (D55, D39, s400) ^ k34, myselx (D56, D40, s800) ^ k35, &D03, &D28, &D10, &D18); - s7( D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06); - s8( D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20); - - if (i) KEYSET11 else KEYSET01 - - s1(myselx (D31, D15, s001) ^ k00, myselx (D00, D16, s002) ^ k01, myselx (D01, D17, s004) ^ k02, myselx (D02, D18, s008) ^ k03, myselx (D03, D19, s010) ^ k04, myselx (D04, D20, s020) ^ k05, &D40, &D48, &D54, &D62); - s2(myselx (D03, D19, s040) ^ k06, myselx (D04, D20, s080) ^ k07, myselx (D05, D21, s100) ^ k08, myselx (D06, D22, s200) ^ k09, myselx (D07, D23, s400) ^ k10, myselx (D08, D24, s800) ^ k11, &D44, &D59, &D33, &D49); - s3( D07 ^ k12, D08 ^ k13, D09 ^ k14, D10 ^ k15, D11 ^ k16, D12 ^ k17, &D55, &D47, &D61, &D37); - s4( D11 ^ k18, D12 ^ k19, D13 ^ k20, D14 ^ k21, D15 ^ k22, D16 ^ k23, &D57, &D51, &D41, &D32); - s5(myselx (D15, D31, s001) ^ k24, myselx (D16, D00, s002) ^ k25, myselx (D17, D01, s004) ^ k26, myselx (D18, D02, s008) ^ k27, myselx (D19, D03, s010) ^ k28, myselx (D20, D04, s020) ^ k29, &D39, &D45, &D56, &D34); - s6(myselx (D19, D03, s040) ^ k30, myselx (D20, D04, s080) ^ k31, myselx (D21, D05, s100) ^ k32, myselx (D22, D06, s200) ^ k33, myselx (D23, D07, s400) ^ k34, myselx (D24, D08, s800) ^ k35, &D35, &D60, &D42, &D50); - s7( D23 ^ k36, D24 ^ k37, D25 ^ k38, D26 ^ k39, D27 ^ k40, D28 ^ k41, &D63, &D43, &D53, &D38); - s8( D27 ^ k42, D28 ^ k43, D29 ^ k44, D30 ^ k45, D31 ^ k46, D00 ^ k47, &D36, &D58, &D46, &D52); - - if (i) KEYSET12 else KEYSET02 - - s1(myselx (D63, D47, s001) ^ k00, myselx (D32, D48, s002) ^ k01, myselx (D33, D49, s004) ^ k02, myselx (D34, D50, s008) ^ k03, myselx (D35, D51, s010) ^ k04, myselx (D36, D52, s020) ^ k05, &D08, &D16, &D22, &D30); - s2(myselx (D35, D51, s040) ^ k06, myselx (D36, D52, s080) ^ k07, myselx (D37, D53, s100) ^ k08, myselx (D38, D54, s200) ^ k09, myselx (D39, D55, s400) ^ k10, myselx (D40, D56, s800) ^ k11, &D12, &D27, &D01, &D17); - s3( D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05); - s4( D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00); - s5(myselx (D47, D63, s001) ^ k24, myselx (D48, D32, s002) ^ k25, myselx (D49, D33, s004) ^ k26, myselx (D50, D34, s008) ^ k27, myselx (D51, D35, s010) ^ k28, myselx (D52, D36, s020) ^ k29, &D07, &D13, &D24, &D02); - s6(myselx (D51, D35, s040) ^ k30, myselx (D52, D36, s080) ^ k31, myselx (D53, D37, s100) ^ k32, myselx (D54, D38, s200) ^ k33, myselx (D55, D39, s400) ^ k34, myselx (D56, D40, s800) ^ k35, &D03, &D28, &D10, &D18); - s7( D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06); - s8( D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20); - - if (i) KEYSET13 else KEYSET03 - - s1(myselx (D31, D15, s001) ^ k00, myselx (D00, D16, s002) ^ k01, myselx (D01, D17, s004) ^ k02, myselx (D02, D18, s008) ^ k03, myselx (D03, D19, s010) ^ k04, myselx (D04, D20, s020) ^ k05, &D40, &D48, &D54, &D62); - s2(myselx (D03, D19, s040) ^ k06, myselx (D04, D20, s080) ^ k07, myselx (D05, D21, s100) ^ k08, myselx (D06, D22, s200) ^ k09, myselx (D07, D23, s400) ^ k10, myselx (D08, D24, s800) ^ k11, &D44, &D59, &D33, &D49); - s3( D07 ^ k12, D08 ^ k13, D09 ^ k14, D10 ^ k15, D11 ^ k16, D12 ^ k17, &D55, &D47, &D61, &D37); - s4( D11 ^ k18, D12 ^ k19, D13 ^ k20, D14 ^ k21, D15 ^ k22, D16 ^ k23, &D57, &D51, &D41, &D32); - s5(myselx (D15, D31, s001) ^ k24, myselx (D16, D00, s002) ^ k25, myselx (D17, D01, s004) ^ k26, myselx (D18, D02, s008) ^ k27, myselx (D19, D03, s010) ^ k28, myselx (D20, D04, s020) ^ k29, &D39, &D45, &D56, &D34); - s6(myselx (D19, D03, s040) ^ k30, myselx (D20, D04, s080) ^ k31, myselx (D21, D05, s100) ^ k32, myselx (D22, D06, s200) ^ k33, myselx (D23, D07, s400) ^ k34, myselx (D24, D08, s800) ^ k35, &D35, &D60, &D42, &D50); - s7( D23 ^ k36, D24 ^ k37, D25 ^ k38, D26 ^ k39, D27 ^ k40, D28 ^ k41, &D63, &D43, &D53, &D38); - s8( D27 ^ k42, D28 ^ k43, D29 ^ k44, D30 ^ k45, D31 ^ k46, D00 ^ k47, &D36, &D58, &D46, &D52); - - if (i) KEYSET14 else KEYSET04 - - s1(myselx (D63, D47, s001) ^ k00, myselx (D32, D48, s002) ^ k01, myselx (D33, D49, s004) ^ k02, myselx (D34, D50, s008) ^ k03, myselx (D35, D51, s010) ^ k04, myselx (D36, D52, s020) ^ k05, &D08, &D16, &D22, &D30); - s2(myselx (D35, D51, s040) ^ k06, myselx (D36, D52, s080) ^ k07, myselx (D37, D53, s100) ^ k08, myselx (D38, D54, s200) ^ k09, myselx (D39, D55, s400) ^ k10, myselx (D40, D56, s800) ^ k11, &D12, &D27, &D01, &D17); - s3( D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05); - s4( D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00); - s5(myselx (D47, D63, s001) ^ k24, myselx (D48, D32, s002) ^ k25, myselx (D49, D33, s004) ^ k26, myselx (D50, D34, s008) ^ k27, myselx (D51, D35, s010) ^ k28, myselx (D52, D36, s020) ^ k29, &D07, &D13, &D24, &D02); - s6(myselx (D51, D35, s040) ^ k30, myselx (D52, D36, s080) ^ k31, myselx (D53, D37, s100) ^ k32, myselx (D54, D38, s200) ^ k33, myselx (D55, D39, s400) ^ k34, myselx (D56, D40, s800) ^ k35, &D03, &D28, &D10, &D18); - s7( D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06); - s8( D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20); - - if (i) KEYSET15 else KEYSET05 - - s1(myselx (D31, D15, s001) ^ k00, myselx (D00, D16, s002) ^ k01, myselx (D01, D17, s004) ^ k02, myselx (D02, D18, s008) ^ k03, myselx (D03, D19, s010) ^ k04, myselx (D04, D20, s020) ^ k05, &D40, &D48, &D54, &D62); - s2(myselx (D03, D19, s040) ^ k06, myselx (D04, D20, s080) ^ k07, myselx (D05, D21, s100) ^ k08, myselx (D06, D22, s200) ^ k09, myselx (D07, D23, s400) ^ k10, myselx (D08, D24, s800) ^ k11, &D44, &D59, &D33, &D49); - s3( D07 ^ k12, D08 ^ k13, D09 ^ k14, D10 ^ k15, D11 ^ k16, D12 ^ k17, &D55, &D47, &D61, &D37); - s4( D11 ^ k18, D12 ^ k19, D13 ^ k20, D14 ^ k21, D15 ^ k22, D16 ^ k23, &D57, &D51, &D41, &D32); - s5(myselx (D15, D31, s001) ^ k24, myselx (D16, D00, s002) ^ k25, myselx (D17, D01, s004) ^ k26, myselx (D18, D02, s008) ^ k27, myselx (D19, D03, s010) ^ k28, myselx (D20, D04, s020) ^ k29, &D39, &D45, &D56, &D34); - s6(myselx (D19, D03, s040) ^ k30, myselx (D20, D04, s080) ^ k31, myselx (D21, D05, s100) ^ k32, myselx (D22, D06, s200) ^ k33, myselx (D23, D07, s400) ^ k34, myselx (D24, D08, s800) ^ k35, &D35, &D60, &D42, &D50); - s7( D23 ^ k36, D24 ^ k37, D25 ^ k38, D26 ^ k39, D27 ^ k40, D28 ^ k41, &D63, &D43, &D53, &D38); - s8( D27 ^ k42, D28 ^ k43, D29 ^ k44, D30 ^ k45, D31 ^ k46, D00 ^ k47, &D36, &D58, &D46, &D52); - - if (i) KEYSET16 else KEYSET06 - - s1(myselx (D63, D47, s001) ^ k00, myselx (D32, D48, s002) ^ k01, myselx (D33, D49, s004) ^ k02, myselx (D34, D50, s008) ^ k03, myselx (D35, D51, s010) ^ k04, myselx (D36, D52, s020) ^ k05, &D08, &D16, &D22, &D30); - s2(myselx (D35, D51, s040) ^ k06, myselx (D36, D52, s080) ^ k07, myselx (D37, D53, s100) ^ k08, myselx (D38, D54, s200) ^ k09, myselx (D39, D55, s400) ^ k10, myselx (D40, D56, s800) ^ k11, &D12, &D27, &D01, &D17); - s3( D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05); - s4( D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00); - s5(myselx (D47, D63, s001) ^ k24, myselx (D48, D32, s002) ^ k25, myselx (D49, D33, s004) ^ k26, myselx (D50, D34, s008) ^ k27, myselx (D51, D35, s010) ^ k28, myselx (D52, D36, s020) ^ k29, &D07, &D13, &D24, &D02); - s6(myselx (D51, D35, s040) ^ k30, myselx (D52, D36, s080) ^ k31, myselx (D53, D37, s100) ^ k32, myselx (D54, D38, s200) ^ k33, myselx (D55, D39, s400) ^ k34, myselx (D56, D40, s800) ^ k35, &D03, &D28, &D10, &D18); - s7( D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06); - s8( D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20); - - if (i) KEYSET17 else KEYSET07 - - s1(myselx (D31, D15, s001) ^ k00, myselx (D00, D16, s002) ^ k01, myselx (D01, D17, s004) ^ k02, myselx (D02, D18, s008) ^ k03, myselx (D03, D19, s010) ^ k04, myselx (D04, D20, s020) ^ k05, &D40, &D48, &D54, &D62); - s2(myselx (D03, D19, s040) ^ k06, myselx (D04, D20, s080) ^ k07, myselx (D05, D21, s100) ^ k08, myselx (D06, D22, s200) ^ k09, myselx (D07, D23, s400) ^ k10, myselx (D08, D24, s800) ^ k11, &D44, &D59, &D33, &D49); - s3( D07 ^ k12, D08 ^ k13, D09 ^ k14, D10 ^ k15, D11 ^ k16, D12 ^ k17, &D55, &D47, &D61, &D37); - s4( D11 ^ k18, D12 ^ k19, D13 ^ k20, D14 ^ k21, D15 ^ k22, D16 ^ k23, &D57, &D51, &D41, &D32); - s5(myselx (D15, D31, s001) ^ k24, myselx (D16, D00, s002) ^ k25, myselx (D17, D01, s004) ^ k26, myselx (D18, D02, s008) ^ k27, myselx (D19, D03, s010) ^ k28, myselx (D20, D04, s020) ^ k29, &D39, &D45, &D56, &D34); - s6(myselx (D19, D03, s040) ^ k30, myselx (D20, D04, s080) ^ k31, myselx (D21, D05, s100) ^ k32, myselx (D22, D06, s200) ^ k33, myselx (D23, D07, s400) ^ k34, myselx (D24, D08, s800) ^ k35, &D35, &D60, &D42, &D50); - s7( D23 ^ k36, D24 ^ k37, D25 ^ k38, D26 ^ k39, D27 ^ k40, D28 ^ k41, &D63, &D43, &D53, &D38); - s8( D27 ^ k42, D28 ^ k43, D29 ^ k44, D30 ^ k45, D31 ^ k46, D00 ^ k47, &D36, &D58, &D46, &D52); - } - - DATASWAP; - } - - DATASWAP; -} - -__device__ static void transpose32c (u32 data[32]) -{ - #define swap(x,y,j,m) \ - t = ((x) ^ ((y) >> (j))) & (m); \ - (x) = (x) ^ t; \ - (y) = (y) ^ (t << (j)); - - u32 t; - - swap (data[ 0], data[16], 16, 0x0000ffff); - swap (data[ 1], data[17], 16, 0x0000ffff); - swap (data[ 2], data[18], 16, 0x0000ffff); - swap (data[ 3], data[19], 16, 0x0000ffff); - swap (data[ 4], data[20], 16, 0x0000ffff); - swap (data[ 5], data[21], 16, 0x0000ffff); - swap (data[ 6], data[22], 16, 0x0000ffff); - swap (data[ 7], data[23], 16, 0x0000ffff); - swap (data[ 8], data[24], 16, 0x0000ffff); - swap (data[ 9], data[25], 16, 0x0000ffff); - swap (data[10], data[26], 16, 0x0000ffff); - swap (data[11], data[27], 16, 0x0000ffff); - swap (data[12], data[28], 16, 0x0000ffff); - swap (data[13], data[29], 16, 0x0000ffff); - swap (data[14], data[30], 16, 0x0000ffff); - swap (data[15], data[31], 16, 0x0000ffff); - swap (data[ 0], data[ 8], 8, 0x00ff00ff); - swap (data[ 1], data[ 9], 8, 0x00ff00ff); - swap (data[ 2], data[10], 8, 0x00ff00ff); - swap (data[ 3], data[11], 8, 0x00ff00ff); - swap (data[ 4], data[12], 8, 0x00ff00ff); - swap (data[ 5], data[13], 8, 0x00ff00ff); - swap (data[ 6], data[14], 8, 0x00ff00ff); - swap (data[ 7], data[15], 8, 0x00ff00ff); - swap (data[ 0], data[ 4], 4, 0x0f0f0f0f); - swap (data[ 1], data[ 5], 4, 0x0f0f0f0f); - swap (data[ 2], data[ 6], 4, 0x0f0f0f0f); - swap (data[ 3], data[ 7], 4, 0x0f0f0f0f); - swap (data[ 0], data[ 2], 2, 0x33333333); - swap (data[ 1], data[ 3], 2, 0x33333333); - swap (data[ 0], data[ 1], 1, 0x55555555); - swap (data[ 2], data[ 3], 1, 0x55555555); - swap (data[ 4], data[ 6], 2, 0x33333333); - swap (data[ 5], data[ 7], 2, 0x33333333); - swap (data[ 4], data[ 5], 1, 0x55555555); - swap (data[ 6], data[ 7], 1, 0x55555555); - swap (data[ 8], data[12], 4, 0x0f0f0f0f); - swap (data[ 9], data[13], 4, 0x0f0f0f0f); - swap (data[10], data[14], 4, 0x0f0f0f0f); - swap (data[11], data[15], 4, 0x0f0f0f0f); - swap (data[ 8], data[10], 2, 0x33333333); - swap (data[ 9], data[11], 2, 0x33333333); - swap (data[ 8], data[ 9], 1, 0x55555555); - swap (data[10], data[11], 1, 0x55555555); - swap (data[12], data[14], 2, 0x33333333); - swap (data[13], data[15], 2, 0x33333333); - swap (data[12], data[13], 1, 0x55555555); - swap (data[14], data[15], 1, 0x55555555); - swap (data[16], data[24], 8, 0x00ff00ff); - swap (data[17], data[25], 8, 0x00ff00ff); - swap (data[18], data[26], 8, 0x00ff00ff); - swap (data[19], data[27], 8, 0x00ff00ff); - swap (data[20], data[28], 8, 0x00ff00ff); - swap (data[21], data[29], 8, 0x00ff00ff); - swap (data[22], data[30], 8, 0x00ff00ff); - swap (data[23], data[31], 8, 0x00ff00ff); - swap (data[16], data[20], 4, 0x0f0f0f0f); - swap (data[17], data[21], 4, 0x0f0f0f0f); - swap (data[18], data[22], 4, 0x0f0f0f0f); - swap (data[19], data[23], 4, 0x0f0f0f0f); - swap (data[16], data[18], 2, 0x33333333); - swap (data[17], data[19], 2, 0x33333333); - swap (data[16], data[17], 1, 0x55555555); - swap (data[18], data[19], 1, 0x55555555); - swap (data[20], data[22], 2, 0x33333333); - swap (data[21], data[23], 2, 0x33333333); - swap (data[20], data[21], 1, 0x55555555); - swap (data[22], data[23], 1, 0x55555555); - swap (data[24], data[28], 4, 0x0f0f0f0f); - swap (data[25], data[29], 4, 0x0f0f0f0f); - swap (data[26], data[30], 4, 0x0f0f0f0f); - swap (data[27], data[31], 4, 0x0f0f0f0f); - swap (data[24], data[26], 2, 0x33333333); - swap (data[25], data[27], 2, 0x33333333); - swap (data[24], data[25], 1, 0x55555555); - swap (data[26], data[27], 1, 0x55555555); - swap (data[28], data[30], 2, 0x33333333); - swap (data[29], data[31], 2, 0x33333333); - swap (data[28], data[29], 1, 0x55555555); - swap (data[30], data[31], 1, 0x55555555); -} - -__device__ static void m01500m (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - #ifdef DESCRYPT_SALT - const u32 salt = DESCRYPT_SALT; - #else - const u32 salt = salt_bufs[salt_pos].salt_buf[0]; - #endif - - /** - * keys - */ - - const u32 w0s = (pws[gid].i[0] << 1) & 0xfefefefe; - const u32 w1s = (pws[gid].i[1] << 1) & 0xfefefefe; - - const u32 K00 = -((w0s >> ( 0 + 7)) & 1); - const u32 K01 = -((w0s >> ( 0 + 6)) & 1); - const u32 K02 = -((w0s >> ( 0 + 5)) & 1); - const u32 K03 = -((w0s >> ( 0 + 4)) & 1); - const u32 K04 = -((w0s >> ( 0 + 3)) & 1); - const u32 K05 = -((w0s >> ( 0 + 2)) & 1); - const u32 K06 = -((w0s >> ( 0 + 1)) & 1); - const u32 K07 = -((w0s >> ( 8 + 7)) & 1); - const u32 K08 = -((w0s >> ( 8 + 6)) & 1); - const u32 K09 = -((w0s >> ( 8 + 5)) & 1); - const u32 K10 = -((w0s >> ( 8 + 4)) & 1); - const u32 K11 = -((w0s >> ( 8 + 3)) & 1); - const u32 K12 = -((w0s >> ( 8 + 2)) & 1); - const u32 K13 = -((w0s >> ( 8 + 1)) & 1); - const u32 K14 = -((w0s >> (16 + 7)) & 1); - const u32 K15 = -((w0s >> (16 + 6)) & 1); - const u32 K16 = -((w0s >> (16 + 5)) & 1); - const u32 K17 = -((w0s >> (16 + 4)) & 1); - const u32 K18 = -((w0s >> (16 + 3)) & 1); - const u32 K19 = -((w0s >> (16 + 2)) & 1); - const u32 K20 = -((w0s >> (16 + 1)) & 1); - const u32 K21 = -((w0s >> (24 + 7)) & 1); - const u32 K22 = -((w0s >> (24 + 6)) & 1); - const u32 K23 = -((w0s >> (24 + 5)) & 1); - const u32 K24 = -((w0s >> (24 + 4)) & 1); - const u32 K25 = -((w0s >> (24 + 3)) & 1); - const u32 K26 = -((w0s >> (24 + 2)) & 1); - const u32 K27 = -((w0s >> (24 + 1)) & 1); - const u32 K28 = -((w1s >> ( 0 + 7)) & 1); - const u32 K29 = -((w1s >> ( 0 + 6)) & 1); - const u32 K30 = -((w1s >> ( 0 + 5)) & 1); - const u32 K31 = -((w1s >> ( 0 + 4)) & 1); - const u32 K32 = -((w1s >> ( 0 + 3)) & 1); - const u32 K33 = -((w1s >> ( 0 + 2)) & 1); - const u32 K34 = -((w1s >> ( 0 + 1)) & 1); - const u32 K35 = -((w1s >> ( 8 + 7)) & 1); - const u32 K36 = -((w1s >> ( 8 + 6)) & 1); - const u32 K37 = -((w1s >> ( 8 + 5)) & 1); - const u32 K38 = -((w1s >> ( 8 + 4)) & 1); - const u32 K39 = -((w1s >> ( 8 + 3)) & 1); - const u32 K40 = -((w1s >> ( 8 + 2)) & 1); - const u32 K41 = -((w1s >> ( 8 + 1)) & 1); - const u32 K42 = -((w1s >> (16 + 7)) & 1); - const u32 K43 = -((w1s >> (16 + 6)) & 1); - const u32 K44 = -((w1s >> (16 + 5)) & 1); - const u32 K45 = -((w1s >> (16 + 4)) & 1); - const u32 K46 = -((w1s >> (16 + 3)) & 1); - const u32 K47 = -((w1s >> (16 + 2)) & 1); - const u32 K48 = -((w1s >> (16 + 1)) & 1); - const u32 K49 = -((w1s >> (24 + 7)) & 1); - const u32 K50 = -((w1s >> (24 + 6)) & 1); - const u32 K51 = -((w1s >> (24 + 5)) & 1); - const u32 K52 = -((w1s >> (24 + 4)) & 1); - const u32 K53 = -((w1s >> (24 + 3)) & 1); - const u32 K54 = -((w1s >> (24 + 2)) & 1); - const u32 K55 = -((w1s >> (24 + 1)) & 1); - - /** - * loop - */ - - const u32 bf_loops = bfs_cnt; - - for (u32 il_pos = 0, pc_pos = 0; il_pos < bf_loops; il_pos += 32, pc_pos++) - { - u32 k00 = K00; - u32 k01 = K01; - u32 k02 = K02; - u32 k03 = K03; - u32 k04 = K04; - u32 k05 = K05; - u32 k06 = K06; - u32 k07 = K07; - u32 k08 = K08; - u32 k09 = K09; - u32 k10 = K10; - u32 k11 = K11; - u32 k12 = K12; - u32 k13 = K13; - u32 k14 = K14; - u32 k15 = K15; - u32 k16 = K16; - u32 k17 = K17; - u32 k18 = K18; - u32 k19 = K19; - u32 k20 = K20; - u32 k21 = K21; - u32 k22 = K22; - u32 k23 = K23; - u32 k24 = K24; - u32 k25 = K25; - u32 k26 = K26; - u32 k27 = K27; - - k00 |= c_tm[pc_pos].b[ 0]; - k01 |= c_tm[pc_pos].b[ 1]; - k02 |= c_tm[pc_pos].b[ 2]; - k03 |= c_tm[pc_pos].b[ 3]; - k04 |= c_tm[pc_pos].b[ 4]; - k05 |= c_tm[pc_pos].b[ 5]; - k06 |= c_tm[pc_pos].b[ 6]; - k07 |= c_tm[pc_pos].b[ 7]; - k08 |= c_tm[pc_pos].b[ 8]; - k09 |= c_tm[pc_pos].b[ 9]; - k10 |= c_tm[pc_pos].b[10]; - k11 |= c_tm[pc_pos].b[11]; - k12 |= c_tm[pc_pos].b[12]; - k13 |= c_tm[pc_pos].b[13]; - k14 |= c_tm[pc_pos].b[14]; - k15 |= c_tm[pc_pos].b[15]; - k16 |= c_tm[pc_pos].b[16]; - k17 |= c_tm[pc_pos].b[17]; - k18 |= c_tm[pc_pos].b[18]; - k19 |= c_tm[pc_pos].b[19]; - k20 |= c_tm[pc_pos].b[20]; - k21 |= c_tm[pc_pos].b[21]; - k22 |= c_tm[pc_pos].b[22]; - k23 |= c_tm[pc_pos].b[23]; - k24 |= c_tm[pc_pos].b[24]; - k25 |= c_tm[pc_pos].b[25]; - k26 |= c_tm[pc_pos].b[26]; - k27 |= c_tm[pc_pos].b[27]; - - u32 D00 = 0; - u32 D01 = 0; - u32 D02 = 0; - u32 D03 = 0; - u32 D04 = 0; - u32 D05 = 0; - u32 D06 = 0; - u32 D07 = 0; - u32 D08 = 0; - u32 D09 = 0; - u32 D10 = 0; - u32 D11 = 0; - u32 D12 = 0; - u32 D13 = 0; - u32 D14 = 0; - u32 D15 = 0; - u32 D16 = 0; - u32 D17 = 0; - u32 D18 = 0; - u32 D19 = 0; - u32 D20 = 0; - u32 D21 = 0; - u32 D22 = 0; - u32 D23 = 0; - u32 D24 = 0; - u32 D25 = 0; - u32 D26 = 0; - u32 D27 = 0; - u32 D28 = 0; - u32 D29 = 0; - u32 D30 = 0; - u32 D31 = 0; - u32 D32 = 0; - u32 D33 = 0; - u32 D34 = 0; - u32 D35 = 0; - u32 D36 = 0; - u32 D37 = 0; - u32 D38 = 0; - u32 D39 = 0; - u32 D40 = 0; - u32 D41 = 0; - u32 D42 = 0; - u32 D43 = 0; - u32 D44 = 0; - u32 D45 = 0; - u32 D46 = 0; - u32 D47 = 0; - u32 D48 = 0; - u32 D49 = 0; - u32 D50 = 0; - u32 D51 = 0; - u32 D52 = 0; - u32 D53 = 0; - u32 D54 = 0; - u32 D55 = 0; - u32 D56 = 0; - u32 D57 = 0; - u32 D58 = 0; - u32 D59 = 0; - u32 D60 = 0; - u32 D61 = 0; - u32 D62 = 0; - u32 D63 = 0; - - DESCrypt - ( - salt, - k00, k01, k02, k03, k04, k05, k06, - k07, k08, k09, k10, k11, k12, k13, - k14, k15, k16, k17, k18, k19, k20, - k21, k22, k23, k24, k25, k26, k27, - K28, K29, K30, K31, K32, K33, K34, - K35, K36, K37, K38, K39, K40, K41, - K42, K43, K44, K45, K46, K47, K48, - K49, K50, K51, K52, K53, K54, K55, - D00, D01, D02, D03, D04, D05, D06, D07, - D08, D09, D10, D11, D12, D13, D14, D15, - D16, D17, D18, D19, D20, D21, D22, D23, - D24, D25, D26, D27, D28, D29, D30, D31, - D32, D33, D34, D35, D36, D37, D38, D39, - D40, D41, D42, D43, D44, D45, D46, D47, - D48, D49, D50, D51, D52, D53, D54, D55, - D56, D57, D58, D59, D60, D61, D62, D63 - ); - - u32 out[64]; - - out[ 0] = D00; - out[ 1] = D01; - out[ 2] = D02; - out[ 3] = D03; - out[ 4] = D04; - out[ 5] = D05; - out[ 6] = D06; - out[ 7] = D07; - out[ 8] = D08; - out[ 9] = D09; - out[10] = D10; - out[11] = D11; - out[12] = D12; - out[13] = D13; - out[14] = D14; - out[15] = D15; - out[16] = D16; - out[17] = D17; - out[18] = D18; - out[19] = D19; - out[20] = D20; - out[21] = D21; - out[22] = D22; - out[23] = D23; - out[24] = D24; - out[25] = D25; - out[26] = D26; - out[27] = D27; - out[28] = D28; - out[29] = D29; - out[30] = D30; - out[31] = D31; - out[32] = D32; - out[33] = D33; - out[34] = D34; - out[35] = D35; - out[36] = D36; - out[37] = D37; - out[38] = D38; - out[39] = D39; - out[40] = D40; - out[41] = D41; - out[42] = D42; - out[43] = D43; - out[44] = D44; - out[45] = D45; - out[46] = D46; - out[47] = D47; - out[48] = D48; - out[49] = D49; - out[50] = D50; - out[51] = D51; - out[52] = D52; - out[53] = D53; - out[54] = D54; - out[55] = D55; - out[56] = D56; - out[57] = D57; - out[58] = D58; - out[59] = D59; - out[60] = D60; - out[61] = D61; - out[62] = D62; - out[63] = D63; - - if (digests_cnt < 16) - { - for (u32 d = 0; d < digests_cnt; d++) - { - const u32 final_hash_pos = digests_offset + d; - - if (hashes_shown[final_hash_pos]) continue; - - u32 search[2]; - - search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0]; - search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1]; - - u32 tmpResult = 0; - - #pragma unroll - for (int i = 0; i < 32; i++) - { - const u32 b0 = -((search[0] >> i) & 1); - const u32 b1 = -((search[1] >> i) & 1); - - tmpResult |= out[ 0 + i] ^ b0; - tmpResult |= out[32 + i] ^ b1; - } - - if (tmpResult == 0xffffffff) continue; - - const u32 slice = 31 - __clz (~tmpResult); - - const u32x r0 = search[0]; - const u32x r1 = search[1]; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } - } - else - { - u32 out0[32]; - u32 out1[32]; - - #pragma unroll - for (int i = 0; i < 32; i++) - { - out0[i] = out[ 0 + 31 - i]; - out1[i] = out[32 + 31 - i]; - } - - transpose32c (out0); - transpose32c (out1); - - #pragma unroll - for (int slice = 0; slice < 32; slice++) - { - const u32x r0 = out0[31 - slice]; - const u32x r1 = out1[31 - slice]; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } - } - } -} - -__device__ static void m01500s (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - #ifdef DESCRYPT_SALT - const u32 salt = DESCRYPT_SALT; - #else - const u32 salt = salt_bufs[salt_pos].salt_buf[0]; - #endif - - /** - * digest - */ - - #define S00 s_S[ 0] - #define S01 s_S[ 1] - #define S02 s_S[ 2] - #define S03 s_S[ 3] - #define S04 s_S[ 4] - #define S05 s_S[ 5] - #define S06 s_S[ 6] - #define S07 s_S[ 7] - #define S08 s_S[ 8] - #define S09 s_S[ 9] - #define S10 s_S[10] - #define S11 s_S[11] - #define S12 s_S[12] - #define S13 s_S[13] - #define S14 s_S[14] - #define S15 s_S[15] - #define S16 s_S[16] - #define S17 s_S[17] - #define S18 s_S[18] - #define S19 s_S[19] - #define S20 s_S[20] - #define S21 s_S[21] - #define S22 s_S[22] - #define S23 s_S[23] - #define S24 s_S[24] - #define S25 s_S[25] - #define S26 s_S[26] - #define S27 s_S[27] - #define S28 s_S[28] - #define S29 s_S[29] - #define S30 s_S[30] - #define S31 s_S[31] - #define S32 s_S[32] - #define S33 s_S[33] - #define S34 s_S[34] - #define S35 s_S[35] - #define S36 s_S[36] - #define S37 s_S[37] - #define S38 s_S[38] - #define S39 s_S[39] - #define S40 s_S[40] - #define S41 s_S[41] - #define S42 s_S[42] - #define S43 s_S[43] - #define S44 s_S[44] - #define S45 s_S[45] - #define S46 s_S[46] - #define S47 s_S[47] - #define S48 s_S[48] - #define S49 s_S[49] - #define S50 s_S[50] - #define S51 s_S[51] - #define S52 s_S[52] - #define S53 s_S[53] - #define S54 s_S[54] - #define S55 s_S[55] - #define S56 s_S[56] - #define S57 s_S[57] - #define S58 s_S[58] - #define S59 s_S[59] - #define S60 s_S[60] - #define S61 s_S[61] - #define S62 s_S[62] - #define S63 s_S[63] - - /** - * keys - */ - - const u32 w0s = (pws[gid].i[0] << 1) & 0xfefefefe; - const u32 w1s = (pws[gid].i[1] << 1) & 0xfefefefe; - - const u32 K00 = -((w0s >> ( 0 + 7)) & 1); - const u32 K01 = -((w0s >> ( 0 + 6)) & 1); - const u32 K02 = -((w0s >> ( 0 + 5)) & 1); - const u32 K03 = -((w0s >> ( 0 + 4)) & 1); - const u32 K04 = -((w0s >> ( 0 + 3)) & 1); - const u32 K05 = -((w0s >> ( 0 + 2)) & 1); - const u32 K06 = -((w0s >> ( 0 + 1)) & 1); - const u32 K07 = -((w0s >> ( 8 + 7)) & 1); - const u32 K08 = -((w0s >> ( 8 + 6)) & 1); - const u32 K09 = -((w0s >> ( 8 + 5)) & 1); - const u32 K10 = -((w0s >> ( 8 + 4)) & 1); - const u32 K11 = -((w0s >> ( 8 + 3)) & 1); - const u32 K12 = -((w0s >> ( 8 + 2)) & 1); - const u32 K13 = -((w0s >> ( 8 + 1)) & 1); - const u32 K14 = -((w0s >> (16 + 7)) & 1); - const u32 K15 = -((w0s >> (16 + 6)) & 1); - const u32 K16 = -((w0s >> (16 + 5)) & 1); - const u32 K17 = -((w0s >> (16 + 4)) & 1); - const u32 K18 = -((w0s >> (16 + 3)) & 1); - const u32 K19 = -((w0s >> (16 + 2)) & 1); - const u32 K20 = -((w0s >> (16 + 1)) & 1); - const u32 K21 = -((w0s >> (24 + 7)) & 1); - const u32 K22 = -((w0s >> (24 + 6)) & 1); - const u32 K23 = -((w0s >> (24 + 5)) & 1); - const u32 K24 = -((w0s >> (24 + 4)) & 1); - const u32 K25 = -((w0s >> (24 + 3)) & 1); - const u32 K26 = -((w0s >> (24 + 2)) & 1); - const u32 K27 = -((w0s >> (24 + 1)) & 1); - const u32 K28 = -((w1s >> ( 0 + 7)) & 1); - const u32 K29 = -((w1s >> ( 0 + 6)) & 1); - const u32 K30 = -((w1s >> ( 0 + 5)) & 1); - const u32 K31 = -((w1s >> ( 0 + 4)) & 1); - const u32 K32 = -((w1s >> ( 0 + 3)) & 1); - const u32 K33 = -((w1s >> ( 0 + 2)) & 1); - const u32 K34 = -((w1s >> ( 0 + 1)) & 1); - const u32 K35 = -((w1s >> ( 8 + 7)) & 1); - const u32 K36 = -((w1s >> ( 8 + 6)) & 1); - const u32 K37 = -((w1s >> ( 8 + 5)) & 1); - const u32 K38 = -((w1s >> ( 8 + 4)) & 1); - const u32 K39 = -((w1s >> ( 8 + 3)) & 1); - const u32 K40 = -((w1s >> ( 8 + 2)) & 1); - const u32 K41 = -((w1s >> ( 8 + 1)) & 1); - const u32 K42 = -((w1s >> (16 + 7)) & 1); - const u32 K43 = -((w1s >> (16 + 6)) & 1); - const u32 K44 = -((w1s >> (16 + 5)) & 1); - const u32 K45 = -((w1s >> (16 + 4)) & 1); - const u32 K46 = -((w1s >> (16 + 3)) & 1); - const u32 K47 = -((w1s >> (16 + 2)) & 1); - const u32 K48 = -((w1s >> (16 + 1)) & 1); - const u32 K49 = -((w1s >> (24 + 7)) & 1); - const u32 K50 = -((w1s >> (24 + 6)) & 1); - const u32 K51 = -((w1s >> (24 + 5)) & 1); - const u32 K52 = -((w1s >> (24 + 4)) & 1); - const u32 K53 = -((w1s >> (24 + 3)) & 1); - const u32 K54 = -((w1s >> (24 + 2)) & 1); - const u32 K55 = -((w1s >> (24 + 1)) & 1); - - /** - * loop - */ - - const u32 bf_loops = bfs_cnt; - - for (u32 il_pos = 0, pc_pos = 0; il_pos < bf_loops; il_pos += 32, pc_pos++) - { - u32 k00 = K00; - u32 k01 = K01; - u32 k02 = K02; - u32 k03 = K03; - u32 k04 = K04; - u32 k05 = K05; - u32 k06 = K06; - u32 k07 = K07; - u32 k08 = K08; - u32 k09 = K09; - u32 k10 = K10; - u32 k11 = K11; - u32 k12 = K12; - u32 k13 = K13; - u32 k14 = K14; - u32 k15 = K15; - u32 k16 = K16; - u32 k17 = K17; - u32 k18 = K18; - u32 k19 = K19; - u32 k20 = K20; - u32 k21 = K21; - u32 k22 = K22; - u32 k23 = K23; - u32 k24 = K24; - u32 k25 = K25; - u32 k26 = K26; - u32 k27 = K27; - - k00 |= c_tm[pc_pos].b[ 0]; - k01 |= c_tm[pc_pos].b[ 1]; - k02 |= c_tm[pc_pos].b[ 2]; - k03 |= c_tm[pc_pos].b[ 3]; - k04 |= c_tm[pc_pos].b[ 4]; - k05 |= c_tm[pc_pos].b[ 5]; - k06 |= c_tm[pc_pos].b[ 6]; - k07 |= c_tm[pc_pos].b[ 7]; - k08 |= c_tm[pc_pos].b[ 8]; - k09 |= c_tm[pc_pos].b[ 9]; - k10 |= c_tm[pc_pos].b[10]; - k11 |= c_tm[pc_pos].b[11]; - k12 |= c_tm[pc_pos].b[12]; - k13 |= c_tm[pc_pos].b[13]; - k14 |= c_tm[pc_pos].b[14]; - k15 |= c_tm[pc_pos].b[15]; - k16 |= c_tm[pc_pos].b[16]; - k17 |= c_tm[pc_pos].b[17]; - k18 |= c_tm[pc_pos].b[18]; - k19 |= c_tm[pc_pos].b[19]; - k20 |= c_tm[pc_pos].b[20]; - k21 |= c_tm[pc_pos].b[21]; - k22 |= c_tm[pc_pos].b[22]; - k23 |= c_tm[pc_pos].b[23]; - k24 |= c_tm[pc_pos].b[24]; - k25 |= c_tm[pc_pos].b[25]; - k26 |= c_tm[pc_pos].b[26]; - k27 |= c_tm[pc_pos].b[27]; - - u32 D00 = 0; - u32 D01 = 0; - u32 D02 = 0; - u32 D03 = 0; - u32 D04 = 0; - u32 D05 = 0; - u32 D06 = 0; - u32 D07 = 0; - u32 D08 = 0; - u32 D09 = 0; - u32 D10 = 0; - u32 D11 = 0; - u32 D12 = 0; - u32 D13 = 0; - u32 D14 = 0; - u32 D15 = 0; - u32 D16 = 0; - u32 D17 = 0; - u32 D18 = 0; - u32 D19 = 0; - u32 D20 = 0; - u32 D21 = 0; - u32 D22 = 0; - u32 D23 = 0; - u32 D24 = 0; - u32 D25 = 0; - u32 D26 = 0; - u32 D27 = 0; - u32 D28 = 0; - u32 D29 = 0; - u32 D30 = 0; - u32 D31 = 0; - u32 D32 = 0; - u32 D33 = 0; - u32 D34 = 0; - u32 D35 = 0; - u32 D36 = 0; - u32 D37 = 0; - u32 D38 = 0; - u32 D39 = 0; - u32 D40 = 0; - u32 D41 = 0; - u32 D42 = 0; - u32 D43 = 0; - u32 D44 = 0; - u32 D45 = 0; - u32 D46 = 0; - u32 D47 = 0; - u32 D48 = 0; - u32 D49 = 0; - u32 D50 = 0; - u32 D51 = 0; - u32 D52 = 0; - u32 D53 = 0; - u32 D54 = 0; - u32 D55 = 0; - u32 D56 = 0; - u32 D57 = 0; - u32 D58 = 0; - u32 D59 = 0; - u32 D60 = 0; - u32 D61 = 0; - u32 D62 = 0; - u32 D63 = 0; - - DESCrypt - ( - salt, - k00, k01, k02, k03, k04, k05, k06, - k07, k08, k09, k10, k11, k12, k13, - k14, k15, k16, k17, k18, k19, k20, - k21, k22, k23, k24, k25, k26, k27, - K28, K29, K30, K31, K32, K33, K34, - K35, K36, K37, K38, K39, K40, K41, - K42, K43, K44, K45, K46, K47, K48, - K49, K50, K51, K52, K53, K54, K55, - D00, D01, D02, D03, D04, D05, D06, D07, - D08, D09, D10, D11, D12, D13, D14, D15, - D16, D17, D18, D19, D20, D21, D22, D23, - D24, D25, D26, D27, D28, D29, D30, D31, - D32, D33, D34, D35, D36, D37, D38, D39, - D40, D41, D42, D43, D44, D45, D46, D47, - D48, D49, D50, D51, D52, D53, D54, D55, - D56, D57, D58, D59, D60, D61, D62, D63 - ); - - u32 tmpResult = 0; - - tmpResult |= D00 ^ S00; - tmpResult |= D01 ^ S01; - tmpResult |= D02 ^ S02; - tmpResult |= D03 ^ S03; - tmpResult |= D04 ^ S04; - tmpResult |= D05 ^ S05; - tmpResult |= D06 ^ S06; - tmpResult |= D07 ^ S07; - tmpResult |= D08 ^ S08; - tmpResult |= D09 ^ S09; - tmpResult |= D10 ^ S10; - tmpResult |= D11 ^ S11; - tmpResult |= D12 ^ S12; - tmpResult |= D13 ^ S13; - tmpResult |= D14 ^ S14; - tmpResult |= D15 ^ S15; - tmpResult |= D16 ^ S16; - tmpResult |= D17 ^ S17; - tmpResult |= D18 ^ S18; - tmpResult |= D19 ^ S19; - tmpResult |= D20 ^ S20; - tmpResult |= D21 ^ S21; - tmpResult |= D22 ^ S22; - tmpResult |= D23 ^ S23; - tmpResult |= D24 ^ S24; - tmpResult |= D25 ^ S25; - tmpResult |= D26 ^ S26; - tmpResult |= D27 ^ S27; - tmpResult |= D28 ^ S28; - tmpResult |= D29 ^ S29; - tmpResult |= D30 ^ S30; - tmpResult |= D31 ^ S31; - tmpResult |= D32 ^ S32; - tmpResult |= D33 ^ S33; - tmpResult |= D34 ^ S34; - tmpResult |= D35 ^ S35; - tmpResult |= D36 ^ S36; - tmpResult |= D37 ^ S37; - tmpResult |= D38 ^ S38; - tmpResult |= D39 ^ S39; - tmpResult |= D40 ^ S40; - tmpResult |= D41 ^ S41; - tmpResult |= D42 ^ S42; - tmpResult |= D43 ^ S43; - tmpResult |= D44 ^ S44; - tmpResult |= D45 ^ S45; - tmpResult |= D46 ^ S46; - tmpResult |= D47 ^ S47; - - if (tmpResult == 0xffffffff) continue; - - tmpResult |= D48 ^ S48; - tmpResult |= D49 ^ S49; - tmpResult |= D50 ^ S50; - tmpResult |= D51 ^ S51; - tmpResult |= D52 ^ S52; - tmpResult |= D53 ^ S53; - tmpResult |= D54 ^ S54; - tmpResult |= D55 ^ S55; - tmpResult |= D56 ^ S56; - tmpResult |= D57 ^ S57; - tmpResult |= D58 ^ S58; - tmpResult |= D59 ^ S59; - tmpResult |= D60 ^ S60; - tmpResult |= D61 ^ S61; - tmpResult |= D62 ^ S62; - tmpResult |= D63 ^ S63; - - if (tmpResult == 0xffffffff) continue; - - const u32 slice = 31 - __clz (~tmpResult); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m01500_tb (pw_t *pws) -{ - // not used here, inlined code -} - -extern "C" __global__ void __launch_bounds__ (32, 1) m01500_tm (const u32 *d_bfs, bs_word_t *d_tm) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - const u32 block = gid / 32; - const u32 slice = gid % 32; - - const u32 w0 = c_bfs[gid]; - - const u32 w0s = (w0 << 1) & 0xfefefefe; - - #pragma unroll - for (int i = 0, j = 0; i < 32; i += 8, j += 7) - { - atomicOr (&d_tm[block].b[j + 0], (((w0s >> (i + 7)) & 1) << slice)); - atomicOr (&d_tm[block].b[j + 1], (((w0s >> (i + 6)) & 1) << slice)); - atomicOr (&d_tm[block].b[j + 2], (((w0s >> (i + 5)) & 1) << slice)); - atomicOr (&d_tm[block].b[j + 3], (((w0s >> (i + 4)) & 1) << slice)); - atomicOr (&d_tm[block].b[j + 4], (((w0s >> (i + 3)) & 1) << slice)); - atomicOr (&d_tm[block].b[j + 5], (((w0s >> (i + 2)) & 1) << slice)); - atomicOr (&d_tm[block].b[j + 6], (((w0s >> (i + 1)) & 1) << slice)); - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m01500_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - const u32 s0 = digests_buf[digests_offset].digest_buf[0]; - const u32 s1 = digests_buf[digests_offset].digest_buf[1]; - - if (lid < 32) - { - s_S[lid] = -((s0 >> lid - 0) & 1); - } - else if (lid < 64) - { - s_S[lid] = -((s1 >> lid - 32) & 1); - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m01500m (pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m01500_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m01500_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m01500_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - const u32 s0 = digests_buf[digests_offset].digest_buf[0]; - const u32 s1 = digests_buf[digests_offset].digest_buf[1]; - - if (lid < 32) - { - s_S[lid] = -((s0 >> lid - 0) & 1); - } - else if (lid < 64) - { - s_S[lid] = -((s1 >> lid - 32) & 1); - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m01500s (pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m01500_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m01500_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01600.cu b/nv/m01600.cu deleted file mode 100644 index c4ad2ef..0000000 --- a/nv/m01600.cu +++ /dev/null @@ -1,1187 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#define md5apr1_magic0 0x72706124 -#define md5apr1_magic1 0x00002431 - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = 0; - - u32x tmp2; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void memcat16 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32x append[4]) -{ - u32x tmp0; - u32x tmp1; - u32x tmp2; - u32x tmp3; - u32x tmp4; - - #if __CUDA_ARCH__ >= 200 - - const int offset_minus_4 = 4 - (block_len & 3); - - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - - tmp0 = __byte_perm ( 0, append[0], selector); - tmp1 = __byte_perm (append[0], append[1], selector); - tmp2 = __byte_perm (append[1], append[2], selector); - tmp3 = __byte_perm (append[2], append[3], selector); - tmp4 = __byte_perm (append[3], 0, selector); - - #else - - const u32 mod = block_len & 3; - - switch (mod) - { - case 0: tmp0 = append[0]; - tmp1 = append[1]; - tmp2 = append[2]; - tmp3 = append[3]; - tmp4 = 0; - break; - case 1: tmp0 = append[0] << 8; - tmp1 = append[0] >> 24 | append[1] << 8; - tmp2 = append[1] >> 24 | append[2] << 8; - tmp3 = append[2] >> 24 | append[3] << 8; - tmp4 = append[3] >> 24; - break; - case 2: tmp0 = append[0] << 16; - tmp1 = append[0] >> 16 | append[1] << 16; - tmp2 = append[1] >> 16 | append[2] << 16; - tmp3 = append[2] >> 16 | append[3] << 16; - tmp4 = append[3] >> 16; - break; - case 3: tmp0 = append[0] << 24; - tmp1 = append[0] >> 8 | append[1] << 24; - tmp2 = append[1] >> 8 | append[2] << 24; - tmp3 = append[2] >> 8 | append[3] << 24; - tmp4 = append[3] >> 8; - break; - } - - #endif - - const u32 div = block_len / 4; - - switch (div) - { - case 0: block0[0] |= tmp0; - block0[1] = tmp1; - block0[2] = tmp2; - block0[3] = tmp3; - block1[0] = tmp4; - break; - case 1: block0[1] |= tmp0; - block0[2] = tmp1; - block0[3] = tmp2; - block1[0] = tmp3; - block1[1] = tmp4; - break; - case 2: block0[2] |= tmp0; - block0[3] = tmp1; - block1[0] = tmp2; - block1[1] = tmp3; - block1[2] = tmp4; - break; - case 3: block0[3] |= tmp0; - block1[0] = tmp1; - block1[1] = tmp2; - block1[2] = tmp3; - block1[3] = tmp4; - break; - case 4: block1[0] |= tmp0; - block1[1] = tmp1; - block1[2] = tmp2; - block1[3] = tmp3; - block2[0] = tmp4; - break; - case 5: block1[1] |= tmp0; - block1[2] = tmp1; - block1[3] = tmp2; - block2[0] = tmp3; - block2[1] = tmp4; - break; - case 6: block1[2] |= tmp0; - block1[3] = tmp1; - block2[0] = tmp2; - block2[1] = tmp3; - block2[2] = tmp4; - break; - case 7: block1[3] |= tmp0; - block2[0] = tmp1; - block2[1] = tmp2; - block2[2] = tmp3; - block2[3] = tmp4; - break; - case 8: block2[0] |= tmp0; - block2[1] = tmp1; - block2[2] = tmp2; - block2[3] = tmp3; - block3[0] = tmp4; - break; - case 9: block2[1] |= tmp0; - block2[2] = tmp1; - block2[3] = tmp2; - block3[0] = tmp3; - block3[1] = tmp4; - break; - } - - return; -} - -__device__ static void memcat16_x80 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32x append[4]) -{ - u32x tmp0; - u32x tmp1; - u32x tmp2; - u32x tmp3; - u32x tmp4; - - #if __CUDA_ARCH__ >= 200 - - const int offset_minus_4 = 4 - (block_len & 3); - - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - - tmp0 = __byte_perm ( 0, append[0], selector); - tmp1 = __byte_perm (append[0], append[1], selector); - tmp2 = __byte_perm (append[1], append[2], selector); - tmp3 = __byte_perm (append[2], append[3], selector); - tmp4 = __byte_perm (append[3], 0x80, selector); - - #else - - const u32 mod = block_len & 3; - - switch (mod) - { - case 0: tmp0 = append[0]; - tmp1 = append[1]; - tmp2 = append[2]; - tmp3 = append[3]; - tmp4 = 0x80; - break; - case 1: tmp0 = append[0] << 8; - tmp1 = append[0] >> 24 | append[1] << 8; - tmp2 = append[1] >> 24 | append[2] << 8; - tmp3 = append[2] >> 24 | append[3] << 8; - tmp4 = append[3] >> 24; - break; - case 2: tmp0 = append[0] << 16; - tmp1 = append[0] >> 16 | append[1] << 16; - tmp2 = append[1] >> 16 | append[2] << 16; - tmp3 = append[2] >> 16 | append[3] << 16; - tmp4 = append[3] >> 16; - break; - case 3: tmp0 = append[0] << 24; - tmp1 = append[0] >> 8 | append[1] << 24; - tmp2 = append[1] >> 8 | append[2] << 24; - tmp3 = append[2] >> 8 | append[3] << 24; - tmp4 = append[3] >> 8; - break; - } - - #endif - - const u32 div = block_len / 4; - - switch (div) - { - case 0: block0[0] |= tmp0; - block0[1] = tmp1; - block0[2] = tmp2; - block0[3] = tmp3; - block1[0] = tmp4; - break; - case 1: block0[1] |= tmp0; - block0[2] = tmp1; - block0[3] = tmp2; - block1[0] = tmp3; - block1[1] = tmp4; - break; - case 2: block0[2] |= tmp0; - block0[3] = tmp1; - block1[0] = tmp2; - block1[1] = tmp3; - block1[2] = tmp4; - break; - case 3: block0[3] |= tmp0; - block1[0] = tmp1; - block1[1] = tmp2; - block1[2] = tmp3; - block1[3] = tmp4; - break; - case 4: block1[0] |= tmp0; - block1[1] = tmp1; - block1[2] = tmp2; - block1[3] = tmp3; - block2[0] = tmp4; - break; - case 5: block1[1] |= tmp0; - block1[2] = tmp1; - block1[3] = tmp2; - block2[0] = tmp3; - block2[1] = tmp4; - break; - case 6: block1[2] |= tmp0; - block1[3] = tmp1; - block2[0] = tmp2; - block2[1] = tmp3; - block2[2] = tmp4; - break; - case 7: block1[3] |= tmp0; - block2[0] = tmp1; - block2[1] = tmp2; - block2[2] = tmp3; - block2[3] = tmp4; - break; - case 8: block2[0] |= tmp0; - block2[1] = tmp1; - block2[2] = tmp2; - block2[3] = tmp3; - block3[0] = tmp4; - break; - case 9: block2[1] |= tmp0; - block2[2] = tmp1; - block2[3] = tmp2; - block3[0] = tmp3; - block3[1] = tmp4; - break; - } - - return; -} - -__device__ static void memcat8 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32 append[2]) -{ - u32x tmp0; - u32x tmp1; - u32x tmp2; - - #if __CUDA_ARCH__ >= 200 - - const int offset_minus_4 = 4 - (block_len & 3); - - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - - tmp0 = __byte_perm ( 0, append[0], selector); - tmp1 = __byte_perm (append[0], append[1], selector); - tmp2 = __byte_perm (append[1], 0, selector); - - #else - - const u32 mod = block_len & 3; - - switch (mod) - { - case 0: tmp0 = append[0]; - tmp1 = append[1]; - tmp2 = 0; - break; - case 1: tmp0 = append[0] << 8; - tmp1 = append[0] >> 24 | append[1] << 8; - tmp2 = append[1] >> 24; - break; - case 2: tmp0 = append[0] << 16; - tmp1 = append[0] >> 16 | append[1] << 16; - tmp2 = append[1] >> 16; - break; - case 3: tmp0 = append[0] << 24; - tmp1 = append[0] >> 8 | append[1] << 24; - tmp2 = append[1] >> 8; - break; - } - - #endif - - const u32 div = block_len / 4; - - switch (div) - { - case 0: block0[0] |= tmp0; - block0[1] = tmp1; - block0[2] = tmp2; - break; - case 1: block0[1] |= tmp0; - block0[2] = tmp1; - block0[3] = tmp2; - break; - case 2: block0[2] |= tmp0; - block0[3] = tmp1; - block1[0] = tmp2; - break; - case 3: block0[3] |= tmp0; - block1[0] = tmp1; - block1[1] = tmp2; - break; - case 4: block1[0] |= tmp0; - block1[1] = tmp1; - block1[2] = tmp2; - break; - case 5: block1[1] |= tmp0; - block1[2] = tmp1; - block1[3] = tmp2; - break; - case 6: block1[2] |= tmp0; - block1[3] = tmp1; - block2[0] = tmp2; - break; - case 7: block1[3] |= tmp0; - block2[0] = tmp1; - block2[1] = tmp2; - break; - case 8: block2[0] |= tmp0; - block2[1] = tmp1; - block2[2] = tmp2; - break; - case 9: block2[1] |= tmp0; - block2[2] = tmp1; - block2[3] = tmp2; - break; - case 10: block2[2] |= tmp0; - block2[3] = tmp1; - block3[0] = tmp2; - break; - case 11: block2[3] |= tmp0; - block3[0] = tmp1; - block3[1] = tmp2; - break; - } - - return; -} - -__device__ static void append_sign (u32x block0[4], u32x block1[4], const u32 block_len) -{ - switch (block_len) - { - case 0: - block0[0] = md5apr1_magic0; - block0[1] = md5apr1_magic1; - break; - - case 1: - block0[0] = block0[0] | md5apr1_magic0 << 8; - block0[1] = md5apr1_magic0 >> 24 | md5apr1_magic1 << 8; - block0[2] = md5apr1_magic1 >> 24; - break; - - case 2: - block0[0] = block0[0] | md5apr1_magic0 << 16; - block0[1] = md5apr1_magic0 >> 16 | md5apr1_magic1 << 16; - block0[2] = md5apr1_magic1 >> 16; - break; - - case 3: - block0[0] = block0[0] | md5apr1_magic0 << 24; - block0[1] = md5apr1_magic0 >> 8 | md5apr1_magic1 << 24; - block0[2] = md5apr1_magic1 >> 8; - break; - - case 4: - block0[1] = md5apr1_magic0; - block0[2] = md5apr1_magic1; - break; - - case 5: - block0[1] = block0[1] | md5apr1_magic0 << 8; - block0[2] = md5apr1_magic0 >> 24 | md5apr1_magic1 << 8; - block0[3] = md5apr1_magic1 >> 24; - break; - - case 6: - block0[1] = block0[1] | md5apr1_magic0 << 16; - block0[2] = md5apr1_magic0 >> 16 | md5apr1_magic1 << 16; - block0[3] = md5apr1_magic1 >> 16; - break; - - case 7: - block0[1] = block0[1] | md5apr1_magic0 << 24; - block0[2] = md5apr1_magic0 >> 8 | md5apr1_magic1 << 24; - block0[3] = md5apr1_magic1 >> 8; - break; - - case 8: - block0[2] = md5apr1_magic0; - block0[3] = md5apr1_magic1; - break; - - case 9: - block0[2] = block0[2] | md5apr1_magic0 << 8; - block0[3] = md5apr1_magic0 >> 24 | md5apr1_magic1 << 8; - block1[0] = md5apr1_magic1 >> 24; - break; - - case 10: - block0[2] = block0[2] | md5apr1_magic0 << 16; - block0[3] = md5apr1_magic0 >> 16 | md5apr1_magic1 << 16; - block1[0] = md5apr1_magic1 >> 16; - break; - - case 11: - block0[2] = block0[2] | md5apr1_magic0 << 24; - block0[3] = md5apr1_magic0 >> 8 | md5apr1_magic1 << 24; - block1[0] = md5apr1_magic1 >> 8; - break; - - case 12: - block0[3] = md5apr1_magic0; - block1[0] = md5apr1_magic1; - break; - - case 13: - block0[3] = block0[3] | md5apr1_magic0 << 8; - block1[0] = md5apr1_magic0 >> 24 | md5apr1_magic1 << 8; - block1[1] = md5apr1_magic1 >> 24; - break; - - case 14: - block0[3] = block0[3] | md5apr1_magic0 << 16; - block1[0] = md5apr1_magic0 >> 16 | md5apr1_magic1 << 16; - block1[1] = md5apr1_magic1 >> 16; - break; - - case 15: - block0[3] = block0[3] | md5apr1_magic0 << 24; - block1[0] = md5apr1_magic0 >> 8 | md5apr1_magic1 << 24; - block1[1] = md5apr1_magic1 >> 8; - break; - } -} - -__device__ static void append_1st (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32x append) -{ - switch (block_len) - { - case 0: - block0[0] = append; - break; - - case 1: - block0[0] = block0[0] | append << 8; - break; - - case 2: - block0[0] = block0[0] | append << 16; - break; - - case 3: - block0[0] = block0[0] | append << 24; - break; - - case 4: - block0[1] = append; - break; - - case 5: - block0[1] = block0[1] | append << 8; - break; - - case 6: - block0[1] = block0[1] | append << 16; - break; - - case 7: - block0[1] = block0[1] | append << 24; - break; - - case 8: - block0[2] = append; - break; - - case 9: - block0[2] = block0[2] | append << 8; - break; - - case 10: - block0[2] = block0[2] | append << 16; - break; - - case 11: - block0[2] = block0[2] | append << 24; - break; - - case 12: - block0[3] = append; - break; - - case 13: - block0[3] = block0[3] | append << 8; - break; - - case 14: - block0[3] = block0[3] | append << 16; - break; - - case 15: - block0[3] = block0[3] | append << 24; - break; - - case 16: - block1[0] = append; - break; - - case 17: - block1[0] = block1[0] | append << 8; - break; - - case 18: - block1[0] = block1[0] | append << 16; - break; - - case 19: - block1[0] = block1[0] | append << 24; - break; - - case 20: - block1[1] = append; - break; - - case 21: - block1[1] = block1[1] | append << 8; - break; - - case 22: - block1[1] = block1[1] | append << 16; - break; - - case 23: - block1[1] = block1[1] | append << 24; - break; - - case 24: - block1[2] = append; - break; - - case 25: - block1[2] = block1[2] | append << 8; - break; - - case 26: - block1[2] = block1[2] | append << 16; - break; - - case 27: - block1[2] = block1[2] | append << 24; - break; - - case 28: - block1[3] = append; - break; - - case 29: - block1[3] = block1[3] | append << 8; - break; - - case 30: - block1[3] = block1[3] | append << 16; - break; - - case 31: - block1[3] = block1[3] | append << 24; - break; - - case 32: - block2[0] = append; - break; - - case 33: - block2[0] = block2[0] | append << 8; - break; - - case 34: - block2[0] = block2[0] | append << 16; - break; - - case 35: - block2[0] = block2[0] | append << 24; - break; - - case 36: - block2[1] = append; - break; - - case 37: - block2[1] = block2[1] | append << 8; - break; - - case 38: - block2[1] = block2[1] | append << 16; - break; - - case 39: - block2[1] = block2[1] | append << 24; - break; - - case 40: - block2[2] = append; - break; - - case 41: - block2[2] = block2[2] | append << 8; - break; - - case 42: - block2[2] = block2[2] | append << 16; - break; - - case 43: - block2[2] = block2[2] | append << 24; - break; - - case 44: - block2[3] = append; - break; - - case 45: - block2[3] = block2[3] | append << 8; - break; - - case 46: - block2[3] = block2[3] | append << 16; - break; - - case 47: - block2[3] = block2[3] | append << 24; - break; - - case 48: - block3[0] = append; - break; - - case 49: - block3[0] = block3[0] | append << 8; - break; - - case 50: - block3[0] = block3[0] | append << 16; - break; - - case 51: - block3[0] = block3[0] | append << 24; - break; - - case 52: - block3[1] = append; - break; - - case 53: - block3[1] = block3[1] | append << 8; - break; - - case 54: - block3[1] = block3[1] | append << 16; - break; - - case 55: - block3[1] = block3[1] | append << 24; - break; - - case 56: - block3[2] = append; - break; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01600_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, md5crypt_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[0]; - w0[1] = pws[gid].i[1]; - w0[2] = pws[gid].i[2]; - w0[3] = pws[gid].i[3]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf[2]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * init - */ - - //memcat16 (block0, block1, block2, block3, block_len, w0); - //block_len += pw_len; - - u32 block_len = pw_len; - - u32x block0[4]; - - block0[0] = w0[0]; - block0[1] = w0[1]; - block0[2] = w0[2]; - block0[3] = w0[3]; - - u32x block1[4]; - - block1[0] = 0; - block1[1] = 0; - block1[2] = 0; - block1[3] = 0; - - u32x block2[4]; - - block2[0] = 0; - block2[1] = 0; - block2[2] = 0; - block2[3] = 0; - - u32x block3[4]; - - block3[0] = 0; - block3[1] = 0; - block3[2] = 0; - block3[3] = 0; - - memcat8 (block0, block1, block2, block3, block_len, salt_buf); - - block_len += salt_len; - - memcat16 (block0, block1, block2, block3, block_len, w0); - - block_len += pw_len; - - append_0x80_4 (block0, block1, block2, block3, block_len); - - block3[2] = block_len * 8; - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (block0, block1, block2, block3, digest); - - /* The password first, since that is what is most unknown */ - /* Then our magic string */ - /* Then the raw salt */ - /* Then just as many characters of the MD5(pw,salt,pw) */ - - //memcat16 (block0, block1, block2, block3, block_len, w); - //block_len += pw_len; - - block_len = pw_len; - - block0[0] = w0[0]; - block0[1] = w0[1]; - block0[2] = w0[2]; - block0[3] = w0[3]; - - block1[0] = 0; - block1[1] = 0; - block1[2] = 0; - block1[3] = 0; - - block2[0] = 0; - block2[1] = 0; - block2[2] = 0; - block2[3] = 0; - - block3[0] = 0; - block3[1] = 0; - block3[2] = 0; - block3[3] = 0; - - append_sign (block0, block1, block_len); - - block_len += 6; - - memcat8 (block0, block1, block2, block3, block_len, salt_buf); - - block_len += salt_len; - - truncate_block (digest, pw_len); - - memcat16 (block0, block1, block2, block3, block_len, digest); - - block_len += pw_len; - - /* Then something really weird... */ - - u32x append = block0[0] & 0xFF; - - for (u32 j = pw_len; j; j >>= 1) - { - if ((j & 1) == 0) - { - append_1st (block0, block1, block2, block3, block_len, append); - } - - block_len++; - } - - append_0x80_4 (block0, block1, block2, block3, block_len); - - block3[2] = block_len * 8; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (block0, block1, block2, block3, digest); - - tmps[gid].digest_buf[0] = digest[0]; - tmps[gid].digest_buf[1] = digest[1]; - tmps[gid].digest_buf[2] = digest[2]; - tmps[gid].digest_buf[3] = digest[3]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01600_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, md5crypt_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[0]; - w0[1] = pws[gid].i[1]; - w0[2] = pws[gid].i[2]; - w0[3] = pws[gid].i[3]; - - const u32 pw_len = pws[gid].pw_len; - - u32x w0_x80[4]; - - w0_x80[0] = w0[0]; - w0_x80[1] = w0[1]; - w0_x80[2] = w0[2]; - w0_x80[3] = w0[3]; - - append_0x80_1 (w0_x80, pw_len); - - /** - * salt - */ - - u32 salt_buf[2]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - u32x digest[4]; - - digest[0] = tmps[gid].digest_buf[0]; - digest[1] = tmps[gid].digest_buf[1]; - digest[2] = tmps[gid].digest_buf[2]; - digest[3] = tmps[gid].digest_buf[3]; - - /** - * loop - */ - - /* and now, just to make sure things don't run too fast */ - - u32 block_len; - - u32x block0[4]; - - block0[0] = 0; - block0[1] = 0; - block0[2] = 0; - block0[3] = 0; - - u32x block1[4]; - - block1[0] = 0; - block1[1] = 0; - block1[2] = 0; - block1[3] = 0; - - u32x block2[4]; - - block2[0] = 0; - block2[1] = 0; - block2[2] = 0; - block2[3] = 0; - - u32x block3[4]; - - block3[0] = 0; - block3[1] = 0; - block3[2] = 0; - block3[3] = 0; - - for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++) - { - block1[0] = 0; - block1[1] = 0; - block1[2] = 0; - block1[3] = 0; - block2[0] = 0; - block2[1] = 0; - block2[2] = 0; - block2[3] = 0; - block3[0] = 0; - block3[1] = 0; - - const u32 j1 = (j & 1) ? 1 : 0; - const u32 j3 = (j % 3) ? 1 : 0; - const u32 j7 = (j % 7) ? 1 : 0; - - if (j1) - { - block0[0] = w0[0]; - block0[1] = w0[1]; - block0[2] = w0[2]; - block0[3] = w0[3]; - - block_len = pw_len; - - if (j3) - { - memcat8 (block0, block1, block2, block3, block_len, salt_buf); - - block_len += salt_len; - } - - if (j7) - { - memcat16 (block0, block1, block2, block3, block_len, w0); - - block_len += pw_len; - } - - memcat16_x80 (block0, block1, block2, block3, block_len, digest); - - block_len += 16; - } - else - { - block0[0] = digest[0]; - block0[1] = digest[1]; - block0[2] = digest[2]; - block0[3] = digest[3]; - - block_len = 16; - - if (j3 && j7) - { - block1[0] = salt_buf[0]; - block1[1] = salt_buf[1]; - - block_len += salt_len; - - memcat16 (block0, block1, block2, block3, block_len, w0); - - block_len += pw_len; - } - else if (j3) - { - block1[0] = salt_buf[0]; - block1[1] = salt_buf[1]; - - block_len += salt_len; - } - else if (j7) - { - block1[0] = w0[0]; - block1[1] = w0[1]; - block1[2] = w0[2]; - block1[3] = w0[3]; - - block_len += pw_len; - } - - memcat16 (block0, block1, block2, block3, block_len, w0_x80); - - block_len += pw_len; - } - - block3[2] = block_len * 8; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (block0, block1, block2, block3, digest); - } - - tmps[gid].digest_buf[0] = digest[0]; - tmps[gid].digest_buf[1] = digest[1]; - tmps[gid].digest_buf[2] = digest[2]; - tmps[gid].digest_buf[3] = digest[3]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01600_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, md5crypt_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32x r0 = tmps[gid].digest_buf[DGST_R0]; - const u32x r1 = tmps[gid].digest_buf[DGST_R1]; - const u32x r2 = tmps[gid].digest_buf[DGST_R2]; - const u32x r3 = tmps[gid].digest_buf[DGST_R3]; - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m01700_a0.cu b/nv/m01700_a0.cu deleted file mode 100644 index a4d7ea8..0000000 --- a/nv/m01700_a0.cu +++ /dev/null @@ -1,431 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA512_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 14 -#define DGST_R1 15 -#define DGST_R2 6 -#define DGST_R3 7 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -__device__ __constant__ u64 k_sha512[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -__device__ static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) -{ - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - /* rev - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; - */ - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = g; - digest[7] = h; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01700_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - /** - * SHA512 - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = 0; - w3_t[3] = out_len * 8; - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01700_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01700_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01700_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - /** - * SHA512 - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = 0; - w3_t[3] = out_len * 8; - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01700_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01700_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01700_a1.cu b/nv/m01700_a1.cu deleted file mode 100644 index cfdc5ad..0000000 --- a/nv/m01700_a1.cu +++ /dev/null @@ -1,529 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA512_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 14 -#define DGST_R1 15 -#define DGST_R2 6 -#define DGST_R3 7 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -__device__ __constant__ u64 k_sha512[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -__device__ static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) -{ - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - /* rev - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; - */ - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = g; - digest[7] = h; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01700_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - append_0x80_2 (wordr0, wordr1, pw_r_len); - - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * SHA512 - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = 0; - w3_t[3] = pw_len * 8; - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01700_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01700_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01700_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - append_0x80_2 (wordr0, wordr1, pw_r_len); - - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * SHA512 - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = 0; - w3_t[3] = pw_len * 8; - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01700_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01700_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01700_a3.cu b/nv/m01700_a3.cu deleted file mode 100644 index 0525f34..0000000 --- a/nv/m01700_a3.cu +++ /dev/null @@ -1,540 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA512_ -#define _SCALAR_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 14 -#define DGST_R1 15 -#define DGST_R2 6 -#define DGST_R3 7 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -__device__ __constant__ u32x c_bfs[1024]; - -__device__ __constant__ u64 k_sha512[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -__device__ static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) -{ - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - /* rev - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; - */ - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = g; - digest[7] = h; -} - -__device__ static void m01700m (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0; - w0_t[1] = w[ 1]; - w0_t[2] = w[ 2]; - w0_t[3] = w[ 3]; - w1_t[0] = w[ 4]; - w1_t[1] = w[ 5]; - w1_t[2] = w[ 6]; - w1_t[3] = w[ 7]; - w2_t[0] = w[ 8]; - w2_t[1] = w[ 9]; - w2_t[2] = w[10]; - w2_t[3] = w[11]; - w3_t[0] = w[12]; - w3_t[1] = w[13]; - w3_t[2] = w[14]; - w3_t[3] = w[15]; - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_M - } -} - -__device__ static void m01700s (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0; - w0_t[1] = w[ 1]; - w0_t[2] = w[ 2]; - w0_t[3] = w[ 3]; - w1_t[0] = w[ 4]; - w1_t[1] = w[ 5]; - w1_t[2] = w[ 6]; - w1_t[3] = w[ 7]; - w2_t[0] = w[ 8]; - w2_t[1] = w[ 9]; - w2_t[2] = w[10]; - w2_t[3] = w[11]; - w3_t[0] = w[12]; - w3_t[1] = w[13]; - w3_t[2] = w[14]; - w3_t[3] = w[15]; - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01700_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01700_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01700_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01700m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01700_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01700_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01700_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01700s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m01710_a0.cu b/nv/m01710_a0.cu deleted file mode 100644 index 6c01148..0000000 --- a/nv/m01710_a0.cu +++ /dev/null @@ -1,583 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA512_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 14 -#define DGST_R1 15 -#define DGST_R2 6 -#define DGST_R3 7 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -__device__ __constant__ u64 k_sha512[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -__device__ static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) -{ - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - /* rev - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; - */ - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = g; - digest[7] = h; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01710_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, out_len); - - const u32 out_salt_len = out_len + salt_len; - - w0[0] |= s0[0]; - w0[1] |= s0[1]; - w0[2] |= s0[2]; - w0[3] |= s0[3]; - - w1[0] |= s1[0]; - w1[1] |= s1[1]; - w1[2] |= s1[2]; - w1[3] |= s1[3]; - - w2[0] |= s2[0]; - w2[1] |= s2[1]; - w2[2] |= s2[2]; - w2[3] |= s2[3]; - - w3[0] |= s3[0]; - w3[1] |= s3[1]; - w3[2] |= s3[2]; - w3[3] |= s3[3]; - - append_0x80_4 (w0, w1, w2, w3, out_salt_len); - - /** - * sha512 - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = 0; - w3_t[3] = out_salt_len * 8; - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01710_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01710_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01710_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, out_len); - - const u32 out_salt_len = out_len + salt_len; - - w0[0] |= s0[0]; - w0[1] |= s0[1]; - w0[2] |= s0[2]; - w0[3] |= s0[3]; - - w1[0] |= s1[0]; - w1[1] |= s1[1]; - w1[2] |= s1[2]; - w1[3] |= s1[3]; - - w2[0] |= s2[0]; - w2[1] |= s2[1]; - w2[2] |= s2[2]; - w2[3] |= s2[3]; - - w3[0] |= s3[0]; - w3[1] |= s3[1]; - w3[2] |= s3[2]; - w3[3] |= s3[3]; - - append_0x80_4 (w0, w1, w2, w3, out_salt_len); - - /** - * sha512 - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = 0; - w3_t[3] = out_salt_len * 8; - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01710_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01710_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01710_a1.cu b/nv/m01710_a1.cu deleted file mode 100644 index 1713fe4..0000000 --- a/nv/m01710_a1.cu +++ /dev/null @@ -1,637 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA512_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 14 -#define DGST_R1 15 -#define DGST_R2 6 -#define DGST_R3 7 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -__device__ __constant__ u64 k_sha512[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -__device__ static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) -{ - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - /* rev - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; - */ - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = g; - digest[7] = h; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01710_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, pw_len); - - const u32 pw_salt_len = pw_len + salt_len; - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0] | s0[0]; - w0[1] = wordl0[1] | wordr0[1] | s0[1]; - w0[2] = wordl0[2] | wordr0[2] | s0[2]; - w0[3] = wordl0[3] | wordr0[3] | s0[3]; - w1[0] = wordl1[0] | wordr1[0] | s1[0]; - w1[1] = wordl1[1] | wordr1[1] | s1[1]; - w1[2] = wordl1[2] | wordr1[2] | s1[2]; - w1[3] = wordl1[3] | wordr1[3] | s1[3]; - w2[0] = wordl2[0] | wordr2[0] | s2[0]; - w2[1] = wordl2[1] | wordr2[1] | s2[1]; - w2[2] = wordl2[2] | wordr2[2] | s2[2]; - w2[3] = wordl2[3] | wordr2[3] | s2[3]; - w3[0] = wordl3[0] | wordr3[0] | s3[0]; - w3[1] = wordl3[1] | wordr3[1] | s3[1]; - w3[2] = wordl3[2] | wordr3[2] | s3[2]; - w3[3] = wordl3[3] | wordr3[3] | s3[3]; - - append_0x80_4 (w0, w1, w2, w3, pw_salt_len); - - /** - * sha512 - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = 0; - w3_t[3] = pw_salt_len * 8; - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01710_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01710_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01710_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, pw_len); - - const u32 pw_salt_len = pw_len + salt_len; - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0] | s0[0]; - w0[1] = wordl0[1] | wordr0[1] | s0[1]; - w0[2] = wordl0[2] | wordr0[2] | s0[2]; - w0[3] = wordl0[3] | wordr0[3] | s0[3]; - w1[0] = wordl1[0] | wordr1[0] | s1[0]; - w1[1] = wordl1[1] | wordr1[1] | s1[1]; - w1[2] = wordl1[2] | wordr1[2] | s1[2]; - w1[3] = wordl1[3] | wordr1[3] | s1[3]; - w2[0] = wordl2[0] | wordr2[0] | s2[0]; - w2[1] = wordl2[1] | wordr2[1] | s2[1]; - w2[2] = wordl2[2] | wordr2[2] | s2[2]; - w2[3] = wordl2[3] | wordr2[3] | s2[3]; - w3[0] = wordl3[0] | wordr3[0] | s3[0]; - w3[1] = wordl3[1] | wordr3[1] | s3[1]; - w3[2] = wordl3[2] | wordr3[2] | s3[2]; - w3[3] = wordl3[3] | wordr3[3] | s3[3]; - - append_0x80_4 (w0, w1, w2, w3, pw_salt_len); - - /** - * sha512 - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = 0; - w3_t[3] = pw_salt_len * 8; - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01710_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01710_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01710_a3.cu b/nv/m01710_a3.cu deleted file mode 100644 index e995495..0000000 --- a/nv/m01710_a3.cu +++ /dev/null @@ -1,597 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA512_ -#define _SCALAR_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 14 -#define DGST_R1 15 -#define DGST_R2 6 -#define DGST_R3 7 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -__device__ __constant__ u32x c_bfs[1024]; - -__device__ __constant__ u64 k_sha512[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -__device__ static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) -{ - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - /* rev - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; - */ - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = g; - digest[7] = h; -} - -__device__ static void m01710m (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - switch_buffer_by_offset (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); - - w[ 0] |= swap_workaround (salt_buf0[0]); - w[ 1] |= swap_workaround (salt_buf0[1]); - w[ 2] |= swap_workaround (salt_buf0[2]); - w[ 3] |= swap_workaround (salt_buf0[3]); - w[ 4] |= swap_workaround (salt_buf1[0]); - w[ 5] |= swap_workaround (salt_buf1[1]); - w[ 6] |= swap_workaround (salt_buf1[2]); - w[ 7] |= swap_workaround (salt_buf1[3]); - w[ 8] |= swap_workaround (salt_buf2[0]); - w[ 9] |= swap_workaround (salt_buf2[1]); - w[10] |= swap_workaround (salt_buf2[2]); - w[11] |= swap_workaround (salt_buf2[3]); - w[12] |= swap_workaround (salt_buf3[0]); - w[13] |= swap_workaround (salt_buf3[1]); - w[14] |= swap_workaround (salt_buf3[2]); - w[15] |= swap_workaround (salt_buf3[3]); - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - w[15] = pw_salt_len * 8; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0; - w0_t[1] = w[ 1]; - w0_t[2] = w[ 2]; - w0_t[3] = w[ 3]; - w1_t[0] = w[ 4]; - w1_t[1] = w[ 5]; - w1_t[2] = w[ 6]; - w1_t[3] = w[ 7]; - w2_t[0] = w[ 8]; - w2_t[1] = w[ 9]; - w2_t[2] = w[10]; - w2_t[3] = w[11]; - w3_t[0] = w[12]; - w3_t[1] = w[13]; - w3_t[2] = w[14]; - w3_t[3] = w[15]; - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_M - } -} - -__device__ static void m01710s (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0; - w0_t[1] = w[ 1]; - w0_t[2] = w[ 2]; - w0_t[3] = w[ 3]; - w1_t[0] = w[ 4]; - w1_t[1] = w[ 5]; - w1_t[2] = w[ 6]; - w1_t[3] = w[ 7]; - w2_t[0] = w[ 8]; - w2_t[1] = w[ 9]; - w2_t[2] = w[10]; - w2_t[3] = w[11]; - w3_t[0] = w[12]; - w3_t[1] = w[13]; - w3_t[2] = w[14]; - w3_t[3] = w[15]; - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01710_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01710_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01710_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01710m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01710_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01710s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01710_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01710s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01710_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01710s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m01720_a0.cu b/nv/m01720_a0.cu deleted file mode 100644 index cb26aac..0000000 --- a/nv/m01720_a0.cu +++ /dev/null @@ -1,505 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA512_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 14 -#define DGST_R1 15 -#define DGST_R2 6 -#define DGST_R3 7 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -__device__ __constant__ u64 k_sha512[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -__device__ static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) -{ - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - /* rev - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; - */ - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = g; - digest[7] = h; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01720_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * prepend salt - */ - - const u32 out_salt_len = out_len + salt_len; - - switch_buffer_by_offset (w0, w1, w2, w3, salt_len); - - w0[0] |= salt_buf0[0]; - w0[1] |= salt_buf0[1]; - w0[2] |= salt_buf0[2]; - w0[3] |= salt_buf0[3]; - w1[0] |= salt_buf1[0]; - w1[1] |= salt_buf1[1]; - w1[2] |= salt_buf1[2]; - w1[3] |= salt_buf1[3]; - - append_0x80_4 (w0, w1, w2, w3, out_salt_len); - - /** - * sha512 - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = 0; - w3_t[3] = out_salt_len * 8; - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01720_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01720_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01720_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * prepend salt - */ - - const u32 out_salt_len = out_len + salt_len; - - switch_buffer_by_offset (w0, w1, w2, w3, salt_len); - - w0[0] |= salt_buf0[0]; - w0[1] |= salt_buf0[1]; - w0[2] |= salt_buf0[2]; - w0[3] |= salt_buf0[3]; - w1[0] |= salt_buf1[0]; - w1[1] |= salt_buf1[1]; - w1[2] |= salt_buf1[2]; - w1[3] |= salt_buf1[3]; - - append_0x80_4 (w0, w1, w2, w3, out_salt_len); - - /** - * sha512 - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = 0; - w3_t[3] = out_salt_len * 8; - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01720_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01720_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01720_a1.cu b/nv/m01720_a1.cu deleted file mode 100644 index d6bc884..0000000 --- a/nv/m01720_a1.cu +++ /dev/null @@ -1,587 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA512_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 14 -#define DGST_R1 15 -#define DGST_R2 6 -#define DGST_R3 7 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -__device__ __constant__ u64 k_sha512[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -__device__ static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) -{ - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - /* rev - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; - */ - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = g; - digest[7] = h; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01720_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - u32 wordr1[4]; - u32 wordr2[4]; - u32 wordr3[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * prepend salt - */ - - const u32 pw_salt_len = pw_len + salt_len; - - switch_buffer_by_offset (w0, w1, w2, w3, salt_len); - - w0[0] |= salt_buf0[0]; - w0[1] |= salt_buf0[1]; - w0[2] |= salt_buf0[2]; - w0[3] |= salt_buf0[3]; - w1[0] |= salt_buf1[0]; - w1[1] |= salt_buf1[1]; - w1[2] |= salt_buf1[2]; - w1[3] |= salt_buf1[3]; - - append_0x80_4 (w0, w1, w2, w3, pw_salt_len); - - /** - * sha512 - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = 0; - w3_t[3] = pw_salt_len * 8; - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01720_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01720_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01720_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - u32 wordr1[4]; - u32 wordr2[4]; - u32 wordr3[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * prepend salt - */ - - const u32 pw_salt_len = pw_len + salt_len; - - switch_buffer_by_offset (w0, w1, w2, w3, salt_len); - - w0[0] |= salt_buf0[0]; - w0[1] |= salt_buf0[1]; - w0[2] |= salt_buf0[2]; - w0[3] |= salt_buf0[3]; - w1[0] |= salt_buf1[0]; - w1[1] |= salt_buf1[1]; - w1[2] |= salt_buf1[2]; - w1[3] |= salt_buf1[3]; - - append_0x80_4 (w0, w1, w2, w3, pw_salt_len); - - /** - * sha512 - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = 0; - w3_t[3] = pw_salt_len * 8; - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01720_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01720_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01720_a3.cu b/nv/m01720_a3.cu deleted file mode 100644 index 55b879b..0000000 --- a/nv/m01720_a3.cu +++ /dev/null @@ -1,749 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA512_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 14 -#define DGST_R1 15 -#define DGST_R2 6 -#define DGST_R3 7 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ __constant__ u64 k_sha512[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -__device__ static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) -{ - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - /* rev - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; - */ - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = g; - digest[7] = h; -} - -__device__ static void m01720m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * prepend salt - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = swap_workaround (w3[2]); - w3_t[3] = swap_workaround (w3[3]); - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - w2_t[0] |= salt_buf2[0]; - w2_t[1] |= salt_buf2[1]; - w2_t[2] |= salt_buf2[2]; - w2_t[3] |= salt_buf2[3]; - w3_t[0] |= salt_buf3[0]; - w3_t[1] |= salt_buf3[1]; - w3_t[2] = 0; - w3_t[3] = pw_salt_len * 8; - - /** - * sha512 - */ - - w0_t[0] = swap_workaround (w0_t[0]); - w0_t[1] = swap_workaround (w0_t[1]); - w0_t[2] = swap_workaround (w0_t[2]); - w0_t[3] = swap_workaround (w0_t[3]); - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - //w3_t[2] = swap_workaround (w3_t[2]); - //w3_t[3] = swap_workaround (w3_t[3]); - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_M - } -} - -__device__ static void m01720s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * prepend salt - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = swap_workaround (w3[2]); - w3_t[3] = swap_workaround (w3[3]); - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - w2_t[0] |= salt_buf2[0]; - w2_t[1] |= salt_buf2[1]; - w2_t[2] |= salt_buf2[2]; - w2_t[3] |= salt_buf2[3]; - w3_t[0] |= salt_buf3[0]; - w3_t[1] |= salt_buf3[1]; - w3_t[2] = 0; - w3_t[3] = pw_salt_len * 8; - - /** - * sha512 - */ - - w0_t[0] = swap_workaround (w0_t[0]); - w0_t[1] = swap_workaround (w0_t[1]); - w0_t[2] = swap_workaround (w0_t[2]); - w0_t[3] = swap_workaround (w0_t[3]); - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - //w3_t[2] = swap_workaround (w3_t[2]); - //w3_t[3] = swap_workaround (w3_t[3]); - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01720_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01720m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01720_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01720m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01720_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01720m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01720_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01720s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01720_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01720s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01720_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01720s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m01730_a0.cu b/nv/m01730_a0.cu deleted file mode 100644 index 3f7b9a9..0000000 --- a/nv/m01730_a0.cu +++ /dev/null @@ -1,583 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA512_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 14 -#define DGST_R1 15 -#define DGST_R2 6 -#define DGST_R3 7 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -__device__ __constant__ u64 k_sha512[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -__device__ static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) -{ - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - /* rev - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; - */ - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = g; - digest[7] = h; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01730_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, (out_len * 2)); - - const u32 out_salt_len = (out_len * 2) + salt_len; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w0_t[0] |= s0[0]; - w0_t[1] |= s0[1]; - w0_t[2] |= s0[2]; - w0_t[3] |= s0[3]; - w1_t[0] |= s1[0]; - w1_t[1] |= s1[1]; - w1_t[2] |= s1[2]; - w1_t[3] |= s1[3]; - w2_t[0] |= s2[0]; - w2_t[1] |= s2[1]; - w2_t[2] |= s2[2]; - w2_t[3] |= s2[3]; - w3_t[0] |= s3[0]; - w3_t[1] |= s3[1]; - w3_t[2] |= s3[2]; - w3_t[3] |= s3[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, out_salt_len); - - /** - * sha512 - */ - - w0_t[0] = swap_workaround (w0_t[0]); - w0_t[1] = swap_workaround (w0_t[1]); - w0_t[2] = swap_workaround (w0_t[2]); - w0_t[3] = swap_workaround (w0_t[3]); - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - w3_t[2] = 0; - w3_t[3] = out_salt_len * 8; - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01730_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01730_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01730_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, (out_len * 2)); - - const u32 out_salt_len = (out_len * 2) + salt_len; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w0_t[0] |= s0[0]; - w0_t[1] |= s0[1]; - w0_t[2] |= s0[2]; - w0_t[3] |= s0[3]; - w1_t[0] |= s1[0]; - w1_t[1] |= s1[1]; - w1_t[2] |= s1[2]; - w1_t[3] |= s1[3]; - w2_t[0] |= s2[0]; - w2_t[1] |= s2[1]; - w2_t[2] |= s2[2]; - w2_t[3] |= s2[3]; - w3_t[0] |= s3[0]; - w3_t[1] |= s3[1]; - w3_t[2] |= s3[2]; - w3_t[3] |= s3[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, out_salt_len); - - /** - * sha512 - */ - - w0_t[0] = swap_workaround (w0_t[0]); - w0_t[1] = swap_workaround (w0_t[1]); - w0_t[2] = swap_workaround (w0_t[2]); - w0_t[3] = swap_workaround (w0_t[3]); - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - w3_t[2] = 0; - w3_t[3] = out_salt_len * 8; - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01730_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01730_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01730_a1.cu b/nv/m01730_a1.cu deleted file mode 100644 index 4c3e300..0000000 --- a/nv/m01730_a1.cu +++ /dev/null @@ -1,665 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA512_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 14 -#define DGST_R1 15 -#define DGST_R2 6 -#define DGST_R3 7 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -__device__ __constant__ u64 k_sha512[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -__device__ static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) -{ - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - /* rev - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; - */ - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = g; - digest[7] = h; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01730_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - u32 wordr1[4]; - u32 wordr2[4]; - u32 wordr3[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, (pw_len * 2)); - - const u32 pw_salt_len = (pw_len * 2) + salt_len; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w0_t[0] |= s0[0]; - w0_t[1] |= s0[1]; - w0_t[2] |= s0[2]; - w0_t[3] |= s0[3]; - w1_t[0] |= s1[0]; - w1_t[1] |= s1[1]; - w1_t[2] |= s1[2]; - w1_t[3] |= s1[3]; - w2_t[0] |= s2[0]; - w2_t[1] |= s2[1]; - w2_t[2] |= s2[2]; - w2_t[3] |= s2[3]; - w3_t[0] |= s3[0]; - w3_t[1] |= s3[1]; - w3_t[2] |= s3[2]; - w3_t[3] |= s3[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len); - - /** - * sha512 - */ - - w0_t[0] = swap_workaround (w0_t[0]); - w0_t[1] = swap_workaround (w0_t[1]); - w0_t[2] = swap_workaround (w0_t[2]); - w0_t[3] = swap_workaround (w0_t[3]); - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - w3_t[2] = 0; - w3_t[3] = pw_salt_len * 8; - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01730_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01730_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01730_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - u32 wordr1[4]; - u32 wordr2[4]; - u32 wordr3[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, (pw_len * 2)); - - const u32 pw_salt_len = (pw_len * 2) + salt_len; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w0_t[0] |= s0[0]; - w0_t[1] |= s0[1]; - w0_t[2] |= s0[2]; - w0_t[3] |= s0[3]; - w1_t[0] |= s1[0]; - w1_t[1] |= s1[1]; - w1_t[2] |= s1[2]; - w1_t[3] |= s1[3]; - w2_t[0] |= s2[0]; - w2_t[1] |= s2[1]; - w2_t[2] |= s2[2]; - w2_t[3] |= s2[3]; - w3_t[0] |= s3[0]; - w3_t[1] |= s3[1]; - w3_t[2] |= s3[2]; - w3_t[3] |= s3[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len); - - /** - * sha512 - */ - - w0_t[0] = swap_workaround (w0_t[0]); - w0_t[1] = swap_workaround (w0_t[1]); - w0_t[2] = swap_workaround (w0_t[2]); - w0_t[3] = swap_workaround (w0_t[3]); - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - w3_t[2] = 0; - w3_t[3] = pw_salt_len * 8; - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01730_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01730_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01730_a3.cu b/nv/m01730_a3.cu deleted file mode 100644 index 9ee9b33..0000000 --- a/nv/m01730_a3.cu +++ /dev/null @@ -1,598 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA512_ -#define _SCALAR_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 14 -#define DGST_R1 15 -#define DGST_R2 6 -#define DGST_R3 7 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -__device__ __constant__ u32x c_bfs[1024]; - -__device__ __constant__ u64 k_sha512[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -__device__ static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) -{ - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - /* rev - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; - */ - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = g; - digest[7] = h; -} - -__device__ static void m01730m (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - switch_buffer_by_offset (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); - - w[ 0] |= swap_workaround (salt_buf0[0]); - w[ 1] |= swap_workaround (salt_buf0[1]); - w[ 2] |= swap_workaround (salt_buf0[2]); - w[ 3] |= swap_workaround (salt_buf0[3]); - w[ 4] |= swap_workaround (salt_buf1[0]); - w[ 5] |= swap_workaround (salt_buf1[1]); - w[ 6] |= swap_workaround (salt_buf1[2]); - w[ 7] |= swap_workaround (salt_buf1[3]); - w[ 8] |= swap_workaround (salt_buf2[0]); - w[ 9] |= swap_workaround (salt_buf2[1]); - w[10] |= swap_workaround (salt_buf2[2]); - w[11] |= swap_workaround (salt_buf2[3]); - w[12] |= swap_workaround (salt_buf3[0]); - w[13] |= swap_workaround (salt_buf3[1]); - w[14] |= swap_workaround (salt_buf3[2]); - w[15] |= swap_workaround (salt_buf3[3]); - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - w[15] = pw_salt_len * 8; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0; - w0_t[1] = w[ 1]; - w0_t[2] = w[ 2]; - w0_t[3] = w[ 3]; - w1_t[0] = w[ 4]; - w1_t[1] = w[ 5]; - w1_t[2] = w[ 6]; - w1_t[3] = w[ 7]; - w2_t[0] = w[ 8]; - w2_t[1] = w[ 9]; - w2_t[2] = w[10]; - w2_t[3] = w[11]; - w3_t[0] = w[12]; - w3_t[1] = w[13]; - w3_t[2] = w[14]; - w3_t[3] = w[15]; - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_M - } -} - -__device__ static void m01730s (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0; - w0_t[1] = w[ 1]; - w0_t[2] = w[ 2]; - w0_t[3] = w[ 3]; - w1_t[0] = w[ 4]; - w1_t[1] = w[ 5]; - w1_t[2] = w[ 6]; - w1_t[3] = w[ 7]; - w2_t[0] = w[ 8]; - w2_t[1] = w[ 9]; - w2_t[2] = w[10]; - w2_t[3] = w[11]; - w3_t[0] = w[12]; - w3_t[1] = w[13]; - w3_t[2] = w[14]; - w3_t[3] = w[15]; - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01730_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01730m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01730_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01730m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01730_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01730m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01730_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01730s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01730_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01730s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01730_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01730s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m01740_a0.cu b/nv/m01740_a0.cu deleted file mode 100644 index 7961018..0000000 --- a/nv/m01740_a0.cu +++ /dev/null @@ -1,499 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA512_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 14 -#define DGST_R1 15 -#define DGST_R2 6 -#define DGST_R3 7 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -__device__ __constant__ u64 k_sha512[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -__device__ static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) -{ - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - /* rev - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; - */ - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = g; - digest[7] = h; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01740_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * prepend salt - */ - - const u32 out_salt_len = (out_len * 2) + salt_len; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, out_salt_len); - - /** - * sha512 - */ - - w0_t[0] = swap_workaround (w0_t[0]); - w0_t[1] = swap_workaround (w0_t[1]); - w0_t[2] = swap_workaround (w0_t[2]); - w0_t[3] = swap_workaround (w0_t[3]); - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - w3_t[2] = 0; - w3_t[3] = out_salt_len * 8; - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01740_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01740_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01740_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * prepend salt - */ - - const u32 out_salt_len = (out_len * 2) + salt_len; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, out_salt_len); - - /** - * sha512 - */ - - w0_t[0] = swap_workaround (w0_t[0]); - w0_t[1] = swap_workaround (w0_t[1]); - w0_t[2] = swap_workaround (w0_t[2]); - w0_t[3] = swap_workaround (w0_t[3]); - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - w3_t[2] = 0; - w3_t[3] = out_salt_len * 8; - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01740_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01740_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01740_a1.cu b/nv/m01740_a1.cu deleted file mode 100644 index 6b7ac88..0000000 --- a/nv/m01740_a1.cu +++ /dev/null @@ -1,593 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA512_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 14 -#define DGST_R1 15 -#define DGST_R2 6 -#define DGST_R3 7 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -__device__ __constant__ u64 k_sha512[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -__device__ static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) -{ - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - /* rev - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; - */ - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = g; - digest[7] = h; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01740_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - u32 wordr1[4]; - u32 wordr2[4]; - u32 wordr3[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * prepend salt - */ - - const u32 pw_salt_len = (pw_len * 2) + salt_len; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len); - - /** - * sha512 - */ - - w0_t[0] = swap_workaround (w0_t[0]); - w0_t[1] = swap_workaround (w0_t[1]); - w0_t[2] = swap_workaround (w0_t[2]); - w0_t[3] = swap_workaround (w0_t[3]); - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - w3_t[2] = 0; - w3_t[3] = pw_salt_len * 8; - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01740_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01740_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01740_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - u32 wordr1[4]; - u32 wordr2[4]; - u32 wordr3[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * prepend salt - */ - - const u32 pw_salt_len = (pw_len * 2) + salt_len; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len); - - /** - * sha512 - */ - - w0_t[0] = swap_workaround (w0_t[0]); - w0_t[1] = swap_workaround (w0_t[1]); - w0_t[2] = swap_workaround (w0_t[2]); - w0_t[3] = swap_workaround (w0_t[3]); - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - w3_t[2] = 0; - w3_t[3] = pw_salt_len * 8; - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01740_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01740_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01740_a3.cu b/nv/m01740_a3.cu deleted file mode 100644 index a7d084b..0000000 --- a/nv/m01740_a3.cu +++ /dev/null @@ -1,749 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA512_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 14 -#define DGST_R1 15 -#define DGST_R2 6 -#define DGST_R3 7 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ __constant__ u64 k_sha512[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -__device__ static void sha512_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) -{ - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - /* rev - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; - */ - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = g; - digest[7] = h; -} - -__device__ static void m01740m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * prepend salt - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = swap_workaround (w3[2]); - w3_t[3] = swap_workaround (w3[3]); - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - w2_t[0] |= salt_buf2[0]; - w2_t[1] |= salt_buf2[1]; - w2_t[2] |= salt_buf2[2]; - w2_t[3] |= salt_buf2[3]; - w3_t[0] |= salt_buf3[0]; - w3_t[1] |= salt_buf3[1]; - w3_t[2] = 0; - w3_t[3] = pw_salt_len * 8; - - /** - * sha512 - */ - - w0_t[0] = swap_workaround (w0_t[0]); - w0_t[1] = swap_workaround (w0_t[1]); - w0_t[2] = swap_workaround (w0_t[2]); - w0_t[3] = swap_workaround (w0_t[3]); - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - //w3_t[2] = swap_workaround (w3_t[2]); - //w3_t[3] = swap_workaround (w3_t[3]); - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_M - } -} - -__device__ static void m01740s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * prepend salt - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = swap_workaround (w3[2]); - w3_t[3] = swap_workaround (w3[3]); - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - w2_t[0] |= salt_buf2[0]; - w2_t[1] |= salt_buf2[1]; - w2_t[2] |= salt_buf2[2]; - w2_t[3] |= salt_buf2[3]; - w3_t[0] |= salt_buf3[0]; - w3_t[1] |= salt_buf3[1]; - w3_t[2] = 0; - w3_t[3] = pw_salt_len * 8; - - /** - * sha512 - */ - - w0_t[0] = swap_workaround (w0_t[0]); - w0_t[1] = swap_workaround (w0_t[1]); - w0_t[2] = swap_workaround (w0_t[2]); - w0_t[3] = swap_workaround (w0_t[3]); - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - //w3_t[2] = swap_workaround (w3_t[2]); - //w3_t[3] = swap_workaround (w3_t[3]); - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01740_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01740m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01740_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01740m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01740_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01740m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01740_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01740s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01740_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01740s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01740_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01740s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m01750_a0.cu b/nv/m01750_a0.cu deleted file mode 100644 index 5e0f2ef..0000000 --- a/nv/m01750_a0.cu +++ /dev/null @@ -1,622 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA512_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 14 -#define DGST_R1 15 -#define DGST_R2 6 -#define DGST_R3 7 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -__device__ __constant__ u64 k_sha512[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -__device__ static void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], const u64x w3[4], u64x digest[8]) -{ - u64x w0_t = w0[0]; - u64x w1_t = w0[1]; - u64x w2_t = w0[2]; - u64x w3_t = w0[3]; - u64x w4_t = w1[0]; - u64x w5_t = w1[1]; - u64x w6_t = w1[2]; - u64x w7_t = w1[3]; - u64x w8_t = w2[0]; - u64x w9_t = w2[1]; - u64x wa_t = w2[2]; - u64x wb_t = w2[3]; - u64x wc_t = w3[0]; - u64x wd_t = w3[1]; - u64x we_t = w3[2]; - u64x wf_t = w3[3]; - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; -} - -__device__ static void hmac_sha512_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64x ipad[8], u64x opad[8]) -{ - u64x w0_t[4]; - u64x w1_t[4]; - u64x w2_t[4]; - u64x w3_t[4]; - - w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ 0x3636363636363636; - w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ 0x3636363636363636; - w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ 0x3636363636363636; - w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ 0x3636363636363636; - w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ 0x3636363636363636; - w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ 0x3636363636363636; - w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ 0x3636363636363636; - w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ 0x3636363636363636; - w2_t[0] = 0 ^ 0x3636363636363636; - w2_t[1] = 0 ^ 0x3636363636363636; - w2_t[2] = 0 ^ 0x3636363636363636; - w2_t[3] = 0 ^ 0x3636363636363636; - w3_t[0] = 0 ^ 0x3636363636363636; - w3_t[1] = 0 ^ 0x3636363636363636; - w3_t[2] = 0 ^ 0x3636363636363636; - w3_t[3] = 0 ^ 0x3636363636363636; - - ipad[0] = SHA512M_A; - ipad[1] = SHA512M_B; - ipad[2] = SHA512M_C; - ipad[3] = SHA512M_D; - ipad[4] = SHA512M_E; - ipad[5] = SHA512M_F; - ipad[6] = SHA512M_G; - ipad[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, ipad); - - w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ 0x5c5c5c5c5c5c5c5c; - w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ 0x5c5c5c5c5c5c5c5c; - w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ 0x5c5c5c5c5c5c5c5c; - w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ 0x5c5c5c5c5c5c5c5c; - w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ 0x5c5c5c5c5c5c5c5c; - w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ 0x5c5c5c5c5c5c5c5c; - w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ 0x5c5c5c5c5c5c5c5c; - w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ 0x5c5c5c5c5c5c5c5c; - w2_t[0] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w2_t[1] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w2_t[2] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w2_t[3] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w3_t[0] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w3_t[1] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w3_t[2] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w3_t[3] = 0 ^ 0x5c5c5c5c5c5c5c5c; - - opad[0] = SHA512M_A; - opad[1] = SHA512M_B; - opad[2] = SHA512M_C; - opad[3] = SHA512M_D; - opad[4] = SHA512M_E; - opad[5] = SHA512M_F; - opad[6] = SHA512M_G; - opad[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, opad); -} - -__device__ static void hmac_sha512_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64x ipad[8], u64x opad[8], u64x digest[8]) -{ - u64x w0_t[4]; - u64x w1_t[4]; - u64x w2_t[4]; - u64x w3_t[4]; - - w0_t[0] = hl32_to_64 (w0[0], w0[1]); - w0_t[1] = hl32_to_64 (w0[2], w0[3]); - w0_t[2] = hl32_to_64 (w1[0], w1[1]); - w0_t[3] = hl32_to_64 (w1[2], w1[3]); - w1_t[0] = hl32_to_64 (w2[0], w2[1]); - w1_t[1] = hl32_to_64 (w2[2], w2[3]); - w1_t[2] = hl32_to_64 (w3[0], w3[1]); - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = hl32_to_64 (w3[2], w3[3]); - - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - digest[5] = ipad[5]; - digest[6] = ipad[6]; - digest[7] = ipad[7]; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = digest[4]; - w1_t[1] = digest[5]; - w1_t[2] = digest[6]; - w1_t[3] = digest[7]; - w2_t[0] = 0x8000000000000000; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (128 + 64) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - digest[5] = opad[5]; - digest[6] = opad[6]; - digest[7] = opad[7]; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01750_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u64x ipad[8]; - u64x opad[8]; - - hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (128 + salt_len) * 8; - - u64x digest[8]; - - hmac_sha512_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01750_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01750_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01750_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u64x ipad[8]; - u64x opad[8]; - - hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (128 + salt_len) * 8; - - u64x digest[8]; - - hmac_sha512_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01750_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01750_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01750_a1.cu b/nv/m01750_a1.cu deleted file mode 100644 index ac4b85a..0000000 --- a/nv/m01750_a1.cu +++ /dev/null @@ -1,728 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA512_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 14 -#define DGST_R1 15 -#define DGST_R2 6 -#define DGST_R3 7 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -__device__ __constant__ u64 k_sha512[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -__device__ static void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], const u64x w3[4], u64x digest[8]) -{ - u64x w0_t = w0[0]; - u64x w1_t = w0[1]; - u64x w2_t = w0[2]; - u64x w3_t = w0[3]; - u64x w4_t = w1[0]; - u64x w5_t = w1[1]; - u64x w6_t = w1[2]; - u64x w7_t = w1[3]; - u64x w8_t = w2[0]; - u64x w9_t = w2[1]; - u64x wa_t = w2[2]; - u64x wb_t = w2[3]; - u64x wc_t = w3[0]; - u64x wd_t = w3[1]; - u64x we_t = w3[2]; - u64x wf_t = w3[3]; - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; -} - -__device__ static void hmac_sha512_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64x ipad[8], u64x opad[8]) -{ - u64x w0_t[4]; - u64x w1_t[4]; - u64x w2_t[4]; - u64x w3_t[4]; - - w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ 0x3636363636363636; - w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ 0x3636363636363636; - w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ 0x3636363636363636; - w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ 0x3636363636363636; - w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ 0x3636363636363636; - w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ 0x3636363636363636; - w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ 0x3636363636363636; - w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ 0x3636363636363636; - w2_t[0] = 0 ^ 0x3636363636363636; - w2_t[1] = 0 ^ 0x3636363636363636; - w2_t[2] = 0 ^ 0x3636363636363636; - w2_t[3] = 0 ^ 0x3636363636363636; - w3_t[0] = 0 ^ 0x3636363636363636; - w3_t[1] = 0 ^ 0x3636363636363636; - w3_t[2] = 0 ^ 0x3636363636363636; - w3_t[3] = 0 ^ 0x3636363636363636; - - ipad[0] = SHA512M_A; - ipad[1] = SHA512M_B; - ipad[2] = SHA512M_C; - ipad[3] = SHA512M_D; - ipad[4] = SHA512M_E; - ipad[5] = SHA512M_F; - ipad[6] = SHA512M_G; - ipad[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, ipad); - - w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ 0x5c5c5c5c5c5c5c5c; - w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ 0x5c5c5c5c5c5c5c5c; - w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ 0x5c5c5c5c5c5c5c5c; - w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ 0x5c5c5c5c5c5c5c5c; - w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ 0x5c5c5c5c5c5c5c5c; - w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ 0x5c5c5c5c5c5c5c5c; - w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ 0x5c5c5c5c5c5c5c5c; - w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ 0x5c5c5c5c5c5c5c5c; - w2_t[0] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w2_t[1] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w2_t[2] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w2_t[3] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w3_t[0] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w3_t[1] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w3_t[2] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w3_t[3] = 0 ^ 0x5c5c5c5c5c5c5c5c; - - opad[0] = SHA512M_A; - opad[1] = SHA512M_B; - opad[2] = SHA512M_C; - opad[3] = SHA512M_D; - opad[4] = SHA512M_E; - opad[5] = SHA512M_F; - opad[6] = SHA512M_G; - opad[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, opad); -} - -__device__ static void hmac_sha512_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64x ipad[8], u64x opad[8], u64x digest[8]) -{ - u64x w0_t[4]; - u64x w1_t[4]; - u64x w2_t[4]; - u64x w3_t[4]; - - w0_t[0] = hl32_to_64 (w0[0], w0[1]); - w0_t[1] = hl32_to_64 (w0[2], w0[3]); - w0_t[2] = hl32_to_64 (w1[0], w1[1]); - w0_t[3] = hl32_to_64 (w1[2], w1[3]); - w1_t[0] = hl32_to_64 (w2[0], w2[1]); - w1_t[1] = hl32_to_64 (w2[2], w2[3]); - w1_t[2] = hl32_to_64 (w3[0], w3[1]); - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = hl32_to_64 (w3[2], w3[3]); - - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - digest[5] = ipad[5]; - digest[6] = ipad[6]; - digest[7] = ipad[7]; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = digest[4]; - w1_t[1] = digest[5]; - w1_t[2] = digest[6]; - w1_t[3] = digest[7]; - w2_t[0] = 0x8000000000000000; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (128 + 64) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - digest[5] = opad[5]; - digest[6] = opad[6]; - digest[7] = opad[7]; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01750_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u64x ipad[8]; - u64x opad[8]; - - hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (128 + salt_len) * 8; - - u64x digest[8]; - - hmac_sha512_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01750_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01750_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01750_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u64x ipad[8]; - u64x opad[8]; - - hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (128 + salt_len) * 8; - - u64x digest[8]; - - hmac_sha512_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01750_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01750_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01750_a3.cu b/nv/m01750_a3.cu deleted file mode 100644 index 8209bef..0000000 --- a/nv/m01750_a3.cu +++ /dev/null @@ -1,792 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA512_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 14 -#define DGST_R1 15 -#define DGST_R2 6 -#define DGST_R3 7 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ __constant__ u64 k_sha512[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -__device__ static void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], const u64x w3[4], u64x digest[8]) -{ - u64x w0_t = w0[0]; - u64x w1_t = w0[1]; - u64x w2_t = w0[2]; - u64x w3_t = w0[3]; - u64x w4_t = w1[0]; - u64x w5_t = w1[1]; - u64x w6_t = w1[2]; - u64x w7_t = w1[3]; - u64x w8_t = w2[0]; - u64x w9_t = w2[1]; - u64x wa_t = w2[2]; - u64x wb_t = w2[3]; - u64x wc_t = w3[0]; - u64x wd_t = w3[1]; - u64x we_t = w3[2]; - u64x wf_t = w3[3]; - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; -} - -__device__ static void hmac_sha512_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64x ipad[8], u64x opad[8]) -{ - u64x w0_t[4]; - u64x w1_t[4]; - u64x w2_t[4]; - u64x w3_t[4]; - - w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ 0x3636363636363636; - w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ 0x3636363636363636; - w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ 0x3636363636363636; - w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ 0x3636363636363636; - w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ 0x3636363636363636; - w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ 0x3636363636363636; - w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ 0x3636363636363636; - w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ 0x3636363636363636; - w2_t[0] = 0x3636363636363636; - w2_t[1] = 0x3636363636363636; - w2_t[2] = 0x3636363636363636; - w2_t[3] = 0x3636363636363636; - w3_t[0] = 0x3636363636363636; - w3_t[1] = 0x3636363636363636; - w3_t[2] = 0x3636363636363636; - w3_t[3] = 0x3636363636363636; - - ipad[0] = SHA512M_A; - ipad[1] = SHA512M_B; - ipad[2] = SHA512M_C; - ipad[3] = SHA512M_D; - ipad[4] = SHA512M_E; - ipad[5] = SHA512M_F; - ipad[6] = SHA512M_G; - ipad[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, ipad); - - w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ 0x5c5c5c5c5c5c5c5c; - w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ 0x5c5c5c5c5c5c5c5c; - w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ 0x5c5c5c5c5c5c5c5c; - w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ 0x5c5c5c5c5c5c5c5c; - w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ 0x5c5c5c5c5c5c5c5c; - w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ 0x5c5c5c5c5c5c5c5c; - w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ 0x5c5c5c5c5c5c5c5c; - w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ 0x5c5c5c5c5c5c5c5c; - w2_t[0] = 0x5c5c5c5c5c5c5c5c; - w2_t[1] = 0x5c5c5c5c5c5c5c5c; - w2_t[2] = 0x5c5c5c5c5c5c5c5c; - w2_t[3] = 0x5c5c5c5c5c5c5c5c; - w3_t[0] = 0x5c5c5c5c5c5c5c5c; - w3_t[1] = 0x5c5c5c5c5c5c5c5c; - w3_t[2] = 0x5c5c5c5c5c5c5c5c; - w3_t[3] = 0x5c5c5c5c5c5c5c5c; - - opad[0] = SHA512M_A; - opad[1] = SHA512M_B; - opad[2] = SHA512M_C; - opad[3] = SHA512M_D; - opad[4] = SHA512M_E; - opad[5] = SHA512M_F; - opad[6] = SHA512M_G; - opad[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, opad); -} - -__device__ static void hmac_sha512_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64x ipad[8], u64x opad[8], u64x digest[8]) -{ - u64x w0_t[4]; - u64x w1_t[4]; - u64x w2_t[4]; - u64x w3_t[4]; - - w0_t[0] = hl32_to_64 (w0[0], w0[1]); - w0_t[1] = hl32_to_64 (w0[2], w0[3]); - w0_t[2] = hl32_to_64 (w1[0], w1[1]); - w0_t[3] = hl32_to_64 (w1[2], w1[3]); - w1_t[0] = hl32_to_64 (w2[0], w2[1]); - w1_t[1] = hl32_to_64 (w2[2], w2[3]); - w1_t[2] = hl32_to_64 (w3[0], w3[1]); - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = hl32_to_64 (w3[2], w3[3]); - - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - digest[5] = ipad[5]; - digest[6] = ipad[6]; - digest[7] = ipad[7]; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = digest[4]; - w1_t[1] = digest[5]; - w1_t[2] = digest[6]; - w1_t[3] = digest[7]; - w2_t[0] = 0x8000000000000000; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (128 + 64) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - digest[5] = opad[5]; - digest[6] = opad[6]; - digest[7] = opad[7]; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); -} - -__device__ static void m01750m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = 0; - w3_t[3] = 0; - - u64x ipad[8]; - u64x opad[8]; - - hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (128 + salt_len) * 8; - - u64x digest[8]; - - hmac_sha512_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_M - } -} - -__device__ static void m01750s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = 0; - w3_t[3] = 0; - - u64x ipad[8]; - u64x opad[8]; - - hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (128 + salt_len) * 8; - - u64x digest[8]; - - hmac_sha512_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01750_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01750m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01750_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01750m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01750_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01750m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01750_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01750s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01750_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01750s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01750_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01750s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m01760_a0.cu b/nv/m01760_a0.cu deleted file mode 100644 index f67085d..0000000 --- a/nv/m01760_a0.cu +++ /dev/null @@ -1,622 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA512_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 14 -#define DGST_R1 15 -#define DGST_R2 6 -#define DGST_R3 7 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -__device__ __constant__ u64 k_sha512[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -__device__ static void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], const u64x w3[4], u64x digest[8]) -{ - u64x w0_t = w0[0]; - u64x w1_t = w0[1]; - u64x w2_t = w0[2]; - u64x w3_t = w0[3]; - u64x w4_t = w1[0]; - u64x w5_t = w1[1]; - u64x w6_t = w1[2]; - u64x w7_t = w1[3]; - u64x w8_t = w2[0]; - u64x w9_t = w2[1]; - u64x wa_t = w2[2]; - u64x wb_t = w2[3]; - u64x wc_t = w3[0]; - u64x wd_t = w3[1]; - u64x we_t = w3[2]; - u64x wf_t = w3[3]; - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; -} - -__device__ static void hmac_sha512_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64x ipad[8], u64x opad[8]) -{ - u64x w0_t[4]; - u64x w1_t[4]; - u64x w2_t[4]; - u64x w3_t[4]; - - w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ 0x3636363636363636; - w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ 0x3636363636363636; - w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ 0x3636363636363636; - w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ 0x3636363636363636; - w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ 0x3636363636363636; - w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ 0x3636363636363636; - w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ 0x3636363636363636; - w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ 0x3636363636363636; - w2_t[0] = 0 ^ 0x3636363636363636; - w2_t[1] = 0 ^ 0x3636363636363636; - w2_t[2] = 0 ^ 0x3636363636363636; - w2_t[3] = 0 ^ 0x3636363636363636; - w3_t[0] = 0 ^ 0x3636363636363636; - w3_t[1] = 0 ^ 0x3636363636363636; - w3_t[2] = 0 ^ 0x3636363636363636; - w3_t[3] = 0 ^ 0x3636363636363636; - - ipad[0] = SHA512M_A; - ipad[1] = SHA512M_B; - ipad[2] = SHA512M_C; - ipad[3] = SHA512M_D; - ipad[4] = SHA512M_E; - ipad[5] = SHA512M_F; - ipad[6] = SHA512M_G; - ipad[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, ipad); - - w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ 0x5c5c5c5c5c5c5c5c; - w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ 0x5c5c5c5c5c5c5c5c; - w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ 0x5c5c5c5c5c5c5c5c; - w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ 0x5c5c5c5c5c5c5c5c; - w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ 0x5c5c5c5c5c5c5c5c; - w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ 0x5c5c5c5c5c5c5c5c; - w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ 0x5c5c5c5c5c5c5c5c; - w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ 0x5c5c5c5c5c5c5c5c; - w2_t[0] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w2_t[1] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w2_t[2] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w2_t[3] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w3_t[0] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w3_t[1] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w3_t[2] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w3_t[3] = 0 ^ 0x5c5c5c5c5c5c5c5c; - - opad[0] = SHA512M_A; - opad[1] = SHA512M_B; - opad[2] = SHA512M_C; - opad[3] = SHA512M_D; - opad[4] = SHA512M_E; - opad[5] = SHA512M_F; - opad[6] = SHA512M_G; - opad[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, opad); -} - -__device__ static void hmac_sha512_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64x ipad[8], u64x opad[8], u64x digest[8]) -{ - u64x w0_t[4]; - u64x w1_t[4]; - u64x w2_t[4]; - u64x w3_t[4]; - - w0_t[0] = hl32_to_64 (w0[0], w0[1]); - w0_t[1] = hl32_to_64 (w0[2], w0[3]); - w0_t[2] = hl32_to_64 (w1[0], w1[1]); - w0_t[3] = hl32_to_64 (w1[2], w1[3]); - w1_t[0] = hl32_to_64 (w2[0], w2[1]); - w1_t[1] = hl32_to_64 (w2[2], w2[3]); - w1_t[2] = hl32_to_64 (w3[0], w3[1]); - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = hl32_to_64 (w3[2], w3[3]); - - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - digest[5] = ipad[5]; - digest[6] = ipad[6]; - digest[7] = ipad[7]; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = digest[4]; - w1_t[1] = digest[5]; - w1_t[2] = digest[6]; - w1_t[3] = digest[7]; - w2_t[0] = 0x8000000000000000; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (128 + 64) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - digest[5] = opad[5]; - digest[6] = opad[6]; - digest[7] = opad[7]; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01760_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u64x ipad[8]; - u64x opad[8]; - - hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (128 + out_len) * 8; - - u64x digest[8]; - - hmac_sha512_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01760_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01760_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01760_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u64x ipad[8]; - u64x opad[8]; - - hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (128 + out_len) * 8; - - u64x digest[8]; - - hmac_sha512_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01760_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01760_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01760_a1.cu b/nv/m01760_a1.cu deleted file mode 100644 index 36763c6..0000000 --- a/nv/m01760_a1.cu +++ /dev/null @@ -1,728 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA512_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 14 -#define DGST_R1 15 -#define DGST_R2 6 -#define DGST_R3 7 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -__device__ __constant__ u64 k_sha512[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -__device__ static void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], const u64x w3[4], u64x digest[8]) -{ - u64x w0_t = w0[0]; - u64x w1_t = w0[1]; - u64x w2_t = w0[2]; - u64x w3_t = w0[3]; - u64x w4_t = w1[0]; - u64x w5_t = w1[1]; - u64x w6_t = w1[2]; - u64x w7_t = w1[3]; - u64x w8_t = w2[0]; - u64x w9_t = w2[1]; - u64x wa_t = w2[2]; - u64x wb_t = w2[3]; - u64x wc_t = w3[0]; - u64x wd_t = w3[1]; - u64x we_t = w3[2]; - u64x wf_t = w3[3]; - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; -} - -__device__ static void hmac_sha512_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64x ipad[8], u64x opad[8]) -{ - u64x w0_t[4]; - u64x w1_t[4]; - u64x w2_t[4]; - u64x w3_t[4]; - - w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ 0x3636363636363636; - w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ 0x3636363636363636; - w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ 0x3636363636363636; - w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ 0x3636363636363636; - w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ 0x3636363636363636; - w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ 0x3636363636363636; - w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ 0x3636363636363636; - w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ 0x3636363636363636; - w2_t[0] = 0 ^ 0x3636363636363636; - w2_t[1] = 0 ^ 0x3636363636363636; - w2_t[2] = 0 ^ 0x3636363636363636; - w2_t[3] = 0 ^ 0x3636363636363636; - w3_t[0] = 0 ^ 0x3636363636363636; - w3_t[1] = 0 ^ 0x3636363636363636; - w3_t[2] = 0 ^ 0x3636363636363636; - w3_t[3] = 0 ^ 0x3636363636363636; - - ipad[0] = SHA512M_A; - ipad[1] = SHA512M_B; - ipad[2] = SHA512M_C; - ipad[3] = SHA512M_D; - ipad[4] = SHA512M_E; - ipad[5] = SHA512M_F; - ipad[6] = SHA512M_G; - ipad[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, ipad); - - w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ 0x5c5c5c5c5c5c5c5c; - w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ 0x5c5c5c5c5c5c5c5c; - w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ 0x5c5c5c5c5c5c5c5c; - w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ 0x5c5c5c5c5c5c5c5c; - w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ 0x5c5c5c5c5c5c5c5c; - w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ 0x5c5c5c5c5c5c5c5c; - w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ 0x5c5c5c5c5c5c5c5c; - w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ 0x5c5c5c5c5c5c5c5c; - w2_t[0] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w2_t[1] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w2_t[2] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w2_t[3] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w3_t[0] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w3_t[1] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w3_t[2] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w3_t[3] = 0 ^ 0x5c5c5c5c5c5c5c5c; - - opad[0] = SHA512M_A; - opad[1] = SHA512M_B; - opad[2] = SHA512M_C; - opad[3] = SHA512M_D; - opad[4] = SHA512M_E; - opad[5] = SHA512M_F; - opad[6] = SHA512M_G; - opad[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, opad); -} - -__device__ static void hmac_sha512_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64x ipad[8], u64x opad[8], u64x digest[8]) -{ - u64x w0_t[4]; - u64x w1_t[4]; - u64x w2_t[4]; - u64x w3_t[4]; - - w0_t[0] = hl32_to_64 (w0[0], w0[1]); - w0_t[1] = hl32_to_64 (w0[2], w0[3]); - w0_t[2] = hl32_to_64 (w1[0], w1[1]); - w0_t[3] = hl32_to_64 (w1[2], w1[3]); - w1_t[0] = hl32_to_64 (w2[0], w2[1]); - w1_t[1] = hl32_to_64 (w2[2], w2[3]); - w1_t[2] = hl32_to_64 (w3[0], w3[1]); - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = hl32_to_64 (w3[2], w3[3]); - - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - digest[5] = ipad[5]; - digest[6] = ipad[6]; - digest[7] = ipad[7]; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = digest[4]; - w1_t[1] = digest[5]; - w1_t[2] = digest[6]; - w1_t[3] = digest[7]; - w2_t[0] = 0x8000000000000000; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (128 + 64) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - digest[5] = opad[5]; - digest[6] = opad[6]; - digest[7] = opad[7]; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01760_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u64x ipad[8]; - u64x opad[8]; - - hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - append_0x80_4 (w0, w1, w2, w3, pw_len); - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = 0; - w3_t[3] = (128 + pw_len) * 8; - - u64x digest[8]; - - hmac_sha512_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01760_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01760_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01760_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u64x ipad[8]; - u64x opad[8]; - - hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - append_0x80_4 (w0, w1, w2, w3, pw_len); - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = 0; - w3_t[3] = (128 + pw_len) * 8; - - u64x digest[8]; - - hmac_sha512_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01760_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01760_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m01760_a3.cu b/nv/m01760_a3.cu deleted file mode 100644 index f5fe33a..0000000 --- a/nv/m01760_a3.cu +++ /dev/null @@ -1,788 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA512_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 14 -#define DGST_R1 15 -#define DGST_R2 6 -#define DGST_R3 7 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ __constant__ u64 k_sha512[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -__device__ static void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], const u64x w3[4], u64x digest[8]) -{ - u64x w0_t = w0[0]; - u64x w1_t = w0[1]; - u64x w2_t = w0[2]; - u64x w3_t = w0[3]; - u64x w4_t = w1[0]; - u64x w5_t = w1[1]; - u64x w6_t = w1[2]; - u64x w7_t = w1[3]; - u64x w8_t = w2[0]; - u64x w9_t = w2[1]; - u64x wa_t = w2[2]; - u64x wb_t = w2[3]; - u64x wc_t = w3[0]; - u64x wd_t = w3[1]; - u64x we_t = w3[2]; - u64x wf_t = w3[3]; - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; -} - -__device__ static void hmac_sha512_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64x ipad[8], u64x opad[8]) -{ - u64x w0_t[4]; - u64x w1_t[4]; - u64x w2_t[4]; - u64x w3_t[4]; - - w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ 0x3636363636363636; - w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ 0x3636363636363636; - w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ 0x3636363636363636; - w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ 0x3636363636363636; - w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ 0x3636363636363636; - w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ 0x3636363636363636; - w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ 0x3636363636363636; - w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ 0x3636363636363636; - w2_t[0] = 0 ^ 0x3636363636363636; - w2_t[1] = 0 ^ 0x3636363636363636; - w2_t[2] = 0 ^ 0x3636363636363636; - w2_t[3] = 0 ^ 0x3636363636363636; - w3_t[0] = 0 ^ 0x3636363636363636; - w3_t[1] = 0 ^ 0x3636363636363636; - w3_t[2] = 0 ^ 0x3636363636363636; - w3_t[3] = 0 ^ 0x3636363636363636; - - ipad[0] = SHA512M_A; - ipad[1] = SHA512M_B; - ipad[2] = SHA512M_C; - ipad[3] = SHA512M_D; - ipad[4] = SHA512M_E; - ipad[5] = SHA512M_F; - ipad[6] = SHA512M_G; - ipad[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, ipad); - - w0_t[0] = hl32_to_64 (w0[0], w0[1]) ^ 0x5c5c5c5c5c5c5c5c; - w0_t[1] = hl32_to_64 (w0[2], w0[3]) ^ 0x5c5c5c5c5c5c5c5c; - w0_t[2] = hl32_to_64 (w1[0], w1[1]) ^ 0x5c5c5c5c5c5c5c5c; - w0_t[3] = hl32_to_64 (w1[2], w1[3]) ^ 0x5c5c5c5c5c5c5c5c; - w1_t[0] = hl32_to_64 (w2[0], w2[1]) ^ 0x5c5c5c5c5c5c5c5c; - w1_t[1] = hl32_to_64 (w2[2], w2[3]) ^ 0x5c5c5c5c5c5c5c5c; - w1_t[2] = hl32_to_64 (w3[0], w3[1]) ^ 0x5c5c5c5c5c5c5c5c; - w1_t[3] = hl32_to_64 (w3[2], w3[3]) ^ 0x5c5c5c5c5c5c5c5c; - w2_t[0] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w2_t[1] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w2_t[2] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w2_t[3] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w3_t[0] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w3_t[1] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w3_t[2] = 0 ^ 0x5c5c5c5c5c5c5c5c; - w3_t[3] = 0 ^ 0x5c5c5c5c5c5c5c5c; - - opad[0] = SHA512M_A; - opad[1] = SHA512M_B; - opad[2] = SHA512M_C; - opad[3] = SHA512M_D; - opad[4] = SHA512M_E; - opad[5] = SHA512M_F; - opad[6] = SHA512M_G; - opad[7] = SHA512M_H; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, opad); -} - -__device__ static void hmac_sha512_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u64x ipad[8], u64x opad[8], u64x digest[8]) -{ - u64x w0_t[4]; - u64x w1_t[4]; - u64x w2_t[4]; - u64x w3_t[4]; - - w0_t[0] = hl32_to_64 (w0[0], w0[1]); - w0_t[1] = hl32_to_64 (w0[2], w0[3]); - w0_t[2] = hl32_to_64 (w1[0], w1[1]); - w0_t[3] = hl32_to_64 (w1[2], w1[3]); - w1_t[0] = hl32_to_64 (w2[0], w2[1]); - w1_t[1] = hl32_to_64 (w2[2], w2[3]); - w1_t[2] = hl32_to_64 (w3[0], w3[1]); - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = hl32_to_64 (w3[2], w3[3]); - - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - digest[5] = ipad[5]; - digest[6] = ipad[6]; - digest[7] = ipad[7]; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = digest[4]; - w1_t[1] = digest[5]; - w1_t[2] = digest[6]; - w1_t[3] = digest[7]; - w2_t[0] = 0x8000000000000000; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (128 + 64) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - digest[5] = opad[5]; - digest[6] = opad[6]; - digest[7] = opad[7]; - - sha512_transform (w0_t, w1_t, w2_t, w3_t, digest); -} - -__device__ static void m01760m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u64x ipad[8]; - u64x opad[8]; - - hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = 0; - w3_t[3] = (128 + pw_len) * 8; - - u64x digest[8]; - - hmac_sha512_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_M - } -} - -__device__ static void m01760s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (salt_buf0[0]); - w0_t[1] = swap_workaround (salt_buf0[1]); - w0_t[2] = swap_workaround (salt_buf0[2]); - w0_t[3] = swap_workaround (salt_buf0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (salt_buf1[0]); - w1_t[1] = swap_workaround (salt_buf1[1]); - w1_t[2] = swap_workaround (salt_buf1[2]); - w1_t[3] = swap_workaround (salt_buf1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u64x ipad[8]; - u64x opad[8]; - - hmac_sha512_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = 0; - w3_t[3] = (128 + pw_len) * 8; - - u64x digest[8]; - - hmac_sha512_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - - const u32x r0 = l32_from_64 (digest[7]); - const u32x r1 = h32_from_64 (digest[7]); - const u32x r2 = l32_from_64 (digest[3]); - const u32x r3 = h32_from_64 (digest[3]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01760_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01760m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01760_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01760m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01760_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01760m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01760_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01760s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01760_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01760s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01760_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m01760s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m01800.cu b/nv/m01800.cu deleted file mode 100644 index d1eb230..0000000 --- a/nv/m01800.cu +++ /dev/null @@ -1,566 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA512_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#define PUTCHAR64_BE(a,p,c) ((u8 *)(a))[(p) ^ 7] = (u8) (c) -#define GETCHAR64_BE(a,p) ((u8 *)(a))[(p) ^ 7] - -typedef struct -{ - u64x state[8]; - u64x buf[16]; - int len; - -} sha512_ctx_t; - -__device__ __constant__ u64 k_sha512[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -__device__ static void sha512_transform (const u64x w[16], u64x digest[8]) -{ - u64x w0_t = w[ 0]; - u64x w1_t = w[ 1]; - u64x w2_t = w[ 2]; - u64x w3_t = w[ 3]; - u64x w4_t = w[ 4]; - u64x w5_t = w[ 5]; - u64x w6_t = w[ 6]; - u64x w7_t = w[ 7]; - u64x w8_t = w[ 8]; - u64x w9_t = w[ 9]; - u64x wa_t = w[10]; - u64x wb_t = w[11]; - u64x wc_t = w[12]; - u64x wd_t = w[13]; - u64x we_t = w[14]; - u64x wf_t = w[15]; - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; -} - -__device__ static void sha512_init (sha512_ctx_t *sha512_ctx) -{ - sha512_ctx->state[0] = SHA512M_A; - sha512_ctx->state[1] = SHA512M_B; - sha512_ctx->state[2] = SHA512M_C; - sha512_ctx->state[3] = SHA512M_D; - sha512_ctx->state[4] = SHA512M_E; - sha512_ctx->state[5] = SHA512M_F; - sha512_ctx->state[6] = SHA512M_G; - sha512_ctx->state[7] = SHA512M_H; - - sha512_ctx->len = 0; -} - -__device__ static void sha512_update (sha512_ctx_t *sha512_ctx, const u64x *buf, int len) -{ - int pos = sha512_ctx->len & 0x7f; - - sha512_ctx->len += len; - - if ((pos + len) < 128) - { - for (int i = 0; i < len; i++) - { - PUTCHAR64_BE (sha512_ctx->buf, pos++, GETCHAR64_BE (buf, i)); - } - - return; - } - - int cnt = 128 - pos; - - for (int i = 0; i < cnt; i++) - { - PUTCHAR64_BE (sha512_ctx->buf, pos++, GETCHAR64_BE (buf, i)); - } - - sha512_transform (sha512_ctx->buf, sha512_ctx->state); - - len -= cnt; - - for (int i = 0; i < len; i++) - { - PUTCHAR64_BE (sha512_ctx->buf, i, GETCHAR64_BE (buf, cnt + i)); - } -} - -__device__ static void sha512_final (sha512_ctx_t *sha512_ctx) -{ - int pos = sha512_ctx->len & 0x7f; - - for (int i = pos; i < 128; i++) - { - PUTCHAR64_BE (sha512_ctx->buf, i, 0); - } - - PUTCHAR64_BE (sha512_ctx->buf, pos, 0x80); - - if (pos >= 112) - { - sha512_transform (sha512_ctx->buf, sha512_ctx->state); - - sha512_ctx->buf[ 0] = 0; - sha512_ctx->buf[ 1] = 0; - sha512_ctx->buf[ 2] = 0; - sha512_ctx->buf[ 3] = 0; - sha512_ctx->buf[ 4] = 0; - sha512_ctx->buf[ 5] = 0; - sha512_ctx->buf[ 6] = 0; - sha512_ctx->buf[ 7] = 0; - sha512_ctx->buf[ 8] = 0; - sha512_ctx->buf[ 9] = 0; - sha512_ctx->buf[10] = 0; - sha512_ctx->buf[11] = 0; - sha512_ctx->buf[12] = 0; - sha512_ctx->buf[13] = 0; - sha512_ctx->buf[14] = 0; - sha512_ctx->buf[15] = 0; - } - - sha512_ctx->buf[15] = sha512_ctx->len * 8; - - sha512_transform (sha512_ctx->buf, sha512_ctx->state); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01800_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, sha512crypt_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[0]; - w0[1] = pws[gid].i[1]; - w0[2] = pws[gid].i[2]; - w0[3] = pws[gid].i[3]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * buffers - */ - - u64x pw[2]; - - pw[0] = swap_workaround (hl32_to_64 (w0[1], w0[0])); - pw[1] = swap_workaround (hl32_to_64 (w0[3], w0[2])); - - u64x salt[2]; - - salt[0] = swap_workaround (hl32_to_64 (salt_buf[1], salt_buf[0])); - salt[1] = swap_workaround (hl32_to_64 (salt_buf[3], salt_buf[2])); - - /** - * begin - */ - - sha512_ctx_t sha512_ctx; - - sha512_init (&sha512_ctx); - - sha512_update (&sha512_ctx, pw, pw_len); - sha512_update (&sha512_ctx, salt, salt_len); - sha512_update (&sha512_ctx, pw, pw_len); - - sha512_final (&sha512_ctx); - - u64x tmp[8]; - - tmp[0] = sha512_ctx.state[0]; - tmp[1] = sha512_ctx.state[1]; - tmp[2] = sha512_ctx.state[2]; - tmp[3] = sha512_ctx.state[3]; - tmp[4] = sha512_ctx.state[4]; - tmp[5] = sha512_ctx.state[5]; - tmp[6] = sha512_ctx.state[6]; - tmp[7] = sha512_ctx.state[7]; - - sha512_init (&sha512_ctx); - - sha512_update (&sha512_ctx, pw, pw_len); - sha512_update (&sha512_ctx, salt, salt_len); - sha512_update (&sha512_ctx, tmp, pw_len); - - for (u32 j = pw_len; j; j >>= 1) - { - if (j & 1) - { - sha512_update (&sha512_ctx, tmp, 64); - } - else - { - sha512_update (&sha512_ctx, pw, pw_len); - } - } - - sha512_final (&sha512_ctx); - - tmps[gid].l_alt_result[0] = sha512_ctx.state[0]; - tmps[gid].l_alt_result[1] = sha512_ctx.state[1]; - tmps[gid].l_alt_result[2] = sha512_ctx.state[2]; - tmps[gid].l_alt_result[3] = sha512_ctx.state[3]; - tmps[gid].l_alt_result[4] = sha512_ctx.state[4]; - tmps[gid].l_alt_result[5] = sha512_ctx.state[5]; - tmps[gid].l_alt_result[6] = sha512_ctx.state[6]; - tmps[gid].l_alt_result[7] = sha512_ctx.state[7]; - - // p_bytes - - sha512_init (&sha512_ctx); - - for (u32 j = 0; j < pw_len; j++) - { - sha512_update (&sha512_ctx, pw, pw_len); - } - - sha512_final (&sha512_ctx); - - tmps[gid].l_p_bytes[0] = sha512_ctx.state[0]; - tmps[gid].l_p_bytes[1] = sha512_ctx.state[1]; - - // s_bytes - - sha512_init (&sha512_ctx); - - for (u32 j = 0; j < 16 + ((tmps[gid].l_alt_result[0] >> 56) & 0xff); j++) - { - sha512_update (&sha512_ctx, salt, salt_len); - } - - sha512_final (&sha512_ctx); - - tmps[gid].l_s_bytes[0] = sha512_ctx.state[0]; - tmps[gid].l_s_bytes[1] = sha512_ctx.state[1]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01800_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, sha512crypt_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - - u64x l_p_bytes0[2]; - - l_p_bytes0[0] = tmps[gid].l_p_bytes[0]; - l_p_bytes0[1] = tmps[gid].l_p_bytes[1]; - - const u32 pw_len = pws[gid].pw_len; - - u64x l_s_bytes0[2]; - - l_s_bytes0[0] = tmps[gid].l_s_bytes[0]; - l_s_bytes0[1] = tmps[gid].l_s_bytes[1]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 wpc_len[8]; - - wpc_len[0] = 64 + 0 + 0 + pw_len; - wpc_len[1] = pw_len + 0 + 0 + 64; - wpc_len[2] = 64 + salt_len + 0 + pw_len; - wpc_len[3] = pw_len + salt_len + 0 + 64; - wpc_len[4] = 64 + 0 + pw_len + pw_len; - wpc_len[5] = pw_len + 0 + pw_len + 64; - wpc_len[6] = 64 + salt_len + pw_len + pw_len; - wpc_len[7] = pw_len + salt_len + pw_len + 64; - - u64 wpc[8][16] = { 0 }; - - for (u32 i = 0; i < 8; i++) - { - u32 block_len = 0; - - if (i & 1) - { - for (u32 j = 0; j < pw_len; j++) - { - PUTCHAR64_BE (wpc[i], block_len++, GETCHAR64_BE (l_p_bytes0, j)); - } - } - else - { - block_len += 64; - } - - if (i & 2) - { - for (u32 j = 0; j < salt_len; j++) - { - PUTCHAR64_BE (wpc[i], block_len++, GETCHAR64_BE (l_s_bytes0, j)); - } - } - - if (i & 4) - { - for (u32 j = 0; j < pw_len; j++) - { - PUTCHAR64_BE (wpc[i], block_len++, GETCHAR64_BE (l_p_bytes0, j)); - } - } - - if (i & 1) - { - block_len += 64; - } - else - { - for (u32 j = 0; j < pw_len; j++) - { - PUTCHAR64_BE (wpc[i], block_len++, GETCHAR64_BE (l_p_bytes0, j)); - } - } - - PUTCHAR64_BE (wpc[i], block_len, 0x80); - - wpc[i][15] = block_len * 8; - } - - /** - * base - */ - - u64x l_alt_result[8]; - - l_alt_result[0] = tmps[gid].l_alt_result[0]; - l_alt_result[1] = tmps[gid].l_alt_result[1]; - l_alt_result[2] = tmps[gid].l_alt_result[2]; - l_alt_result[3] = tmps[gid].l_alt_result[3]; - l_alt_result[4] = tmps[gid].l_alt_result[4]; - l_alt_result[5] = tmps[gid].l_alt_result[5]; - l_alt_result[6] = tmps[gid].l_alt_result[6]; - l_alt_result[7] = tmps[gid].l_alt_result[7]; - - - /* Repeatedly run the collected hash value through SHA512 to burn - CPU cycles. */ - - for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++) - { - const u32 j1 = (j & 1) ? 1 : 0; - const u32 j3 = (j % 3) ? 2 : 0; - const u32 j7 = (j % 7) ? 4 : 0; - - const u32 pc = j1 + j3 + j7; - - u64 block[16]; - - block[ 0] = wpc[pc][ 0]; - block[ 1] = wpc[pc][ 1]; - block[ 2] = wpc[pc][ 2]; - block[ 3] = wpc[pc][ 3]; - block[ 4] = wpc[pc][ 4]; - block[ 5] = wpc[pc][ 5]; - block[ 6] = wpc[pc][ 6]; - block[ 7] = wpc[pc][ 7]; - block[ 8] = wpc[pc][ 8]; - block[ 9] = wpc[pc][ 9]; - block[10] = wpc[pc][10]; - block[11] = wpc[pc][11]; - block[12] = wpc[pc][12]; - block[13] = wpc[pc][13]; - block[14] = wpc[pc][14]; - block[15] = wpc[pc][15]; - - if (j1) - { - const u32 block_len = wpc_len[pc]; - - #pragma unroll 64 - for (u32 k = 0, p = block_len - 64; k < 64; k++, p++) - { - PUTCHAR64_BE (block, p, GETCHAR64_BE (l_alt_result, k)); - } - } - else - { - block[0] = l_alt_result[0]; - block[1] = l_alt_result[1]; - block[2] = l_alt_result[2]; - block[3] = l_alt_result[3]; - block[4] = l_alt_result[4]; - block[5] = l_alt_result[5]; - block[6] = l_alt_result[6]; - block[7] = l_alt_result[7]; - } - - l_alt_result[0] = SHA512M_A; - l_alt_result[1] = SHA512M_B; - l_alt_result[2] = SHA512M_C; - l_alt_result[3] = SHA512M_D; - l_alt_result[4] = SHA512M_E; - l_alt_result[5] = SHA512M_F; - l_alt_result[6] = SHA512M_G; - l_alt_result[7] = SHA512M_H; - - sha512_transform (block, l_alt_result); - } - - tmps[gid].l_alt_result[0] = l_alt_result[0]; - tmps[gid].l_alt_result[1] = l_alt_result[1]; - tmps[gid].l_alt_result[2] = l_alt_result[2]; - tmps[gid].l_alt_result[3] = l_alt_result[3]; - tmps[gid].l_alt_result[4] = l_alt_result[4]; - tmps[gid].l_alt_result[5] = l_alt_result[5]; - tmps[gid].l_alt_result[6] = l_alt_result[6]; - tmps[gid].l_alt_result[7] = l_alt_result[7]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m01800_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, sha512crypt_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 lid = threadIdx.x; - - const u64x a = swap_workaround (tmps[gid].l_alt_result[0]); - const u64x b = swap_workaround (tmps[gid].l_alt_result[1]); - - const u32x r0 = l32_from_64 (a); - const u32x r1 = h32_from_64 (a); - const u32x r2 = l32_from_64 (b); - const u32x r3 = h32_from_64 (b); - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m02100.cu b/nv/m02100.cu deleted file mode 100644 index 8540592..0000000 --- a/nv/m02100.cu +++ /dev/null @@ -1,629 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _DCC2_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ static void md4_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - MD4_STEP (MD4_Fo, a, b, c, d, w0[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w0[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w0[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w0[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w1[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w1[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w1[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w1[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w2[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w2[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w2[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w2[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w3[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w3[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w3[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w3[3], MD4C00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0[0], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[0], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[0], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[0], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0[1], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[1], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[1], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[1], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0[2], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[2], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[2], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[2], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0[3], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[3], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[3], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[3], MD4C01, MD4S13); - - MD4_STEP (MD4_H , a, b, c, d, w0[0], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[0], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[0], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[0], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0[2], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[2], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[2], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[2], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0[1], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[1], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[1], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[1], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0[3], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[3], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[3], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[3], MD4C02, MD4S23); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = SHA1M_A; - ipad[1] = SHA1M_B; - ipad[2] = SHA1M_C; - ipad[3] = SHA1M_D; - ipad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = SHA1M_A; - opad[1] = SHA1M_B; - opad[2] = SHA1M_C; - opad[3] = SHA1M_D; - opad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - - sha1_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - - sha1_transform (w0, w1, w2, w3, digest); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02100_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, dcc2_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 salt_buf0[4]; - u32 salt_buf1[4]; - u32 salt_buf2[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[9]; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - /** - * generate dcc - */ - - append_0x80_1 (w0, pw_len); - - make_unicode (w0, w0, w1); - - w3[2] = pw_len * 2 * 8; - - u32x digest_md4[4]; - - digest_md4[0] = MD4M_A; - digest_md4[1] = MD4M_B; - digest_md4[2] = MD4M_C; - digest_md4[3] = MD4M_D; - - md4_transform (w0, w1, w2, w3, digest_md4); - - w0[0] = digest_md4[0]; - w0[1] = digest_md4[1]; - w0[2] = digest_md4[2]; - w0[3] = digest_md4[3]; - w1[0] = salt_buf0[0]; - w1[1] = salt_buf0[1]; - w1[2] = salt_buf0[2]; - w1[3] = salt_buf0[3]; - w2[0] = salt_buf1[0]; - w2[1] = salt_buf1[1]; - w2[2] = salt_buf1[2]; - w2[3] = salt_buf1[3]; - w3[0] = salt_buf2[0]; - w3[1] = salt_buf2[1]; - w3[2] = (16 + salt_len) * 8; - w3[3] = 0; - - append_0x80_4 (w0, w1, w2, w3, 16 + salt_len); - - digest_md4[0] = MD4M_A; - digest_md4[1] = MD4M_B; - digest_md4[2] = MD4M_C; - digest_md4[3] = MD4M_D; - - md4_transform (w0, w1, w2, w3, digest_md4); - - /** - * pads - */ - - w0[0] = swap_workaround (digest_md4[0]); - w0[1] = swap_workaround (digest_md4[1]); - w0[2] = swap_workaround (digest_md4[2]); - w0[3] = swap_workaround (digest_md4[3]); - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - u32x ipad[5]; - u32x opad[5]; - - hmac_sha1_pad (w0, w1, w2, w3, ipad, opad); - - tmps[gid].ipad[0] = ipad[0]; - tmps[gid].ipad[1] = ipad[1]; - tmps[gid].ipad[2] = ipad[2]; - tmps[gid].ipad[3] = ipad[3]; - tmps[gid].ipad[4] = ipad[4]; - - tmps[gid].opad[0] = opad[0]; - tmps[gid].opad[1] = opad[1]; - tmps[gid].opad[2] = opad[2]; - tmps[gid].opad[3] = opad[3]; - tmps[gid].opad[4] = opad[4]; - - /** - * hmac1 - */ - - w0[0] = salt_buf0[0]; - w0[1] = salt_buf0[1]; - w0[2] = salt_buf0[2]; - w0[3] = salt_buf0[3]; - w1[0] = salt_buf1[0]; - w1[1] = salt_buf1[1]; - w1[2] = salt_buf1[2]; - w1[3] = salt_buf1[3]; - w2[0] = salt_buf2[0]; - w2[1] = salt_buf2[1]; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + salt_len + 4) * 8; - - append_0x01_4 (w0, w1, w2, w3, salt_len + 3); - append_0x80_4 (w0, w1, w2, w3, salt_len + 4); - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - w2[0] = swap_workaround (w2[0]); - w2[1] = swap_workaround (w2[1]); - w2[2] = swap_workaround (w2[2]); - w2[3] = swap_workaround (w2[3]); - w3[0] = swap_workaround (w3[0]); - w3[1] = swap_workaround (w3[1]); - - u32x digest[5]; - - hmac_sha1_run (w0, w1, w2, w3, ipad, opad, digest); - - tmps[gid].dgst[0] = digest[0]; - tmps[gid].dgst[1] = digest[1]; - tmps[gid].dgst[2] = digest[2]; - tmps[gid].dgst[3] = digest[3]; - tmps[gid].dgst[4] = digest[4]; - - tmps[gid].out[0] = digest[0]; - tmps[gid].out[1] = digest[1]; - tmps[gid].out[2] = digest[2]; - tmps[gid].out[3] = digest[3]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02100_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, dcc2_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x ipad[5]; - u32x opad[5]; - - ipad[0] = tmps[gid].ipad[0]; - ipad[1] = tmps[gid].ipad[1]; - ipad[2] = tmps[gid].ipad[2]; - ipad[3] = tmps[gid].ipad[3]; - ipad[4] = tmps[gid].ipad[4]; - - opad[0] = tmps[gid].opad[0]; - opad[1] = tmps[gid].opad[1]; - opad[2] = tmps[gid].opad[2]; - opad[3] = tmps[gid].opad[3]; - opad[4] = tmps[gid].opad[4]; - - /** - * iter1 - */ - - u32x dgst[5]; - u32x out[4]; - - dgst[0] = tmps[gid].dgst[0]; - dgst[1] = tmps[gid].dgst[1]; - dgst[2] = tmps[gid].dgst[2]; - dgst[3] = tmps[gid].dgst[3]; - dgst[4] = tmps[gid].dgst[4]; - - out[0] = tmps[gid].out[0]; - out[1] = tmps[gid].out[1]; - out[2] = tmps[gid].out[2]; - out[3] = tmps[gid].out[3]; - - for (u32 i = 0; i < loop_cnt; i++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = dgst[0]; - w0[1] = dgst[1]; - w0[2] = dgst[2]; - w0[3] = dgst[3]; - w1[0] = dgst[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst); - - out[0] ^= dgst[0]; - out[1] ^= dgst[1]; - out[2] ^= dgst[2]; - out[3] ^= dgst[3]; - } - - tmps[gid].dgst[0] = dgst[0]; - tmps[gid].dgst[1] = dgst[1]; - tmps[gid].dgst[2] = dgst[2]; - tmps[gid].dgst[3] = dgst[3]; - tmps[gid].dgst[4] = dgst[4]; - - tmps[gid].out[0] = out[0]; - tmps[gid].out[1] = out[1]; - tmps[gid].out[2] = out[2]; - tmps[gid].out[3] = out[3]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02100_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, dcc2_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 lid = threadIdx.x; - - const u32x r0 = tmps[gid].out[DGST_R0]; - const u32x r1 = tmps[gid].out[DGST_R1]; - const u32x r2 = tmps[gid].out[DGST_R2]; - const u32x r3 = tmps[gid].out[DGST_R3]; - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m02400_a0.cu b/nv/m02400_a0.cu deleted file mode 100644 index 838cfeb..0000000 --- a/nv/m02400_a0.cu +++ /dev/null @@ -1,388 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m02400_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - truncate_block (w0, out_len); - - w1[0] = 0x80; - w3[2] = 16 * 8; - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a &= 0x00ffffff; - d &= 0x00ffffff; - c &= 0x00ffffff; - b &= 0x00ffffff; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02400_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02400_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02400_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - truncate_block (w0, out_len); - - w1[0] = 0x80; - w3[2] = 16 * 8; - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - - bool q_cond = ((a & 0x00ffffff) != search[0]); - - if (q_cond) continue; - - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a &= 0x00ffffff; - d &= 0x00ffffff; - c &= 0x00ffffff; - b &= 0x00ffffff; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02400_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02400_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m02400_a1.cu b/nv/m02400_a1.cu deleted file mode 100644 index e48a2e5..0000000 --- a/nv/m02400_a1.cu +++ /dev/null @@ -1,508 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m02400_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - truncate_block (w0, pw_len); - - w1[0] = 0x80; - w3[2] = 16 * 8; - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a &= 0x00ffffff; - d &= 0x00ffffff; - c &= 0x00ffffff; - b &= 0x00ffffff; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02400_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02400_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02400_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - truncate_block (w0, pw_len); - - w1[0] = 0x80; - w3[2] = 16 * 8; - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - - bool q_cond = ((a & 0x00ffffff) != search[0]); - - if (q_cond) continue; - - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a &= 0x00ffffff; - d &= 0x00ffffff; - c &= 0x00ffffff; - b &= 0x00ffffff; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02400_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02400_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m02400_a3.cu b/nv/m02400_a3.cu deleted file mode 100644 index e46fd84..0000000 --- a/nv/m02400_a3.cu +++ /dev/null @@ -1,535 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ -#define _SCALAR_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -__device__ __constant__ u32x c_bfs[1024]; - -__device__ static void m02400m (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * algorithm specific - */ - - w[ 4] = 0x80; - w[14] = 16 * 8; - - /** - * base - */ - - const u32 F_w0c00 = 0 + MD5C00; - const u32 F_w1c01 = w[ 1] + MD5C01; - const u32 F_w2c02 = w[ 2] + MD5C02; - const u32 F_w3c03 = w[ 3] + MD5C03; - const u32 F_w4c04 = w[ 4] + MD5C04; - const u32 F_w5c05 = w[ 5] + MD5C05; - const u32 F_w6c06 = w[ 6] + MD5C06; - const u32 F_w7c07 = w[ 7] + MD5C07; - const u32 F_w8c08 = w[ 8] + MD5C08; - const u32 F_w9c09 = w[ 9] + MD5C09; - const u32 F_wac0a = w[10] + MD5C0a; - const u32 F_wbc0b = w[11] + MD5C0b; - const u32 F_wcc0c = w[12] + MD5C0c; - const u32 F_wdc0d = w[13] + MD5C0d; - const u32 F_wec0e = w[14] + MD5C0e; - const u32 F_wfc0f = w[15] + MD5C0f; - - const u32 G_w1c10 = w[ 1] + MD5C10; - const u32 G_w6c11 = w[ 6] + MD5C11; - const u32 G_wbc12 = w[11] + MD5C12; - const u32 G_w0c13 = 0 + MD5C13; - const u32 G_w5c14 = w[ 5] + MD5C14; - const u32 G_wac15 = w[10] + MD5C15; - const u32 G_wfc16 = w[15] + MD5C16; - const u32 G_w4c17 = w[ 4] + MD5C17; - const u32 G_w9c18 = w[ 9] + MD5C18; - const u32 G_wec19 = w[14] + MD5C19; - const u32 G_w3c1a = w[ 3] + MD5C1a; - const u32 G_w8c1b = w[ 8] + MD5C1b; - const u32 G_wdc1c = w[13] + MD5C1c; - const u32 G_w2c1d = w[ 2] + MD5C1d; - const u32 G_w7c1e = w[ 7] + MD5C1e; - const u32 G_wcc1f = w[12] + MD5C1f; - - const u32 H_w5c20 = w[ 5] + MD5C20; - const u32 H_w8c21 = w[ 8] + MD5C21; - const u32 H_wbc22 = w[11] + MD5C22; - const u32 H_wec23 = w[14] + MD5C23; - const u32 H_w1c24 = w[ 1] + MD5C24; - const u32 H_w4c25 = w[ 4] + MD5C25; - const u32 H_w7c26 = w[ 7] + MD5C26; - const u32 H_wac27 = w[10] + MD5C27; - const u32 H_wdc28 = w[13] + MD5C28; - const u32 H_w0c29 = 0 + MD5C29; - const u32 H_w3c2a = w[ 3] + MD5C2a; - const u32 H_w6c2b = w[ 6] + MD5C2b; - const u32 H_w9c2c = w[ 9] + MD5C2c; - const u32 H_wcc2d = w[12] + MD5C2d; - const u32 H_wfc2e = w[15] + MD5C2e; - const u32 H_w2c2f = w[ 2] + MD5C2f; - - const u32 I_w0c30 = 0 + MD5C30; - const u32 I_w7c31 = w[ 7] + MD5C31; - const u32 I_wec32 = w[14] + MD5C32; - const u32 I_w5c33 = w[ 5] + MD5C33; - const u32 I_wcc34 = w[12] + MD5C34; - const u32 I_w3c35 = w[ 3] + MD5C35; - const u32 I_wac36 = w[10] + MD5C36; - const u32 I_w1c37 = w[ 1] + MD5C37; - const u32 I_w8c38 = w[ 8] + MD5C38; - const u32 I_wfc39 = w[15] + MD5C39; - const u32 I_w6c3a = w[ 6] + MD5C3a; - const u32 I_wdc3b = w[13] + MD5C3b; - const u32 I_w4c3c = w[ 4] + MD5C3c; - const u32 I_wbc3d = w[11] + MD5C3d; - const u32 I_w2c3e = w[ 2] + MD5C3e; - const u32 I_w9c3f = w[ 9] + MD5C3f; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_w2c02, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_w3c03, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_w4c04, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w5c05, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_w6c06, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_w7c07, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_w8c08, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w9c09, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_wac0a, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_wbc0b, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_wcc0c, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_wdc0d, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_wec0e, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_wfc0f, MD5S03); - - MD5_STEP0(MD5_Go, a, b, c, d, G_w1c10, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_w6c11, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_wbc12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0, G_w0c13, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_w5c14, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_wac15, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_wfc16, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_w4c17, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_w9c18, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_wec19, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_w3c1a, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_w8c1b, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_wdc1c, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_w2c1d, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_w7c1e, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_wcc1f, MD5S13); - - MD5_STEP0(MD5_H1, a, b, c, d, H_w5c20, MD5S20); - MD5_STEP0(MD5_H2, d, a, b, c, H_w8c21, MD5S21); - MD5_STEP0(MD5_H1, c, d, a, b, H_wbc22, MD5S22); - MD5_STEP0(MD5_H2, b, c, d, a, H_wec23, MD5S23); - MD5_STEP0(MD5_H1, a, b, c, d, H_w1c24, MD5S20); - MD5_STEP0(MD5_H2, d, a, b, c, H_w4c25, MD5S21); - MD5_STEP0(MD5_H1, c, d, a, b, H_w7c26, MD5S22); - MD5_STEP0(MD5_H2, b, c, d, a, H_wac27, MD5S23); - MD5_STEP0(MD5_H1, a, b, c, d, H_wdc28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0, H_w0c29, MD5S21); - MD5_STEP0(MD5_H1, c, d, a, b, H_w3c2a, MD5S22); - MD5_STEP0(MD5_H2, b, c, d, a, H_w6c2b, MD5S23); - MD5_STEP0(MD5_H1, a, b, c, d, H_w9c2c, MD5S20); - MD5_STEP0(MD5_H2, d, a, b, c, H_wcc2d, MD5S21); - MD5_STEP0(MD5_H1, c, d, a, b, H_wfc2e, MD5S22); - MD5_STEP0(MD5_H2, b, c, d, a, H_w2c2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0, I_w0c30, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_w7c31, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_wec32, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w5c33, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_wcc34, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_w3c35, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_wac36, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w1c37, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_w8c38, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_wfc39, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_w6c3a, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_wdc3b, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_w4c3c, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_wbc3d, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_w2c3e, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w9c3f, MD5S33); - - a &= 0x00ffffff; - d &= 0x00ffffff; - c &= 0x00ffffff; - b &= 0x00ffffff; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m02400s (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * algorithm specific - */ - - w[ 4] = 0x80; - w[14] = 16 * 8; - - /** - * base - */ - - const u32 F_w0c00 = 0 + MD5C00; - const u32 F_w1c01 = w[ 1] + MD5C01; - const u32 F_w2c02 = w[ 2] + MD5C02; - const u32 F_w3c03 = w[ 3] + MD5C03; - const u32 F_w4c04 = w[ 4] + MD5C04; - const u32 F_w5c05 = w[ 5] + MD5C05; - const u32 F_w6c06 = w[ 6] + MD5C06; - const u32 F_w7c07 = w[ 7] + MD5C07; - const u32 F_w8c08 = w[ 8] + MD5C08; - const u32 F_w9c09 = w[ 9] + MD5C09; - const u32 F_wac0a = w[10] + MD5C0a; - const u32 F_wbc0b = w[11] + MD5C0b; - const u32 F_wcc0c = w[12] + MD5C0c; - const u32 F_wdc0d = w[13] + MD5C0d; - const u32 F_wec0e = w[14] + MD5C0e; - const u32 F_wfc0f = w[15] + MD5C0f; - - const u32 G_w1c10 = w[ 1] + MD5C10; - const u32 G_w6c11 = w[ 6] + MD5C11; - const u32 G_wbc12 = w[11] + MD5C12; - const u32 G_w0c13 = 0 + MD5C13; - const u32 G_w5c14 = w[ 5] + MD5C14; - const u32 G_wac15 = w[10] + MD5C15; - const u32 G_wfc16 = w[15] + MD5C16; - const u32 G_w4c17 = w[ 4] + MD5C17; - const u32 G_w9c18 = w[ 9] + MD5C18; - const u32 G_wec19 = w[14] + MD5C19; - const u32 G_w3c1a = w[ 3] + MD5C1a; - const u32 G_w8c1b = w[ 8] + MD5C1b; - const u32 G_wdc1c = w[13] + MD5C1c; - const u32 G_w2c1d = w[ 2] + MD5C1d; - const u32 G_w7c1e = w[ 7] + MD5C1e; - const u32 G_wcc1f = w[12] + MD5C1f; - - const u32 H_w5c20 = w[ 5] + MD5C20; - const u32 H_w8c21 = w[ 8] + MD5C21; - const u32 H_wbc22 = w[11] + MD5C22; - const u32 H_wec23 = w[14] + MD5C23; - const u32 H_w1c24 = w[ 1] + MD5C24; - const u32 H_w4c25 = w[ 4] + MD5C25; - const u32 H_w7c26 = w[ 7] + MD5C26; - const u32 H_wac27 = w[10] + MD5C27; - const u32 H_wdc28 = w[13] + MD5C28; - const u32 H_w0c29 = 0 + MD5C29; - const u32 H_w3c2a = w[ 3] + MD5C2a; - const u32 H_w6c2b = w[ 6] + MD5C2b; - const u32 H_w9c2c = w[ 9] + MD5C2c; - const u32 H_wcc2d = w[12] + MD5C2d; - const u32 H_wfc2e = w[15] + MD5C2e; - const u32 H_w2c2f = w[ 2] + MD5C2f; - - const u32 I_w0c30 = 0 + MD5C30; - const u32 I_w7c31 = w[ 7] + MD5C31; - const u32 I_wec32 = w[14] + MD5C32; - const u32 I_w5c33 = w[ 5] + MD5C33; - const u32 I_wcc34 = w[12] + MD5C34; - const u32 I_w3c35 = w[ 3] + MD5C35; - const u32 I_wac36 = w[10] + MD5C36; - const u32 I_w1c37 = w[ 1] + MD5C37; - const u32 I_w8c38 = w[ 8] + MD5C38; - const u32 I_wfc39 = w[15] + MD5C39; - const u32 I_w6c3a = w[ 6] + MD5C3a; - const u32 I_wdc3b = w[13] + MD5C3b; - const u32 I_w4c3c = w[ 4] + MD5C3c; - const u32 I_wbc3d = w[11] + MD5C3d; - const u32 I_w2c3e = w[ 2] + MD5C3e; - const u32 I_w9c3f = w[ 9] + MD5C3f; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_w2c02, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_w3c03, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_w4c04, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w5c05, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_w6c06, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_w7c07, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_w8c08, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w9c09, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_wac0a, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_wbc0b, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_wcc0c, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_wdc0d, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_wec0e, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_wfc0f, MD5S03); - - MD5_STEP0(MD5_Go, a, b, c, d, G_w1c10, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_w6c11, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_wbc12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0, G_w0c13, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_w5c14, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_wac15, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_wfc16, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_w4c17, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_w9c18, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_wec19, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_w3c1a, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_w8c1b, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_wdc1c, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_w2c1d, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_w7c1e, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_wcc1f, MD5S13); - - MD5_STEP0(MD5_H1, a, b, c, d, H_w5c20, MD5S20); - MD5_STEP0(MD5_H2, d, a, b, c, H_w8c21, MD5S21); - MD5_STEP0(MD5_H1, c, d, a, b, H_wbc22, MD5S22); - MD5_STEP0(MD5_H2, b, c, d, a, H_wec23, MD5S23); - MD5_STEP0(MD5_H1, a, b, c, d, H_w1c24, MD5S20); - MD5_STEP0(MD5_H2, d, a, b, c, H_w4c25, MD5S21); - MD5_STEP0(MD5_H1, c, d, a, b, H_w7c26, MD5S22); - MD5_STEP0(MD5_H2, b, c, d, a, H_wac27, MD5S23); - MD5_STEP0(MD5_H1, a, b, c, d, H_wdc28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0, H_w0c29, MD5S21); - MD5_STEP0(MD5_H1, c, d, a, b, H_w3c2a, MD5S22); - MD5_STEP0(MD5_H2, b, c, d, a, H_w6c2b, MD5S23); - MD5_STEP0(MD5_H1, a, b, c, d, H_w9c2c, MD5S20); - MD5_STEP0(MD5_H2, d, a, b, c, H_wcc2d, MD5S21); - MD5_STEP0(MD5_H1, c, d, a, b, H_wfc2e, MD5S22); - MD5_STEP0(MD5_H2, b, c, d, a, H_w2c2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0, I_w0c30, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_w7c31, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_wec32, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w5c33, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_wcc34, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_w3c35, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_wac36, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w1c37, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_w8c38, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_wfc39, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_w6c3a, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_wdc3b, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_w4c3c, MD5S30); - - bool q_cond = ((a & 0x00ffffff) != search[0]); - - if (q_cond) continue; - - MD5_STEP0(MD5_I , d, a, b, c, I_wbc3d, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_w2c3e, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w9c3f, MD5S33); - - a &= 0x00ffffff; - d &= 0x00ffffff; - c &= 0x00ffffff; - b &= 0x00ffffff; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02400_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m02400m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02400_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02400_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02400_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m02400s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02400_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02400_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m02410_a0.cu b/nv/m02410_a0.cu deleted file mode 100644 index 2034d73..0000000 --- a/nv/m02410_a0.cu +++ /dev/null @@ -1,496 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m02410_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = 0; - salt_buf0[2] = 0; - salt_buf0[3] = 0; - - const u32 salt_len = (salt_bufs[salt_pos].salt_len < 4) ? salt_bufs[salt_pos].salt_len : 4; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = 0; - s1[1] = 0; - s1[2] = 0; - s1[3] = 0; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, out_len); - - w0[0] |= s0[0]; - w0[1] |= s0[1]; - w0[2] |= s0[2]; - w0[3] |= s0[3]; - - const u32 pw_salt_len = out_len + salt_len; - - truncate_block (w0, pw_salt_len); - - w1[0] = 0x80; - w3[2] = 16 * 8; - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a &= 0x00ffffff; - d &= 0x00ffffff; - c &= 0x00ffffff; - b &= 0x00ffffff; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02410_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02410_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02410_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = 0; - salt_buf0[2] = 0; - salt_buf0[3] = 0; - - const u32 salt_len = (salt_bufs[salt_pos].salt_len < 4) ? salt_bufs[salt_pos].salt_len : 4; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = 0; - s1[1] = 0; - s1[2] = 0; - s1[3] = 0; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, out_len); - - w0[0] |= s0[0]; - w0[1] |= s0[1]; - w0[2] |= s0[2]; - w0[3] |= s0[3]; - - const u32 pw_salt_len = out_len + salt_len; - - truncate_block (w0, pw_salt_len); - - w1[0] = 0x80; - w3[2] = 16 * 8; - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - - bool q_cond = ((a & 0x00ffffff) != search[0]); - - if (q_cond) continue; - - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a &= 0x00ffffff; - d &= 0x00ffffff; - c &= 0x00ffffff; - b &= 0x00ffffff; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02410_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02410_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m02410_a1.cu b/nv/m02410_a1.cu deleted file mode 100644 index 3a069c9..0000000 --- a/nv/m02410_a1.cu +++ /dev/null @@ -1,606 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m02410_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = 0; - salt_buf0[2] = 0; - salt_buf0[3] = 0; - - const u32 salt_len = (salt_bufs[salt_pos].salt_len < 4) ? salt_bufs[salt_pos].salt_len : 4; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = 0; - s1[1] = 0; - s1[2] = 0; - s1[3] = 0; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, pw_len); - - const u32 pw_salt_len = pw_len + salt_len; - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0] | s0[0]; - w0[1] = wordl0[1] | wordr0[1] | s0[1]; - w0[2] = wordl0[2] | wordr0[2] | s0[2]; - w0[3] = wordl0[3] | wordr0[3] | s0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - truncate_block (w0, pw_salt_len); - - w1[0] = 0x80; - w3[2] = 16 * 8; - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a &= 0x00ffffff; - d &= 0x00ffffff; - c &= 0x00ffffff; - b &= 0x00ffffff; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02410_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02410_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02410_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = 0; - salt_buf0[2] = 0; - salt_buf0[3] = 0; - - const u32 salt_len = (salt_bufs[salt_pos].salt_len < 4) ? salt_bufs[salt_pos].salt_len : 4; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = 0; - s1[1] = 0; - s1[2] = 0; - s1[3] = 0; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, pw_len); - - const u32 pw_salt_len = pw_len + salt_len; - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0] | s0[0]; - w0[1] = wordl0[1] | wordr0[1] | s0[1]; - w0[2] = wordl0[2] | wordr0[2] | s0[2]; - w0[3] = wordl0[3] | wordr0[3] | s0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - truncate_block (w0, pw_salt_len); - - w1[0] = 0x80; - w3[2] = 16 * 8; - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - - bool q_cond = ((a & 0x00ffffff) != search[0]); - - if (q_cond) continue; - - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a &= 0x00ffffff; - d &= 0x00ffffff; - c &= 0x00ffffff; - b &= 0x00ffffff; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02410_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02410_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m02410_a3.cu b/nv/m02410_a3.cu deleted file mode 100644 index cf038e6..0000000 --- a/nv/m02410_a3.cu +++ /dev/null @@ -1,625 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ -#define _SCALAR_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -__device__ __constant__ u32x c_bfs[1024]; - -__device__ static void m02410m (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = 0; - salt_buf0[2] = 0; - salt_buf0[3] = 0; - - u32 salt_buf1[4]; - - salt_buf1[0] = 0; - salt_buf1[1] = 0; - salt_buf1[2] = 0; - salt_buf1[3] = 0; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - switch_buffer_by_offset (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); - - w[0] |= salt_buf0[0]; - w[1] |= salt_buf0[1]; - w[2] |= salt_buf0[2]; - w[3] |= salt_buf0[3]; - - const u32 salt_len = (salt_bufs[salt_pos].salt_len < 4) ? salt_bufs[salt_pos].salt_len : 4; - - const u32 pw_salt_len = pw_len + salt_len; - - truncate_block (w, pw_salt_len); - - /** - * algorithm specific - */ - - w[ 4] = 0x80; - w[14] = 16 * 8; - - /** - * base - */ - - const u32 F_w0c00 = 0 + MD5C00; - const u32 F_w1c01 = w[ 1] + MD5C01; - const u32 F_w2c02 = w[ 2] + MD5C02; - const u32 F_w3c03 = w[ 3] + MD5C03; - const u32 F_w4c04 = w[ 4] + MD5C04; - const u32 F_w5c05 = w[ 5] + MD5C05; - const u32 F_w6c06 = w[ 6] + MD5C06; - const u32 F_w7c07 = w[ 7] + MD5C07; - const u32 F_w8c08 = w[ 8] + MD5C08; - const u32 F_w9c09 = w[ 9] + MD5C09; - const u32 F_wac0a = w[10] + MD5C0a; - const u32 F_wbc0b = w[11] + MD5C0b; - const u32 F_wcc0c = w[12] + MD5C0c; - const u32 F_wdc0d = w[13] + MD5C0d; - const u32 F_wec0e = w[14] + MD5C0e; - const u32 F_wfc0f = w[15] + MD5C0f; - - const u32 G_w1c10 = w[ 1] + MD5C10; - const u32 G_w6c11 = w[ 6] + MD5C11; - const u32 G_wbc12 = w[11] + MD5C12; - const u32 G_w0c13 = 0 + MD5C13; - const u32 G_w5c14 = w[ 5] + MD5C14; - const u32 G_wac15 = w[10] + MD5C15; - const u32 G_wfc16 = w[15] + MD5C16; - const u32 G_w4c17 = w[ 4] + MD5C17; - const u32 G_w9c18 = w[ 9] + MD5C18; - const u32 G_wec19 = w[14] + MD5C19; - const u32 G_w3c1a = w[ 3] + MD5C1a; - const u32 G_w8c1b = w[ 8] + MD5C1b; - const u32 G_wdc1c = w[13] + MD5C1c; - const u32 G_w2c1d = w[ 2] + MD5C1d; - const u32 G_w7c1e = w[ 7] + MD5C1e; - const u32 G_wcc1f = w[12] + MD5C1f; - - const u32 H_w5c20 = w[ 5] + MD5C20; - const u32 H_w8c21 = w[ 8] + MD5C21; - const u32 H_wbc22 = w[11] + MD5C22; - const u32 H_wec23 = w[14] + MD5C23; - const u32 H_w1c24 = w[ 1] + MD5C24; - const u32 H_w4c25 = w[ 4] + MD5C25; - const u32 H_w7c26 = w[ 7] + MD5C26; - const u32 H_wac27 = w[10] + MD5C27; - const u32 H_wdc28 = w[13] + MD5C28; - const u32 H_w0c29 = 0 + MD5C29; - const u32 H_w3c2a = w[ 3] + MD5C2a; - const u32 H_w6c2b = w[ 6] + MD5C2b; - const u32 H_w9c2c = w[ 9] + MD5C2c; - const u32 H_wcc2d = w[12] + MD5C2d; - const u32 H_wfc2e = w[15] + MD5C2e; - const u32 H_w2c2f = w[ 2] + MD5C2f; - - const u32 I_w0c30 = 0 + MD5C30; - const u32 I_w7c31 = w[ 7] + MD5C31; - const u32 I_wec32 = w[14] + MD5C32; - const u32 I_w5c33 = w[ 5] + MD5C33; - const u32 I_wcc34 = w[12] + MD5C34; - const u32 I_w3c35 = w[ 3] + MD5C35; - const u32 I_wac36 = w[10] + MD5C36; - const u32 I_w1c37 = w[ 1] + MD5C37; - const u32 I_w8c38 = w[ 8] + MD5C38; - const u32 I_wfc39 = w[15] + MD5C39; - const u32 I_w6c3a = w[ 6] + MD5C3a; - const u32 I_wdc3b = w[13] + MD5C3b; - const u32 I_w4c3c = w[ 4] + MD5C3c; - const u32 I_wbc3d = w[11] + MD5C3d; - const u32 I_w2c3e = w[ 2] + MD5C3e; - const u32 I_w9c3f = w[ 9] + MD5C3f; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_w2c02, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_w3c03, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_w4c04, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w5c05, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_w6c06, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_w7c07, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_w8c08, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w9c09, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_wac0a, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_wbc0b, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_wcc0c, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_wdc0d, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_wec0e, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_wfc0f, MD5S03); - - MD5_STEP0(MD5_Go, a, b, c, d, G_w1c10, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_w6c11, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_wbc12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0, G_w0c13, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_w5c14, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_wac15, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_wfc16, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_w4c17, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_w9c18, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_wec19, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_w3c1a, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_w8c1b, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_wdc1c, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_w2c1d, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_w7c1e, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_wcc1f, MD5S13); - - MD5_STEP0(MD5_H1, a, b, c, d, H_w5c20, MD5S20); - MD5_STEP0(MD5_H2, d, a, b, c, H_w8c21, MD5S21); - MD5_STEP0(MD5_H1, c, d, a, b, H_wbc22, MD5S22); - MD5_STEP0(MD5_H2, b, c, d, a, H_wec23, MD5S23); - MD5_STEP0(MD5_H1, a, b, c, d, H_w1c24, MD5S20); - MD5_STEP0(MD5_H2, d, a, b, c, H_w4c25, MD5S21); - MD5_STEP0(MD5_H1, c, d, a, b, H_w7c26, MD5S22); - MD5_STEP0(MD5_H2, b, c, d, a, H_wac27, MD5S23); - MD5_STEP0(MD5_H1, a, b, c, d, H_wdc28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0, H_w0c29, MD5S21); - MD5_STEP0(MD5_H1, c, d, a, b, H_w3c2a, MD5S22); - MD5_STEP0(MD5_H2, b, c, d, a, H_w6c2b, MD5S23); - MD5_STEP0(MD5_H1, a, b, c, d, H_w9c2c, MD5S20); - MD5_STEP0(MD5_H2, d, a, b, c, H_wcc2d, MD5S21); - MD5_STEP0(MD5_H1, c, d, a, b, H_wfc2e, MD5S22); - MD5_STEP0(MD5_H2, b, c, d, a, H_w2c2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0, I_w0c30, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_w7c31, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_wec32, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w5c33, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_wcc34, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_w3c35, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_wac36, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w1c37, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_w8c38, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_wfc39, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_w6c3a, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_wdc3b, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_w4c3c, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_wbc3d, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_w2c3e, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w9c3f, MD5S33); - - a &= 0x00ffffff; - d &= 0x00ffffff; - c &= 0x00ffffff; - b &= 0x00ffffff; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m02410s (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = 0; - salt_buf0[2] = 0; - salt_buf0[3] = 0; - - u32 salt_buf1[4]; - - salt_buf1[0] = 0; - salt_buf1[1] = 0; - salt_buf1[2] = 0; - salt_buf1[3] = 0; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - switch_buffer_by_offset (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); - - w[0] |= salt_buf0[0]; - w[1] |= salt_buf0[1]; - w[2] |= salt_buf0[2]; - w[3] |= salt_buf0[3]; - - const u32 salt_len = (salt_bufs[salt_pos].salt_len < 4) ? salt_bufs[salt_pos].salt_len : 4; - - const u32 pw_salt_len = pw_len + salt_len; - - truncate_block (w, pw_salt_len); - - /** - * algorithm specific - */ - - w[ 4] = 0x80; - w[14] = 16 * 8; - - /** - * base - */ - - const u32 F_w0c00 = 0 + MD5C00; - const u32 F_w1c01 = w[ 1] + MD5C01; - const u32 F_w2c02 = w[ 2] + MD5C02; - const u32 F_w3c03 = w[ 3] + MD5C03; - const u32 F_w4c04 = w[ 4] + MD5C04; - const u32 F_w5c05 = w[ 5] + MD5C05; - const u32 F_w6c06 = w[ 6] + MD5C06; - const u32 F_w7c07 = w[ 7] + MD5C07; - const u32 F_w8c08 = w[ 8] + MD5C08; - const u32 F_w9c09 = w[ 9] + MD5C09; - const u32 F_wac0a = w[10] + MD5C0a; - const u32 F_wbc0b = w[11] + MD5C0b; - const u32 F_wcc0c = w[12] + MD5C0c; - const u32 F_wdc0d = w[13] + MD5C0d; - const u32 F_wec0e = w[14] + MD5C0e; - const u32 F_wfc0f = w[15] + MD5C0f; - - const u32 G_w1c10 = w[ 1] + MD5C10; - const u32 G_w6c11 = w[ 6] + MD5C11; - const u32 G_wbc12 = w[11] + MD5C12; - const u32 G_w0c13 = 0 + MD5C13; - const u32 G_w5c14 = w[ 5] + MD5C14; - const u32 G_wac15 = w[10] + MD5C15; - const u32 G_wfc16 = w[15] + MD5C16; - const u32 G_w4c17 = w[ 4] + MD5C17; - const u32 G_w9c18 = w[ 9] + MD5C18; - const u32 G_wec19 = w[14] + MD5C19; - const u32 G_w3c1a = w[ 3] + MD5C1a; - const u32 G_w8c1b = w[ 8] + MD5C1b; - const u32 G_wdc1c = w[13] + MD5C1c; - const u32 G_w2c1d = w[ 2] + MD5C1d; - const u32 G_w7c1e = w[ 7] + MD5C1e; - const u32 G_wcc1f = w[12] + MD5C1f; - - const u32 H_w5c20 = w[ 5] + MD5C20; - const u32 H_w8c21 = w[ 8] + MD5C21; - const u32 H_wbc22 = w[11] + MD5C22; - const u32 H_wec23 = w[14] + MD5C23; - const u32 H_w1c24 = w[ 1] + MD5C24; - const u32 H_w4c25 = w[ 4] + MD5C25; - const u32 H_w7c26 = w[ 7] + MD5C26; - const u32 H_wac27 = w[10] + MD5C27; - const u32 H_wdc28 = w[13] + MD5C28; - const u32 H_w0c29 = 0 + MD5C29; - const u32 H_w3c2a = w[ 3] + MD5C2a; - const u32 H_w6c2b = w[ 6] + MD5C2b; - const u32 H_w9c2c = w[ 9] + MD5C2c; - const u32 H_wcc2d = w[12] + MD5C2d; - const u32 H_wfc2e = w[15] + MD5C2e; - const u32 H_w2c2f = w[ 2] + MD5C2f; - - const u32 I_w0c30 = 0 + MD5C30; - const u32 I_w7c31 = w[ 7] + MD5C31; - const u32 I_wec32 = w[14] + MD5C32; - const u32 I_w5c33 = w[ 5] + MD5C33; - const u32 I_wcc34 = w[12] + MD5C34; - const u32 I_w3c35 = w[ 3] + MD5C35; - const u32 I_wac36 = w[10] + MD5C36; - const u32 I_w1c37 = w[ 1] + MD5C37; - const u32 I_w8c38 = w[ 8] + MD5C38; - const u32 I_wfc39 = w[15] + MD5C39; - const u32 I_w6c3a = w[ 6] + MD5C3a; - const u32 I_wdc3b = w[13] + MD5C3b; - const u32 I_w4c3c = w[ 4] + MD5C3c; - const u32 I_wbc3d = w[11] + MD5C3d; - const u32 I_w2c3e = w[ 2] + MD5C3e; - const u32 I_w9c3f = w[ 9] + MD5C3f; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_w2c02, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_w3c03, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_w4c04, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w5c05, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_w6c06, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_w7c07, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_w8c08, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w9c09, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_wac0a, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_wbc0b, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_wcc0c, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_wdc0d, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_wec0e, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_wfc0f, MD5S03); - - MD5_STEP0(MD5_Go, a, b, c, d, G_w1c10, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_w6c11, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_wbc12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0, G_w0c13, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_w5c14, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_wac15, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_wfc16, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_w4c17, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_w9c18, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_wec19, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_w3c1a, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_w8c1b, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_wdc1c, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_w2c1d, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_w7c1e, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_wcc1f, MD5S13); - - MD5_STEP0(MD5_H1, a, b, c, d, H_w5c20, MD5S20); - MD5_STEP0(MD5_H2, d, a, b, c, H_w8c21, MD5S21); - MD5_STEP0(MD5_H1, c, d, a, b, H_wbc22, MD5S22); - MD5_STEP0(MD5_H2, b, c, d, a, H_wec23, MD5S23); - MD5_STEP0(MD5_H1, a, b, c, d, H_w1c24, MD5S20); - MD5_STEP0(MD5_H2, d, a, b, c, H_w4c25, MD5S21); - MD5_STEP0(MD5_H1, c, d, a, b, H_w7c26, MD5S22); - MD5_STEP0(MD5_H2, b, c, d, a, H_wac27, MD5S23); - MD5_STEP0(MD5_H1, a, b, c, d, H_wdc28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0, H_w0c29, MD5S21); - MD5_STEP0(MD5_H1, c, d, a, b, H_w3c2a, MD5S22); - MD5_STEP0(MD5_H2, b, c, d, a, H_w6c2b, MD5S23); - MD5_STEP0(MD5_H1, a, b, c, d, H_w9c2c, MD5S20); - MD5_STEP0(MD5_H2, d, a, b, c, H_wcc2d, MD5S21); - MD5_STEP0(MD5_H1, c, d, a, b, H_wfc2e, MD5S22); - MD5_STEP0(MD5_H2, b, c, d, a, H_w2c2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0, I_w0c30, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_w7c31, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_wec32, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w5c33, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_wcc34, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_w3c35, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_wac36, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w1c37, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_w8c38, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_wfc39, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_w6c3a, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_wdc3b, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_w4c3c, MD5S30); - - bool q_cond = ((a & 0x00ffffff) != search[0]); - - if (q_cond) continue; - - MD5_STEP0(MD5_I , d, a, b, c, I_wbc3d, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_w2c3e, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w9c3f, MD5S33); - - a &= 0x00ffffff; - d &= 0x00ffffff; - c &= 0x00ffffff; - b &= 0x00ffffff; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02410_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m02410m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02410_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02410_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02410_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m02410s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02410_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02410_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m02500.cu b/nv/m02500.cu deleted file mode 100644 index c74aa25..0000000 --- a/nv/m02500.cu +++ /dev/null @@ -1,920 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _WPA_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - u32x tmp2; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = MD5M_A; - ipad[1] = MD5M_B; - ipad[2] = MD5M_C; - ipad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = MD5M_A; - opad[1] = MD5M_B; - opad[2] = MD5M_C; - opad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - - md5_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = 0x80; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = (64 + 16) * 8; - w3[3] = 0; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - - md5_transform (w0, w1, w2, w3, digest); -} - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = SHA1M_A; - ipad[1] = SHA1M_B; - ipad[2] = SHA1M_C; - ipad[3] = SHA1M_D; - ipad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = SHA1M_A; - opad[1] = SHA1M_B; - opad[2] = SHA1M_C; - opad[3] = SHA1M_D; - opad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - - sha1_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - - sha1_transform (w0, w1, w2, w3, digest); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02500_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, wpa_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const wpa_t *wpa_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - /** - * salt - */ - - u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 salt_buf0[4]; - u32 salt_buf1[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - /** - * pads - */ - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - w2[0] = swap_workaround (w2[0]); - w2[1] = swap_workaround (w2[1]); - w2[2] = swap_workaround (w2[2]); - w2[3] = swap_workaround (w2[3]); - w3[0] = swap_workaround (w3[0]); - w3[1] = swap_workaround (w3[1]); - w3[2] = swap_workaround (w3[2]); - w3[3] = swap_workaround (w3[3]); - - u32x ipad[5]; - u32x opad[5]; - - hmac_sha1_pad (w0, w1, w2, w3, ipad, opad); - - tmps[gid].ipad[0] = ipad[0]; - tmps[gid].ipad[1] = ipad[1]; - tmps[gid].ipad[2] = ipad[2]; - tmps[gid].ipad[3] = ipad[3]; - tmps[gid].ipad[4] = ipad[4]; - - tmps[gid].opad[0] = opad[0]; - tmps[gid].opad[1] = opad[1]; - tmps[gid].opad[2] = opad[2]; - tmps[gid].opad[3] = opad[3]; - tmps[gid].opad[4] = opad[4]; - - for (u32 i = 0, j = 1; i < 8; i += 5, j += 1) - { - w0[0] = salt_buf0[0]; - w0[1] = salt_buf0[1]; - w0[2] = salt_buf0[2]; - w0[3] = salt_buf0[3]; - w1[0] = salt_buf1[0]; - w1[1] = salt_buf1[1]; - w1[2] = salt_buf1[2]; - w1[3] = salt_buf1[3]; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - if (j == 1) - append_0x01_3 (w0, w1, w2, salt_len + 3); - else - append_0x02_3 (w0, w1, w2, salt_len + 3); - - append_0x80_3 (w0, w1, w2, salt_len + 4); - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - w2[0] = swap_workaround (w2[0]); - w2[1] = swap_workaround (w2[1]); - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + salt_len + 4) * 8; - - u32x dgst[5]; - - hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst); - - tmps[gid].dgst[i + 0] = dgst[0]; - tmps[gid].dgst[i + 1] = dgst[1]; - tmps[gid].dgst[i + 2] = dgst[2]; - tmps[gid].dgst[i + 3] = dgst[3]; - tmps[gid].dgst[i + 4] = dgst[4]; - - tmps[gid].out[i + 0] = dgst[0]; - tmps[gid].out[i + 1] = dgst[1]; - tmps[gid].out[i + 2] = dgst[2]; - tmps[gid].out[i + 3] = dgst[3]; - tmps[gid].out[i + 4] = dgst[4]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02500_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, wpa_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const wpa_t *wpa_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x ipad[5]; - u32x opad[5]; - - ipad[0] = tmps[gid].ipad[0]; - ipad[1] = tmps[gid].ipad[1]; - ipad[2] = tmps[gid].ipad[2]; - ipad[3] = tmps[gid].ipad[3]; - ipad[4] = tmps[gid].ipad[4]; - - opad[0] = tmps[gid].opad[0]; - opad[1] = tmps[gid].opad[1]; - opad[2] = tmps[gid].opad[2]; - opad[3] = tmps[gid].opad[3]; - opad[4] = tmps[gid].opad[4]; - - for (u32 i = 0; i < 8; i += 5) - { - u32x dgst[5]; - u32x out[5]; - - dgst[0] = tmps[gid].dgst[i + 0]; - dgst[1] = tmps[gid].dgst[i + 1]; - dgst[2] = tmps[gid].dgst[i + 2]; - dgst[3] = tmps[gid].dgst[i + 3]; - dgst[4] = tmps[gid].dgst[i + 4]; - - out[0] = tmps[gid].out[i + 0]; - out[1] = tmps[gid].out[i + 1]; - out[2] = tmps[gid].out[i + 2]; - out[3] = tmps[gid].out[i + 3]; - out[4] = tmps[gid].out[i + 4]; - - for (u32 j = 0; j < loop_cnt; j++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = dgst[0]; - w0[1] = dgst[1]; - w0[2] = dgst[2]; - w0[3] = dgst[3]; - w1[0] = dgst[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst); - - out[0] ^= dgst[0]; - out[1] ^= dgst[1]; - out[2] ^= dgst[2]; - out[3] ^= dgst[3]; - out[4] ^= dgst[4]; - } - - tmps[gid].dgst[i + 0] = dgst[0]; - tmps[gid].dgst[i + 1] = dgst[1]; - tmps[gid].dgst[i + 2] = dgst[2]; - tmps[gid].dgst[i + 3] = dgst[3]; - tmps[gid].dgst[i + 4] = dgst[4]; - - tmps[gid].out[i + 0] = out[0]; - tmps[gid].out[i + 1] = out[1]; - tmps[gid].out[i + 2] = out[2]; - tmps[gid].out[i + 3] = out[3]; - tmps[gid].out[i + 4] = out[4]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02500_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, wpa_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const wpa_t *wpa_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 lid = threadIdx.x; - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = tmps[gid].out[0]; - w0[1] = tmps[gid].out[1]; - w0[2] = tmps[gid].out[2]; - w0[3] = tmps[gid].out[3]; - w1[0] = tmps[gid].out[4]; - w1[1] = tmps[gid].out[5]; - w1[2] = tmps[gid].out[6]; - w1[3] = tmps[gid].out[7]; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - u32x ipad[5]; - u32x opad[5]; - - hmac_sha1_pad (w0, w1, w2, w3, ipad, opad); - - w0[0] = wpa_bufs[salt_pos].pke[ 0]; - w0[1] = wpa_bufs[salt_pos].pke[ 1]; - w0[2] = wpa_bufs[salt_pos].pke[ 2]; - w0[3] = wpa_bufs[salt_pos].pke[ 3]; - w1[0] = wpa_bufs[salt_pos].pke[ 4]; - w1[1] = wpa_bufs[salt_pos].pke[ 5]; - w1[2] = wpa_bufs[salt_pos].pke[ 6]; - w1[3] = wpa_bufs[salt_pos].pke[ 7]; - w2[0] = wpa_bufs[salt_pos].pke[ 8]; - w2[1] = wpa_bufs[salt_pos].pke[ 9]; - w2[2] = wpa_bufs[salt_pos].pke[10]; - w2[3] = wpa_bufs[salt_pos].pke[11]; - w3[0] = wpa_bufs[salt_pos].pke[12]; - w3[1] = wpa_bufs[salt_pos].pke[13]; - w3[2] = wpa_bufs[salt_pos].pke[14]; - w3[3] = wpa_bufs[salt_pos].pke[15]; - - sha1_transform (w0, w1, w2, w3, ipad); - - w0[0] = wpa_bufs[salt_pos].pke[16]; - w0[1] = wpa_bufs[salt_pos].pke[17]; - w0[2] = wpa_bufs[salt_pos].pke[18]; - w0[3] = wpa_bufs[salt_pos].pke[19]; - w1[0] = wpa_bufs[salt_pos].pke[20]; - w1[1] = wpa_bufs[salt_pos].pke[21]; - w1[2] = wpa_bufs[salt_pos].pke[22]; - w1[3] = wpa_bufs[salt_pos].pke[23]; - w2[0] = wpa_bufs[salt_pos].pke[24]; - w2[1] = 0x80000000; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 100) * 8; - - u32x digest[5]; - - hmac_sha1_run (w0, w1, w2, w3, ipad, opad, digest); - - { - w0[0] = swap_workaround (digest[0]); - w0[1] = swap_workaround (digest[1]); - w0[2] = swap_workaround (digest[2]); - w0[3] = swap_workaround (digest[3]); - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - hmac_md5_pad (w0, w1, w2, w3, ipad, opad); - - int eapol_size = wpa_bufs[salt_pos].eapol_size; - - int eapol_left; - int eapol_off; - - for (eapol_left = eapol_size, eapol_off = 0; eapol_left >= 56; eapol_left -= 64, eapol_off += 16) - { - w0[0] = wpa_bufs[salt_pos].eapol[eapol_off + 0]; - w0[1] = wpa_bufs[salt_pos].eapol[eapol_off + 1]; - w0[2] = wpa_bufs[salt_pos].eapol[eapol_off + 2]; - w0[3] = wpa_bufs[salt_pos].eapol[eapol_off + 3]; - w1[0] = wpa_bufs[salt_pos].eapol[eapol_off + 4]; - w1[1] = wpa_bufs[salt_pos].eapol[eapol_off + 5]; - w1[2] = wpa_bufs[salt_pos].eapol[eapol_off + 6]; - w1[3] = wpa_bufs[salt_pos].eapol[eapol_off + 7]; - w2[0] = wpa_bufs[salt_pos].eapol[eapol_off + 8]; - w2[1] = wpa_bufs[salt_pos].eapol[eapol_off + 9]; - w2[2] = wpa_bufs[salt_pos].eapol[eapol_off + 10]; - w2[3] = wpa_bufs[salt_pos].eapol[eapol_off + 11]; - w3[0] = wpa_bufs[salt_pos].eapol[eapol_off + 12]; - w3[1] = wpa_bufs[salt_pos].eapol[eapol_off + 13]; - w3[2] = wpa_bufs[salt_pos].eapol[eapol_off + 14]; - w3[3] = wpa_bufs[salt_pos].eapol[eapol_off + 15]; - - md5_transform (w0, w1, w2, w3, ipad); - } - - w0[0] = wpa_bufs[salt_pos].eapol[eapol_off + 0]; - w0[1] = wpa_bufs[salt_pos].eapol[eapol_off + 1]; - w0[2] = wpa_bufs[salt_pos].eapol[eapol_off + 2]; - w0[3] = wpa_bufs[salt_pos].eapol[eapol_off + 3]; - w1[0] = wpa_bufs[salt_pos].eapol[eapol_off + 4]; - w1[1] = wpa_bufs[salt_pos].eapol[eapol_off + 5]; - w1[2] = wpa_bufs[salt_pos].eapol[eapol_off + 6]; - w1[3] = wpa_bufs[salt_pos].eapol[eapol_off + 7]; - w2[0] = wpa_bufs[salt_pos].eapol[eapol_off + 8]; - w2[1] = wpa_bufs[salt_pos].eapol[eapol_off + 9]; - w2[2] = wpa_bufs[salt_pos].eapol[eapol_off + 10]; - w2[3] = wpa_bufs[salt_pos].eapol[eapol_off + 11]; - w3[0] = wpa_bufs[salt_pos].eapol[eapol_off + 12]; - w3[1] = wpa_bufs[salt_pos].eapol[eapol_off + 13]; - w3[2] = (64 + eapol_size) * 8; - w3[3] = 0; - - u32x digest1[4]; - - hmac_md5_run (w0, w1, w2, w3, ipad, opad, digest1); - - /** - * base - */ - - #define il_pos 0 - - const u32x r0 = digest1[DGST_R0]; - const u32x r1 = digest1[DGST_R1]; - const u32x r2 = digest1[DGST_R2]; - const u32x r3 = digest1[DGST_R3]; - - #include VECT_COMPARE_M - } - - { - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - hmac_sha1_pad (w0, w1, w2, w3, ipad, opad); - - int eapol_size = wpa_bufs[salt_pos].eapol_size; - - int eapol_left; - int eapol_off; - - for (eapol_left = eapol_size, eapol_off = 0; eapol_left >= 56; eapol_left -= 64, eapol_off += 16) - { - w0[0] = wpa_bufs[salt_pos].eapol[eapol_off + 0]; - w0[1] = wpa_bufs[salt_pos].eapol[eapol_off + 1]; - w0[2] = wpa_bufs[salt_pos].eapol[eapol_off + 2]; - w0[3] = wpa_bufs[salt_pos].eapol[eapol_off + 3]; - w1[0] = wpa_bufs[salt_pos].eapol[eapol_off + 4]; - w1[1] = wpa_bufs[salt_pos].eapol[eapol_off + 5]; - w1[2] = wpa_bufs[salt_pos].eapol[eapol_off + 6]; - w1[3] = wpa_bufs[salt_pos].eapol[eapol_off + 7]; - w2[0] = wpa_bufs[salt_pos].eapol[eapol_off + 8]; - w2[1] = wpa_bufs[salt_pos].eapol[eapol_off + 9]; - w2[2] = wpa_bufs[salt_pos].eapol[eapol_off + 10]; - w2[3] = wpa_bufs[salt_pos].eapol[eapol_off + 11]; - w3[0] = wpa_bufs[salt_pos].eapol[eapol_off + 12]; - w3[1] = wpa_bufs[salt_pos].eapol[eapol_off + 13]; - w3[2] = wpa_bufs[salt_pos].eapol[eapol_off + 14]; - w3[3] = wpa_bufs[salt_pos].eapol[eapol_off + 15]; - - sha1_transform (w0, w1, w2, w3, ipad); - } - - w0[0] = wpa_bufs[salt_pos].eapol[eapol_off + 0]; - w0[1] = wpa_bufs[salt_pos].eapol[eapol_off + 1]; - w0[2] = wpa_bufs[salt_pos].eapol[eapol_off + 2]; - w0[3] = wpa_bufs[salt_pos].eapol[eapol_off + 3]; - w1[0] = wpa_bufs[salt_pos].eapol[eapol_off + 4]; - w1[1] = wpa_bufs[salt_pos].eapol[eapol_off + 5]; - w1[2] = wpa_bufs[salt_pos].eapol[eapol_off + 6]; - w1[3] = wpa_bufs[salt_pos].eapol[eapol_off + 7]; - w2[0] = wpa_bufs[salt_pos].eapol[eapol_off + 8]; - w2[1] = wpa_bufs[salt_pos].eapol[eapol_off + 9]; - w2[2] = wpa_bufs[salt_pos].eapol[eapol_off + 10]; - w2[3] = wpa_bufs[salt_pos].eapol[eapol_off + 11]; - w3[0] = wpa_bufs[salt_pos].eapol[eapol_off + 12]; - w3[1] = wpa_bufs[salt_pos].eapol[eapol_off + 13]; - w3[2] = 0; - w3[3] = (64 + eapol_size) * 8; - - u32x digest2[5]; - - hmac_sha1_run (w0, w1, w2, w3, ipad, opad, digest2); - - /** - * base - */ - - #define il_pos 0 - - const u32x r0 = digest2[DGST_R0]; - const u32x r1 = digest2[DGST_R1]; - const u32x r2 = digest2[DGST_R2]; - const u32x r3 = digest2[DGST_R3]; - - #include VECT_COMPARE_M - } -} diff --git a/nv/m02610_a0.cu b/nv/m02610_a0.cu deleted file mode 100644 index 0bf029d..0000000 --- a/nv/m02610_a0.cu +++ /dev/null @@ -1,657 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m02610_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 s[8]; - - s[0] = salt_bufs[salt_pos].salt_buf[0]; - s[1] = salt_bufs[salt_pos].salt_buf[1]; - s[2] = salt_bufs[salt_pos].salt_buf[2]; - s[3] = salt_bufs[salt_pos].salt_buf[3]; - s[4] = salt_bufs[salt_pos].salt_buf[4]; - s[5] = salt_bufs[salt_pos].salt_buf[5]; - s[6] = (32 + salt_len) * 8; - s[7] = 0; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - w3[2] = out_len * 8; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - const u32 w8_t = s[0]; - const u32 w9_t = s[1]; - const u32 wa_t = s[2]; - const u32 wb_t = s[3]; - const u32 wc_t = s[4]; - const u32 wd_t = s[5]; - const u32 we_t = s[6]; - const u32 wf_t = s[7]; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02610_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02610_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02610_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 s[8]; - - s[0] = salt_bufs[salt_pos].salt_buf[0]; - s[1] = salt_bufs[salt_pos].salt_buf[1]; - s[2] = salt_bufs[salt_pos].salt_buf[2]; - s[3] = salt_bufs[salt_pos].salt_buf[3]; - s[4] = salt_bufs[salt_pos].salt_buf[4]; - s[5] = salt_bufs[salt_pos].salt_buf[5]; - s[6] = (32 + salt_len) * 8; - s[7] = 0; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - w3[2] = out_len * 8; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - const u32 w8_t = s[0]; - const u32 w9_t = s[1]; - const u32 wa_t = s[2]; - const u32 wb_t = s[3]; - const u32 wc_t = s[4]; - const u32 wd_t = s[5]; - const u32 we_t = s[6]; - const u32 wf_t = s[7]; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02610_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02610_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m02610_a1.cu b/nv/m02610_a1.cu deleted file mode 100644 index c13d3c7..0000000 --- a/nv/m02610_a1.cu +++ /dev/null @@ -1,759 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m02610_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 s[8]; - - s[0] = salt_bufs[salt_pos].salt_buf[0]; - s[1] = salt_bufs[salt_pos].salt_buf[1]; - s[2] = salt_bufs[salt_pos].salt_buf[2]; - s[3] = salt_bufs[salt_pos].salt_buf[3]; - s[4] = salt_bufs[salt_pos].salt_buf[4]; - s[5] = salt_bufs[salt_pos].salt_buf[5]; - s[6] = (32 + salt_len) * 8; - s[7] = 0; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = pw_len * 8; - w3[3] = 0; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - const u32 w8_t = s[0]; - const u32 w9_t = s[1]; - const u32 wa_t = s[2]; - const u32 wb_t = s[3]; - const u32 wc_t = s[4]; - const u32 wd_t = s[5]; - const u32 we_t = s[6]; - const u32 wf_t = s[7]; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02610_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02610_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02610_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 s[8]; - - s[0] = salt_bufs[salt_pos].salt_buf[0]; - s[1] = salt_bufs[salt_pos].salt_buf[1]; - s[2] = salt_bufs[salt_pos].salt_buf[2]; - s[3] = salt_bufs[salt_pos].salt_buf[3]; - s[4] = salt_bufs[salt_pos].salt_buf[4]; - s[5] = salt_bufs[salt_pos].salt_buf[5]; - s[6] = (32 + salt_len) * 8; - s[7] = 0; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = pw_len * 8; - w3[3] = 0; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - const u32 w8_t = s[0]; - const u32 w9_t = s[1]; - const u32 wa_t = s[2]; - const u32 wb_t = s[3]; - const u32 wc_t = s[4]; - const u32 wd_t = s[5]; - const u32 we_t = s[6]; - const u32 wf_t = s[7]; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02610_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02610_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m02610_a3.cu b/nv/m02610_a3.cu deleted file mode 100644 index 43932d0..0000000 --- a/nv/m02610_a3.cu +++ /dev/null @@ -1,861 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ bf_t c_bfs[1024]; - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ static void m02610m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 s[8]; - - s[0] = salt_bufs[salt_pos].salt_buf[0]; - s[1] = salt_bufs[salt_pos].salt_buf[1]; - s[2] = salt_bufs[salt_pos].salt_buf[2]; - s[3] = salt_bufs[salt_pos].salt_buf[3]; - s[4] = salt_bufs[salt_pos].salt_buf[4]; - s[5] = salt_bufs[salt_pos].salt_buf[5]; - s[6] = (32 + salt_len) * 8; - s[7] = 0; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - const u32 w8_t = s[0]; - const u32 w9_t = s[1]; - const u32 wa_t = s[2]; - const u32 wb_t = s[3]; - const u32 wc_t = s[4]; - const u32 wd_t = s[5]; - const u32 we_t = s[6]; - const u32 wf_t = s[7]; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m02610s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 s[8]; - - s[0] = salt_bufs[salt_pos].salt_buf[0]; - s[1] = salt_bufs[salt_pos].salt_buf[1]; - s[2] = salt_bufs[salt_pos].salt_buf[2]; - s[3] = salt_bufs[salt_pos].salt_buf[3]; - s[4] = salt_bufs[salt_pos].salt_buf[4]; - s[5] = salt_bufs[salt_pos].salt_buf[5]; - s[6] = (32 + salt_len) * 8; - s[7] = 0; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - const u32 w8_t = s[0]; - const u32 w9_t = s[1]; - const u32 wa_t = s[2]; - const u32 wb_t = s[3]; - const u32 wc_t = s[4]; - const u32 wd_t = s[5]; - const u32 we_t = s[6]; - const u32 wf_t = s[7]; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02610_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m02610m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02610_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m02610m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02610_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m02610m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02610_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m02610s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02610_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m02610s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02610_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m02610s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m02710_a0.cu b/nv/m02710_a0.cu deleted file mode 100644 index 2a33a0a..0000000 --- a/nv/m02710_a0.cu +++ /dev/null @@ -1,830 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m02710_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 s[8]; - - s[0] = salt_bufs[salt_pos].salt_buf[0]; - s[1] = salt_bufs[salt_pos].salt_buf[1]; - s[2] = salt_bufs[salt_pos].salt_buf[2]; - s[3] = salt_bufs[salt_pos].salt_buf[3]; - s[4] = salt_bufs[salt_pos].salt_buf[4]; - s[5] = salt_bufs[salt_pos].salt_buf[5]; - s[6] = salt_bufs[salt_pos].salt_buf[6]; - s[7] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 r_14 = (32 + salt_len) * 8; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - w3[2] = out_len * 8; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - const u32 w8_t = s[0]; - const u32 w9_t = s[1]; - const u32 wa_t = s[2]; - const u32 wb_t = s[3]; - const u32 wc_t = s[4]; - const u32 wd_t = s[5]; - const u32 we_t = s[6]; - const u32 wf_t = s[7]; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - const u32x r_a = a + MD5M_A; - const u32x r_b = b + MD5M_B; - const u32x r_c = c + MD5M_C; - const u32x r_d = d + MD5M_D; - - a = r_a; - b = r_b; - c = r_c; - d = r_d; - - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C00, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C01, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C02, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C03, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C04, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C05, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C06, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C07, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C08, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C09, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C0a, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C0b, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C0c, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, r_14, MD5C0e, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C0f, MD5S03); - - MD5_STEP0(MD5_Go, a, b, c, d, MD5C10, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C11, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C12, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C13, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C14, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C15, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C16, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C17, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, r_14, MD5C19, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C1a, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C1b, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C1c, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C1d, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C1e, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C1f, MD5S13); - - MD5_STEP0(MD5_H , a, b, c, d, MD5C20, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C21, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, r_14, MD5C23, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C24, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C25, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C26, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C27, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C28, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C29, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C2a, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C2b, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C2c, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C2d, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C2e, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C2f, MD5S23); - - MD5_STEP0(MD5_I , a, b, c, d, MD5C30, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, r_14, MD5C32, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C33, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C34, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C35, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C36, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C37, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C38, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C39, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C3a, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C3b, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C3c, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C3d, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C3e, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02710_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02710_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02710_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 s[8]; - - s[0] = salt_bufs[salt_pos].salt_buf[0]; - s[1] = salt_bufs[salt_pos].salt_buf[1]; - s[2] = salt_bufs[salt_pos].salt_buf[2]; - s[3] = salt_bufs[salt_pos].salt_buf[3]; - s[4] = salt_bufs[salt_pos].salt_buf[4]; - s[5] = salt_bufs[salt_pos].salt_buf[5]; - s[6] = salt_bufs[salt_pos].salt_buf[6]; - s[7] = salt_bufs[salt_pos].salt_buf[7]; - - u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 r_14 = (32 + salt_len) * 8; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - w3[2] = out_len * 8; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - const u32 w8_t = s[0]; - const u32 w9_t = s[1]; - const u32 wa_t = s[2]; - const u32 wb_t = s[3]; - const u32 wc_t = s[4]; - const u32 wd_t = s[5]; - const u32 we_t = s[6]; - const u32 wf_t = s[7]; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - const u32x r_a = a + MD5M_A; - const u32x r_b = b + MD5M_B; - const u32x r_c = c + MD5M_C; - const u32x r_d = d + MD5M_D; - - a = r_a; - b = r_b; - c = r_c; - d = r_d; - - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C00, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C01, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C02, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C03, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C04, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C05, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C06, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C07, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C08, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C09, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C0a, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C0b, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C0c, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, r_14, MD5C0e, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C0f, MD5S03); - - MD5_STEP0(MD5_Go, a, b, c, d, MD5C10, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C11, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C12, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C13, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C14, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C15, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C16, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C17, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, r_14, MD5C19, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C1a, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C1b, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C1c, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C1d, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C1e, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C1f, MD5S13); - - MD5_STEP0(MD5_H , a, b, c, d, MD5C20, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C21, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, r_14, MD5C23, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C24, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C25, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C26, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C27, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C28, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C29, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C2a, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C2b, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C2c, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C2d, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C2e, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C2f, MD5S23); - - MD5_STEP0(MD5_I , a, b, c, d, MD5C30, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, r_14, MD5C32, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C33, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C34, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C35, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C36, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C37, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C38, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C39, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C3a, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C3b, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C3c, MD5S30); - - if ((a + r_a) != search[0]) continue; - - MD5_STEP0(MD5_I , d, a, b, c, MD5C3d, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C3e, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02710_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02710_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m02710_a1.cu b/nv/m02710_a1.cu deleted file mode 100644 index 2012560..0000000 --- a/nv/m02710_a1.cu +++ /dev/null @@ -1,932 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m02710_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 s[8]; - - s[0] = salt_bufs[salt_pos].salt_buf[0]; - s[1] = salt_bufs[salt_pos].salt_buf[1]; - s[2] = salt_bufs[salt_pos].salt_buf[2]; - s[3] = salt_bufs[salt_pos].salt_buf[3]; - s[4] = salt_bufs[salt_pos].salt_buf[4]; - s[5] = salt_bufs[salt_pos].salt_buf[5]; - s[6] = salt_bufs[salt_pos].salt_buf[6]; - s[7] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 r_14 = (32 + salt_len) * 8; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = pw_len * 8; - w3[3] = 0; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - const u32 w8_t = s[0]; - const u32 w9_t = s[1]; - const u32 wa_t = s[2]; - const u32 wb_t = s[3]; - const u32 wc_t = s[4]; - const u32 wd_t = s[5]; - const u32 we_t = s[6]; - const u32 wf_t = s[7]; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - const u32x r_a = a + MD5M_A; - const u32x r_b = b + MD5M_B; - const u32x r_c = c + MD5M_C; - const u32x r_d = d + MD5M_D; - - a = r_a; - b = r_b; - c = r_c; - d = r_d; - - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C00, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C01, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C02, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C03, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C04, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C05, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C06, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C07, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C08, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C09, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C0a, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C0b, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C0c, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, r_14, MD5C0e, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C0f, MD5S03); - - MD5_STEP0(MD5_Go, a, b, c, d, MD5C10, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C11, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C12, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C13, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C14, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C15, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C16, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C17, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, r_14, MD5C19, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C1a, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C1b, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C1c, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C1d, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C1e, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C1f, MD5S13); - - MD5_STEP0(MD5_H , a, b, c, d, MD5C20, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C21, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, r_14, MD5C23, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C24, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C25, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C26, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C27, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C28, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C29, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C2a, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C2b, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C2c, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C2d, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C2e, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C2f, MD5S23); - - MD5_STEP0(MD5_I , a, b, c, d, MD5C30, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, r_14, MD5C32, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C33, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C34, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C35, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C36, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C37, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C38, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C39, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C3a, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C3b, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C3c, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C3d, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C3e, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02710_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02710_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02710_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 s[8]; - - s[0] = salt_bufs[salt_pos].salt_buf[0]; - s[1] = salt_bufs[salt_pos].salt_buf[1]; - s[2] = salt_bufs[salt_pos].salt_buf[2]; - s[3] = salt_bufs[salt_pos].salt_buf[3]; - s[4] = salt_bufs[salt_pos].salt_buf[4]; - s[5] = salt_bufs[salt_pos].salt_buf[5]; - s[6] = salt_bufs[salt_pos].salt_buf[6]; - s[7] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 r_14 = (32 + salt_len) * 8; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = pw_len * 8; - w3[3] = 0; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - const u32 w8_t = s[0]; - const u32 w9_t = s[1]; - const u32 wa_t = s[2]; - const u32 wb_t = s[3]; - const u32 wc_t = s[4]; - const u32 wd_t = s[5]; - const u32 we_t = s[6]; - const u32 wf_t = s[7]; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - const u32x r_a = a + MD5M_A; - const u32x r_b = b + MD5M_B; - const u32x r_c = c + MD5M_C; - const u32x r_d = d + MD5M_D; - - a = r_a; - b = r_b; - c = r_c; - d = r_d; - - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C00, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C01, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C02, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C03, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C04, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C05, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C06, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C07, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C08, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C09, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C0a, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C0b, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C0c, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, r_14, MD5C0e, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C0f, MD5S03); - - MD5_STEP0(MD5_Go, a, b, c, d, MD5C10, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C11, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C12, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C13, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C14, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C15, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C16, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C17, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, r_14, MD5C19, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C1a, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C1b, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C1c, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C1d, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C1e, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C1f, MD5S13); - - MD5_STEP0(MD5_H , a, b, c, d, MD5C20, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C21, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, r_14, MD5C23, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C24, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C25, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C26, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C27, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C28, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C29, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C2a, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C2b, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C2c, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C2d, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C2e, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C2f, MD5S23); - - MD5_STEP0(MD5_I , a, b, c, d, MD5C30, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, r_14, MD5C32, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C33, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C34, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C35, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C36, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C37, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C38, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C39, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C3a, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C3b, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C3c, MD5S30); - - if ((a + r_a) != search[0]) continue; - - MD5_STEP0(MD5_I , d, a, b, c, MD5C3d, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C3e, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02710_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02710_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m02710_a3.cu b/nv/m02710_a3.cu deleted file mode 100644 index 6c33c2a..0000000 --- a/nv/m02710_a3.cu +++ /dev/null @@ -1,1034 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ bf_t c_bfs[1024]; - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ static void m02710m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 s[8]; - - s[0] = salt_bufs[salt_pos].salt_buf[0]; - s[1] = salt_bufs[salt_pos].salt_buf[1]; - s[2] = salt_bufs[salt_pos].salt_buf[2]; - s[3] = salt_bufs[salt_pos].salt_buf[3]; - s[4] = salt_bufs[salt_pos].salt_buf[4]; - s[5] = salt_bufs[salt_pos].salt_buf[5]; - s[6] = salt_bufs[salt_pos].salt_buf[6]; - s[7] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 r_14 = (32 + salt_len) * 8; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - const u32 w8_t = s[0]; - const u32 w9_t = s[1]; - const u32 wa_t = s[2]; - const u32 wb_t = s[3]; - const u32 wc_t = s[4]; - const u32 wd_t = s[5]; - const u32 we_t = s[6]; - const u32 wf_t = s[7]; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - const u32x r_a = a + MD5M_A; - const u32x r_b = b + MD5M_B; - const u32x r_c = c + MD5M_C; - const u32x r_d = d + MD5M_D; - - a = r_a; - b = r_b; - c = r_c; - d = r_d; - - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C00, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C01, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C02, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C03, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C04, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C05, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C06, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C07, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C08, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C09, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C0a, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C0b, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C0c, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, r_14, MD5C0e, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C0f, MD5S03); - - MD5_STEP0(MD5_Go, a, b, c, d, MD5C10, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C11, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C12, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C13, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C14, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C15, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C16, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C17, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, r_14, MD5C19, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C1a, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C1b, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C1c, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C1d, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C1e, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C1f, MD5S13); - - MD5_STEP0(MD5_H , a, b, c, d, MD5C20, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C21, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, r_14, MD5C23, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C24, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C25, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C26, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C27, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C28, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C29, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C2a, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C2b, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C2c, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C2d, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C2e, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C2f, MD5S23); - - MD5_STEP0(MD5_I , a, b, c, d, MD5C30, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, r_14, MD5C32, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C33, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C34, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C35, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C36, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C37, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C38, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C39, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C3a, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C3b, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C3c, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C3d, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C3e, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m02710s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - u32 s[8]; - - s[0] = salt_bufs[salt_pos].salt_buf[0]; - s[1] = salt_bufs[salt_pos].salt_buf[1]; - s[2] = salt_bufs[salt_pos].salt_buf[2]; - s[3] = salt_bufs[salt_pos].salt_buf[3]; - s[4] = salt_bufs[salt_pos].salt_buf[4]; - s[5] = salt_bufs[salt_pos].salt_buf[5]; - s[6] = salt_bufs[salt_pos].salt_buf[6]; - s[7] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 r_14 = (32 + salt_len) * 8; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - const u32 w8_t = s[0]; - const u32 w9_t = s[1]; - const u32 wa_t = s[2]; - const u32 wb_t = s[3]; - const u32 wc_t = s[4]; - const u32 wd_t = s[5]; - const u32 we_t = s[6]; - const u32 wf_t = s[7]; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - const u32x r_a = a + MD5M_A; - const u32x r_b = b + MD5M_B; - const u32x r_c = c + MD5M_C; - const u32x r_d = d + MD5M_D; - - a = r_a; - b = r_b; - c = r_c; - d = r_d; - - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C00, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C01, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C02, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C03, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C04, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C05, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C06, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C07, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C08, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C09, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C0a, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C0b, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C0c, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, r_14, MD5C0e, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C0f, MD5S03); - - MD5_STEP0(MD5_Go, a, b, c, d, MD5C10, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C11, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C12, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C13, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C14, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C15, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C16, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C17, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, r_14, MD5C19, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C1a, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C1b, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C1c, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C1d, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C1e, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C1f, MD5S13); - - MD5_STEP0(MD5_H , a, b, c, d, MD5C20, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C21, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, r_14, MD5C23, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C24, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C25, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C26, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C27, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C28, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C29, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C2a, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C2b, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C2c, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C2d, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C2e, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C2f, MD5S23); - - MD5_STEP0(MD5_I , a, b, c, d, MD5C30, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, r_14, MD5C32, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C33, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C34, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C35, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C36, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C37, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C38, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C39, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C3a, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C3b, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C3c, MD5S30); - - if ((a + r_a) != search[0]) continue; - - MD5_STEP0(MD5_I , d, a, b, c, MD5C3d, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C3e, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02710_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m02710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02710_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m02710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02710_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m02710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02710_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m02710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02710_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m02710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02710_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m02710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m02810_a0.cu b/nv/m02810_a0.cu deleted file mode 100644 index 8b15c33..0000000 --- a/nv/m02810_a0.cu +++ /dev/null @@ -1,832 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m02810_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 s[8]; - - s[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - s[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - s[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - s[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - s[4] = salt_bufs[salt_pos].salt_buf_pc[4]; - s[5] = salt_bufs[salt_pos].salt_buf_pc[5]; - s[6] = salt_bufs[salt_pos].salt_buf_pc[6]; - s[7] = salt_bufs[salt_pos].salt_buf_pc[7]; - - const u32 r_00 = 0x80; - const u32 r_14 = 64 * 8; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - w3[2] = out_len * 8; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - const u32 w0_t = s[0]; - const u32 w1_t = s[1]; - const u32 w2_t = s[2]; - const u32 w3_t = s[3]; - const u32 w4_t = s[4]; - const u32 w5_t = s[5]; - const u32 w6_t = s[6]; - const u32 w7_t = s[7]; - - const u32x w8_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w9_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x wa_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x wb_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x wc_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x wd_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x we_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x wf_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - const u32x r_a = a + MD5M_A; - const u32x r_b = b + MD5M_B; - const u32x r_c = c + MD5M_C; - const u32x r_d = d + MD5M_D; - - a = r_a; - b = r_b; - c = r_c; - d = r_d; - - MD5_STEP (MD5_Fo, a, b, c, d, r_00, MD5C00, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C01, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C02, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C03, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C04, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C05, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C06, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C07, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C08, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C09, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C0a, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C0b, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C0c, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, r_14, MD5C0e, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C0f, MD5S03); - - MD5_STEP0(MD5_Go, a, b, c, d, MD5C10, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C11, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, r_00, MD5C13, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C14, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C15, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C16, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C17, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, r_14, MD5C19, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C1a, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C1b, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C1c, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C1d, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C1e, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C1f, MD5S13); - - MD5_STEP0(MD5_H , a, b, c, d, MD5C20, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C21, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, r_14, MD5C23, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C24, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C25, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C26, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C27, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, r_00, MD5C29, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C2a, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C2b, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C2c, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C2d, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C2e, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, r_00, MD5C30, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, r_14, MD5C32, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C33, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C34, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C35, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C36, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C37, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C38, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C39, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C3a, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C3b, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C3c, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C3d, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C3e, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02810_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02810_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02810_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 s[8]; - - s[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - s[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - s[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - s[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - s[4] = salt_bufs[salt_pos].salt_buf_pc[4]; - s[5] = salt_bufs[salt_pos].salt_buf_pc[5]; - s[6] = salt_bufs[salt_pos].salt_buf_pc[6]; - s[7] = salt_bufs[salt_pos].salt_buf_pc[7]; - - const u32 r_00 = 0x80; - const u32 r_14 = 64 * 8; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - w3[2] = out_len * 8; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - const u32 w0_t = s[0]; - const u32 w1_t = s[1]; - const u32 w2_t = s[2]; - const u32 w3_t = s[3]; - const u32 w4_t = s[4]; - const u32 w5_t = s[5]; - const u32 w6_t = s[6]; - const u32 w7_t = s[7]; - - const u32x w8_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w9_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x wa_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x wb_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x wc_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x wd_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x we_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x wf_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - const u32x r_a = a + MD5M_A; - const u32x r_b = b + MD5M_B; - const u32x r_c = c + MD5M_C; - const u32x r_d = d + MD5M_D; - - a = r_a; - b = r_b; - c = r_c; - d = r_d; - - MD5_STEP (MD5_Fo, a, b, c, d, r_00, MD5C00, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C01, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C02, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C03, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C04, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C05, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C06, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C07, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C08, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C09, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C0a, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C0b, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C0c, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, r_14, MD5C0e, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C0f, MD5S03); - - MD5_STEP0(MD5_Go, a, b, c, d, MD5C10, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C11, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, r_00, MD5C13, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C14, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C15, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C16, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C17, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, r_14, MD5C19, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C1a, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C1b, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C1c, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C1d, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C1e, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C1f, MD5S13); - - MD5_STEP0(MD5_H , a, b, c, d, MD5C20, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C21, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, r_14, MD5C23, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C24, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C25, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C26, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C27, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, r_00, MD5C29, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C2a, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C2b, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C2c, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C2d, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C2e, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, r_00, MD5C30, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, r_14, MD5C32, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C33, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C34, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C35, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C36, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C37, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C38, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C39, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C3a, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C3b, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C3c, MD5S30); - - if ((a + r_a) != search[0]) continue; - - MD5_STEP0(MD5_I , d, a, b, c, MD5C3d, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C3e, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02810_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02810_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m02810_a1.cu b/nv/m02810_a1.cu deleted file mode 100644 index fe4767e..0000000 --- a/nv/m02810_a1.cu +++ /dev/null @@ -1,930 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m02810_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 s[8]; - - s[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - s[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - s[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - s[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - s[4] = salt_bufs[salt_pos].salt_buf_pc[4]; - s[5] = salt_bufs[salt_pos].salt_buf_pc[5]; - s[6] = salt_bufs[salt_pos].salt_buf_pc[6]; - s[7] = salt_bufs[salt_pos].salt_buf_pc[7]; - - const u32 r_00 = 0x80; - const u32 r_14 = 64 * 8; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = pw_len * 8; - w3[3] = 0; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - const u32 w0_t = s[0]; - const u32 w1_t = s[1]; - const u32 w2_t = s[2]; - const u32 w3_t = s[3]; - const u32 w4_t = s[4]; - const u32 w5_t = s[5]; - const u32 w6_t = s[6]; - const u32 w7_t = s[7]; - - const u32x w8_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w9_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x wa_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x wb_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x wc_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x wd_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x we_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x wf_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - const u32x r_a = a + MD5M_A; - const u32x r_b = b + MD5M_B; - const u32x r_c = c + MD5M_C; - const u32x r_d = d + MD5M_D; - - a = r_a; - b = r_b; - c = r_c; - d = r_d; - - MD5_STEP (MD5_Fo, a, b, c, d, r_00, MD5C00, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C01, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C02, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C03, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C04, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C05, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C06, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C07, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C08, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C09, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C0a, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C0b, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C0c, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, r_14, MD5C0e, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C0f, MD5S03); - - MD5_STEP0(MD5_Go, a, b, c, d, MD5C10, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C11, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, r_00, MD5C13, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C14, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C15, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C16, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C17, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, r_14, MD5C19, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C1a, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C1b, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C1c, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C1d, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C1e, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C1f, MD5S13); - - MD5_STEP0(MD5_H , a, b, c, d, MD5C20, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C21, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, r_14, MD5C23, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C24, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C25, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C26, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C27, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, r_00, MD5C29, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C2a, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C2b, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C2c, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C2d, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C2e, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, r_00, MD5C30, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, r_14, MD5C32, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C33, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C34, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C35, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C36, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C37, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C38, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C39, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C3a, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C3b, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C3c, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C3d, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C3e, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02810_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02810_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02810_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 s[8]; - - s[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - s[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - s[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - s[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - s[4] = salt_bufs[salt_pos].salt_buf_pc[4]; - s[5] = salt_bufs[salt_pos].salt_buf_pc[5]; - s[6] = salt_bufs[salt_pos].salt_buf_pc[6]; - s[7] = salt_bufs[salt_pos].salt_buf_pc[7]; - - const u32 r_00 = 0x80; - const u32 r_14 = 64 * 8; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = pw_len * 8; - w3[3] = 0; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - const u32 w0_t = s[0]; - const u32 w1_t = s[1]; - const u32 w2_t = s[2]; - const u32 w3_t = s[3]; - const u32 w4_t = s[4]; - const u32 w5_t = s[5]; - const u32 w6_t = s[6]; - const u32 w7_t = s[7]; - - const u32x w8_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w9_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x wa_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x wb_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x wc_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x wd_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x we_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x wf_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - const u32x r_a = a + MD5M_A; - const u32x r_b = b + MD5M_B; - const u32x r_c = c + MD5M_C; - const u32x r_d = d + MD5M_D; - - a = r_a; - b = r_b; - c = r_c; - d = r_d; - - MD5_STEP (MD5_Fo, a, b, c, d, r_00, MD5C00, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C01, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C02, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C03, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C04, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C05, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C06, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C07, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C08, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C09, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C0a, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C0b, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C0c, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, r_14, MD5C0e, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C0f, MD5S03); - - MD5_STEP0(MD5_Go, a, b, c, d, MD5C10, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C11, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, r_00, MD5C13, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C14, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C15, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C16, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C17, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, r_14, MD5C19, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C1a, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C1b, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C1c, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C1d, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C1e, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C1f, MD5S13); - - MD5_STEP0(MD5_H , a, b, c, d, MD5C20, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C21, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, r_14, MD5C23, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C24, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C25, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C26, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C27, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, r_00, MD5C29, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C2a, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C2b, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C2c, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C2d, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C2e, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, r_00, MD5C30, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, r_14, MD5C32, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C33, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C34, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C35, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C36, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C37, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C38, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C39, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C3a, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C3b, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C3c, MD5S30); - - if ((a + r_a) != search[0]) continue; - - MD5_STEP0(MD5_I , d, a, b, c, MD5C3d, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C3e, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02810_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02810_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m02810_a3.cu b/nv/m02810_a3.cu deleted file mode 100644 index 4f84ce6..0000000 --- a/nv/m02810_a3.cu +++ /dev/null @@ -1,1032 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ bf_t c_bfs[1024]; - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ static void m02810m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 s[8]; - - s[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - s[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - s[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - s[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - s[4] = salt_bufs[salt_pos].salt_buf_pc[4]; - s[5] = salt_bufs[salt_pos].salt_buf_pc[5]; - s[6] = salt_bufs[salt_pos].salt_buf_pc[6]; - s[7] = salt_bufs[salt_pos].salt_buf_pc[7]; - - const u32 r_00 = 0x80; - const u32 r_14 = 64 * 8; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - const u32 w0_t = s[0]; - const u32 w1_t = s[1]; - const u32 w2_t = s[2]; - const u32 w3_t = s[3]; - const u32 w4_t = s[4]; - const u32 w5_t = s[5]; - const u32 w6_t = s[6]; - const u32 w7_t = s[7]; - - const u32x w8_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w9_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x wa_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x wb_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x wc_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x wd_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x we_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x wf_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - const u32x r_a = a + MD5M_A; - const u32x r_b = b + MD5M_B; - const u32x r_c = c + MD5M_C; - const u32x r_d = d + MD5M_D; - - a = r_a; - b = r_b; - c = r_c; - d = r_d; - - MD5_STEP (MD5_Fo, a, b, c, d, r_00, MD5C00, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C01, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C02, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C03, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C04, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C05, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C06, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C07, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C08, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C09, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C0a, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C0b, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C0c, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, r_14, MD5C0e, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C0f, MD5S03); - - MD5_STEP0(MD5_Go, a, b, c, d, MD5C10, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C11, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, r_00, MD5C13, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C14, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C15, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C16, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C17, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, r_14, MD5C19, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C1a, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C1b, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C1c, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C1d, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C1e, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C1f, MD5S13); - - MD5_STEP0(MD5_H , a, b, c, d, MD5C20, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C21, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, r_14, MD5C23, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C24, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C25, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C26, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C27, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, r_00, MD5C29, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C2a, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C2b, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C2c, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C2d, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C2e, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, r_00, MD5C30, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, r_14, MD5C32, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C33, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C34, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C35, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C36, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C37, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C38, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C39, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C3a, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C3b, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C3c, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C3d, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C3e, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m02810s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - u32 s[8]; - - s[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - s[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - s[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - s[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - s[4] = salt_bufs[salt_pos].salt_buf_pc[4]; - s[5] = salt_bufs[salt_pos].salt_buf_pc[5]; - s[6] = salt_bufs[salt_pos].salt_buf_pc[6]; - s[7] = salt_bufs[salt_pos].salt_buf_pc[7]; - - const u32 r_00 = 0x80; - const u32 r_14 = 64 * 8; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - const u32 w0_t = s[0]; - const u32 w1_t = s[1]; - const u32 w2_t = s[2]; - const u32 w3_t = s[3]; - const u32 w4_t = s[4]; - const u32 w5_t = s[5]; - const u32 w6_t = s[6]; - const u32 w7_t = s[7]; - - const u32x w8_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w9_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x wa_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x wb_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x wc_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x wd_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x we_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x wf_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - const u32x r_a = a + MD5M_A; - const u32x r_b = b + MD5M_B; - const u32x r_c = c + MD5M_C; - const u32x r_d = d + MD5M_D; - - a = r_a; - b = r_b; - c = r_c; - d = r_d; - - MD5_STEP (MD5_Fo, a, b, c, d, r_00, MD5C00, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C01, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C02, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C03, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C04, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C05, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C06, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C07, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C08, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C09, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, MD5C0a, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C0b, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, MD5C0c, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, r_14, MD5C0e, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, MD5C0f, MD5S03); - - MD5_STEP0(MD5_Go, a, b, c, d, MD5C10, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C11, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, r_00, MD5C13, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C14, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C15, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C16, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C17, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, r_14, MD5C19, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C1a, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C1b, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, MD5C1c, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, MD5C1d, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, MD5C1e, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, MD5C1f, MD5S13); - - MD5_STEP0(MD5_H , a, b, c, d, MD5C20, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C21, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, r_14, MD5C23, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C24, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C25, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C26, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C27, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, r_00, MD5C29, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C2a, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C2b, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, MD5C2c, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, MD5C2d, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, MD5C2e, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, r_00, MD5C30, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, r_14, MD5C32, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C33, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C34, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C35, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C36, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C37, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C38, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, MD5C39, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C3a, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C3b, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, MD5C3c, MD5S30); - - if ((a + r_a) != search[0]) continue; - - MD5_STEP0(MD5_I , d, a, b, c, MD5C3d, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, MD5C3e, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02810_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m02810m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02810_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m02810m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02810_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m02810m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02810_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m02810s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02810_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m02810s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m02810_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m02810s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m03000_a0.cu b/nv/m03000_a0.cu deleted file mode 100644 index 53fb28d..0000000 --- a/nv/m03000_a0.cu +++ /dev/null @@ -1,796 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _DES_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#define PERM_OP(a,b,tt,n,m) \ -{ \ - tt = a >> n; \ - tt = tt ^ b; \ - tt = tt & m; \ - b = b ^ tt; \ - tt = tt << n; \ - a = a ^ tt; \ -} - -#define HPERM_OP(a,tt,n,m) \ -{ \ - tt = a << (16 + n); \ - tt = tt ^ a; \ - tt = tt & m; \ - a = a ^ tt; \ - tt = tt >> (16 + n); \ - a = a ^ tt; \ -} - -__device__ __constant__ u32 c_SPtrans[8][64] = -{ - /* nibble 0 */ - 0x02080800, 0x00080000, 0x02000002, 0x02080802, - 0x02000000, 0x00080802, 0x00080002, 0x02000002, - 0x00080802, 0x02080800, 0x02080000, 0x00000802, - 0x02000802, 0x02000000, 0x00000000, 0x00080002, - 0x00080000, 0x00000002, 0x02000800, 0x00080800, - 0x02080802, 0x02080000, 0x00000802, 0x02000800, - 0x00000002, 0x00000800, 0x00080800, 0x02080002, - 0x00000800, 0x02000802, 0x02080002, 0x00000000, - 0x00000000, 0x02080802, 0x02000800, 0x00080002, - 0x02080800, 0x00080000, 0x00000802, 0x02000800, - 0x02080002, 0x00000800, 0x00080800, 0x02000002, - 0x00080802, 0x00000002, 0x02000002, 0x02080000, - 0x02080802, 0x00080800, 0x02080000, 0x02000802, - 0x02000000, 0x00000802, 0x00080002, 0x00000000, - 0x00080000, 0x02000000, 0x02000802, 0x02080800, - 0x00000002, 0x02080002, 0x00000800, 0x00080802, - /* nibble 1 */ - 0x40108010, 0x00000000, 0x00108000, 0x40100000, - 0x40000010, 0x00008010, 0x40008000, 0x00108000, - 0x00008000, 0x40100010, 0x00000010, 0x40008000, - 0x00100010, 0x40108000, 0x40100000, 0x00000010, - 0x00100000, 0x40008010, 0x40100010, 0x00008000, - 0x00108010, 0x40000000, 0x00000000, 0x00100010, - 0x40008010, 0x00108010, 0x40108000, 0x40000010, - 0x40000000, 0x00100000, 0x00008010, 0x40108010, - 0x00100010, 0x40108000, 0x40008000, 0x00108010, - 0x40108010, 0x00100010, 0x40000010, 0x00000000, - 0x40000000, 0x00008010, 0x00100000, 0x40100010, - 0x00008000, 0x40000000, 0x00108010, 0x40008010, - 0x40108000, 0x00008000, 0x00000000, 0x40000010, - 0x00000010, 0x40108010, 0x00108000, 0x40100000, - 0x40100010, 0x00100000, 0x00008010, 0x40008000, - 0x40008010, 0x00000010, 0x40100000, 0x00108000, - /* nibble 2 */ - 0x04000001, 0x04040100, 0x00000100, 0x04000101, - 0x00040001, 0x04000000, 0x04000101, 0x00040100, - 0x04000100, 0x00040000, 0x04040000, 0x00000001, - 0x04040101, 0x00000101, 0x00000001, 0x04040001, - 0x00000000, 0x00040001, 0x04040100, 0x00000100, - 0x00000101, 0x04040101, 0x00040000, 0x04000001, - 0x04040001, 0x04000100, 0x00040101, 0x04040000, - 0x00040100, 0x00000000, 0x04000000, 0x00040101, - 0x04040100, 0x00000100, 0x00000001, 0x00040000, - 0x00000101, 0x00040001, 0x04040000, 0x04000101, - 0x00000000, 0x04040100, 0x00040100, 0x04040001, - 0x00040001, 0x04000000, 0x04040101, 0x00000001, - 0x00040101, 0x04000001, 0x04000000, 0x04040101, - 0x00040000, 0x04000100, 0x04000101, 0x00040100, - 0x04000100, 0x00000000, 0x04040001, 0x00000101, - 0x04000001, 0x00040101, 0x00000100, 0x04040000, - /* nibble 3 */ - 0x00401008, 0x10001000, 0x00000008, 0x10401008, - 0x00000000, 0x10400000, 0x10001008, 0x00400008, - 0x10401000, 0x10000008, 0x10000000, 0x00001008, - 0x10000008, 0x00401008, 0x00400000, 0x10000000, - 0x10400008, 0x00401000, 0x00001000, 0x00000008, - 0x00401000, 0x10001008, 0x10400000, 0x00001000, - 0x00001008, 0x00000000, 0x00400008, 0x10401000, - 0x10001000, 0x10400008, 0x10401008, 0x00400000, - 0x10400008, 0x00001008, 0x00400000, 0x10000008, - 0x00401000, 0x10001000, 0x00000008, 0x10400000, - 0x10001008, 0x00000000, 0x00001000, 0x00400008, - 0x00000000, 0x10400008, 0x10401000, 0x00001000, - 0x10000000, 0x10401008, 0x00401008, 0x00400000, - 0x10401008, 0x00000008, 0x10001000, 0x00401008, - 0x00400008, 0x00401000, 0x10400000, 0x10001008, - 0x00001008, 0x10000000, 0x10000008, 0x10401000, - /* nibble 4 */ - 0x08000000, 0x00010000, 0x00000400, 0x08010420, - 0x08010020, 0x08000400, 0x00010420, 0x08010000, - 0x00010000, 0x00000020, 0x08000020, 0x00010400, - 0x08000420, 0x08010020, 0x08010400, 0x00000000, - 0x00010400, 0x08000000, 0x00010020, 0x00000420, - 0x08000400, 0x00010420, 0x00000000, 0x08000020, - 0x00000020, 0x08000420, 0x08010420, 0x00010020, - 0x08010000, 0x00000400, 0x00000420, 0x08010400, - 0x08010400, 0x08000420, 0x00010020, 0x08010000, - 0x00010000, 0x00000020, 0x08000020, 0x08000400, - 0x08000000, 0x00010400, 0x08010420, 0x00000000, - 0x00010420, 0x08000000, 0x00000400, 0x00010020, - 0x08000420, 0x00000400, 0x00000000, 0x08010420, - 0x08010020, 0x08010400, 0x00000420, 0x00010000, - 0x00010400, 0x08010020, 0x08000400, 0x00000420, - 0x00000020, 0x00010420, 0x08010000, 0x08000020, - /* nibble 5 */ - 0x80000040, 0x00200040, 0x00000000, 0x80202000, - 0x00200040, 0x00002000, 0x80002040, 0x00200000, - 0x00002040, 0x80202040, 0x00202000, 0x80000000, - 0x80002000, 0x80000040, 0x80200000, 0x00202040, - 0x00200000, 0x80002040, 0x80200040, 0x00000000, - 0x00002000, 0x00000040, 0x80202000, 0x80200040, - 0x80202040, 0x80200000, 0x80000000, 0x00002040, - 0x00000040, 0x00202000, 0x00202040, 0x80002000, - 0x00002040, 0x80000000, 0x80002000, 0x00202040, - 0x80202000, 0x00200040, 0x00000000, 0x80002000, - 0x80000000, 0x00002000, 0x80200040, 0x00200000, - 0x00200040, 0x80202040, 0x00202000, 0x00000040, - 0x80202040, 0x00202000, 0x00200000, 0x80002040, - 0x80000040, 0x80200000, 0x00202040, 0x00000000, - 0x00002000, 0x80000040, 0x80002040, 0x80202000, - 0x80200000, 0x00002040, 0x00000040, 0x80200040, - /* nibble 6 */ - 0x00004000, 0x00000200, 0x01000200, 0x01000004, - 0x01004204, 0x00004004, 0x00004200, 0x00000000, - 0x01000000, 0x01000204, 0x00000204, 0x01004000, - 0x00000004, 0x01004200, 0x01004000, 0x00000204, - 0x01000204, 0x00004000, 0x00004004, 0x01004204, - 0x00000000, 0x01000200, 0x01000004, 0x00004200, - 0x01004004, 0x00004204, 0x01004200, 0x00000004, - 0x00004204, 0x01004004, 0x00000200, 0x01000000, - 0x00004204, 0x01004000, 0x01004004, 0x00000204, - 0x00004000, 0x00000200, 0x01000000, 0x01004004, - 0x01000204, 0x00004204, 0x00004200, 0x00000000, - 0x00000200, 0x01000004, 0x00000004, 0x01000200, - 0x00000000, 0x01000204, 0x01000200, 0x00004200, - 0x00000204, 0x00004000, 0x01004204, 0x01000000, - 0x01004200, 0x00000004, 0x00004004, 0x01004204, - 0x01000004, 0x01004200, 0x01004000, 0x00004004, - /* nibble 7 */ - 0x20800080, 0x20820000, 0x00020080, 0x00000000, - 0x20020000, 0x00800080, 0x20800000, 0x20820080, - 0x00000080, 0x20000000, 0x00820000, 0x00020080, - 0x00820080, 0x20020080, 0x20000080, 0x20800000, - 0x00020000, 0x00820080, 0x00800080, 0x20020000, - 0x20820080, 0x20000080, 0x00000000, 0x00820000, - 0x20000000, 0x00800000, 0x20020080, 0x20800080, - 0x00800000, 0x00020000, 0x20820000, 0x00000080, - 0x00800000, 0x00020000, 0x20000080, 0x20820080, - 0x00020080, 0x20000000, 0x00000000, 0x00820000, - 0x20800080, 0x20020080, 0x20020000, 0x00800080, - 0x20820000, 0x00000080, 0x00800080, 0x20020000, - 0x20820080, 0x00800000, 0x20800000, 0x20000080, - 0x00820000, 0x00020080, 0x20020080, 0x20800000, - 0x00000080, 0x20820000, 0x00820080, 0x00000000, - 0x20000000, 0x20800080, 0x00020000, 0x00820080, -}; - -__device__ __constant__ u32 c_skb[8][64] = -{ - /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ - 0x00000000, 0x00000010, 0x20000000, 0x20000010, - 0x00010000, 0x00010010, 0x20010000, 0x20010010, - 0x00000800, 0x00000810, 0x20000800, 0x20000810, - 0x00010800, 0x00010810, 0x20010800, 0x20010810, - 0x00000020, 0x00000030, 0x20000020, 0x20000030, - 0x00010020, 0x00010030, 0x20010020, 0x20010030, - 0x00000820, 0x00000830, 0x20000820, 0x20000830, - 0x00010820, 0x00010830, 0x20010820, 0x20010830, - 0x00080000, 0x00080010, 0x20080000, 0x20080010, - 0x00090000, 0x00090010, 0x20090000, 0x20090010, - 0x00080800, 0x00080810, 0x20080800, 0x20080810, - 0x00090800, 0x00090810, 0x20090800, 0x20090810, - 0x00080020, 0x00080030, 0x20080020, 0x20080030, - 0x00090020, 0x00090030, 0x20090020, 0x20090030, - 0x00080820, 0x00080830, 0x20080820, 0x20080830, - 0x00090820, 0x00090830, 0x20090820, 0x20090830, - /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */ - 0x00000000, 0x02000000, 0x00002000, 0x02002000, - 0x00200000, 0x02200000, 0x00202000, 0x02202000, - 0x00000004, 0x02000004, 0x00002004, 0x02002004, - 0x00200004, 0x02200004, 0x00202004, 0x02202004, - 0x00000400, 0x02000400, 0x00002400, 0x02002400, - 0x00200400, 0x02200400, 0x00202400, 0x02202400, - 0x00000404, 0x02000404, 0x00002404, 0x02002404, - 0x00200404, 0x02200404, 0x00202404, 0x02202404, - 0x10000000, 0x12000000, 0x10002000, 0x12002000, - 0x10200000, 0x12200000, 0x10202000, 0x12202000, - 0x10000004, 0x12000004, 0x10002004, 0x12002004, - 0x10200004, 0x12200004, 0x10202004, 0x12202004, - 0x10000400, 0x12000400, 0x10002400, 0x12002400, - 0x10200400, 0x12200400, 0x10202400, 0x12202400, - 0x10000404, 0x12000404, 0x10002404, 0x12002404, - 0x10200404, 0x12200404, 0x10202404, 0x12202404, - /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */ - 0x00000000, 0x00000001, 0x00040000, 0x00040001, - 0x01000000, 0x01000001, 0x01040000, 0x01040001, - 0x00000002, 0x00000003, 0x00040002, 0x00040003, - 0x01000002, 0x01000003, 0x01040002, 0x01040003, - 0x00000200, 0x00000201, 0x00040200, 0x00040201, - 0x01000200, 0x01000201, 0x01040200, 0x01040201, - 0x00000202, 0x00000203, 0x00040202, 0x00040203, - 0x01000202, 0x01000203, 0x01040202, 0x01040203, - 0x08000000, 0x08000001, 0x08040000, 0x08040001, - 0x09000000, 0x09000001, 0x09040000, 0x09040001, - 0x08000002, 0x08000003, 0x08040002, 0x08040003, - 0x09000002, 0x09000003, 0x09040002, 0x09040003, - 0x08000200, 0x08000201, 0x08040200, 0x08040201, - 0x09000200, 0x09000201, 0x09040200, 0x09040201, - 0x08000202, 0x08000203, 0x08040202, 0x08040203, - 0x09000202, 0x09000203, 0x09040202, 0x09040203, - /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */ - 0x00000000, 0x00100000, 0x00000100, 0x00100100, - 0x00000008, 0x00100008, 0x00000108, 0x00100108, - 0x00001000, 0x00101000, 0x00001100, 0x00101100, - 0x00001008, 0x00101008, 0x00001108, 0x00101108, - 0x04000000, 0x04100000, 0x04000100, 0x04100100, - 0x04000008, 0x04100008, 0x04000108, 0x04100108, - 0x04001000, 0x04101000, 0x04001100, 0x04101100, - 0x04001008, 0x04101008, 0x04001108, 0x04101108, - 0x00020000, 0x00120000, 0x00020100, 0x00120100, - 0x00020008, 0x00120008, 0x00020108, 0x00120108, - 0x00021000, 0x00121000, 0x00021100, 0x00121100, - 0x00021008, 0x00121008, 0x00021108, 0x00121108, - 0x04020000, 0x04120000, 0x04020100, 0x04120100, - 0x04020008, 0x04120008, 0x04020108, 0x04120108, - 0x04021000, 0x04121000, 0x04021100, 0x04121100, - 0x04021008, 0x04121008, 0x04021108, 0x04121108, - /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ - 0x00000000, 0x10000000, 0x00010000, 0x10010000, - 0x00000004, 0x10000004, 0x00010004, 0x10010004, - 0x20000000, 0x30000000, 0x20010000, 0x30010000, - 0x20000004, 0x30000004, 0x20010004, 0x30010004, - 0x00100000, 0x10100000, 0x00110000, 0x10110000, - 0x00100004, 0x10100004, 0x00110004, 0x10110004, - 0x20100000, 0x30100000, 0x20110000, 0x30110000, - 0x20100004, 0x30100004, 0x20110004, 0x30110004, - 0x00001000, 0x10001000, 0x00011000, 0x10011000, - 0x00001004, 0x10001004, 0x00011004, 0x10011004, - 0x20001000, 0x30001000, 0x20011000, 0x30011000, - 0x20001004, 0x30001004, 0x20011004, 0x30011004, - 0x00101000, 0x10101000, 0x00111000, 0x10111000, - 0x00101004, 0x10101004, 0x00111004, 0x10111004, - 0x20101000, 0x30101000, 0x20111000, 0x30111000, - 0x20101004, 0x30101004, 0x20111004, 0x30111004, - /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */ - 0x00000000, 0x08000000, 0x00000008, 0x08000008, - 0x00000400, 0x08000400, 0x00000408, 0x08000408, - 0x00020000, 0x08020000, 0x00020008, 0x08020008, - 0x00020400, 0x08020400, 0x00020408, 0x08020408, - 0x00000001, 0x08000001, 0x00000009, 0x08000009, - 0x00000401, 0x08000401, 0x00000409, 0x08000409, - 0x00020001, 0x08020001, 0x00020009, 0x08020009, - 0x00020401, 0x08020401, 0x00020409, 0x08020409, - 0x02000000, 0x0A000000, 0x02000008, 0x0A000008, - 0x02000400, 0x0A000400, 0x02000408, 0x0A000408, - 0x02020000, 0x0A020000, 0x02020008, 0x0A020008, - 0x02020400, 0x0A020400, 0x02020408, 0x0A020408, - 0x02000001, 0x0A000001, 0x02000009, 0x0A000009, - 0x02000401, 0x0A000401, 0x02000409, 0x0A000409, - 0x02020001, 0x0A020001, 0x02020009, 0x0A020009, - 0x02020401, 0x0A020401, 0x02020409, 0x0A020409, - /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */ - 0x00000000, 0x00000100, 0x00080000, 0x00080100, - 0x01000000, 0x01000100, 0x01080000, 0x01080100, - 0x00000010, 0x00000110, 0x00080010, 0x00080110, - 0x01000010, 0x01000110, 0x01080010, 0x01080110, - 0x00200000, 0x00200100, 0x00280000, 0x00280100, - 0x01200000, 0x01200100, 0x01280000, 0x01280100, - 0x00200010, 0x00200110, 0x00280010, 0x00280110, - 0x01200010, 0x01200110, 0x01280010, 0x01280110, - 0x00000200, 0x00000300, 0x00080200, 0x00080300, - 0x01000200, 0x01000300, 0x01080200, 0x01080300, - 0x00000210, 0x00000310, 0x00080210, 0x00080310, - 0x01000210, 0x01000310, 0x01080210, 0x01080310, - 0x00200200, 0x00200300, 0x00280200, 0x00280300, - 0x01200200, 0x01200300, 0x01280200, 0x01280300, - 0x00200210, 0x00200310, 0x00280210, 0x00280310, - 0x01200210, 0x01200310, 0x01280210, 0x01280310, - /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */ - 0x00000000, 0x04000000, 0x00040000, 0x04040000, - 0x00000002, 0x04000002, 0x00040002, 0x04040002, - 0x00002000, 0x04002000, 0x00042000, 0x04042000, - 0x00002002, 0x04002002, 0x00042002, 0x04042002, - 0x00000020, 0x04000020, 0x00040020, 0x04040020, - 0x00000022, 0x04000022, 0x00040022, 0x04040022, - 0x00002020, 0x04002020, 0x00042020, 0x04042020, - 0x00002022, 0x04002022, 0x00042022, 0x04042022, - 0x00000800, 0x04000800, 0x00040800, 0x04040800, - 0x00000802, 0x04000802, 0x00040802, 0x04040802, - 0x00002800, 0x04002800, 0x00042800, 0x04042800, - 0x00002802, 0x04002802, 0x00042802, 0x04042802, - 0x00000820, 0x04000820, 0x00040820, 0x04040820, - 0x00000822, 0x04000822, 0x00040822, 0x04040822, - 0x00002820, 0x04002820, 0x00042820, 0x04042820, - 0x00002822, 0x04002822, 0x00042822, 0x04042822 -}; - -#define LM_IV_0_IP_RR3 0x2400b807 -#define LM_IV_1_IP_RR3 0xaa190747 - -#ifdef VECT_SIZE1 -#define BOX(i,n,S) u32x ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(i,n,S) u32x ((S)[(n)][(i).x], (S)[(n)][(i).y]) -#endif - -__device__ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], u32 s_SPtrans[8][64]) -{ - asm (".reg .u8 c0, c1, c2, c3, c4, c5, c6, c7;"); - - u32x tt; - - u32x r = data[0]; - u32x l = data[1]; - - for (int i = 0; i < 16; i++) - { - u32x u = Kc[i] ^ r; - u32x t = Kd[i] ^ rotl32 (r, 28u); - - u = (u >> 2) & 0x3f3f3f3f; - t = (t >> 2) & 0x3f3f3f3f; - - u32 u0, u1, u2, u3; - u32 t0, t1, t2, t3; - - asm - ( - "mov.b32 {c0, c1, c2, c3}, %8;" - "mov.b32 {c4, c5, c6, c7}, %9;" - "cvt.u32.u8 %0, c0;" - "cvt.u32.u8 %1, c1;" - "cvt.u32.u8 %2, c2;" - "cvt.u32.u8 %3, c3;" - "cvt.u32.u8 %4, c4;" - "cvt.u32.u8 %5, c5;" - "cvt.u32.u8 %6, c6;" - "cvt.u32.u8 %7, c7;" - - : "=r"(u0), "=r"(u1), "=r"(u2), "=r"(u3), - "=r"(t0), "=r"(t1), "=r"(t2), "=r"(t3) - : "r"(u), - "r"(t) - ); - - l ^= BOX (u0, 0, s_SPtrans) - | BOX (u1, 2, s_SPtrans) - | BOX (u2, 4, s_SPtrans) - | BOX (u3, 6, s_SPtrans) - | BOX (t0, 1, s_SPtrans) - | BOX (t1, 3, s_SPtrans) - | BOX (t2, 5, s_SPtrans) - | BOX (t3, 7, s_SPtrans); - - tt = l; - l = r; - r = tt; - } - - iv[0] = rotl32 (l, 29); - iv[1] = rotl32 (r, 29); -} - -__device__ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], u32 s_skb[8][64]) -{ - u32x tt; - - PERM_OP (d, c, tt, 4, 0x0f0f0f0f); - HPERM_OP (c, tt, 2, 0xcccc0000); - HPERM_OP (d, tt, 2, 0xcccc0000); - PERM_OP (d, c, tt, 1, 0x55555555); - PERM_OP (c, d, tt, 8, 0x00ff00ff); - PERM_OP (d, c, tt, 1, 0x55555555); - - d = ((d & 0x000000ff) << 16) - | ((d & 0x0000ff00) << 0) - | ((d & 0x00ff0000) >> 16) - | ((c & 0xf0000000) >> 4); - - c = c & 0x0fffffff; - - for (u32 i = 0; i < 16; i++) - { - if ((i < 2) || (i == 8) || (i == 15)) - { - c = ((c >> 1) | (c << 27)); - d = ((d >> 1) | (d << 27)); - } - else - { - c = ((c >> 2) | (c << 26)); - d = ((d >> 2) | (d << 26)); - } - - c = c & 0x0fffffff; - d = d & 0x0fffffff; - - const u32x c00 = (c >> 0) & 0x0000003f; - const u32x c06 = (c >> 6) & 0x00383003; - const u32x c07 = (c >> 7) & 0x0000003c; - const u32x c13 = (c >> 13) & 0x0000060f; - const u32x c20 = (c >> 20) & 0x00000001; - - u32x s = BOX (((c00 >> 0) & 0xff), 0, s_skb) - | BOX (((c06 >> 0) & 0xff) - |((c07 >> 0) & 0xff), 1, s_skb) - | BOX (((c13 >> 0) & 0xff) - |((c06 >> 8) & 0xff), 2, s_skb) - | BOX (((c20 >> 0) & 0xff) - |((c13 >> 8) & 0xff) - |((c06 >> 16) & 0xff), 3, s_skb); - - const u32x d00 = (d >> 0) & 0x00003c3f; - const u32x d07 = (d >> 7) & 0x00003f03; - const u32x d21 = (d >> 21) & 0x0000000f; - const u32x d22 = (d >> 22) & 0x00000030; - - u32x t = BOX (((d00 >> 0) & 0xff), 4, s_skb) - | BOX (((d07 >> 0) & 0xff) - |((d00 >> 8) & 0xff), 5, s_skb) - | BOX (((d07 >> 8) & 0xff), 6, s_skb) - | BOX (((d21 >> 0) & 0xff) - |((d22 >> 0) & 0xff), 7, s_skb); - - #if __CUDA_ARCH__ >= 200 - Kc[i] = __byte_perm (s, t, 0x5410); - Kd[i] = __byte_perm (s, t, 0x7632); - #else - Kc[i] = ((t << 16) | (s & 0x0000ffff)); - Kd[i] = ((s >> 16) | (t & 0xffff0000)); - #endif - - Kc[i] = rotl32 (Kc[i], 2u); - Kd[i] = rotl32 (Kd[i], 2u); - } -} - -__device__ static void transform_netntlmv1_key (const u32x w0, const u32x w1, u32x out[2]) -{ - u32x t[8]; - - t[0] = (w0 >> 0) & 0xff; - t[1] = (w0 >> 8) & 0xff; - t[2] = (w0 >> 16) & 0xff; - t[3] = (w0 >> 24) & 0xff; - t[4] = (w1 >> 0) & 0xff; - t[5] = (w1 >> 8) & 0xff; - t[6] = (w1 >> 16) & 0xff; - t[7] = (w1 >> 24) & 0xff; - - u32x k[8]; - - k[0] = (t[0] >> 0); - k[1] = (t[0] << 7) | (t[1] >> 1); - k[2] = (t[1] << 6) | (t[2] >> 2); - k[3] = (t[2] << 5) | (t[3] >> 3); - k[4] = (t[3] << 4) | (t[4] >> 4); - k[5] = (t[4] << 3) | (t[5] >> 5); - k[6] = (t[5] << 2) | (t[6] >> 6); - k[7] = (t[6] << 1); - - out[0] = ((k[0] & 0xff) << 0) - | ((k[1] & 0xff) << 8) - | ((k[2] & 0xff) << 16) - | ((k[3] & 0xff) << 24); - - out[1] = ((k[4] & 0xff) << 0) - | ((k[5] & 0xff) << 8) - | ((k[6] & 0xff) << 16) - | ((k[7] & 0xff) << 24); -} - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m03000_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf[4]; - - pw_buf[0] = pws[gid].i[ 0]; - pw_buf[1] = pws[gid].i[ 1]; - pw_buf[2] = 0; - pw_buf[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * sbox, kbox - */ - - __shared__ u32 s_SPtrans[8][64]; - __shared__ u32 s_skb[8][64]; - - if (lid < 64) - { - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf[0]; - w0[1] = pw_buf[1]; - w0[2] = pw_buf[2]; - w0[3] = pw_buf[3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - out_len = (out_len >= 7) ? 7 : out_len; - - u32x key[2]; - - transform_netntlmv1_key (w0[0], w0[1], key); - - const u32x c = key[0]; - const u32x d = key[1]; - - u32x Kc[16]; - u32x Kd[16]; - - _des_crypt_keysetup (c, d, Kc, Kd, s_skb); - - u32x data[2]; - - data[0] = LM_IV_0_IP_RR3; - data[1] = LM_IV_1_IP_RR3; - - u32x iv[2]; - - _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); - - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03000_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03000_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03000_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf[4]; - - pw_buf[0] = pws[gid].i[ 0]; - pw_buf[1] = pws[gid].i[ 1]; - pw_buf[2] = 0; - pw_buf[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * sbox, kbox - */ - - __shared__ u32 s_SPtrans[8][64]; - __shared__ u32 s_skb[8][64]; - - if (lid < 64) - { - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * main - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf[0]; - w0[1] = pw_buf[1]; - w0[2] = pw_buf[2]; - w0[3] = pw_buf[3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - out_len = (out_len >= 7) ? 7 : out_len; - - u32x key[2]; - - transform_netntlmv1_key (w0[0], w0[1], key); - - const u32x c = key[0]; - const u32x d = key[1]; - - u32x Kc[16]; - u32x Kd[16]; - - _des_crypt_keysetup (c, d, Kc, Kd, s_skb); - - u32x data[2]; - - data[0] = LM_IV_0_IP_RR3; - data[1] = LM_IV_1_IP_RR3; - - u32x iv[2]; - - _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); - - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03000_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03000_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m03000_a1.cu b/nv/m03000_a1.cu deleted file mode 100644 index 0e36da9..0000000 --- a/nv/m03000_a1.cu +++ /dev/null @@ -1,918 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _DES_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#define PERM_OP(a,b,tt,n,m) \ -{ \ - tt = a >> n; \ - tt = tt ^ b; \ - tt = tt & m; \ - b = b ^ tt; \ - tt = tt << n; \ - a = a ^ tt; \ -} - -#define HPERM_OP(a,tt,n,m) \ -{ \ - tt = a << (16 + n); \ - tt = tt ^ a; \ - tt = tt & m; \ - a = a ^ tt; \ - tt = tt >> (16 + n); \ - a = a ^ tt; \ -} - -__device__ __constant__ u32 c_SPtrans[8][64] = -{ - /* nibble 0 */ - 0x02080800, 0x00080000, 0x02000002, 0x02080802, - 0x02000000, 0x00080802, 0x00080002, 0x02000002, - 0x00080802, 0x02080800, 0x02080000, 0x00000802, - 0x02000802, 0x02000000, 0x00000000, 0x00080002, - 0x00080000, 0x00000002, 0x02000800, 0x00080800, - 0x02080802, 0x02080000, 0x00000802, 0x02000800, - 0x00000002, 0x00000800, 0x00080800, 0x02080002, - 0x00000800, 0x02000802, 0x02080002, 0x00000000, - 0x00000000, 0x02080802, 0x02000800, 0x00080002, - 0x02080800, 0x00080000, 0x00000802, 0x02000800, - 0x02080002, 0x00000800, 0x00080800, 0x02000002, - 0x00080802, 0x00000002, 0x02000002, 0x02080000, - 0x02080802, 0x00080800, 0x02080000, 0x02000802, - 0x02000000, 0x00000802, 0x00080002, 0x00000000, - 0x00080000, 0x02000000, 0x02000802, 0x02080800, - 0x00000002, 0x02080002, 0x00000800, 0x00080802, - /* nibble 1 */ - 0x40108010, 0x00000000, 0x00108000, 0x40100000, - 0x40000010, 0x00008010, 0x40008000, 0x00108000, - 0x00008000, 0x40100010, 0x00000010, 0x40008000, - 0x00100010, 0x40108000, 0x40100000, 0x00000010, - 0x00100000, 0x40008010, 0x40100010, 0x00008000, - 0x00108010, 0x40000000, 0x00000000, 0x00100010, - 0x40008010, 0x00108010, 0x40108000, 0x40000010, - 0x40000000, 0x00100000, 0x00008010, 0x40108010, - 0x00100010, 0x40108000, 0x40008000, 0x00108010, - 0x40108010, 0x00100010, 0x40000010, 0x00000000, - 0x40000000, 0x00008010, 0x00100000, 0x40100010, - 0x00008000, 0x40000000, 0x00108010, 0x40008010, - 0x40108000, 0x00008000, 0x00000000, 0x40000010, - 0x00000010, 0x40108010, 0x00108000, 0x40100000, - 0x40100010, 0x00100000, 0x00008010, 0x40008000, - 0x40008010, 0x00000010, 0x40100000, 0x00108000, - /* nibble 2 */ - 0x04000001, 0x04040100, 0x00000100, 0x04000101, - 0x00040001, 0x04000000, 0x04000101, 0x00040100, - 0x04000100, 0x00040000, 0x04040000, 0x00000001, - 0x04040101, 0x00000101, 0x00000001, 0x04040001, - 0x00000000, 0x00040001, 0x04040100, 0x00000100, - 0x00000101, 0x04040101, 0x00040000, 0x04000001, - 0x04040001, 0x04000100, 0x00040101, 0x04040000, - 0x00040100, 0x00000000, 0x04000000, 0x00040101, - 0x04040100, 0x00000100, 0x00000001, 0x00040000, - 0x00000101, 0x00040001, 0x04040000, 0x04000101, - 0x00000000, 0x04040100, 0x00040100, 0x04040001, - 0x00040001, 0x04000000, 0x04040101, 0x00000001, - 0x00040101, 0x04000001, 0x04000000, 0x04040101, - 0x00040000, 0x04000100, 0x04000101, 0x00040100, - 0x04000100, 0x00000000, 0x04040001, 0x00000101, - 0x04000001, 0x00040101, 0x00000100, 0x04040000, - /* nibble 3 */ - 0x00401008, 0x10001000, 0x00000008, 0x10401008, - 0x00000000, 0x10400000, 0x10001008, 0x00400008, - 0x10401000, 0x10000008, 0x10000000, 0x00001008, - 0x10000008, 0x00401008, 0x00400000, 0x10000000, - 0x10400008, 0x00401000, 0x00001000, 0x00000008, - 0x00401000, 0x10001008, 0x10400000, 0x00001000, - 0x00001008, 0x00000000, 0x00400008, 0x10401000, - 0x10001000, 0x10400008, 0x10401008, 0x00400000, - 0x10400008, 0x00001008, 0x00400000, 0x10000008, - 0x00401000, 0x10001000, 0x00000008, 0x10400000, - 0x10001008, 0x00000000, 0x00001000, 0x00400008, - 0x00000000, 0x10400008, 0x10401000, 0x00001000, - 0x10000000, 0x10401008, 0x00401008, 0x00400000, - 0x10401008, 0x00000008, 0x10001000, 0x00401008, - 0x00400008, 0x00401000, 0x10400000, 0x10001008, - 0x00001008, 0x10000000, 0x10000008, 0x10401000, - /* nibble 4 */ - 0x08000000, 0x00010000, 0x00000400, 0x08010420, - 0x08010020, 0x08000400, 0x00010420, 0x08010000, - 0x00010000, 0x00000020, 0x08000020, 0x00010400, - 0x08000420, 0x08010020, 0x08010400, 0x00000000, - 0x00010400, 0x08000000, 0x00010020, 0x00000420, - 0x08000400, 0x00010420, 0x00000000, 0x08000020, - 0x00000020, 0x08000420, 0x08010420, 0x00010020, - 0x08010000, 0x00000400, 0x00000420, 0x08010400, - 0x08010400, 0x08000420, 0x00010020, 0x08010000, - 0x00010000, 0x00000020, 0x08000020, 0x08000400, - 0x08000000, 0x00010400, 0x08010420, 0x00000000, - 0x00010420, 0x08000000, 0x00000400, 0x00010020, - 0x08000420, 0x00000400, 0x00000000, 0x08010420, - 0x08010020, 0x08010400, 0x00000420, 0x00010000, - 0x00010400, 0x08010020, 0x08000400, 0x00000420, - 0x00000020, 0x00010420, 0x08010000, 0x08000020, - /* nibble 5 */ - 0x80000040, 0x00200040, 0x00000000, 0x80202000, - 0x00200040, 0x00002000, 0x80002040, 0x00200000, - 0x00002040, 0x80202040, 0x00202000, 0x80000000, - 0x80002000, 0x80000040, 0x80200000, 0x00202040, - 0x00200000, 0x80002040, 0x80200040, 0x00000000, - 0x00002000, 0x00000040, 0x80202000, 0x80200040, - 0x80202040, 0x80200000, 0x80000000, 0x00002040, - 0x00000040, 0x00202000, 0x00202040, 0x80002000, - 0x00002040, 0x80000000, 0x80002000, 0x00202040, - 0x80202000, 0x00200040, 0x00000000, 0x80002000, - 0x80000000, 0x00002000, 0x80200040, 0x00200000, - 0x00200040, 0x80202040, 0x00202000, 0x00000040, - 0x80202040, 0x00202000, 0x00200000, 0x80002040, - 0x80000040, 0x80200000, 0x00202040, 0x00000000, - 0x00002000, 0x80000040, 0x80002040, 0x80202000, - 0x80200000, 0x00002040, 0x00000040, 0x80200040, - /* nibble 6 */ - 0x00004000, 0x00000200, 0x01000200, 0x01000004, - 0x01004204, 0x00004004, 0x00004200, 0x00000000, - 0x01000000, 0x01000204, 0x00000204, 0x01004000, - 0x00000004, 0x01004200, 0x01004000, 0x00000204, - 0x01000204, 0x00004000, 0x00004004, 0x01004204, - 0x00000000, 0x01000200, 0x01000004, 0x00004200, - 0x01004004, 0x00004204, 0x01004200, 0x00000004, - 0x00004204, 0x01004004, 0x00000200, 0x01000000, - 0x00004204, 0x01004000, 0x01004004, 0x00000204, - 0x00004000, 0x00000200, 0x01000000, 0x01004004, - 0x01000204, 0x00004204, 0x00004200, 0x00000000, - 0x00000200, 0x01000004, 0x00000004, 0x01000200, - 0x00000000, 0x01000204, 0x01000200, 0x00004200, - 0x00000204, 0x00004000, 0x01004204, 0x01000000, - 0x01004200, 0x00000004, 0x00004004, 0x01004204, - 0x01000004, 0x01004200, 0x01004000, 0x00004004, - /* nibble 7 */ - 0x20800080, 0x20820000, 0x00020080, 0x00000000, - 0x20020000, 0x00800080, 0x20800000, 0x20820080, - 0x00000080, 0x20000000, 0x00820000, 0x00020080, - 0x00820080, 0x20020080, 0x20000080, 0x20800000, - 0x00020000, 0x00820080, 0x00800080, 0x20020000, - 0x20820080, 0x20000080, 0x00000000, 0x00820000, - 0x20000000, 0x00800000, 0x20020080, 0x20800080, - 0x00800000, 0x00020000, 0x20820000, 0x00000080, - 0x00800000, 0x00020000, 0x20000080, 0x20820080, - 0x00020080, 0x20000000, 0x00000000, 0x00820000, - 0x20800080, 0x20020080, 0x20020000, 0x00800080, - 0x20820000, 0x00000080, 0x00800080, 0x20020000, - 0x20820080, 0x00800000, 0x20800000, 0x20000080, - 0x00820000, 0x00020080, 0x20020080, 0x20800000, - 0x00000080, 0x20820000, 0x00820080, 0x00000000, - 0x20000000, 0x20800080, 0x00020000, 0x00820080, -}; - -__device__ __constant__ u32 c_skb[8][64] = -{ - /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ - 0x00000000, 0x00000010, 0x20000000, 0x20000010, - 0x00010000, 0x00010010, 0x20010000, 0x20010010, - 0x00000800, 0x00000810, 0x20000800, 0x20000810, - 0x00010800, 0x00010810, 0x20010800, 0x20010810, - 0x00000020, 0x00000030, 0x20000020, 0x20000030, - 0x00010020, 0x00010030, 0x20010020, 0x20010030, - 0x00000820, 0x00000830, 0x20000820, 0x20000830, - 0x00010820, 0x00010830, 0x20010820, 0x20010830, - 0x00080000, 0x00080010, 0x20080000, 0x20080010, - 0x00090000, 0x00090010, 0x20090000, 0x20090010, - 0x00080800, 0x00080810, 0x20080800, 0x20080810, - 0x00090800, 0x00090810, 0x20090800, 0x20090810, - 0x00080020, 0x00080030, 0x20080020, 0x20080030, - 0x00090020, 0x00090030, 0x20090020, 0x20090030, - 0x00080820, 0x00080830, 0x20080820, 0x20080830, - 0x00090820, 0x00090830, 0x20090820, 0x20090830, - /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */ - 0x00000000, 0x02000000, 0x00002000, 0x02002000, - 0x00200000, 0x02200000, 0x00202000, 0x02202000, - 0x00000004, 0x02000004, 0x00002004, 0x02002004, - 0x00200004, 0x02200004, 0x00202004, 0x02202004, - 0x00000400, 0x02000400, 0x00002400, 0x02002400, - 0x00200400, 0x02200400, 0x00202400, 0x02202400, - 0x00000404, 0x02000404, 0x00002404, 0x02002404, - 0x00200404, 0x02200404, 0x00202404, 0x02202404, - 0x10000000, 0x12000000, 0x10002000, 0x12002000, - 0x10200000, 0x12200000, 0x10202000, 0x12202000, - 0x10000004, 0x12000004, 0x10002004, 0x12002004, - 0x10200004, 0x12200004, 0x10202004, 0x12202004, - 0x10000400, 0x12000400, 0x10002400, 0x12002400, - 0x10200400, 0x12200400, 0x10202400, 0x12202400, - 0x10000404, 0x12000404, 0x10002404, 0x12002404, - 0x10200404, 0x12200404, 0x10202404, 0x12202404, - /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */ - 0x00000000, 0x00000001, 0x00040000, 0x00040001, - 0x01000000, 0x01000001, 0x01040000, 0x01040001, - 0x00000002, 0x00000003, 0x00040002, 0x00040003, - 0x01000002, 0x01000003, 0x01040002, 0x01040003, - 0x00000200, 0x00000201, 0x00040200, 0x00040201, - 0x01000200, 0x01000201, 0x01040200, 0x01040201, - 0x00000202, 0x00000203, 0x00040202, 0x00040203, - 0x01000202, 0x01000203, 0x01040202, 0x01040203, - 0x08000000, 0x08000001, 0x08040000, 0x08040001, - 0x09000000, 0x09000001, 0x09040000, 0x09040001, - 0x08000002, 0x08000003, 0x08040002, 0x08040003, - 0x09000002, 0x09000003, 0x09040002, 0x09040003, - 0x08000200, 0x08000201, 0x08040200, 0x08040201, - 0x09000200, 0x09000201, 0x09040200, 0x09040201, - 0x08000202, 0x08000203, 0x08040202, 0x08040203, - 0x09000202, 0x09000203, 0x09040202, 0x09040203, - /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */ - 0x00000000, 0x00100000, 0x00000100, 0x00100100, - 0x00000008, 0x00100008, 0x00000108, 0x00100108, - 0x00001000, 0x00101000, 0x00001100, 0x00101100, - 0x00001008, 0x00101008, 0x00001108, 0x00101108, - 0x04000000, 0x04100000, 0x04000100, 0x04100100, - 0x04000008, 0x04100008, 0x04000108, 0x04100108, - 0x04001000, 0x04101000, 0x04001100, 0x04101100, - 0x04001008, 0x04101008, 0x04001108, 0x04101108, - 0x00020000, 0x00120000, 0x00020100, 0x00120100, - 0x00020008, 0x00120008, 0x00020108, 0x00120108, - 0x00021000, 0x00121000, 0x00021100, 0x00121100, - 0x00021008, 0x00121008, 0x00021108, 0x00121108, - 0x04020000, 0x04120000, 0x04020100, 0x04120100, - 0x04020008, 0x04120008, 0x04020108, 0x04120108, - 0x04021000, 0x04121000, 0x04021100, 0x04121100, - 0x04021008, 0x04121008, 0x04021108, 0x04121108, - /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ - 0x00000000, 0x10000000, 0x00010000, 0x10010000, - 0x00000004, 0x10000004, 0x00010004, 0x10010004, - 0x20000000, 0x30000000, 0x20010000, 0x30010000, - 0x20000004, 0x30000004, 0x20010004, 0x30010004, - 0x00100000, 0x10100000, 0x00110000, 0x10110000, - 0x00100004, 0x10100004, 0x00110004, 0x10110004, - 0x20100000, 0x30100000, 0x20110000, 0x30110000, - 0x20100004, 0x30100004, 0x20110004, 0x30110004, - 0x00001000, 0x10001000, 0x00011000, 0x10011000, - 0x00001004, 0x10001004, 0x00011004, 0x10011004, - 0x20001000, 0x30001000, 0x20011000, 0x30011000, - 0x20001004, 0x30001004, 0x20011004, 0x30011004, - 0x00101000, 0x10101000, 0x00111000, 0x10111000, - 0x00101004, 0x10101004, 0x00111004, 0x10111004, - 0x20101000, 0x30101000, 0x20111000, 0x30111000, - 0x20101004, 0x30101004, 0x20111004, 0x30111004, - /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */ - 0x00000000, 0x08000000, 0x00000008, 0x08000008, - 0x00000400, 0x08000400, 0x00000408, 0x08000408, - 0x00020000, 0x08020000, 0x00020008, 0x08020008, - 0x00020400, 0x08020400, 0x00020408, 0x08020408, - 0x00000001, 0x08000001, 0x00000009, 0x08000009, - 0x00000401, 0x08000401, 0x00000409, 0x08000409, - 0x00020001, 0x08020001, 0x00020009, 0x08020009, - 0x00020401, 0x08020401, 0x00020409, 0x08020409, - 0x02000000, 0x0A000000, 0x02000008, 0x0A000008, - 0x02000400, 0x0A000400, 0x02000408, 0x0A000408, - 0x02020000, 0x0A020000, 0x02020008, 0x0A020008, - 0x02020400, 0x0A020400, 0x02020408, 0x0A020408, - 0x02000001, 0x0A000001, 0x02000009, 0x0A000009, - 0x02000401, 0x0A000401, 0x02000409, 0x0A000409, - 0x02020001, 0x0A020001, 0x02020009, 0x0A020009, - 0x02020401, 0x0A020401, 0x02020409, 0x0A020409, - /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */ - 0x00000000, 0x00000100, 0x00080000, 0x00080100, - 0x01000000, 0x01000100, 0x01080000, 0x01080100, - 0x00000010, 0x00000110, 0x00080010, 0x00080110, - 0x01000010, 0x01000110, 0x01080010, 0x01080110, - 0x00200000, 0x00200100, 0x00280000, 0x00280100, - 0x01200000, 0x01200100, 0x01280000, 0x01280100, - 0x00200010, 0x00200110, 0x00280010, 0x00280110, - 0x01200010, 0x01200110, 0x01280010, 0x01280110, - 0x00000200, 0x00000300, 0x00080200, 0x00080300, - 0x01000200, 0x01000300, 0x01080200, 0x01080300, - 0x00000210, 0x00000310, 0x00080210, 0x00080310, - 0x01000210, 0x01000310, 0x01080210, 0x01080310, - 0x00200200, 0x00200300, 0x00280200, 0x00280300, - 0x01200200, 0x01200300, 0x01280200, 0x01280300, - 0x00200210, 0x00200310, 0x00280210, 0x00280310, - 0x01200210, 0x01200310, 0x01280210, 0x01280310, - /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */ - 0x00000000, 0x04000000, 0x00040000, 0x04040000, - 0x00000002, 0x04000002, 0x00040002, 0x04040002, - 0x00002000, 0x04002000, 0x00042000, 0x04042000, - 0x00002002, 0x04002002, 0x00042002, 0x04042002, - 0x00000020, 0x04000020, 0x00040020, 0x04040020, - 0x00000022, 0x04000022, 0x00040022, 0x04040022, - 0x00002020, 0x04002020, 0x00042020, 0x04042020, - 0x00002022, 0x04002022, 0x00042022, 0x04042022, - 0x00000800, 0x04000800, 0x00040800, 0x04040800, - 0x00000802, 0x04000802, 0x00040802, 0x04040802, - 0x00002800, 0x04002800, 0x00042800, 0x04042800, - 0x00002802, 0x04002802, 0x00042802, 0x04042802, - 0x00000820, 0x04000820, 0x00040820, 0x04040820, - 0x00000822, 0x04000822, 0x00040822, 0x04040822, - 0x00002820, 0x04002820, 0x00042820, 0x04042820, - 0x00002822, 0x04002822, 0x00042822, 0x04042822 -}; - -#define LM_IV_0_IP_RR3 0x2400b807 -#define LM_IV_1_IP_RR3 0xaa190747 - -#ifdef VECT_SIZE1 -#define BOX(i,n,S) u32x ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(i,n,S) u32x ((S)[(n)][(i).x], (S)[(n)][(i).y]) -#endif - -__device__ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], u32 s_SPtrans[8][64]) -{ - asm (".reg .u8 c0, c1, c2, c3, c4, c5, c6, c7;"); - - u32x tt; - - u32x r = data[0]; - u32x l = data[1]; - - for (int i = 0; i < 16; i++) - { - u32x u = Kc[i] ^ r; - u32x t = Kd[i] ^ rotl32 (r, 28u); - - u = (u >> 2) & 0x3f3f3f3f; - t = (t >> 2) & 0x3f3f3f3f; - - u32 u0, u1, u2, u3; - u32 t0, t1, t2, t3; - - asm - ( - "mov.b32 {c0, c1, c2, c3}, %8;" - "mov.b32 {c4, c5, c6, c7}, %9;" - "cvt.u32.u8 %0, c0;" - "cvt.u32.u8 %1, c1;" - "cvt.u32.u8 %2, c2;" - "cvt.u32.u8 %3, c3;" - "cvt.u32.u8 %4, c4;" - "cvt.u32.u8 %5, c5;" - "cvt.u32.u8 %6, c6;" - "cvt.u32.u8 %7, c7;" - - : "=r"(u0), "=r"(u1), "=r"(u2), "=r"(u3), - "=r"(t0), "=r"(t1), "=r"(t2), "=r"(t3) - : "r"(u), - "r"(t) - ); - - l ^= BOX (u0, 0, s_SPtrans) - | BOX (u1, 2, s_SPtrans) - | BOX (u2, 4, s_SPtrans) - | BOX (u3, 6, s_SPtrans) - | BOX (t0, 1, s_SPtrans) - | BOX (t1, 3, s_SPtrans) - | BOX (t2, 5, s_SPtrans) - | BOX (t3, 7, s_SPtrans); - - tt = l; - l = r; - r = tt; - } - - iv[0] = rotl32 (l, 29); - iv[1] = rotl32 (r, 29); -} - -__device__ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], u32 s_skb[8][64]) -{ - u32x tt; - - PERM_OP (d, c, tt, 4, 0x0f0f0f0f); - HPERM_OP (c, tt, 2, 0xcccc0000); - HPERM_OP (d, tt, 2, 0xcccc0000); - PERM_OP (d, c, tt, 1, 0x55555555); - PERM_OP (c, d, tt, 8, 0x00ff00ff); - PERM_OP (d, c, tt, 1, 0x55555555); - - d = ((d & 0x000000ff) << 16) - | ((d & 0x0000ff00) << 0) - | ((d & 0x00ff0000) >> 16) - | ((c & 0xf0000000) >> 4); - - c = c & 0x0fffffff; - - for (u32 i = 0; i < 16; i++) - { - if ((i < 2) || (i == 8) || (i == 15)) - { - c = ((c >> 1) | (c << 27)); - d = ((d >> 1) | (d << 27)); - } - else - { - c = ((c >> 2) | (c << 26)); - d = ((d >> 2) | (d << 26)); - } - - c = c & 0x0fffffff; - d = d & 0x0fffffff; - - const u32x c00 = (c >> 0) & 0x0000003f; - const u32x c06 = (c >> 6) & 0x00383003; - const u32x c07 = (c >> 7) & 0x0000003c; - const u32x c13 = (c >> 13) & 0x0000060f; - const u32x c20 = (c >> 20) & 0x00000001; - - u32x s = BOX (((c00 >> 0) & 0xff), 0, s_skb) - | BOX (((c06 >> 0) & 0xff) - |((c07 >> 0) & 0xff), 1, s_skb) - | BOX (((c13 >> 0) & 0xff) - |((c06 >> 8) & 0xff), 2, s_skb) - | BOX (((c20 >> 0) & 0xff) - |((c13 >> 8) & 0xff) - |((c06 >> 16) & 0xff), 3, s_skb); - - const u32x d00 = (d >> 0) & 0x00003c3f; - const u32x d07 = (d >> 7) & 0x00003f03; - const u32x d21 = (d >> 21) & 0x0000000f; - const u32x d22 = (d >> 22) & 0x00000030; - - u32x t = BOX (((d00 >> 0) & 0xff), 4, s_skb) - | BOX (((d07 >> 0) & 0xff) - |((d00 >> 8) & 0xff), 5, s_skb) - | BOX (((d07 >> 8) & 0xff), 6, s_skb) - | BOX (((d21 >> 0) & 0xff) - |((d22 >> 0) & 0xff), 7, s_skb); - - #if __CUDA_ARCH__ >= 200 - Kc[i] = __byte_perm (s, t, 0x5410); - Kd[i] = __byte_perm (s, t, 0x7632); - #else - Kc[i] = ((t << 16) | (s & 0x0000ffff)); - Kd[i] = ((s >> 16) | (t & 0xffff0000)); - #endif - - Kc[i] = rotl32 (Kc[i], 2u); - Kd[i] = rotl32 (Kd[i], 2u); - } -} - -__device__ static void transform_netntlmv1_key (const u32x w0, const u32x w1, u32x out[2]) -{ - u32x t[8]; - - t[0] = (w0 >> 0) & 0xff; - t[1] = (w0 >> 8) & 0xff; - t[2] = (w0 >> 16) & 0xff; - t[3] = (w0 >> 24) & 0xff; - t[4] = (w1 >> 0) & 0xff; - t[5] = (w1 >> 8) & 0xff; - t[6] = (w1 >> 16) & 0xff; - t[7] = (w1 >> 24) & 0xff; - - u32x k[8]; - - k[0] = (t[0] >> 0); - k[1] = (t[0] << 7) | (t[1] >> 1); - k[2] = (t[1] << 6) | (t[2] >> 2); - k[3] = (t[2] << 5) | (t[3] >> 3); - k[4] = (t[3] << 4) | (t[4] >> 4); - k[5] = (t[4] << 3) | (t[5] >> 5); - k[6] = (t[5] << 2) | (t[6] >> 6); - k[7] = (t[6] << 1); - - out[0] = ((k[0] & 0xff) << 0) - | ((k[1] & 0xff) << 8) - | ((k[2] & 0xff) << 16) - | ((k[3] & 0xff) << 24); - - out[1] = ((k[4] & 0xff) << 0) - | ((k[5] & 0xff) << 8) - | ((k[6] & 0xff) << 16) - | ((k[7] & 0xff) << 24); -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m03000_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = 0; - wordl0[3] = 0; - - u32x wordl1[4]; - - wordl1[0] = 0; - wordl1[1] = 0; - wordl1[2] = 0; - wordl1[3] = 0; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * sbox, kbox - */ - - __shared__ u32 s_SPtrans[8][64]; - - __shared__ u32 s_skb[8][64]; - - if (lid < 64) - { - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - u32 pw_len = pw_l_len + pw_r_len; - - pw_len = (pw_len >= 7) ? 7 : pw_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = 0; - wordr0[3] = 0; - - u32 wordr1[4]; - - wordr1[0] = 0; - wordr1[1] = 0; - wordr1[2] = 0; - wordr1[3] = 0; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = 0; - w0[3] = 0; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - u32x key[2]; - - transform_netntlmv1_key (w0[0], w0[1], key); - - const u32x c = key[0]; - const u32x d = key[1]; - - u32x Kc[16]; - u32x Kd[16]; - - _des_crypt_keysetup (c, d, Kc, Kd, s_skb); - - u32x data[2]; - - data[0] = LM_IV_0_IP_RR3; - data[1] = LM_IV_1_IP_RR3; - - u32x iv[2]; - - _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); - - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03000_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03000_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03000_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = 0; - wordl0[3] = 0; - - u32x wordl1[4]; - - wordl1[0] = 0; - wordl1[1] = 0; - wordl1[2] = 0; - wordl1[3] = 0; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * sbox, kbox - */ - - __shared__ u32 s_SPtrans[8][64]; - - __shared__ u32 s_skb[8][64]; - - if (lid < 64) - { - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * main - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - u32 pw_len = pw_l_len + pw_r_len; - - pw_len = (pw_len >= 7) ? 7 : pw_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = 0; - wordr0[3] = 0; - - u32 wordr1[4]; - - wordr1[0] = 0; - wordr1[1] = 0; - wordr1[2] = 0; - wordr1[3] = 0; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = 0; - w0[3] = 0; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - u32x key[2]; - - transform_netntlmv1_key (w0[0], w0[1], key); - - const u32x c = key[0]; - const u32x d = key[1]; - - u32x Kc[16]; - u32x Kd[16]; - - _des_crypt_keysetup (c, d, Kc, Kd, s_skb); - - u32x data[2]; - - data[0] = LM_IV_0_IP_RR3; - data[1] = LM_IV_1_IP_RR3; - - u32x iv[2]; - - _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); - - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03000_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03000_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m03000_a3.cu b/nv/m03000_a3.cu deleted file mode 100644 index 326ae26..0000000 --- a/nv/m03000_a3.cu +++ /dev/null @@ -1,1994 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - * NOTE........: sboxes for maxwell were taken from DeepLearningJohnDoe, license below - * : sboxes for others were takes fron JtR, license below - */ - -#define _DES_ -#define _SCALAR_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp_bs.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp_bs.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp_bs.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp_bs.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp_bs.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp_bs.c" -#endif - -#define KXX_DECL - -__device__ __constant__ u32x c_bfs[1024]; -__device__ __constant__ bs_word_t c_tm[32]; -__device__ __shared__ u32 s_S[64]; - -#if __CUDA_ARCH__ >= 500 - -// -// Bitslice DES S-boxes with LOP3.LUT instructions -// For NVIDIA Maxwell architecture and CUDA 7.5 RC -// by DeepLearningJohnDoe, version 0.1.6, 2015/07/19 -// -// Gate counts: 25 24 25 18 25 24 24 23 -// Average: 23.5 -// Depth: 8 7 7 6 8 10 10 8 -// Average: 8 -// -// Note that same S-box function with a lower gate count isn't necessarily faster. -// -// These Boolean expressions corresponding to DES S-boxes were -// discovered by -// -// This file itself is Copyright (c) 2015 by -// Redistribution and use in source and binary forms, with or without -// modification, are permitted. -// -// The underlying mathematical formulas are NOT copyrighted. -// - -#define LUT(a,b,c,d,e) u32 a; asm ("lop3.b32 %0, %1, %2, %3, "#e";" : "=r"(a): "r"(b), "r"(c), "r"(d)); - -__device__ static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) -{ - LUT(xAA55AA5500550055, a1, a4, a6, 0xC1) - LUT(xA55AA55AF0F5F0F5, a3, a6, xAA55AA5500550055, 0x9E) - LUT(x5F5F5F5FA5A5A5A5, a1, a3, a6, 0xD6) - LUT(xF5A0F5A0A55AA55A, a4, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x56) - LUT(x947A947AD1E7D1E7, a2, xA55AA55AF0F5F0F5, xF5A0F5A0A55AA55A, 0x6C) - LUT(x5FFF5FFFFFFAFFFA, a6, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x7B) - LUT(xB96CB96C69936993, a2, xF5A0F5A0A55AA55A, x5FFF5FFFFFFAFFFA, 0xD6) - LUT(x3, a5, x947A947AD1E7D1E7, xB96CB96C69936993, 0x6A) - LUT(x55EE55EE55EE55EE, a1, a2, a4, 0x7A) - LUT(x084C084CB77BB77B, a2, a6, xF5A0F5A0A55AA55A, 0xC9) - LUT(x9C329C32E295E295, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x72) - LUT(xA51EA51E50E050E0, a3, a6, x55EE55EE55EE55EE, 0x29) - LUT(x4AD34AD3BE3CBE3C, a2, x947A947AD1E7D1E7, xA51EA51E50E050E0, 0x95) - LUT(x2, a5, x9C329C32E295E295, x4AD34AD3BE3CBE3C, 0xC6) - LUT(xD955D95595D195D1, a1, a2, x9C329C32E295E295, 0xD2) - LUT(x8058805811621162, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x90) - LUT(x7D0F7D0FC4B3C4B3, xA51EA51E50E050E0, xD955D95595D195D1, x8058805811621162, 0x76) - LUT(x0805080500010001, a3, xAA55AA5500550055, xD955D95595D195D1, 0x80) - LUT(x4A964A96962D962D, xB96CB96C69936993, x4AD34AD3BE3CBE3C, x0805080500010001, 0xA6) - LUT(x4, a5, x7D0F7D0FC4B3C4B3, x4A964A96962D962D, 0xA6) - LUT(x148014807B087B08, a1, xAA55AA5500550055, x947A947AD1E7D1E7, 0x21) - LUT(x94D894D86B686B68, xA55AA55AF0F5F0F5, x8058805811621162, x148014807B087B08, 0x6A) - LUT(x5555555540044004, a1, a6, x084C084CB77BB77B, 0x70) - LUT(xAFB4AFB4BF5BBF5B, x5F5F5F5FA5A5A5A5, xA51EA51E50E050E0, x5555555540044004, 0x97) - LUT(x1, a5, x94D894D86B686B68, xAFB4AFB4BF5BBF5B, 0x6C) - - *out1 ^= x1; - *out2 ^= x2; - *out3 ^= x3; - *out4 ^= x4; -} - -__device__ static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) -{ - LUT(xEEEEEEEE99999999, a1, a2, a6, 0x97) - LUT(xFFFFEEEE66666666, a5, a6, xEEEEEEEE99999999, 0x67) - LUT(x5555FFFFFFFF0000, a1, a5, a6, 0x76) - LUT(x6666DDDD5555AAAA, a2, xFFFFEEEE66666666, x5555FFFFFFFF0000, 0x69) - LUT(x6969D3D35353ACAC, a3, xFFFFEEEE66666666, x6666DDDD5555AAAA, 0x6A) - LUT(xCFCF3030CFCF3030, a2, a3, a5, 0x65) - LUT(xE4E4EEEE9999F0F0, a3, xEEEEEEEE99999999, x5555FFFFFFFF0000, 0x8D) - LUT(xE5E5BABACDCDB0B0, a1, xCFCF3030CFCF3030, xE4E4EEEE9999F0F0, 0xCA) - LUT(x3, a4, x6969D3D35353ACAC, xE5E5BABACDCDB0B0, 0xC6) - LUT(x3333CCCC00000000, a2, a5, a6, 0x14) - LUT(xCCCCDDDDFFFF0F0F, a5, xE4E4EEEE9999F0F0, x3333CCCC00000000, 0xB5) - LUT(x00000101F0F0F0F0, a3, a6, xFFFFEEEE66666666, 0x1C) - LUT(x9A9A64646A6A9595, a1, xCFCF3030CFCF3030, x00000101F0F0F0F0, 0x96) - LUT(x2, a4, xCCCCDDDDFFFF0F0F, x9A9A64646A6A9595, 0x6A) - LUT(x3333BBBB3333FFFF, a1, a2, x6666DDDD5555AAAA, 0xDE) - LUT(x1414141441410000, a1, a3, xE4E4EEEE9999F0F0, 0x90) - LUT(x7F7FF3F3F5F53939, x6969D3D35353ACAC, x9A9A64646A6A9595, x3333BBBB3333FFFF, 0x79) - LUT(x9494E3E34B4B3939, a5, x1414141441410000, x7F7FF3F3F5F53939, 0x29) - LUT(x1, a4, x3333BBBB3333FFFF, x9494E3E34B4B3939, 0xA6) - LUT(xB1B1BBBBCCCCA5A5, a1, a1, xE4E4EEEE9999F0F0, 0x4A) - LUT(xFFFFECECEEEEDDDD, a2, x3333CCCC00000000, x9A9A64646A6A9595, 0xEF) - LUT(xB1B1A9A9DCDC8787, xE5E5BABACDCDB0B0, xB1B1BBBBCCCCA5A5, xFFFFECECEEEEDDDD, 0x8D) - LUT(xFFFFCCCCEEEE4444, a2, a5, xFFFFEEEE66666666, 0x2B) - LUT(x4, a4, xB1B1A9A9DCDC8787, xFFFFCCCCEEEE4444, 0x6C) - - *out1 ^= x1; - *out2 ^= x2; - *out3 ^= x3; - *out4 ^= x4; -} - -__device__ static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) -{ - LUT(xA50FA50FA50FA50F, a1, a3, a4, 0xC9) - LUT(xF0F00F0FF0F0F0F0, a3, a5, a6, 0x4B) - LUT(xAF0FA0AAAF0FAF0F, a1, xA50FA50FA50FA50F, xF0F00F0FF0F0F0F0, 0x4D) - LUT(x5AA5A55A5AA55AA5, a1, a4, xF0F00F0FF0F0F0F0, 0x69) - LUT(xAA005FFFAA005FFF, a3, a5, xA50FA50FA50FA50F, 0xD6) - LUT(x5AA5A55A0F5AFAA5, a6, x5AA5A55A5AA55AA5, xAA005FFFAA005FFF, 0x9C) - LUT(x1, a2, xAF0FA0AAAF0FAF0F, x5AA5A55A0F5AFAA5, 0xA6) - LUT(xAA55AA5500AA00AA, a1, a4, a6, 0x49) - LUT(xFAFAA50FFAFAA50F, a1, a5, xA50FA50FA50FA50F, 0x9B) - LUT(x50AF0F5AFA50A5A5, a1, xAA55AA5500AA00AA, xFAFAA50FFAFAA50F, 0x66) - LUT(xAFAFAFAFFAFAFAFA, a1, a3, a6, 0x6F) - LUT(xAFAFFFFFFFFAFAFF, a4, x50AF0F5AFA50A5A5, xAFAFAFAFFAFAFAFA, 0xEB) - LUT(x4, a2, x50AF0F5AFA50A5A5, xAFAFFFFFFFFAFAFF, 0x6C) - LUT(x500F500F500F500F, a1, a3, a4, 0x98) - LUT(xF0505A0505A5050F, x5AA5A55A0F5AFAA5, xAA55AA5500AA00AA, xAFAFAFAFFAFAFAFA, 0x1D) - LUT(xF0505A05AA55AAFF, a6, x500F500F500F500F, xF0505A0505A5050F, 0x9A) - LUT(xFF005F55FF005F55, a1, a4, xAA005FFFAA005FFF, 0xB2) - LUT(xA55F5AF0A55F5AF0, a5, xA50FA50FA50FA50F, x5AA5A55A5AA55AA5, 0x3D) - LUT(x5A5F05A5A55F5AF0, a6, xFF005F55FF005F55, xA55F5AF0A55F5AF0, 0xA6) - LUT(x3, a2, xF0505A05AA55AAFF, x5A5F05A5A55F5AF0, 0xA6) - LUT(x0F0F0F0FA5A5A5A5, a1, a3, a6, 0xC6) - LUT(x5FFFFF5FFFA0FFA0, x5AA5A55A5AA55AA5, xAFAFAFAFFAFAFAFA, x0F0F0F0FA5A5A5A5, 0xDB) - LUT(xF5555AF500A05FFF, a5, xFAFAA50FFAFAA50F, xF0505A0505A5050F, 0xB9) - LUT(x05A5AAF55AFA55A5, xF0505A05AA55AAFF, x0F0F0F0FA5A5A5A5, xF5555AF500A05FFF, 0x9B) - LUT(x2, a2, x5FFFFF5FFFA0FFA0, x05A5AAF55AFA55A5, 0xA6) - - *out1 ^= x1; - *out2 ^= x2; - *out3 ^= x3; - *out4 ^= x4; -} - -__device__ static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) -{ - LUT(x55F055F055F055F0, a1, a3, a4, 0x72) - LUT(xA500F5F0A500F5F0, a3, a5, x55F055F055F055F0, 0xAD) - LUT(xF50AF50AF50AF50A, a1, a3, a4, 0x59) - LUT(xF5FA0FFFF5FA0FFF, a3, a5, xF50AF50AF50AF50A, 0xE7) - LUT(x61C8F93C61C8F93C, a2, xA500F5F0A500F5F0, xF5FA0FFFF5FA0FFF, 0xC6) - LUT(x9999666699996666, a1, a2, a5, 0x69) - LUT(x22C022C022C022C0, a2, a4, x55F055F055F055F0, 0x18) - LUT(xB35C94A6B35C94A6, xF5FA0FFFF5FA0FFF, x9999666699996666, x22C022C022C022C0, 0x63) - LUT(x4, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x6A) - LUT(x4848484848484848, a1, a2, a3, 0x12) - LUT(x55500AAA55500AAA, a1, a5, xF5FA0FFFF5FA0FFF, 0x28) - LUT(x3C90B3D63C90B3D6, x61C8F93C61C8F93C, x4848484848484848, x55500AAA55500AAA, 0x1E) - LUT(x8484333384843333, a1, x9999666699996666, x4848484848484848, 0x14) - LUT(x4452F1AC4452F1AC, xF50AF50AF50AF50A, xF5FA0FFFF5FA0FFF, xB35C94A6B35C94A6, 0x78) - LUT(x9586CA379586CA37, x55500AAA55500AAA, x8484333384843333, x4452F1AC4452F1AC, 0xD6) - LUT(x2, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0x6A) - LUT(x1, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0xA9) - LUT(x3, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x56) - - *out1 ^= x1; - *out2 ^= x2; - *out3 ^= x3; - *out4 ^= x4; -} - -__device__ static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) -{ - LUT(xA0A0A0A0FFFFFFFF, a1, a3, a6, 0xAB) - LUT(xFFFF00005555FFFF, a1, a5, a6, 0xB9) - LUT(xB3B320207777FFFF, a2, xA0A0A0A0FFFFFFFF, xFFFF00005555FFFF, 0xE8) - LUT(x50505A5A5A5A5050, a1, a3, xFFFF00005555FFFF, 0x34) - LUT(xA2A2FFFF2222FFFF, a1, a5, xB3B320207777FFFF, 0xCE) - LUT(x2E2E6969A4A46363, a2, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, 0x29) - LUT(x3, a4, xB3B320207777FFFF, x2E2E6969A4A46363, 0xA6) - LUT(xA5A50A0AA5A50A0A, a1, a3, a5, 0x49) - LUT(x969639396969C6C6, a2, a6, xA5A50A0AA5A50A0A, 0x96) - LUT(x1B1B1B1B1B1B1B1B, a1, a2, a3, 0xCA) - LUT(xBFBFBFBFF6F6F9F9, a3, xA0A0A0A0FFFFFFFF, x969639396969C6C6, 0x7E) - LUT(x5B5BA4A4B8B81D1D, xFFFF00005555FFFF, x1B1B1B1B1B1B1B1B, xBFBFBFBFF6F6F9F9, 0x96) - LUT(x2, a4, x969639396969C6C6, x5B5BA4A4B8B81D1D, 0xCA) - LUT(x5555BBBBFFFF5555, a1, a2, xFFFF00005555FFFF, 0xE5) - LUT(x6D6D9C9C95956969, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, x969639396969C6C6, 0x97) - LUT(x1A1A67676A6AB4B4, xA5A50A0AA5A50A0A, x5555BBBBFFFF5555, x6D6D9C9C95956969, 0x47) - LUT(xA0A0FFFFAAAA0000, a3, xFFFF00005555FFFF, xA5A50A0AA5A50A0A, 0x3B) - LUT(x36369C9CC1C1D6D6, x969639396969C6C6, x6D6D9C9C95956969, xA0A0FFFFAAAA0000, 0xD9) - LUT(x1, a4, x1A1A67676A6AB4B4, x36369C9CC1C1D6D6, 0xCA) - LUT(x5555F0F0F5F55555, a1, a3, xFFFF00005555FFFF, 0xB1) - LUT(x79790202DCDC0808, xA2A2FFFF2222FFFF, xA5A50A0AA5A50A0A, x969639396969C6C6, 0x47) - LUT(x6C6CF2F229295D5D, xBFBFBFBFF6F6F9F9, x5555F0F0F5F55555, x79790202DCDC0808, 0x6E) - LUT(xA3A3505010101A1A, a2, xA2A2FFFF2222FFFF, x36369C9CC1C1D6D6, 0x94) - LUT(x7676C7C74F4FC7C7, a1, x2E2E6969A4A46363, xA3A3505010101A1A, 0xD9) - LUT(x4, a4, x6C6CF2F229295D5D, x7676C7C74F4FC7C7, 0xC6) - - *out1 ^= x1; - *out2 ^= x2; - *out3 ^= x3; - *out4 ^= x4; -} - -__device__ static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) -{ - LUT(x5050F5F55050F5F5, a1, a3, a5, 0xB2) - LUT(x6363C6C66363C6C6, a1, a2, x5050F5F55050F5F5, 0x66) - LUT(xAAAA5555AAAA5555, a1, a1, a5, 0xA9) - LUT(x3A3A65653A3A6565, a3, x6363C6C66363C6C6, xAAAA5555AAAA5555, 0xA9) - LUT(x5963A3C65963A3C6, a4, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xC6) - LUT(xE7E76565E7E76565, a5, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xAD) - LUT(x455D45DF455D45DF, a1, a4, xE7E76565E7E76565, 0xE4) - LUT(x4, a6, x5963A3C65963A3C6, x455D45DF455D45DF, 0x6C) - LUT(x1101220211012202, a2, xAAAA5555AAAA5555, x5963A3C65963A3C6, 0x20) - LUT(xF00F0FF0F00F0FF0, a3, a4, a5, 0x69) - LUT(x16E94A9716E94A97, xE7E76565E7E76565, x1101220211012202, xF00F0FF0F00F0FF0, 0x9E) - LUT(x2992922929929229, a1, a2, xF00F0FF0F00F0FF0, 0x49) - LUT(xAFAF9823AFAF9823, a5, x5050F5F55050F5F5, x2992922929929229, 0x93) - LUT(x3, a6, x16E94A9716E94A97, xAFAF9823AFAF9823, 0x6C) - LUT(x4801810248018102, a4, x5963A3C65963A3C6, x1101220211012202, 0xA4) - LUT(x5EE8FFFD5EE8FFFD, a5, x16E94A9716E94A97, x4801810248018102, 0x76) - LUT(xF0FF00FFF0FF00FF, a3, a4, a5, 0xCD) - LUT(x942D9A67942D9A67, x3A3A65653A3A6565, x5EE8FFFD5EE8FFFD, xF0FF00FFF0FF00FF, 0x86) - LUT(x1, a6, x5EE8FFFD5EE8FFFD, x942D9A67942D9A67, 0xA6) - LUT(x6A40D4ED6F4DD4EE, a2, x4, xAFAF9823AFAF9823, 0x2D) - LUT(x6CA89C7869A49C79, x1101220211012202, x16E94A9716E94A97, x6A40D4ED6F4DD4EE, 0x26) - LUT(xD6DE73F9D6DE73F9, a3, x6363C6C66363C6C6, x455D45DF455D45DF, 0x6B) - LUT(x925E63E1965A63E1, x3A3A65653A3A6565, x6CA89C7869A49C79, xD6DE73F9D6DE73F9, 0xA2) - LUT(x2, a6, x6CA89C7869A49C79, x925E63E1965A63E1, 0xCA) - - *out1 ^= x1; - *out2 ^= x2; - *out3 ^= x3; - *out4 ^= x4; -} - -__device__ static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) -{ - LUT(x88AA88AA88AA88AA, a1, a2, a4, 0x0B) - LUT(xAAAAFF00AAAAFF00, a1, a4, a5, 0x27) - LUT(xADAFF8A5ADAFF8A5, a3, x88AA88AA88AA88AA, xAAAAFF00AAAAFF00, 0x9E) - LUT(x0A0AF5F50A0AF5F5, a1, a3, a5, 0xA6) - LUT(x6B69C5DC6B69C5DC, a2, xADAFF8A5ADAFF8A5, x0A0AF5F50A0AF5F5, 0x6B) - LUT(x1C69B2DC1C69B2DC, a4, x88AA88AA88AA88AA, x6B69C5DC6B69C5DC, 0xA9) - LUT(x1, a6, xADAFF8A5ADAFF8A5, x1C69B2DC1C69B2DC, 0x6A) - LUT(x9C9C9C9C9C9C9C9C, a1, a2, a3, 0x63) - LUT(xE6E63BFDE6E63BFD, a2, xAAAAFF00AAAAFF00, x0A0AF5F50A0AF5F5, 0xE7) - LUT(x6385639E6385639E, a4, x9C9C9C9C9C9C9C9C, xE6E63BFDE6E63BFD, 0x93) - LUT(x5959C4CE5959C4CE, a2, x6B69C5DC6B69C5DC, xE6E63BFDE6E63BFD, 0x5D) - LUT(x5B53F53B5B53F53B, a4, x0A0AF5F50A0AF5F5, x5959C4CE5959C4CE, 0x6E) - LUT(x3, a6, x6385639E6385639E, x5B53F53B5B53F53B, 0xC6) - LUT(xFAF505FAFAF505FA, a3, a4, x0A0AF5F50A0AF5F5, 0x6D) - LUT(x6A65956A6A65956A, a3, x9C9C9C9C9C9C9C9C, xFAF505FAFAF505FA, 0xA6) - LUT(x8888CCCC8888CCCC, a1, a2, a5, 0x23) - LUT(x94E97A9494E97A94, x1C69B2DC1C69B2DC, x6A65956A6A65956A, x8888CCCC8888CCCC, 0x72) - LUT(x4, a6, x6A65956A6A65956A, x94E97A9494E97A94, 0xAC) - LUT(xA050A050A050A050, a1, a3, a4, 0x21) - LUT(xC1B87A2BC1B87A2B, xAAAAFF00AAAAFF00, x5B53F53B5B53F53B, x94E97A9494E97A94, 0xA4) - LUT(xE96016B7E96016B7, x8888CCCC8888CCCC, xA050A050A050A050, xC1B87A2BC1B87A2B, 0x96) - LUT(xE3CF1FD5E3CF1FD5, x88AA88AA88AA88AA, x6A65956A6A65956A, xE96016B7E96016B7, 0x3E) - LUT(x6776675B6776675B, xADAFF8A5ADAFF8A5, x94E97A9494E97A94, xE3CF1FD5E3CF1FD5, 0x6B) - LUT(x2, a6, xE96016B7E96016B7, x6776675B6776675B, 0xC6) - - *out1 ^= x1; - *out2 ^= x2; - *out3 ^= x3; - *out4 ^= x4; -} - -__device__ static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) -{ - LUT(xEEEE3333EEEE3333, a1, a2, a5, 0x9D) - LUT(xBBBBBBBBBBBBBBBB, a1, a1, a2, 0x83) - LUT(xDDDDAAAADDDDAAAA, a1, a2, a5, 0x5B) - LUT(x29295A5A29295A5A, a3, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0x85) - LUT(xC729695AC729695A, a4, xEEEE3333EEEE3333, x29295A5A29295A5A, 0xA6) - LUT(x3BF77B7B3BF77B7B, a2, a5, xC729695AC729695A, 0xF9) - LUT(x2900FF002900FF00, a4, a5, x29295A5A29295A5A, 0x0E) - LUT(x56B3803F56B3803F, xBBBBBBBBBBBBBBBB, x3BF77B7B3BF77B7B, x2900FF002900FF00, 0x61) - LUT(x4, a6, xC729695AC729695A, x56B3803F56B3803F, 0x6C) - LUT(xFBFBFBFBFBFBFBFB, a1, a2, a3, 0xDF) - LUT(x3012B7B73012B7B7, a2, a5, xC729695AC729695A, 0xD4) - LUT(x34E9B34C34E9B34C, a4, xFBFBFBFBFBFBFBFB, x3012B7B73012B7B7, 0x69) - LUT(xBFEAEBBEBFEAEBBE, a1, x29295A5A29295A5A, x34E9B34C34E9B34C, 0x6F) - LUT(xFFAEAFFEFFAEAFFE, a3, xBBBBBBBBBBBBBBBB, xBFEAEBBEBFEAEBBE, 0xB9) - LUT(x2, a6, x34E9B34C34E9B34C, xFFAEAFFEFFAEAFFE, 0xC6) - LUT(xCFDE88BBCFDE88BB, a2, xDDDDAAAADDDDAAAA, x34E9B34C34E9B34C, 0x5C) - LUT(x3055574530555745, a1, xC729695AC729695A, xCFDE88BBCFDE88BB, 0x71) - LUT(x99DDEEEE99DDEEEE, a4, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0xB9) - LUT(x693CD926693CD926, x3BF77B7B3BF77B7B, x34E9B34C34E9B34C, x99DDEEEE99DDEEEE, 0x69) - LUT(x3, a6, x3055574530555745, x693CD926693CD926, 0x6A) - LUT(x9955EE559955EE55, a1, a4, x99DDEEEE99DDEEEE, 0xE2) - LUT(x9D48FA949D48FA94, x3BF77B7B3BF77B7B, xBFEAEBBEBFEAEBBE, x9955EE559955EE55, 0x9C) - LUT(x1, a6, xC729695AC729695A, x9D48FA949D48FA94, 0x39) - - *out1 ^= x1; - *out2 ^= x2; - *out3 ^= x3; - *out4 ^= x4; -} - -#else - -/* - * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC - * architectures. These use AND, OR, XOR, NOT, and AND-NOT gates. - * - * Gate counts: 49 44 46 33 48 46 46 41 - * Average: 44.125 - * - * Several same-gate-count expressions for each S-box are included (for use on - * different CPUs/GPUs). - * - * These Boolean expressions corresponding to DES S-boxes have been generated - * by Roman Rusakov for use in Openwall's - * John the Ripper password cracker: http://www.openwall.com/john/ - * Being mathematical formulas, they are not copyrighted and are free for reuse - * by anyone. - * - * This file (a specific representation of the S-box expressions, surrounding - * logic) is Copyright (c) 2011 by Solar Designer . - * Redistribution and use in source and binary forms, with or without - * modification, are permitted. (This is a heavily cut-down "BSD license".) - * - * The effort has been sponsored by Rapid7: http://www.rapid7.com - */ - -__device__ static void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) -{ - u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969, - x25202160; - u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93; - u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69; - u32 x0A0A0000, x0AD80096, x00999900, x0AD99996; - u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC; - u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0; - u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A; - u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205; - u32 x00, x01, x10, x11, x20, x21, x30, x31; - - x55005500 = a1 & ~a5; - x5A0F5A0F = a4 ^ x55005500; - x3333FFFF = a3 | a6; - x66666666 = a1 ^ a3; - x22226666 = x3333FFFF & x66666666; - x2D2D6969 = a4 ^ x22226666; - x25202160 = x2D2D6969 & ~x5A0F5A0F; - - x00FFFF00 = a5 ^ a6; - x33CCCC33 = a3 ^ x00FFFF00; - x4803120C = x5A0F5A0F & ~x33CCCC33; - x2222FFFF = a6 | x22226666; - x6A21EDF3 = x4803120C ^ x2222FFFF; - x4A01CC93 = x6A21EDF3 & ~x25202160; - - x5555FFFF = a1 | a6; - x7F75FFFF = x6A21EDF3 | x5555FFFF; - x00D20096 = a5 & ~x2D2D6969; - x7FA7FF69 = x7F75FFFF ^ x00D20096; - - x0A0A0000 = a4 & ~x5555FFFF; - x0AD80096 = x00D20096 ^ x0A0A0000; - x00999900 = x00FFFF00 & ~x66666666; - x0AD99996 = x0AD80096 | x00999900; - - x22332233 = a3 & ~x55005500; - x257AA5F0 = x5A0F5A0F ^ x7F75FFFF; - x054885C0 = x257AA5F0 & ~x22332233; - xFAB77A3F = ~x054885C0; - x2221EDF3 = x3333FFFF & x6A21EDF3; - xD89697CC = xFAB77A3F ^ x2221EDF3; - x20 = x7FA7FF69 & ~a2; - x21 = x20 ^ xD89697CC; - *out3 ^= x21; - - x05B77AC0 = x00FFFF00 ^ x054885C0; - x05F77AD6 = x00D20096 | x05B77AC0; - x36C48529 = x3333FFFF ^ x05F77AD6; - x6391D07C = a1 ^ x36C48529; - xBB0747B0 = xD89697CC ^ x6391D07C; - x00 = x25202160 | a2; - x01 = x00 ^ xBB0747B0; - *out1 ^= x01; - - x4C460000 = x3333FFFF ^ x7F75FFFF; - x4EDF9996 = x0AD99996 | x4C460000; - x2D4E49EA = x6391D07C ^ x4EDF9996; - xBBFFFFB0 = x00FFFF00 | xBB0747B0; - x96B1B65A = x2D4E49EA ^ xBBFFFFB0; - x10 = x4A01CC93 | a2; - x11 = x10 ^ x96B1B65A; - *out2 ^= x11; - - x5AFF5AFF = a5 | x5A0F5A0F; - x52B11215 = x5AFF5AFF & ~x2D4E49EA; - x4201C010 = x4A01CC93 & x6391D07C; - x10B0D205 = x52B11215 ^ x4201C010; - x30 = x10B0D205 | a2; - x31 = x30 ^ x0AD99996; - *out4 ^= x31; -} - -__device__ static void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) -{ - u32 x33CC33CC; - u32 x55550000, x00AA00FF, x33BB33FF; - u32 x33CC0000, x11441144, x11BB11BB, x003311BB; - u32 x00000F0F, x336600FF, x332200FF, x332200F0; - u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95; - u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39; - u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53; - u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F; - u32 x0A451047, xBBDFDD7B, xB19ACD3C; - u32 x00, x01, x10, x11, x20, x21, x30, x31; - - x33CC33CC = a2 ^ a5; - - x55550000 = a1 & ~a6; - x00AA00FF = a5 & ~x55550000; - x33BB33FF = a2 | x00AA00FF; - - x33CC0000 = x33CC33CC & ~a6; - x11441144 = a1 & x33CC33CC; - x11BB11BB = a5 ^ x11441144; - x003311BB = x11BB11BB & ~x33CC0000; - - x00000F0F = a3 & a6; - x336600FF = x00AA00FF ^ x33CC0000; - x332200FF = x33BB33FF & x336600FF; - x332200F0 = x332200FF & ~x00000F0F; - - x0302000F = a3 & x332200FF; - xAAAAAAAA = ~a1; - xA9A8AAA5 = x0302000F ^ xAAAAAAAA; - x33CCCC33 = a6 ^ x33CC33CC; - x33CCC030 = x33CCCC33 & ~x00000F0F; - x9A646A95 = xA9A8AAA5 ^ x33CCC030; - x10 = a4 & ~x332200F0; - x11 = x10 ^ x9A646A95; - *out2 ^= x11; - - x00333303 = a2 & ~x33CCC030; - x118822B8 = x11BB11BB ^ x00333303; - xA8208805 = xA9A8AAA5 & ~x118822B8; - x3CC3C33C = a3 ^ x33CCCC33; - x94E34B39 = xA8208805 ^ x3CC3C33C; - x00 = x33BB33FF & ~a4; - x01 = x00 ^ x94E34B39; - *out1 ^= x01; - - x0331330C = x0302000F ^ x00333303; - x3FF3F33C = x3CC3C33C | x0331330C; - xA9DF596A = x33BB33FF ^ x9A646A95; - xA9DF5F6F = x00000F0F | xA9DF596A; - x962CAC53 = x3FF3F33C ^ xA9DF5F6F; - - xA9466A6A = x332200FF ^ x9A646A95; - x3DA52153 = x94E34B39 ^ xA9466A6A; - x29850143 = xA9DF5F6F & x3DA52153; - x33C0330C = x33CC33CC & x3FF3F33C; - x1A45324F = x29850143 ^ x33C0330C; - x20 = x1A45324F | a4; - x21 = x20 ^ x962CAC53; - *out3 ^= x21; - - x0A451047 = x1A45324F & ~x118822B8; - xBBDFDD7B = x33CCCC33 | xA9DF596A; - xB19ACD3C = x0A451047 ^ xBBDFDD7B; - x30 = x003311BB | a4; - x31 = x30 ^ xB19ACD3C; - *out4 ^= x31; -} - -__device__ static void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) -{ - u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4; - u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00; - u32 x00005EF4, x00FF5EFF, x00555455, x3C699796; - u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F; - u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8; - u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A; - u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356; - u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B; - u32 x00, x01, x10, x11, x20, x21, x30, x31; - - x44444444 = a1 & ~a2; - x0F0FF0F0 = a3 ^ a6; - x4F4FF4F4 = x44444444 | x0F0FF0F0; - x00FFFF00 = a4 ^ a6; - x00AAAA00 = x00FFFF00 & ~a1; - x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00; - - x3C3CC3C3 = a2 ^ x0F0FF0F0; - x3C3C0000 = x3C3CC3C3 & ~a6; - x7373F4F4 = x4F4FF4F4 ^ x3C3C0000; - x0C840A00 = x4FE55EF4 & ~x7373F4F4; - - x00005EF4 = a6 & x4FE55EF4; - x00FF5EFF = a4 | x00005EF4; - x00555455 = a1 & x00FF5EFF; - x3C699796 = x3C3CC3C3 ^ x00555455; - x30 = x4FE55EF4 & ~a5; - x31 = x30 ^ x3C699796; - *out4 ^= x31; - - x000FF000 = x0F0FF0F0 & x00FFFF00; - x55AA55AA = a1 ^ a4; - x26D9A15E = x7373F4F4 ^ x55AA55AA; - x2FDFAF5F = a3 | x26D9A15E; - x2FD00F5F = x2FDFAF5F & ~x000FF000; - - x55AAFFAA = x00AAAA00 | x55AA55AA; - x28410014 = x3C699796 & ~x55AAFFAA; - x000000FF = a4 & a6; - x000000CC = x000000FF & ~a2; - x284100D8 = x28410014 ^ x000000CC; - - x204100D0 = x7373F4F4 & x284100D8; - x3C3CC3FF = x3C3CC3C3 | x000000FF; - x1C3CC32F = x3C3CC3FF & ~x204100D0; - x4969967A = a1 ^ x1C3CC32F; - x10 = x2FD00F5F & a5; - x11 = x10 ^ x4969967A; - *out2 ^= x11; - - x4CC44CC4 = x4FE55EF4 & ~a2; - x40C040C0 = x4CC44CC4 & ~a3; - xC3C33C3C = ~x3C3CC3C3; - x9669C396 = x55AAFFAA ^ xC3C33C3C; - xD6A98356 = x40C040C0 ^ x9669C396; - x00 = a5 & ~x0C840A00; - x01 = x00 ^ xD6A98356; - *out1 ^= x01; - - xD6E9C3D6 = x40C040C0 | x9669C396; - x4CEEEEC4 = x00AAAA00 | x4CC44CC4; - x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4; - x001A000B = a4 & ~x4FE55EF4; - x9A1F2D1B = x9A072D12 | x001A000B; - x20 = a5 & ~x284100D8; - x21 = x20 ^ x9A1F2D1B; - *out3 ^= x21; -} - -__device__ static void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) -{ - u32 x5A5A5A5A, x0F0FF0F0; - u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F, - x52FBCA0F, x61C8F93C; - u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6; - u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1; - u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6; - u32 x00, x01, x10, x11, x20, x21, x30, x31; - - x5A5A5A5A = a1 ^ a3; - x0F0FF0F0 = a3 ^ a5; - x33FF33FF = a2 | a4; - x33FFCC00 = a5 ^ x33FF33FF; - x0C0030F0 = x0F0FF0F0 & ~x33FFCC00; - x0C0CC0C0 = x0F0FF0F0 & ~a2; - x0CF3C03F = a4 ^ x0C0CC0C0; - x5EFBDA7F = x5A5A5A5A | x0CF3C03F; - x52FBCA0F = x5EFBDA7F & ~x0C0030F0; - x61C8F93C = a2 ^ x52FBCA0F; - - x00C0C03C = x0CF3C03F & x61C8F93C; - x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C; - x3B92A366 = x5A5A5A5A ^ x61C8F93C; - x30908326 = x3B92A366 & ~x0F0F30C0; - x3C90B3D6 = x0C0030F0 ^ x30908326; - - x33CC33CC = a2 ^ a4; - x0C0CFFFF = a5 | x0C0CC0C0; - x379E5C99 = x3B92A366 ^ x0C0CFFFF; - x04124C11 = x379E5C99 & ~x33CC33CC; - x56E9861E = x52FBCA0F ^ x04124C11; - x00 = a6 & ~x3C90B3D6; - x01 = x00 ^ x56E9861E; - *out1 ^= x01; - - xA91679E1 = ~x56E9861E; - x10 = x3C90B3D6 & ~a6; - x11 = x10 ^ xA91679E1; - *out2 ^= x11; - - x9586CA37 = x3C90B3D6 ^ xA91679E1; - x8402C833 = x9586CA37 & ~x33CC33CC; - x84C2C83F = x00C0C03C | x8402C833; - xB35C94A6 = x379E5C99 ^ x84C2C83F; - x20 = x61C8F93C | a6; - x21 = x20 ^ xB35C94A6; - *out3 ^= x21; - - x30 = a6 & x61C8F93C; - x31 = x30 ^ xB35C94A6; - *out4 ^= x31; -} - -__device__ static void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) -{ - u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F; - u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B; - u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7; - u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF; - u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A; - u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2; - u32 x22222222, x16BCEE97, x0F080B04, x19B4E593; - u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D; - u32 x00, x01, x10, x11, x20, x21, x30, x31; - - x77777777 = a1 | a3; - x77770000 = x77777777 & ~a6; - x22225555 = a1 ^ x77770000; - x11116666 = a3 ^ x22225555; - x1F1F6F6F = a4 | x11116666; - - x70700000 = x77770000 & ~a4; - x43433333 = a3 ^ x70700000; - x00430033 = a5 & x43433333; - x55557777 = a1 | x11116666; - x55167744 = x00430033 ^ x55557777; - x5A19784B = a4 ^ x55167744; - - x5A1987B4 = a6 ^ x5A19784B; - x7A3BD7F5 = x22225555 | x5A1987B4; - x003B00F5 = a5 & x7A3BD7F5; - x221955A0 = x22225555 ^ x003B00F5; - x05050707 = a4 & x55557777; - x271C52A7 = x221955A0 ^ x05050707; - - x2A2A82A0 = x7A3BD7F5 & ~a1; - x6969B193 = x43433333 ^ x2A2A82A0; - x1FE06F90 = a5 ^ x1F1F6F6F; - x16804E00 = x1FE06F90 & ~x6969B193; - xE97FB1FF = ~x16804E00; - x20 = xE97FB1FF & ~a2; - x21 = x20 ^ x5A19784B; - *out3 ^= x21; - - x43403302 = x43433333 & ~x003B00F5; - x35CAED30 = x2A2A82A0 ^ x1FE06F90; - x37DEFFB7 = x271C52A7 | x35CAED30; - x349ECCB5 = x37DEFFB7 & ~x43403302; - x0B01234A = x1F1F6F6F & ~x349ECCB5; - - x101884B4 = x5A1987B4 & x349ECCB5; - x0FF8EB24 = x1FE06F90 ^ x101884B4; - x41413333 = x43433333 & x55557777; - x4FF9FB37 = x0FF8EB24 | x41413333; - x4FC2FBC2 = x003B00F5 ^ x4FF9FB37; - x30 = x4FC2FBC2 & a2; - x31 = x30 ^ x271C52A7; - *out4 ^= x31; - - x22222222 = a1 ^ x77777777; - x16BCEE97 = x349ECCB5 ^ x22222222; - x0F080B04 = a4 & x0FF8EB24; - x19B4E593 = x16BCEE97 ^ x0F080B04; - x00 = x0B01234A | a2; - x01 = x00 ^ x19B4E593; - *out1 ^= x01; - - x5C5C5C5C = x1F1F6F6F ^ x43433333; - x4448184C = x5C5C5C5C & ~x19B4E593; - x2DDABE71 = x22225555 ^ x0FF8EB24; - x6992A63D = x4448184C ^ x2DDABE71; - x10 = x1F1F6F6F & a2; - x11 = x10 ^ x6992A63D; - *out2 ^= x11; -} - -__device__ static void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) -{ - u32 x33CC33CC; - u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099; - u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6; - u32 x09030C06, x09030000, x336622FF, x3A6522FF; - u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD; - u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B; - u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479; - u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5; - u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67; - u32 x00, x01, x10, x11, x20, x21, x30, x31; - - x33CC33CC = a2 ^ a5; - - x3333FFFF = a2 | a6; - x11115555 = a1 & x3333FFFF; - x22DD6699 = x33CC33CC ^ x11115555; - x22DD9966 = a6 ^ x22DD6699; - x00220099 = a5 & ~x22DD9966; - - x00551144 = a1 & x22DD9966; - x33662277 = a2 ^ x00551144; - x5A5A5A5A = a1 ^ a3; - x7B7E7A7F = x33662277 | x5A5A5A5A; - x59A31CE6 = x22DD6699 ^ x7B7E7A7F; - - x09030C06 = a3 & x59A31CE6; - x09030000 = x09030C06 & ~a6; - x336622FF = x00220099 | x33662277; - x3A6522FF = x09030000 ^ x336622FF; - x30 = x3A6522FF & a4; - x31 = x30 ^ x59A31CE6; - *out4 ^= x31; - - x484D494C = a2 ^ x7B7E7A7F; - x0000B6B3 = a6 & ~x484D494C; - x0F0FB9BC = a3 ^ x0000B6B3; - x00FC00F9 = a5 & ~x09030C06; - x0FFFB9FD = x0F0FB9BC | x00FC00F9; - - x5DF75DF7 = a1 | x59A31CE6; - x116600F7 = x336622FF & x5DF75DF7; - x1E69B94B = x0F0FB9BC ^ x116600F7; - x1668B94B = x1E69B94B & ~x09030000; - x20 = x00220099 | a4; - x21 = x20 ^ x1668B94B; - *out3 ^= x21; - - x7B7B7B7B = a2 | x5A5A5A5A; - x411E5984 = x3A6522FF ^ x7B7B7B7B; - x1FFFFDFD = x11115555 | x0FFFB9FD; - x5EE1A479 = x411E5984 ^ x1FFFFDFD; - - x3CB4DFD2 = x22DD6699 ^ x1E69B94B; - x004B002D = a5 & ~x3CB4DFD2; - xB7B2B6B3 = ~x484D494C; - xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3; - xCC82CDE5 = x004B002D ^ xCCC9CDC8; - x10 = xCC82CDE5 & ~a4; - x11 = x10 ^ x5EE1A479; - *out2 ^= x11; - - x0055EEBB = a6 ^ x00551144; - x5A5AECE9 = a1 ^ x0F0FB9BC; - x0050ECA9 = x0055EEBB & x5A5AECE9; - xC5CAC1CE = x09030C06 ^ xCCC9CDC8; - xC59A2D67 = x0050ECA9 ^ xC5CAC1CE; - x00 = x0FFFB9FD & ~a4; - x01 = x00 ^ xC59A2D67; - *out1 ^= x01; -} - -__device__ static void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) -{ - u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841; - u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78; - u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D; - u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B; - u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB; - u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867; - u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD; - u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A; - u32 x00, x01, x10, x11, x20, x21, x30, x31; - - x0FF00FF0 = a4 ^ a5; - x3CC33CC3 = a3 ^ x0FF00FF0; - x00003CC3 = a6 & x3CC33CC3; - x0F000F00 = a4 & x0FF00FF0; - x5A555A55 = a2 ^ x0F000F00; - x00001841 = x00003CC3 & x5A555A55; - - x00000F00 = a6 & x0F000F00; - x33333C33 = a3 ^ x00000F00; - x7B777E77 = x5A555A55 | x33333C33; - x0FF0F00F = a6 ^ x0FF00FF0; - x74878E78 = x7B777E77 ^ x0FF0F00F; - x30 = a1 & ~x00001841; - x31 = x30 ^ x74878E78; - *out4 ^= x31; - - x003C003C = a5 & ~x3CC33CC3; - x5A7D5A7D = x5A555A55 | x003C003C; - x333300F0 = x00003CC3 ^ x33333C33; - x694E5A8D = x5A7D5A7D ^ x333300F0; - - x0FF0CCCC = x00003CC3 ^ x0FF0F00F; - x000F0303 = a4 & ~x0FF0CCCC; - x5A505854 = x5A555A55 & ~x000F0303; - x33CC000F = a5 ^ x333300F0; - x699C585B = x5A505854 ^ x33CC000F; - - x7F878F78 = x0F000F00 | x74878E78; - x21101013 = a3 & x699C585B; - x7F979F7B = x7F878F78 | x21101013; - x30030CC0 = x3CC33CC3 & ~x0FF0F00F; - x4F9493BB = x7F979F7B ^ x30030CC0; - x00 = x4F9493BB & ~a1; - x01 = x00 ^ x694E5A8D; - *out1 ^= x01; - - x6F9CDBFB = x699C585B | x4F9493BB; - x0000DBFB = a6 & x6F9CDBFB; - x00005151 = a2 & x0000DBFB; - x26DAC936 = x694E5A8D ^ x4F9493BB; - x26DA9867 = x00005151 ^ x26DAC936; - - x27DA9877 = x21101013 | x26DA9867; - x27DA438C = x0000DBFB ^ x27DA9877; - x2625C9C9 = a5 ^ x26DAC936; - x27FFCBCD = x27DA438C | x2625C9C9; - x20 = x27FFCBCD & a1; - x21 = x20 ^ x699C585B; - *out3 ^= x21; - - x27FF1036 = x0000DBFB ^ x27FFCBCD; - x27FF103E = x003C003C | x27FF1036; - xB06B6C44 = ~x4F9493BB; - x97947C7A = x27FF103E ^ xB06B6C44; - x10 = x97947C7A & ~a1; - x11 = x10 ^ x26DA9867; - *out2 ^= x11; -} - -__device__ static void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, u32 *out1, u32 *out2, u32 *out3, u32 *out4) -{ - u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001; - u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745; - u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3; - u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A; - u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926; - u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F; - u32 xF700A600, x61008000, x03B7856B, x62B7056B; - u32 x00, x01, x10, x11, x20, x21, x30, x31; - - x0C0C0C0C = a3 & ~a2; - x0000F0F0 = a5 & ~a3; - x00FFF00F = a4 ^ x0000F0F0; - x00555005 = a1 & x00FFF00F; - x00515001 = x00555005 & ~x0C0C0C0C; - - x33000330 = a2 & ~x00FFF00F; - x77555775 = a1 | x33000330; - x30303030 = a2 & ~a3; - x3030CFCF = a5 ^ x30303030; - x30104745 = x77555775 & x3030CFCF; - x30555745 = x00555005 | x30104745; - - xFF000FF0 = ~x00FFF00F; - xCF1048B5 = x30104745 ^ xFF000FF0; - x080A080A = a3 & ~x77555775; - xC71A40BF = xCF1048B5 ^ x080A080A; - xCB164CB3 = x0C0C0C0C ^ xC71A40BF; - x10 = x00515001 | a6; - x11 = x10 ^ xCB164CB3; - *out2 ^= x11; - - x9E4319E6 = a1 ^ xCB164CB3; - x000019E6 = a5 & x9E4319E6; - xF429738C = a2 ^ xC71A40BF; - xF4296A6A = x000019E6 ^ xF429738C; - xC729695A = x33000330 ^ xF4296A6A; - - xC47C3D2F = x30555745 ^ xF4296A6A; - xF77F3F3F = a2 | xC47C3D2F; - x9E43E619 = a5 ^ x9E4319E6; - x693CD926 = xF77F3F3F ^ x9E43E619; - x20 = x30555745 & a6; - x21 = x20 ^ x693CD926; - *out3 ^= x21; - - xF719A695 = x3030CFCF ^ xC729695A; - xF4FF73FF = a4 | xF429738C; - x03E6D56A = xF719A695 ^ xF4FF73FF; - x56B3803F = a1 ^ x03E6D56A; - x30 = x56B3803F & a6; - x31 = x30 ^ xC729695A; - *out4 ^= x31; - - xF700A600 = xF719A695 & ~a4; - x61008000 = x693CD926 & xF700A600; - x03B7856B = x00515001 ^ x03E6D56A; - x62B7056B = x61008000 ^ x03B7856B; - x00 = x62B7056B | a6; - x01 = x00 ^ xC729695A; - *out1 ^= x01; -} - -#endif - -#define KEYSET00 { k00 = K08; k01 = K44; k02 = K29; k03 = K52; k04 = K42; k05 = K14; k06 = K28; k07 = K49; k08 = K01; k09 = K07; k10 = K16; k11 = K36; k12 = K02; k13 = K30; k14 = K22; k15 = K21; k16 = K38; k17 = K50; k18 = K51; k19 = K00; k20 = K31; k21 = K23; k22 = K15; k23 = K35; k24 = K19; k25 = K24; k26 = K34; k27 = K47; k28 = K32; k29 = K03; k30 = K41; k31 = K26; k32 = K04; k33 = K46; k34 = K20; k35 = K25; k36 = K53; k37 = K18; k38 = K33; k39 = K55; k40 = K13; k41 = K17; k42 = K39; k43 = K12; k44 = K11; k45 = K54; k46 = K48; k47 = K27; } -#define KEYSET10 { k00 = K49; k01 = K28; k02 = K45; k03 = K36; k04 = K01; k05 = K30; k06 = K44; k07 = K08; k08 = K42; k09 = K23; k10 = K00; k11 = K52; k12 = K43; k13 = K14; k14 = K38; k15 = K37; k16 = K22; k17 = K09; k18 = K35; k19 = K16; k20 = K15; k21 = K07; k22 = K31; k23 = K51; k24 = K03; k25 = K40; k26 = K46; k27 = K04; k28 = K20; k29 = K19; k30 = K53; k31 = K10; k32 = K47; k33 = K34; k34 = K32; k35 = K13; k36 = K41; k37 = K06; k38 = K17; k39 = K12; k40 = K25; k41 = K33; k42 = K27; k43 = K55; k44 = K54; k45 = K11; k46 = K05; k47 = K39; } -#define KEYSET01 { k00 = K01; k01 = K37; k02 = K22; k03 = K45; k04 = K35; k05 = K07; k06 = K21; k07 = K42; k08 = K51; k09 = K00; k10 = K09; k11 = K29; k12 = K52; k13 = K23; k14 = K15; k15 = K14; k16 = K31; k17 = K43; k18 = K44; k19 = K50; k20 = K49; k21 = K16; k22 = K08; k23 = K28; k24 = K12; k25 = K17; k26 = K27; k27 = K40; k28 = K25; k29 = K55; k30 = K34; k31 = K19; k32 = K24; k33 = K39; k34 = K13; k35 = K18; k36 = K46; k37 = K11; k38 = K26; k39 = K48; k40 = K06; k41 = K10; k42 = K32; k43 = K05; k44 = K04; k45 = K47; k46 = K41; k47 = K20; } -#define KEYSET11 { k00 = K35; k01 = K14; k02 = K31; k03 = K22; k04 = K44; k05 = K16; k06 = K30; k07 = K51; k08 = K28; k09 = K09; k10 = K43; k11 = K38; k12 = K29; k13 = K00; k14 = K49; k15 = K23; k16 = K08; k17 = K52; k18 = K21; k19 = K02; k20 = K01; k21 = K50; k22 = K42; k23 = K37; k24 = K48; k25 = K26; k26 = K32; k27 = K17; k28 = K06; k29 = K05; k30 = K39; k31 = K55; k32 = K33; k33 = K20; k34 = K18; k35 = K54; k36 = K27; k37 = K47; k38 = K03; k39 = K53; k40 = K11; k41 = K19; k42 = K13; k43 = K41; k44 = K40; k45 = K24; k46 = K46; k47 = K25; } -#define KEYSET02 { k00 = K44; k01 = K23; k02 = K08; k03 = K31; k04 = K21; k05 = K50; k06 = K07; k07 = K28; k08 = K37; k09 = K43; k10 = K52; k11 = K15; k12 = K38; k13 = K09; k14 = K01; k15 = K00; k16 = K42; k17 = K29; k18 = K30; k19 = K36; k20 = K35; k21 = K02; k22 = K51; k23 = K14; k24 = K53; k25 = K03; k26 = K13; k27 = K26; k28 = K11; k29 = K41; k30 = K20; k31 = K05; k32 = K10; k33 = K25; k34 = K54; k35 = K04; k36 = K32; k37 = K24; k38 = K12; k39 = K34; k40 = K47; k41 = K55; k42 = K18; k43 = K46; k44 = K17; k45 = K33; k46 = K27; k47 = K06; } -#define KEYSET12 { k00 = K21; k01 = K00; k02 = K42; k03 = K08; k04 = K30; k05 = K02; k06 = K16; k07 = K37; k08 = K14; k09 = K52; k10 = K29; k11 = K49; k12 = K15; k13 = K43; k14 = K35; k15 = K09; k16 = K51; k17 = K38; k18 = K07; k19 = K45; k20 = K44; k21 = K36; k22 = K28; k23 = K23; k24 = K34; k25 = K12; k26 = K18; k27 = K03; k28 = K47; k29 = K46; k30 = K25; k31 = K41; k32 = K19; k33 = K06; k34 = K04; k35 = K40; k36 = K13; k37 = K33; k38 = K48; k39 = K39; k40 = K24; k41 = K05; k42 = K54; k43 = K27; k44 = K26; k45 = K10; k46 = K32; k47 = K11; } -#define KEYSET03 { k00 = K30; k01 = K09; k02 = K51; k03 = K42; k04 = K07; k05 = K36; k06 = K50; k07 = K14; k08 = K23; k09 = K29; k10 = K38; k11 = K01; k12 = K49; k13 = K52; k14 = K44; k15 = K43; k16 = K28; k17 = K15; k18 = K16; k19 = K22; k20 = K21; k21 = K45; k22 = K37; k23 = K00; k24 = K39; k25 = K48; k26 = K54; k27 = K12; k28 = K24; k29 = K27; k30 = K06; k31 = K46; k32 = K55; k33 = K11; k34 = K40; k35 = K17; k36 = K18; k37 = K10; k38 = K53; k39 = K20; k40 = K33; k41 = K41; k42 = K04; k43 = K32; k44 = K03; k45 = K19; k46 = K13; k47 = K47; } -#define KEYSET13 { k00 = K07; k01 = K43; k02 = K28; k03 = K51; k04 = K16; k05 = K45; k06 = K02; k07 = K23; k08 = K00; k09 = K38; k10 = K15; k11 = K35; k12 = K01; k13 = K29; k14 = K21; k15 = K52; k16 = K37; k17 = K49; k18 = K50; k19 = K31; k20 = K30; k21 = K22; k22 = K14; k23 = K09; k24 = K20; k25 = K53; k26 = K04; k27 = K48; k28 = K33; k29 = K32; k30 = K11; k31 = K27; k32 = K05; k33 = K47; k34 = K17; k35 = K26; k36 = K54; k37 = K19; k38 = K34; k39 = K25; k40 = K10; k41 = K46; k42 = K40; k43 = K13; k44 = K12; k45 = K55; k46 = K18; k47 = K24; } -#define KEYSET04 { k00 = K16; k01 = K52; k02 = K37; k03 = K28; k04 = K50; k05 = K22; k06 = K36; k07 = K00; k08 = K09; k09 = K15; k10 = K49; k11 = K44; k12 = K35; k13 = K38; k14 = K30; k15 = K29; k16 = K14; k17 = K01; k18 = K02; k19 = K08; k20 = K07; k21 = K31; k22 = K23; k23 = K43; k24 = K25; k25 = K34; k26 = K40; k27 = K53; k28 = K10; k29 = K13; k30 = K47; k31 = K32; k32 = K41; k33 = K24; k34 = K26; k35 = K03; k36 = K04; k37 = K55; k38 = K39; k39 = K06; k40 = K19; k41 = K27; k42 = K17; k43 = K18; k44 = K48; k45 = K05; k46 = K54; k47 = K33; } -#define KEYSET14 { k00 = K50; k01 = K29; k02 = K14; k03 = K37; k04 = K02; k05 = K31; k06 = K45; k07 = K09; k08 = K43; k09 = K49; k10 = K01; k11 = K21; k12 = K44; k13 = K15; k14 = K07; k15 = K38; k16 = K23; k17 = K35; k18 = K36; k19 = K42; k20 = K16; k21 = K08; k22 = K00; k23 = K52; k24 = K06; k25 = K39; k26 = K17; k27 = K34; k28 = K19; k29 = K18; k30 = K24; k31 = K13; k32 = K46; k33 = K33; k34 = K03; k35 = K12; k36 = K40; k37 = K05; k38 = K20; k39 = K11; k40 = K55; k41 = K32; k42 = K26; k43 = K54; k44 = K53; k45 = K41; k46 = K04; k47 = K10; } -#define KEYSET05 { k00 = K02; k01 = K38; k02 = K23; k03 = K14; k04 = K36; k05 = K08; k06 = K22; k07 = K43; k08 = K52; k09 = K01; k10 = K35; k11 = K30; k12 = K21; k13 = K49; k14 = K16; k15 = K15; k16 = K00; k17 = K44; k18 = K45; k19 = K51; k20 = K50; k21 = K42; k22 = K09; k23 = K29; k24 = K11; k25 = K20; k26 = K26; k27 = K39; k28 = K55; k29 = K54; k30 = K33; k31 = K18; k32 = K27; k33 = K10; k34 = K12; k35 = K48; k36 = K17; k37 = K41; k38 = K25; k39 = K47; k40 = K05; k41 = K13; k42 = K03; k43 = K04; k44 = K34; k45 = K46; k46 = K40; k47 = K19; } -#define KEYSET15 { k00 = K36; k01 = K15; k02 = K00; k03 = K23; k04 = K45; k05 = K42; k06 = K31; k07 = K52; k08 = K29; k09 = K35; k10 = K44; k11 = K07; k12 = K30; k13 = K01; k14 = K50; k15 = K49; k16 = K09; k17 = K21; k18 = K22; k19 = K28; k20 = K02; k21 = K51; k22 = K43; k23 = K38; k24 = K47; k25 = K25; k26 = K03; k27 = K20; k28 = K05; k29 = K04; k30 = K10; k31 = K54; k32 = K32; k33 = K19; k34 = K48; k35 = K53; k36 = K26; k37 = K46; k38 = K06; k39 = K24; k40 = K41; k41 = K18; k42 = K12; k43 = K40; k44 = K39; k45 = K27; k46 = K17; k47 = K55; } -#define KEYSET06 { k00 = K45; k01 = K49; k02 = K09; k03 = K00; k04 = K22; k05 = K51; k06 = K08; k07 = K29; k08 = K38; k09 = K44; k10 = K21; k11 = K16; k12 = K07; k13 = K35; k14 = K02; k15 = K01; k16 = K43; k17 = K30; k18 = K31; k19 = K37; k20 = K36; k21 = K28; k22 = K52; k23 = K15; k24 = K24; k25 = K06; k26 = K12; k27 = K25; k28 = K41; k29 = K40; k30 = K19; k31 = K04; k32 = K13; k33 = K55; k34 = K53; k35 = K34; k36 = K03; k37 = K27; k38 = K11; k39 = K33; k40 = K46; k41 = K54; k42 = K48; k43 = K17; k44 = K20; k45 = K32; k46 = K26; k47 = K05; } -#define KEYSET16 { k00 = K22; k01 = K01; k02 = K43; k03 = K09; k04 = K31; k05 = K28; k06 = K42; k07 = K38; k08 = K15; k09 = K21; k10 = K30; k11 = K50; k12 = K16; k13 = K44; k14 = K36; k15 = K35; k16 = K52; k17 = K07; k18 = K08; k19 = K14; k20 = K45; k21 = K37; k22 = K29; k23 = K49; k24 = K33; k25 = K11; k26 = K48; k27 = K06; k28 = K46; k29 = K17; k30 = K55; k31 = K40; k32 = K18; k33 = K05; k34 = K34; k35 = K39; k36 = K12; k37 = K32; k38 = K47; k39 = K10; k40 = K27; k41 = K04; k42 = K53; k43 = K26; k44 = K25; k45 = K13; k46 = K03; k47 = K41; } -#define KEYSET07 { k00 = K31; k01 = K35; k02 = K52; k03 = K43; k04 = K08; k05 = K37; k06 = K51; k07 = K15; k08 = K49; k09 = K30; k10 = K07; k11 = K02; k12 = K50; k13 = K21; k14 = K45; k15 = K44; k16 = K29; k17 = K16; k18 = K42; k19 = K23; k20 = K22; k21 = K14; k22 = K38; k23 = K01; k24 = K10; k25 = K47; k26 = K53; k27 = K11; k28 = K27; k29 = K26; k30 = K05; k31 = K17; k32 = K54; k33 = K41; k34 = K39; k35 = K20; k36 = K48; k37 = K13; k38 = K24; k39 = K19; k40 = K32; k41 = K40; k42 = K34; k43 = K03; k44 = K06; k45 = K18; k46 = K12; k47 = K46; } -#define KEYSET17 { k00 = K15; k01 = K51; k02 = K36; k03 = K02; k04 = K49; k05 = K21; k06 = K35; k07 = K31; k08 = K08; k09 = K14; k10 = K23; k11 = K43; k12 = K09; k13 = K37; k14 = K29; k15 = K28; k16 = K45; k17 = K00; k18 = K01; k19 = K07; k20 = K38; k21 = K30; k22 = K22; k23 = K42; k24 = K26; k25 = K04; k26 = K41; k27 = K54; k28 = K39; k29 = K10; k30 = K48; k31 = K33; k32 = K11; k33 = K53; k34 = K27; k35 = K32; k36 = K05; k37 = K25; k38 = K40; k39 = K03; k40 = K20; k41 = K24; k42 = K46; k43 = K19; k44 = K18; k45 = K06; k46 = K55; k47 = K34; } - -__device__ static void DES (const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 &D00, u32 &D01, u32 &D02, u32 &D03, u32 &D04, u32 &D05, u32 &D06, u32 &D07, u32 &D08, u32 &D09, u32 &D10, u32 &D11, u32 &D12, u32 &D13, u32 &D14, u32 &D15, u32 &D16, u32 &D17, u32 &D18, u32 &D19, u32 &D20, u32 &D21, u32 &D22, u32 &D23, u32 &D24, u32 &D25, u32 &D26, u32 &D27, u32 &D28, u32 &D29, u32 &D30, u32 &D31, u32 &D32, u32 &D33, u32 &D34, u32 &D35, u32 &D36, u32 &D37, u32 &D38, u32 &D39, u32 &D40, u32 &D41, u32 &D42, u32 &D43, u32 &D44, u32 &D45, u32 &D46, u32 &D47, u32 &D48, u32 &D49, u32 &D50, u32 &D51, u32 &D52, u32 &D53, u32 &D54, u32 &D55, u32 &D56, u32 &D57, u32 &D58, u32 &D59, u32 &D60, u32 &D61, u32 &D62, u32 &D63) -{ - KXX_DECL u32 k00, k01, k02, k03, k04, k05; - KXX_DECL u32 k06, k07, k08, k09, k10, k11; - KXX_DECL u32 k12, k13, k14, k15, k16, k17; - KXX_DECL u32 k18, k19, k20, k21, k22, k23; - KXX_DECL u32 k24, k25, k26, k27, k28, k29; - KXX_DECL u32 k30, k31, k32, k33, k34, k35; - KXX_DECL u32 k36, k37, k38, k39, k40, k41; - KXX_DECL u32 k42, k43, k44, k45, k46, k47; - - // this is essential - - #if __CUDA_ARCH__ >= 500 - #pragma unroll 1 - #else - #pragma unroll - #endif - - for (u32 i = 0; i < 2; i++) - { - if (i) KEYSET10 else KEYSET00 - - s1(D63 ^ k00, D32 ^ k01, D33 ^ k02, D34 ^ k03, D35 ^ k04, D36 ^ k05, &D08, &D16, &D22, &D30); - s2(D35 ^ k06, D36 ^ k07, D37 ^ k08, D38 ^ k09, D39 ^ k10, D40 ^ k11, &D12, &D27, &D01, &D17); - s3(D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05); - s4(D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00); - s5(D47 ^ k24, D48 ^ k25, D49 ^ k26, D50 ^ k27, D51 ^ k28, D52 ^ k29, &D07, &D13, &D24, &D02); - s6(D51 ^ k30, D52 ^ k31, D53 ^ k32, D54 ^ k33, D55 ^ k34, D56 ^ k35, &D03, &D28, &D10, &D18); - s7(D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06); - s8(D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20); - - if (i) KEYSET11 else KEYSET01 - - s1(D31 ^ k00, D00 ^ k01, D01 ^ k02, D02 ^ k03, D03 ^ k04, D04 ^ k05, &D40, &D48, &D54, &D62); - s2(D03 ^ k06, D04 ^ k07, D05 ^ k08, D06 ^ k09, D07 ^ k10, D08 ^ k11, &D44, &D59, &D33, &D49); - s3(D07 ^ k12, D08 ^ k13, D09 ^ k14, D10 ^ k15, D11 ^ k16, D12 ^ k17, &D55, &D47, &D61, &D37); - s4(D11 ^ k18, D12 ^ k19, D13 ^ k20, D14 ^ k21, D15 ^ k22, D16 ^ k23, &D57, &D51, &D41, &D32); - s5(D15 ^ k24, D16 ^ k25, D17 ^ k26, D18 ^ k27, D19 ^ k28, D20 ^ k29, &D39, &D45, &D56, &D34); - s6(D19 ^ k30, D20 ^ k31, D21 ^ k32, D22 ^ k33, D23 ^ k34, D24 ^ k35, &D35, &D60, &D42, &D50); - s7(D23 ^ k36, D24 ^ k37, D25 ^ k38, D26 ^ k39, D27 ^ k40, D28 ^ k41, &D63, &D43, &D53, &D38); - s8(D27 ^ k42, D28 ^ k43, D29 ^ k44, D30 ^ k45, D31 ^ k46, D00 ^ k47, &D36, &D58, &D46, &D52); - - if (i) KEYSET12 else KEYSET02 - - s1(D63 ^ k00, D32 ^ k01, D33 ^ k02, D34 ^ k03, D35 ^ k04, D36 ^ k05, &D08, &D16, &D22, &D30); - s2(D35 ^ k06, D36 ^ k07, D37 ^ k08, D38 ^ k09, D39 ^ k10, D40 ^ k11, &D12, &D27, &D01, &D17); - s3(D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05); - s4(D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00); - s5(D47 ^ k24, D48 ^ k25, D49 ^ k26, D50 ^ k27, D51 ^ k28, D52 ^ k29, &D07, &D13, &D24, &D02); - s6(D51 ^ k30, D52 ^ k31, D53 ^ k32, D54 ^ k33, D55 ^ k34, D56 ^ k35, &D03, &D28, &D10, &D18); - s7(D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06); - s8(D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20); - - if (i) KEYSET13 else KEYSET03 - - s1(D31 ^ k00, D00 ^ k01, D01 ^ k02, D02 ^ k03, D03 ^ k04, D04 ^ k05, &D40, &D48, &D54, &D62); - s2(D03 ^ k06, D04 ^ k07, D05 ^ k08, D06 ^ k09, D07 ^ k10, D08 ^ k11, &D44, &D59, &D33, &D49); - s3(D07 ^ k12, D08 ^ k13, D09 ^ k14, D10 ^ k15, D11 ^ k16, D12 ^ k17, &D55, &D47, &D61, &D37); - s4(D11 ^ k18, D12 ^ k19, D13 ^ k20, D14 ^ k21, D15 ^ k22, D16 ^ k23, &D57, &D51, &D41, &D32); - s5(D15 ^ k24, D16 ^ k25, D17 ^ k26, D18 ^ k27, D19 ^ k28, D20 ^ k29, &D39, &D45, &D56, &D34); - s6(D19 ^ k30, D20 ^ k31, D21 ^ k32, D22 ^ k33, D23 ^ k34, D24 ^ k35, &D35, &D60, &D42, &D50); - s7(D23 ^ k36, D24 ^ k37, D25 ^ k38, D26 ^ k39, D27 ^ k40, D28 ^ k41, &D63, &D43, &D53, &D38); - s8(D27 ^ k42, D28 ^ k43, D29 ^ k44, D30 ^ k45, D31 ^ k46, D00 ^ k47, &D36, &D58, &D46, &D52); - - if (i) KEYSET14 else KEYSET04 - - s1(D63 ^ k00, D32 ^ k01, D33 ^ k02, D34 ^ k03, D35 ^ k04, D36 ^ k05, &D08, &D16, &D22, &D30); - s2(D35 ^ k06, D36 ^ k07, D37 ^ k08, D38 ^ k09, D39 ^ k10, D40 ^ k11, &D12, &D27, &D01, &D17); - s3(D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05); - s4(D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00); - s5(D47 ^ k24, D48 ^ k25, D49 ^ k26, D50 ^ k27, D51 ^ k28, D52 ^ k29, &D07, &D13, &D24, &D02); - s6(D51 ^ k30, D52 ^ k31, D53 ^ k32, D54 ^ k33, D55 ^ k34, D56 ^ k35, &D03, &D28, &D10, &D18); - s7(D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06); - s8(D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20); - - if (i) KEYSET15 else KEYSET05 - - s1(D31 ^ k00, D00 ^ k01, D01 ^ k02, D02 ^ k03, D03 ^ k04, D04 ^ k05, &D40, &D48, &D54, &D62); - s2(D03 ^ k06, D04 ^ k07, D05 ^ k08, D06 ^ k09, D07 ^ k10, D08 ^ k11, &D44, &D59, &D33, &D49); - s3(D07 ^ k12, D08 ^ k13, D09 ^ k14, D10 ^ k15, D11 ^ k16, D12 ^ k17, &D55, &D47, &D61, &D37); - s4(D11 ^ k18, D12 ^ k19, D13 ^ k20, D14 ^ k21, D15 ^ k22, D16 ^ k23, &D57, &D51, &D41, &D32); - s5(D15 ^ k24, D16 ^ k25, D17 ^ k26, D18 ^ k27, D19 ^ k28, D20 ^ k29, &D39, &D45, &D56, &D34); - s6(D19 ^ k30, D20 ^ k31, D21 ^ k32, D22 ^ k33, D23 ^ k34, D24 ^ k35, &D35, &D60, &D42, &D50); - s7(D23 ^ k36, D24 ^ k37, D25 ^ k38, D26 ^ k39, D27 ^ k40, D28 ^ k41, &D63, &D43, &D53, &D38); - s8(D27 ^ k42, D28 ^ k43, D29 ^ k44, D30 ^ k45, D31 ^ k46, D00 ^ k47, &D36, &D58, &D46, &D52); - - if (i) KEYSET16 else KEYSET06 - - s1(D63 ^ k00, D32 ^ k01, D33 ^ k02, D34 ^ k03, D35 ^ k04, D36 ^ k05, &D08, &D16, &D22, &D30); - s2(D35 ^ k06, D36 ^ k07, D37 ^ k08, D38 ^ k09, D39 ^ k10, D40 ^ k11, &D12, &D27, &D01, &D17); - s3(D39 ^ k12, D40 ^ k13, D41 ^ k14, D42 ^ k15, D43 ^ k16, D44 ^ k17, &D23, &D15, &D29, &D05); - s4(D43 ^ k18, D44 ^ k19, D45 ^ k20, D46 ^ k21, D47 ^ k22, D48 ^ k23, &D25, &D19, &D09, &D00); - s5(D47 ^ k24, D48 ^ k25, D49 ^ k26, D50 ^ k27, D51 ^ k28, D52 ^ k29, &D07, &D13, &D24, &D02); - s6(D51 ^ k30, D52 ^ k31, D53 ^ k32, D54 ^ k33, D55 ^ k34, D56 ^ k35, &D03, &D28, &D10, &D18); - s7(D55 ^ k36, D56 ^ k37, D57 ^ k38, D58 ^ k39, D59 ^ k40, D60 ^ k41, &D31, &D11, &D21, &D06); - s8(D59 ^ k42, D60 ^ k43, D61 ^ k44, D62 ^ k45, D63 ^ k46, D32 ^ k47, &D04, &D26, &D14, &D20); - - if (i) KEYSET17 else KEYSET07 - - s1(D31 ^ k00, D00 ^ k01, D01 ^ k02, D02 ^ k03, D03 ^ k04, D04 ^ k05, &D40, &D48, &D54, &D62); - s2(D03 ^ k06, D04 ^ k07, D05 ^ k08, D06 ^ k09, D07 ^ k10, D08 ^ k11, &D44, &D59, &D33, &D49); - s3(D07 ^ k12, D08 ^ k13, D09 ^ k14, D10 ^ k15, D11 ^ k16, D12 ^ k17, &D55, &D47, &D61, &D37); - s4(D11 ^ k18, D12 ^ k19, D13 ^ k20, D14 ^ k21, D15 ^ k22, D16 ^ k23, &D57, &D51, &D41, &D32); - s5(D15 ^ k24, D16 ^ k25, D17 ^ k26, D18 ^ k27, D19 ^ k28, D20 ^ k29, &D39, &D45, &D56, &D34); - s6(D19 ^ k30, D20 ^ k31, D21 ^ k32, D22 ^ k33, D23 ^ k34, D24 ^ k35, &D35, &D60, &D42, &D50); - s7(D23 ^ k36, D24 ^ k37, D25 ^ k38, D26 ^ k39, D27 ^ k40, D28 ^ k41, &D63, &D43, &D53, &D38); - s8(D27 ^ k42, D28 ^ k43, D29 ^ k44, D30 ^ k45, D31 ^ k46, D00 ^ k47, &D36, &D58, &D46, &D52); - } -} - -__device__ static void transpose32c (u32 data[32]) -{ - #define swap(x,y,j,m) \ - t = ((x) ^ ((y) >> (j))) & (m); \ - (x) = (x) ^ t; \ - (y) = (y) ^ (t << (j)); - - u32 t; - - swap (data[ 0], data[16], 16, 0x0000ffff); - swap (data[ 1], data[17], 16, 0x0000ffff); - swap (data[ 2], data[18], 16, 0x0000ffff); - swap (data[ 3], data[19], 16, 0x0000ffff); - swap (data[ 4], data[20], 16, 0x0000ffff); - swap (data[ 5], data[21], 16, 0x0000ffff); - swap (data[ 6], data[22], 16, 0x0000ffff); - swap (data[ 7], data[23], 16, 0x0000ffff); - swap (data[ 8], data[24], 16, 0x0000ffff); - swap (data[ 9], data[25], 16, 0x0000ffff); - swap (data[10], data[26], 16, 0x0000ffff); - swap (data[11], data[27], 16, 0x0000ffff); - swap (data[12], data[28], 16, 0x0000ffff); - swap (data[13], data[29], 16, 0x0000ffff); - swap (data[14], data[30], 16, 0x0000ffff); - swap (data[15], data[31], 16, 0x0000ffff); - swap (data[ 0], data[ 8], 8, 0x00ff00ff); - swap (data[ 1], data[ 9], 8, 0x00ff00ff); - swap (data[ 2], data[10], 8, 0x00ff00ff); - swap (data[ 3], data[11], 8, 0x00ff00ff); - swap (data[ 4], data[12], 8, 0x00ff00ff); - swap (data[ 5], data[13], 8, 0x00ff00ff); - swap (data[ 6], data[14], 8, 0x00ff00ff); - swap (data[ 7], data[15], 8, 0x00ff00ff); - swap (data[ 0], data[ 4], 4, 0x0f0f0f0f); - swap (data[ 1], data[ 5], 4, 0x0f0f0f0f); - swap (data[ 2], data[ 6], 4, 0x0f0f0f0f); - swap (data[ 3], data[ 7], 4, 0x0f0f0f0f); - swap (data[ 0], data[ 2], 2, 0x33333333); - swap (data[ 1], data[ 3], 2, 0x33333333); - swap (data[ 0], data[ 1], 1, 0x55555555); - swap (data[ 2], data[ 3], 1, 0x55555555); - swap (data[ 4], data[ 6], 2, 0x33333333); - swap (data[ 5], data[ 7], 2, 0x33333333); - swap (data[ 4], data[ 5], 1, 0x55555555); - swap (data[ 6], data[ 7], 1, 0x55555555); - swap (data[ 8], data[12], 4, 0x0f0f0f0f); - swap (data[ 9], data[13], 4, 0x0f0f0f0f); - swap (data[10], data[14], 4, 0x0f0f0f0f); - swap (data[11], data[15], 4, 0x0f0f0f0f); - swap (data[ 8], data[10], 2, 0x33333333); - swap (data[ 9], data[11], 2, 0x33333333); - swap (data[ 8], data[ 9], 1, 0x55555555); - swap (data[10], data[11], 1, 0x55555555); - swap (data[12], data[14], 2, 0x33333333); - swap (data[13], data[15], 2, 0x33333333); - swap (data[12], data[13], 1, 0x55555555); - swap (data[14], data[15], 1, 0x55555555); - swap (data[16], data[24], 8, 0x00ff00ff); - swap (data[17], data[25], 8, 0x00ff00ff); - swap (data[18], data[26], 8, 0x00ff00ff); - swap (data[19], data[27], 8, 0x00ff00ff); - swap (data[20], data[28], 8, 0x00ff00ff); - swap (data[21], data[29], 8, 0x00ff00ff); - swap (data[22], data[30], 8, 0x00ff00ff); - swap (data[23], data[31], 8, 0x00ff00ff); - swap (data[16], data[20], 4, 0x0f0f0f0f); - swap (data[17], data[21], 4, 0x0f0f0f0f); - swap (data[18], data[22], 4, 0x0f0f0f0f); - swap (data[19], data[23], 4, 0x0f0f0f0f); - swap (data[16], data[18], 2, 0x33333333); - swap (data[17], data[19], 2, 0x33333333); - swap (data[16], data[17], 1, 0x55555555); - swap (data[18], data[19], 1, 0x55555555); - swap (data[20], data[22], 2, 0x33333333); - swap (data[21], data[23], 2, 0x33333333); - swap (data[20], data[21], 1, 0x55555555); - swap (data[22], data[23], 1, 0x55555555); - swap (data[24], data[28], 4, 0x0f0f0f0f); - swap (data[25], data[29], 4, 0x0f0f0f0f); - swap (data[26], data[30], 4, 0x0f0f0f0f); - swap (data[27], data[31], 4, 0x0f0f0f0f); - swap (data[24], data[26], 2, 0x33333333); - swap (data[25], data[27], 2, 0x33333333); - swap (data[24], data[25], 1, 0x55555555); - swap (data[26], data[27], 1, 0x55555555); - swap (data[28], data[30], 2, 0x33333333); - swap (data[29], data[31], 2, 0x33333333); - swap (data[28], data[29], 1, 0x55555555); - swap (data[30], data[31], 1, 0x55555555); -} - -__device__ static void m03000m (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * keys - */ - - const u32 w0s = pws[gid].i[0]; - const u32 w1s = pws[gid].i[1]; - - const u32 K00 = -((w0s >> ( 0 + 7)) & 1); - const u32 K01 = -((w0s >> ( 0 + 6)) & 1); - const u32 K02 = -((w0s >> ( 0 + 5)) & 1); - const u32 K03 = -((w0s >> ( 0 + 4)) & 1); - const u32 K04 = -((w0s >> ( 0 + 3)) & 1); - const u32 K05 = -((w0s >> ( 0 + 2)) & 1); - const u32 K06 = -((w0s >> ( 0 + 1)) & 1); - const u32 K07 = -((w0s >> ( 0 + 0)) & 1); - const u32 K08 = -((w0s >> ( 8 + 7)) & 1); - const u32 K09 = -((w0s >> ( 8 + 6)) & 1); - const u32 K10 = -((w0s >> ( 8 + 5)) & 1); - const u32 K11 = -((w0s >> ( 8 + 4)) & 1); - const u32 K12 = -((w0s >> ( 8 + 3)) & 1); - const u32 K13 = -((w0s >> ( 8 + 2)) & 1); - const u32 K14 = -((w0s >> ( 8 + 1)) & 1); - const u32 K15 = -((w0s >> ( 8 + 0)) & 1); - const u32 K16 = -((w0s >> (16 + 7)) & 1); - const u32 K17 = -((w0s >> (16 + 6)) & 1); - const u32 K18 = -((w0s >> (16 + 5)) & 1); - const u32 K19 = -((w0s >> (16 + 4)) & 1); - const u32 K20 = -((w0s >> (16 + 3)) & 1); - const u32 K21 = -((w0s >> (16 + 2)) & 1); - const u32 K22 = -((w0s >> (16 + 1)) & 1); - const u32 K23 = -((w0s >> (16 + 0)) & 1); - const u32 K24 = -((w0s >> (24 + 7)) & 1); - const u32 K25 = -((w0s >> (24 + 6)) & 1); - const u32 K26 = -((w0s >> (24 + 5)) & 1); - const u32 K27 = -((w0s >> (24 + 4)) & 1); - const u32 K28 = -((w0s >> (24 + 3)) & 1); - const u32 K29 = -((w0s >> (24 + 2)) & 1); - const u32 K30 = -((w0s >> (24 + 1)) & 1); - const u32 K31 = -((w0s >> (24 + 0)) & 1); - const u32 K32 = -((w1s >> ( 0 + 7)) & 1); - const u32 K33 = -((w1s >> ( 0 + 6)) & 1); - const u32 K34 = -((w1s >> ( 0 + 5)) & 1); - const u32 K35 = -((w1s >> ( 0 + 4)) & 1); - const u32 K36 = -((w1s >> ( 0 + 3)) & 1); - const u32 K37 = -((w1s >> ( 0 + 2)) & 1); - const u32 K38 = -((w1s >> ( 0 + 1)) & 1); - const u32 K39 = -((w1s >> ( 0 + 0)) & 1); - const u32 K40 = -((w1s >> ( 8 + 7)) & 1); - const u32 K41 = -((w1s >> ( 8 + 6)) & 1); - const u32 K42 = -((w1s >> ( 8 + 5)) & 1); - const u32 K43 = -((w1s >> ( 8 + 4)) & 1); - const u32 K44 = -((w1s >> ( 8 + 3)) & 1); - const u32 K45 = -((w1s >> ( 8 + 2)) & 1); - const u32 K46 = -((w1s >> ( 8 + 1)) & 1); - const u32 K47 = -((w1s >> ( 8 + 0)) & 1); - const u32 K48 = -((w1s >> (16 + 7)) & 1); - const u32 K49 = -((w1s >> (16 + 6)) & 1); - const u32 K50 = -((w1s >> (16 + 5)) & 1); - const u32 K51 = -((w1s >> (16 + 4)) & 1); - const u32 K52 = -((w1s >> (16 + 3)) & 1); - const u32 K53 = -((w1s >> (16 + 2)) & 1); - const u32 K54 = -((w1s >> (16 + 1)) & 1); - const u32 K55 = -((w1s >> (16 + 0)) & 1); - - /** - * loop - */ - - const u32 bf_loops = bfs_cnt; - - for (u32 il_pos = 0, pc_pos = 0; il_pos < bf_loops; il_pos += 32, pc_pos++) - { - u32 k00 = K00; - u32 k01 = K01; - u32 k02 = K02; - u32 k03 = K03; - u32 k04 = K04; - u32 k05 = K05; - u32 k06 = K06; - u32 k07 = K07; - u32 k08 = K08; - u32 k09 = K09; - u32 k10 = K10; - u32 k11 = K11; - u32 k12 = K12; - u32 k13 = K13; - u32 k14 = K14; - u32 k15 = K15; - u32 k16 = K16; - u32 k17 = K17; - u32 k18 = K18; - u32 k19 = K19; - u32 k20 = K20; - u32 k21 = K21; - u32 k22 = K22; - u32 k23 = K23; - u32 k24 = K24; - u32 k25 = K25; - u32 k26 = K26; - u32 k27 = K27; - u32 k28 = K28; - u32 k29 = K29; - u32 k30 = K30; - u32 k31 = K31; - - k00 |= c_tm[pc_pos].b[ 0]; - k01 |= c_tm[pc_pos].b[ 1]; - k02 |= c_tm[pc_pos].b[ 2]; - k03 |= c_tm[pc_pos].b[ 3]; - k04 |= c_tm[pc_pos].b[ 4]; - k05 |= c_tm[pc_pos].b[ 5]; - k06 |= c_tm[pc_pos].b[ 6]; - k07 |= c_tm[pc_pos].b[ 7]; - k08 |= c_tm[pc_pos].b[ 8]; - k09 |= c_tm[pc_pos].b[ 9]; - k10 |= c_tm[pc_pos].b[10]; - k11 |= c_tm[pc_pos].b[11]; - k12 |= c_tm[pc_pos].b[12]; - k13 |= c_tm[pc_pos].b[13]; - k14 |= c_tm[pc_pos].b[14]; - k15 |= c_tm[pc_pos].b[15]; - k16 |= c_tm[pc_pos].b[16]; - k17 |= c_tm[pc_pos].b[17]; - k18 |= c_tm[pc_pos].b[18]; - k19 |= c_tm[pc_pos].b[19]; - k20 |= c_tm[pc_pos].b[20]; - k21 |= c_tm[pc_pos].b[21]; - k22 |= c_tm[pc_pos].b[22]; - k23 |= c_tm[pc_pos].b[23]; - k24 |= c_tm[pc_pos].b[24]; - k25 |= c_tm[pc_pos].b[25]; - k26 |= c_tm[pc_pos].b[26]; - k27 |= c_tm[pc_pos].b[27]; - k28 |= c_tm[pc_pos].b[28]; - k29 |= c_tm[pc_pos].b[29]; - k30 |= c_tm[pc_pos].b[30]; - k31 |= c_tm[pc_pos].b[31]; - - u32 D00 = 0; - u32 D01 = 0; - u32 D02 = 0; - u32 D03 = 0xffffffff; - u32 D04 = 0; - u32 D05 = 0xffffffff; - u32 D06 = 0xffffffff; - u32 D07 = 0xffffffff; - u32 D08 = 0; - u32 D09 = 0; - u32 D10 = 0; - u32 D11 = 0; - u32 D12 = 0; - u32 D13 = 0xffffffff; - u32 D14 = 0; - u32 D15 = 0; - u32 D16 = 0xffffffff; - u32 D17 = 0xffffffff; - u32 D18 = 0; - u32 D19 = 0; - u32 D20 = 0; - u32 D21 = 0; - u32 D22 = 0xffffffff; - u32 D23 = 0; - u32 D24 = 0xffffffff; - u32 D25 = 0; - u32 D26 = 0xffffffff; - u32 D27 = 0; - u32 D28 = 0xffffffff; - u32 D29 = 0xffffffff; - u32 D30 = 0xffffffff; - u32 D31 = 0xffffffff; - u32 D32 = 0; - u32 D33 = 0; - u32 D34 = 0; - u32 D35 = 0; - u32 D36 = 0; - u32 D37 = 0; - u32 D38 = 0; - u32 D39 = 0; - u32 D40 = 0xffffffff; - u32 D41 = 0xffffffff; - u32 D42 = 0xffffffff; - u32 D43 = 0; - u32 D44 = 0xffffffff; - u32 D45 = 0; - u32 D46 = 0; - u32 D47 = 0; - u32 D48 = 0; - u32 D49 = 0; - u32 D50 = 0; - u32 D51 = 0; - u32 D52 = 0; - u32 D53 = 0; - u32 D54 = 0; - u32 D55 = 0xffffffff; - u32 D56 = 0; - u32 D57 = 0; - u32 D58 = 0xffffffff; - u32 D59 = 0; - u32 D60 = 0; - u32 D61 = 0xffffffff; - u32 D62 = 0xffffffff; - u32 D63 = 0xffffffff; - - DES - ( - k00, k01, k02, k03, k04, k05, k06, - k07, k08, k09, k10, k11, k12, k13, - k14, k15, k16, k17, k18, k19, k20, - k21, k22, k23, k24, k25, k26, k27, - k28, k29, k30, k31, K32, K33, K34, - K35, K36, K37, K38, K39, K40, K41, - K42, K43, K44, K45, K46, K47, K48, - K49, K50, K51, K52, K53, K54, K55, - D00, D01, D02, D03, D04, D05, D06, D07, - D08, D09, D10, D11, D12, D13, D14, D15, - D16, D17, D18, D19, D20, D21, D22, D23, - D24, D25, D26, D27, D28, D29, D30, D31, - D32, D33, D34, D35, D36, D37, D38, D39, - D40, D41, D42, D43, D44, D45, D46, D47, - D48, D49, D50, D51, D52, D53, D54, D55, - D56, D57, D58, D59, D60, D61, D62, D63 - ); - - u32 out[64]; - - out[ 0] = D00; - out[ 1] = D01; - out[ 2] = D02; - out[ 3] = D03; - out[ 4] = D04; - out[ 5] = D05; - out[ 6] = D06; - out[ 7] = D07; - out[ 8] = D08; - out[ 9] = D09; - out[10] = D10; - out[11] = D11; - out[12] = D12; - out[13] = D13; - out[14] = D14; - out[15] = D15; - out[16] = D16; - out[17] = D17; - out[18] = D18; - out[19] = D19; - out[20] = D20; - out[21] = D21; - out[22] = D22; - out[23] = D23; - out[24] = D24; - out[25] = D25; - out[26] = D26; - out[27] = D27; - out[28] = D28; - out[29] = D29; - out[30] = D30; - out[31] = D31; - out[32] = D32; - out[33] = D33; - out[34] = D34; - out[35] = D35; - out[36] = D36; - out[37] = D37; - out[38] = D38; - out[39] = D39; - out[40] = D40; - out[41] = D41; - out[42] = D42; - out[43] = D43; - out[44] = D44; - out[45] = D45; - out[46] = D46; - out[47] = D47; - out[48] = D48; - out[49] = D49; - out[50] = D50; - out[51] = D51; - out[52] = D52; - out[53] = D53; - out[54] = D54; - out[55] = D55; - out[56] = D56; - out[57] = D57; - out[58] = D58; - out[59] = D59; - out[60] = D60; - out[61] = D61; - out[62] = D62; - out[63] = D63; - - if (digests_cnt < 16) - { - for (u32 d = 0; d < digests_cnt; d++) - { - const u32 final_hash_pos = digests_offset + d; - - if (hashes_shown[final_hash_pos]) continue; - - u32 search[2]; - - search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0]; - search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1]; - - u32 tmpResult = 0; - - #pragma unroll - for (int i = 0; i < 32; i++) - { - const u32 b0 = -((search[0] >> i) & 1); - const u32 b1 = -((search[1] >> i) & 1); - - tmpResult |= out[ 0 + i] ^ b0; - tmpResult |= out[32 + i] ^ b1; - } - - if (tmpResult == 0xffffffff) continue; - - const u32 slice = 31 - __clz (~tmpResult); - - const u32x r0 = search[0]; - const u32x r1 = search[1]; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } - } - else - { - u32 out0[32]; - u32 out1[32]; - - #pragma unroll - for (int i = 0; i < 32; i++) - { - out0[i] = out[ 0 + 31 - i]; - out1[i] = out[32 + 31 - i]; - } - - transpose32c (out0); - transpose32c (out1); - - #pragma unroll - for (int slice = 0; slice < 32; slice++) - { - const u32x r0 = out0[31 - slice]; - const u32x r1 = out1[31 - slice]; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } - } - } -} - -__device__ static void m03000s (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - #define S00 s_S[ 0] - #define S01 s_S[ 1] - #define S02 s_S[ 2] - #define S03 s_S[ 3] - #define S04 s_S[ 4] - #define S05 s_S[ 5] - #define S06 s_S[ 6] - #define S07 s_S[ 7] - #define S08 s_S[ 8] - #define S09 s_S[ 9] - #define S10 s_S[10] - #define S11 s_S[11] - #define S12 s_S[12] - #define S13 s_S[13] - #define S14 s_S[14] - #define S15 s_S[15] - #define S16 s_S[16] - #define S17 s_S[17] - #define S18 s_S[18] - #define S19 s_S[19] - #define S20 s_S[20] - #define S21 s_S[21] - #define S22 s_S[22] - #define S23 s_S[23] - #define S24 s_S[24] - #define S25 s_S[25] - #define S26 s_S[26] - #define S27 s_S[27] - #define S28 s_S[28] - #define S29 s_S[29] - #define S30 s_S[30] - #define S31 s_S[31] - #define S32 s_S[32] - #define S33 s_S[33] - #define S34 s_S[34] - #define S35 s_S[35] - #define S36 s_S[36] - #define S37 s_S[37] - #define S38 s_S[38] - #define S39 s_S[39] - #define S40 s_S[40] - #define S41 s_S[41] - #define S42 s_S[42] - #define S43 s_S[43] - #define S44 s_S[44] - #define S45 s_S[45] - #define S46 s_S[46] - #define S47 s_S[47] - #define S48 s_S[48] - #define S49 s_S[49] - #define S50 s_S[50] - #define S51 s_S[51] - #define S52 s_S[52] - #define S53 s_S[53] - #define S54 s_S[54] - #define S55 s_S[55] - #define S56 s_S[56] - #define S57 s_S[57] - #define S58 s_S[58] - #define S59 s_S[59] - #define S60 s_S[60] - #define S61 s_S[61] - #define S62 s_S[62] - #define S63 s_S[63] - - /** - * keys - */ - - const u32 w0s = pws[gid].i[0]; - const u32 w1s = pws[gid].i[1]; - - const u32 K00 = -((w0s >> ( 0 + 7)) & 1); - const u32 K01 = -((w0s >> ( 0 + 6)) & 1); - const u32 K02 = -((w0s >> ( 0 + 5)) & 1); - const u32 K03 = -((w0s >> ( 0 + 4)) & 1); - const u32 K04 = -((w0s >> ( 0 + 3)) & 1); - const u32 K05 = -((w0s >> ( 0 + 2)) & 1); - const u32 K06 = -((w0s >> ( 0 + 1)) & 1); - const u32 K07 = -((w0s >> ( 0 + 0)) & 1); - const u32 K08 = -((w0s >> ( 8 + 7)) & 1); - const u32 K09 = -((w0s >> ( 8 + 6)) & 1); - const u32 K10 = -((w0s >> ( 8 + 5)) & 1); - const u32 K11 = -((w0s >> ( 8 + 4)) & 1); - const u32 K12 = -((w0s >> ( 8 + 3)) & 1); - const u32 K13 = -((w0s >> ( 8 + 2)) & 1); - const u32 K14 = -((w0s >> ( 8 + 1)) & 1); - const u32 K15 = -((w0s >> ( 8 + 0)) & 1); - const u32 K16 = -((w0s >> (16 + 7)) & 1); - const u32 K17 = -((w0s >> (16 + 6)) & 1); - const u32 K18 = -((w0s >> (16 + 5)) & 1); - const u32 K19 = -((w0s >> (16 + 4)) & 1); - const u32 K20 = -((w0s >> (16 + 3)) & 1); - const u32 K21 = -((w0s >> (16 + 2)) & 1); - const u32 K22 = -((w0s >> (16 + 1)) & 1); - const u32 K23 = -((w0s >> (16 + 0)) & 1); - const u32 K24 = -((w0s >> (24 + 7)) & 1); - const u32 K25 = -((w0s >> (24 + 6)) & 1); - const u32 K26 = -((w0s >> (24 + 5)) & 1); - const u32 K27 = -((w0s >> (24 + 4)) & 1); - const u32 K28 = -((w0s >> (24 + 3)) & 1); - const u32 K29 = -((w0s >> (24 + 2)) & 1); - const u32 K30 = -((w0s >> (24 + 1)) & 1); - const u32 K31 = -((w0s >> (24 + 0)) & 1); - const u32 K32 = -((w1s >> ( 0 + 7)) & 1); - const u32 K33 = -((w1s >> ( 0 + 6)) & 1); - const u32 K34 = -((w1s >> ( 0 + 5)) & 1); - const u32 K35 = -((w1s >> ( 0 + 4)) & 1); - const u32 K36 = -((w1s >> ( 0 + 3)) & 1); - const u32 K37 = -((w1s >> ( 0 + 2)) & 1); - const u32 K38 = -((w1s >> ( 0 + 1)) & 1); - const u32 K39 = -((w1s >> ( 0 + 0)) & 1); - const u32 K40 = -((w1s >> ( 8 + 7)) & 1); - const u32 K41 = -((w1s >> ( 8 + 6)) & 1); - const u32 K42 = -((w1s >> ( 8 + 5)) & 1); - const u32 K43 = -((w1s >> ( 8 + 4)) & 1); - const u32 K44 = -((w1s >> ( 8 + 3)) & 1); - const u32 K45 = -((w1s >> ( 8 + 2)) & 1); - const u32 K46 = -((w1s >> ( 8 + 1)) & 1); - const u32 K47 = -((w1s >> ( 8 + 0)) & 1); - const u32 K48 = -((w1s >> (16 + 7)) & 1); - const u32 K49 = -((w1s >> (16 + 6)) & 1); - const u32 K50 = -((w1s >> (16 + 5)) & 1); - const u32 K51 = -((w1s >> (16 + 4)) & 1); - const u32 K52 = -((w1s >> (16 + 3)) & 1); - const u32 K53 = -((w1s >> (16 + 2)) & 1); - const u32 K54 = -((w1s >> (16 + 1)) & 1); - const u32 K55 = -((w1s >> (16 + 0)) & 1); - - /** - * loop - */ - - const u32 bf_loops = bfs_cnt; - - for (u32 il_pos = 0, pc_pos = 0; il_pos < bf_loops; il_pos += 32, pc_pos++) - { - u32 k00 = K00; - u32 k01 = K01; - u32 k02 = K02; - u32 k03 = K03; - u32 k04 = K04; - u32 k05 = K05; - u32 k06 = K06; - u32 k07 = K07; - u32 k08 = K08; - u32 k09 = K09; - u32 k10 = K10; - u32 k11 = K11; - u32 k12 = K12; - u32 k13 = K13; - u32 k14 = K14; - u32 k15 = K15; - u32 k16 = K16; - u32 k17 = K17; - u32 k18 = K18; - u32 k19 = K19; - u32 k20 = K20; - u32 k21 = K21; - u32 k22 = K22; - u32 k23 = K23; - u32 k24 = K24; - u32 k25 = K25; - u32 k26 = K26; - u32 k27 = K27; - u32 k28 = K28; - u32 k29 = K29; - u32 k30 = K30; - u32 k31 = K31; - - k00 |= c_tm[pc_pos].b[ 0]; - k01 |= c_tm[pc_pos].b[ 1]; - k02 |= c_tm[pc_pos].b[ 2]; - k03 |= c_tm[pc_pos].b[ 3]; - k04 |= c_tm[pc_pos].b[ 4]; - k05 |= c_tm[pc_pos].b[ 5]; - k06 |= c_tm[pc_pos].b[ 6]; - k07 |= c_tm[pc_pos].b[ 7]; - k08 |= c_tm[pc_pos].b[ 8]; - k09 |= c_tm[pc_pos].b[ 9]; - k10 |= c_tm[pc_pos].b[10]; - k11 |= c_tm[pc_pos].b[11]; - k12 |= c_tm[pc_pos].b[12]; - k13 |= c_tm[pc_pos].b[13]; - k14 |= c_tm[pc_pos].b[14]; - k15 |= c_tm[pc_pos].b[15]; - k16 |= c_tm[pc_pos].b[16]; - k17 |= c_tm[pc_pos].b[17]; - k18 |= c_tm[pc_pos].b[18]; - k19 |= c_tm[pc_pos].b[19]; - k20 |= c_tm[pc_pos].b[20]; - k21 |= c_tm[pc_pos].b[21]; - k22 |= c_tm[pc_pos].b[22]; - k23 |= c_tm[pc_pos].b[23]; - k24 |= c_tm[pc_pos].b[24]; - k25 |= c_tm[pc_pos].b[25]; - k26 |= c_tm[pc_pos].b[26]; - k27 |= c_tm[pc_pos].b[27]; - k28 |= c_tm[pc_pos].b[28]; - k29 |= c_tm[pc_pos].b[29]; - k30 |= c_tm[pc_pos].b[30]; - k31 |= c_tm[pc_pos].b[31]; - - u32 D00 = 0; - u32 D01 = 0; - u32 D02 = 0; - u32 D03 = 0xffffffff; - u32 D04 = 0; - u32 D05 = 0xffffffff; - u32 D06 = 0xffffffff; - u32 D07 = 0xffffffff; - u32 D08 = 0; - u32 D09 = 0; - u32 D10 = 0; - u32 D11 = 0; - u32 D12 = 0; - u32 D13 = 0xffffffff; - u32 D14 = 0; - u32 D15 = 0; - u32 D16 = 0xffffffff; - u32 D17 = 0xffffffff; - u32 D18 = 0; - u32 D19 = 0; - u32 D20 = 0; - u32 D21 = 0; - u32 D22 = 0xffffffff; - u32 D23 = 0; - u32 D24 = 0xffffffff; - u32 D25 = 0; - u32 D26 = 0xffffffff; - u32 D27 = 0; - u32 D28 = 0xffffffff; - u32 D29 = 0xffffffff; - u32 D30 = 0xffffffff; - u32 D31 = 0xffffffff; - u32 D32 = 0; - u32 D33 = 0; - u32 D34 = 0; - u32 D35 = 0; - u32 D36 = 0; - u32 D37 = 0; - u32 D38 = 0; - u32 D39 = 0; - u32 D40 = 0xffffffff; - u32 D41 = 0xffffffff; - u32 D42 = 0xffffffff; - u32 D43 = 0; - u32 D44 = 0xffffffff; - u32 D45 = 0; - u32 D46 = 0; - u32 D47 = 0; - u32 D48 = 0; - u32 D49 = 0; - u32 D50 = 0; - u32 D51 = 0; - u32 D52 = 0; - u32 D53 = 0; - u32 D54 = 0; - u32 D55 = 0xffffffff; - u32 D56 = 0; - u32 D57 = 0; - u32 D58 = 0xffffffff; - u32 D59 = 0; - u32 D60 = 0; - u32 D61 = 0xffffffff; - u32 D62 = 0xffffffff; - u32 D63 = 0xffffffff; - - DES - ( - k00, k01, k02, k03, k04, k05, k06, - k07, k08, k09, k10, k11, k12, k13, - k14, k15, k16, k17, k18, k19, k20, - k21, k22, k23, k24, k25, k26, k27, - k28, k29, k30, k31, K32, K33, K34, - K35, K36, K37, K38, K39, K40, K41, - K42, K43, K44, K45, K46, K47, K48, - K49, K50, K51, K52, K53, K54, K55, - D00, D01, D02, D03, D04, D05, D06, D07, - D08, D09, D10, D11, D12, D13, D14, D15, - D16, D17, D18, D19, D20, D21, D22, D23, - D24, D25, D26, D27, D28, D29, D30, D31, - D32, D33, D34, D35, D36, D37, D38, D39, - D40, D41, D42, D43, D44, D45, D46, D47, - D48, D49, D50, D51, D52, D53, D54, D55, - D56, D57, D58, D59, D60, D61, D62, D63 - ); - - u32 tmpResult = 0; - - tmpResult |= D00 ^ S00; - tmpResult |= D01 ^ S01; - tmpResult |= D02 ^ S02; - tmpResult |= D03 ^ S03; - tmpResult |= D04 ^ S04; - tmpResult |= D05 ^ S05; - tmpResult |= D06 ^ S06; - tmpResult |= D07 ^ S07; - tmpResult |= D08 ^ S08; - tmpResult |= D09 ^ S09; - tmpResult |= D10 ^ S10; - tmpResult |= D11 ^ S11; - tmpResult |= D12 ^ S12; - tmpResult |= D13 ^ S13; - tmpResult |= D14 ^ S14; - tmpResult |= D15 ^ S15; - - if (tmpResult == 0xffffffff) continue; - - tmpResult |= D16 ^ S16; - tmpResult |= D17 ^ S17; - tmpResult |= D18 ^ S18; - tmpResult |= D19 ^ S19; - tmpResult |= D20 ^ S20; - tmpResult |= D21 ^ S21; - tmpResult |= D22 ^ S22; - tmpResult |= D23 ^ S23; - tmpResult |= D24 ^ S24; - tmpResult |= D25 ^ S25; - tmpResult |= D26 ^ S26; - tmpResult |= D27 ^ S27; - tmpResult |= D28 ^ S28; - tmpResult |= D29 ^ S29; - tmpResult |= D30 ^ S30; - tmpResult |= D31 ^ S31; - - if (tmpResult == 0xffffffff) continue; - - tmpResult |= D32 ^ S32; - tmpResult |= D33 ^ S33; - tmpResult |= D34 ^ S34; - tmpResult |= D35 ^ S35; - tmpResult |= D36 ^ S36; - tmpResult |= D37 ^ S37; - tmpResult |= D38 ^ S38; - tmpResult |= D39 ^ S39; - tmpResult |= D40 ^ S40; - tmpResult |= D41 ^ S41; - tmpResult |= D42 ^ S42; - tmpResult |= D43 ^ S43; - tmpResult |= D44 ^ S44; - tmpResult |= D45 ^ S45; - tmpResult |= D46 ^ S46; - tmpResult |= D47 ^ S47; - - if (tmpResult == 0xffffffff) continue; - - tmpResult |= D48 ^ S48; - tmpResult |= D49 ^ S49; - tmpResult |= D50 ^ S50; - tmpResult |= D51 ^ S51; - tmpResult |= D52 ^ S52; - tmpResult |= D53 ^ S53; - tmpResult |= D54 ^ S54; - tmpResult |= D55 ^ S55; - tmpResult |= D56 ^ S56; - tmpResult |= D57 ^ S57; - tmpResult |= D58 ^ S58; - tmpResult |= D59 ^ S59; - tmpResult |= D60 ^ S60; - tmpResult |= D61 ^ S61; - tmpResult |= D62 ^ S62; - tmpResult |= D63 ^ S63; - - if (tmpResult == 0xffffffff) continue; - - const u32 slice = 31 - __clz (~tmpResult); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m03000_tb (pw_t *pws) -{ - // not used here, inlined code -} - -extern "C" __global__ void __launch_bounds__ (32, 1) m03000_tm (const u32 *d_bfs, bs_word_t *d_tbs) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - const u32 block = gid / 32; - const u32 slice = gid % 32; - - const u32 w0 = c_bfs[gid]; - - #pragma unroll - for (int i = 0; i < 32; i += 8) - { - atomicOr (&d_tbs[block].b[i + 0], (((w0 >> (i + 7)) & 1) << slice)); - atomicOr (&d_tbs[block].b[i + 1], (((w0 >> (i + 6)) & 1) << slice)); - atomicOr (&d_tbs[block].b[i + 2], (((w0 >> (i + 5)) & 1) << slice)); - atomicOr (&d_tbs[block].b[i + 3], (((w0 >> (i + 4)) & 1) << slice)); - atomicOr (&d_tbs[block].b[i + 4], (((w0 >> (i + 3)) & 1) << slice)); - atomicOr (&d_tbs[block].b[i + 5], (((w0 >> (i + 2)) & 1) << slice)); - atomicOr (&d_tbs[block].b[i + 6], (((w0 >> (i + 1)) & 1) << slice)); - atomicOr (&d_tbs[block].b[i + 7], (((w0 >> (i + 0)) & 1) << slice)); - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m03000_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - const u32 s0 = digests_buf[digests_offset].digest_buf[0]; - const u32 s1 = digests_buf[digests_offset].digest_buf[1]; - - if (lid < 32) - { - s_S[lid] = -((s0 >> lid - 0) & 1); - } - else if (lid < 64) - { - s_S[lid] = -((s1 >> lid - 32) & 1); - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m03000m (pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m03000_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m03000_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m03000_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - const u32 s0 = digests_buf[digests_offset].digest_buf[0]; - const u32 s1 = digests_buf[digests_offset].digest_buf[1]; - - if (lid < 32) - { - s_S[lid] = -((s0 >> lid - 0) & 1); - } - else if (lid < 64) - { - s_S[lid] = -((s1 >> lid - 32) & 1); - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m03000s (pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m03000_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m03000_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m03100_a0.cu b/nv/m03100_a0.cu deleted file mode 100644 index a1fd073..0000000 --- a/nv/m03100_a0.cu +++ /dev/null @@ -1,1063 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _DES_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#define PERM_OP(a,b,tt,n,m) \ -{ \ - tt = a >> n; \ - tt = tt ^ b; \ - tt = tt & m; \ - b = b ^ tt; \ - tt = tt << n; \ - a = a ^ tt; \ -} - -#define HPERM_OP(a,tt,n,m) \ -{ \ - tt = a << (16 + n); \ - tt = tt ^ a; \ - tt = tt & m; \ - a = a ^ tt; \ - tt = tt >> (16 + n); \ - a = a ^ tt; \ -} - -#define IP(l,r,tt) \ -{ \ - PERM_OP (r, l, tt, 4, 0x0f0f0f0f); \ - PERM_OP (l, r, tt, 16, 0x0000ffff); \ - PERM_OP (r, l, tt, 2, 0x33333333); \ - PERM_OP (l, r, tt, 8, 0x00ff00ff); \ - PERM_OP (r, l, tt, 1, 0x55555555); \ -} - -#define FP(l,r,tt) \ -{ \ - PERM_OP (l, r, tt, 1, 0x55555555); \ - PERM_OP (r, l, tt, 8, 0x00ff00ff); \ - PERM_OP (l, r, tt, 2, 0x33333333); \ - PERM_OP (r, l, tt, 16, 0x0000ffff); \ - PERM_OP (l, r, tt, 4, 0x0f0f0f0f); \ -} - -__device__ __constant__ u32 c_SPtrans[8][64] = -{ - /* nibble 0 */ - 0x02080800, 0x00080000, 0x02000002, 0x02080802, - 0x02000000, 0x00080802, 0x00080002, 0x02000002, - 0x00080802, 0x02080800, 0x02080000, 0x00000802, - 0x02000802, 0x02000000, 0x00000000, 0x00080002, - 0x00080000, 0x00000002, 0x02000800, 0x00080800, - 0x02080802, 0x02080000, 0x00000802, 0x02000800, - 0x00000002, 0x00000800, 0x00080800, 0x02080002, - 0x00000800, 0x02000802, 0x02080002, 0x00000000, - 0x00000000, 0x02080802, 0x02000800, 0x00080002, - 0x02080800, 0x00080000, 0x00000802, 0x02000800, - 0x02080002, 0x00000800, 0x00080800, 0x02000002, - 0x00080802, 0x00000002, 0x02000002, 0x02080000, - 0x02080802, 0x00080800, 0x02080000, 0x02000802, - 0x02000000, 0x00000802, 0x00080002, 0x00000000, - 0x00080000, 0x02000000, 0x02000802, 0x02080800, - 0x00000002, 0x02080002, 0x00000800, 0x00080802, - /* nibble 1 */ - 0x40108010, 0x00000000, 0x00108000, 0x40100000, - 0x40000010, 0x00008010, 0x40008000, 0x00108000, - 0x00008000, 0x40100010, 0x00000010, 0x40008000, - 0x00100010, 0x40108000, 0x40100000, 0x00000010, - 0x00100000, 0x40008010, 0x40100010, 0x00008000, - 0x00108010, 0x40000000, 0x00000000, 0x00100010, - 0x40008010, 0x00108010, 0x40108000, 0x40000010, - 0x40000000, 0x00100000, 0x00008010, 0x40108010, - 0x00100010, 0x40108000, 0x40008000, 0x00108010, - 0x40108010, 0x00100010, 0x40000010, 0x00000000, - 0x40000000, 0x00008010, 0x00100000, 0x40100010, - 0x00008000, 0x40000000, 0x00108010, 0x40008010, - 0x40108000, 0x00008000, 0x00000000, 0x40000010, - 0x00000010, 0x40108010, 0x00108000, 0x40100000, - 0x40100010, 0x00100000, 0x00008010, 0x40008000, - 0x40008010, 0x00000010, 0x40100000, 0x00108000, - /* nibble 2 */ - 0x04000001, 0x04040100, 0x00000100, 0x04000101, - 0x00040001, 0x04000000, 0x04000101, 0x00040100, - 0x04000100, 0x00040000, 0x04040000, 0x00000001, - 0x04040101, 0x00000101, 0x00000001, 0x04040001, - 0x00000000, 0x00040001, 0x04040100, 0x00000100, - 0x00000101, 0x04040101, 0x00040000, 0x04000001, - 0x04040001, 0x04000100, 0x00040101, 0x04040000, - 0x00040100, 0x00000000, 0x04000000, 0x00040101, - 0x04040100, 0x00000100, 0x00000001, 0x00040000, - 0x00000101, 0x00040001, 0x04040000, 0x04000101, - 0x00000000, 0x04040100, 0x00040100, 0x04040001, - 0x00040001, 0x04000000, 0x04040101, 0x00000001, - 0x00040101, 0x04000001, 0x04000000, 0x04040101, - 0x00040000, 0x04000100, 0x04000101, 0x00040100, - 0x04000100, 0x00000000, 0x04040001, 0x00000101, - 0x04000001, 0x00040101, 0x00000100, 0x04040000, - /* nibble 3 */ - 0x00401008, 0x10001000, 0x00000008, 0x10401008, - 0x00000000, 0x10400000, 0x10001008, 0x00400008, - 0x10401000, 0x10000008, 0x10000000, 0x00001008, - 0x10000008, 0x00401008, 0x00400000, 0x10000000, - 0x10400008, 0x00401000, 0x00001000, 0x00000008, - 0x00401000, 0x10001008, 0x10400000, 0x00001000, - 0x00001008, 0x00000000, 0x00400008, 0x10401000, - 0x10001000, 0x10400008, 0x10401008, 0x00400000, - 0x10400008, 0x00001008, 0x00400000, 0x10000008, - 0x00401000, 0x10001000, 0x00000008, 0x10400000, - 0x10001008, 0x00000000, 0x00001000, 0x00400008, - 0x00000000, 0x10400008, 0x10401000, 0x00001000, - 0x10000000, 0x10401008, 0x00401008, 0x00400000, - 0x10401008, 0x00000008, 0x10001000, 0x00401008, - 0x00400008, 0x00401000, 0x10400000, 0x10001008, - 0x00001008, 0x10000000, 0x10000008, 0x10401000, - /* nibble 4 */ - 0x08000000, 0x00010000, 0x00000400, 0x08010420, - 0x08010020, 0x08000400, 0x00010420, 0x08010000, - 0x00010000, 0x00000020, 0x08000020, 0x00010400, - 0x08000420, 0x08010020, 0x08010400, 0x00000000, - 0x00010400, 0x08000000, 0x00010020, 0x00000420, - 0x08000400, 0x00010420, 0x00000000, 0x08000020, - 0x00000020, 0x08000420, 0x08010420, 0x00010020, - 0x08010000, 0x00000400, 0x00000420, 0x08010400, - 0x08010400, 0x08000420, 0x00010020, 0x08010000, - 0x00010000, 0x00000020, 0x08000020, 0x08000400, - 0x08000000, 0x00010400, 0x08010420, 0x00000000, - 0x00010420, 0x08000000, 0x00000400, 0x00010020, - 0x08000420, 0x00000400, 0x00000000, 0x08010420, - 0x08010020, 0x08010400, 0x00000420, 0x00010000, - 0x00010400, 0x08010020, 0x08000400, 0x00000420, - 0x00000020, 0x00010420, 0x08010000, 0x08000020, - /* nibble 5 */ - 0x80000040, 0x00200040, 0x00000000, 0x80202000, - 0x00200040, 0x00002000, 0x80002040, 0x00200000, - 0x00002040, 0x80202040, 0x00202000, 0x80000000, - 0x80002000, 0x80000040, 0x80200000, 0x00202040, - 0x00200000, 0x80002040, 0x80200040, 0x00000000, - 0x00002000, 0x00000040, 0x80202000, 0x80200040, - 0x80202040, 0x80200000, 0x80000000, 0x00002040, - 0x00000040, 0x00202000, 0x00202040, 0x80002000, - 0x00002040, 0x80000000, 0x80002000, 0x00202040, - 0x80202000, 0x00200040, 0x00000000, 0x80002000, - 0x80000000, 0x00002000, 0x80200040, 0x00200000, - 0x00200040, 0x80202040, 0x00202000, 0x00000040, - 0x80202040, 0x00202000, 0x00200000, 0x80002040, - 0x80000040, 0x80200000, 0x00202040, 0x00000000, - 0x00002000, 0x80000040, 0x80002040, 0x80202000, - 0x80200000, 0x00002040, 0x00000040, 0x80200040, - /* nibble 6 */ - 0x00004000, 0x00000200, 0x01000200, 0x01000004, - 0x01004204, 0x00004004, 0x00004200, 0x00000000, - 0x01000000, 0x01000204, 0x00000204, 0x01004000, - 0x00000004, 0x01004200, 0x01004000, 0x00000204, - 0x01000204, 0x00004000, 0x00004004, 0x01004204, - 0x00000000, 0x01000200, 0x01000004, 0x00004200, - 0x01004004, 0x00004204, 0x01004200, 0x00000004, - 0x00004204, 0x01004004, 0x00000200, 0x01000000, - 0x00004204, 0x01004000, 0x01004004, 0x00000204, - 0x00004000, 0x00000200, 0x01000000, 0x01004004, - 0x01000204, 0x00004204, 0x00004200, 0x00000000, - 0x00000200, 0x01000004, 0x00000004, 0x01000200, - 0x00000000, 0x01000204, 0x01000200, 0x00004200, - 0x00000204, 0x00004000, 0x01004204, 0x01000000, - 0x01004200, 0x00000004, 0x00004004, 0x01004204, - 0x01000004, 0x01004200, 0x01004000, 0x00004004, - /* nibble 7 */ - 0x20800080, 0x20820000, 0x00020080, 0x00000000, - 0x20020000, 0x00800080, 0x20800000, 0x20820080, - 0x00000080, 0x20000000, 0x00820000, 0x00020080, - 0x00820080, 0x20020080, 0x20000080, 0x20800000, - 0x00020000, 0x00820080, 0x00800080, 0x20020000, - 0x20820080, 0x20000080, 0x00000000, 0x00820000, - 0x20000000, 0x00800000, 0x20020080, 0x20800080, - 0x00800000, 0x00020000, 0x20820000, 0x00000080, - 0x00800000, 0x00020000, 0x20000080, 0x20820080, - 0x00020080, 0x20000000, 0x00000000, 0x00820000, - 0x20800080, 0x20020080, 0x20020000, 0x00800080, - 0x20820000, 0x00000080, 0x00800080, 0x20020000, - 0x20820080, 0x00800000, 0x20800000, 0x20000080, - 0x00820000, 0x00020080, 0x20020080, 0x20800000, - 0x00000080, 0x20820000, 0x00820080, 0x00000000, - 0x20000000, 0x20800080, 0x00020000, 0x00820080, -}; - -__device__ __constant__ u32 c_skb[8][64] = -{ - /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ - 0x00000000, 0x00000010, 0x20000000, 0x20000010, - 0x00010000, 0x00010010, 0x20010000, 0x20010010, - 0x00000800, 0x00000810, 0x20000800, 0x20000810, - 0x00010800, 0x00010810, 0x20010800, 0x20010810, - 0x00000020, 0x00000030, 0x20000020, 0x20000030, - 0x00010020, 0x00010030, 0x20010020, 0x20010030, - 0x00000820, 0x00000830, 0x20000820, 0x20000830, - 0x00010820, 0x00010830, 0x20010820, 0x20010830, - 0x00080000, 0x00080010, 0x20080000, 0x20080010, - 0x00090000, 0x00090010, 0x20090000, 0x20090010, - 0x00080800, 0x00080810, 0x20080800, 0x20080810, - 0x00090800, 0x00090810, 0x20090800, 0x20090810, - 0x00080020, 0x00080030, 0x20080020, 0x20080030, - 0x00090020, 0x00090030, 0x20090020, 0x20090030, - 0x00080820, 0x00080830, 0x20080820, 0x20080830, - 0x00090820, 0x00090830, 0x20090820, 0x20090830, - /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */ - 0x00000000, 0x02000000, 0x00002000, 0x02002000, - 0x00200000, 0x02200000, 0x00202000, 0x02202000, - 0x00000004, 0x02000004, 0x00002004, 0x02002004, - 0x00200004, 0x02200004, 0x00202004, 0x02202004, - 0x00000400, 0x02000400, 0x00002400, 0x02002400, - 0x00200400, 0x02200400, 0x00202400, 0x02202400, - 0x00000404, 0x02000404, 0x00002404, 0x02002404, - 0x00200404, 0x02200404, 0x00202404, 0x02202404, - 0x10000000, 0x12000000, 0x10002000, 0x12002000, - 0x10200000, 0x12200000, 0x10202000, 0x12202000, - 0x10000004, 0x12000004, 0x10002004, 0x12002004, - 0x10200004, 0x12200004, 0x10202004, 0x12202004, - 0x10000400, 0x12000400, 0x10002400, 0x12002400, - 0x10200400, 0x12200400, 0x10202400, 0x12202400, - 0x10000404, 0x12000404, 0x10002404, 0x12002404, - 0x10200404, 0x12200404, 0x10202404, 0x12202404, - /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */ - 0x00000000, 0x00000001, 0x00040000, 0x00040001, - 0x01000000, 0x01000001, 0x01040000, 0x01040001, - 0x00000002, 0x00000003, 0x00040002, 0x00040003, - 0x01000002, 0x01000003, 0x01040002, 0x01040003, - 0x00000200, 0x00000201, 0x00040200, 0x00040201, - 0x01000200, 0x01000201, 0x01040200, 0x01040201, - 0x00000202, 0x00000203, 0x00040202, 0x00040203, - 0x01000202, 0x01000203, 0x01040202, 0x01040203, - 0x08000000, 0x08000001, 0x08040000, 0x08040001, - 0x09000000, 0x09000001, 0x09040000, 0x09040001, - 0x08000002, 0x08000003, 0x08040002, 0x08040003, - 0x09000002, 0x09000003, 0x09040002, 0x09040003, - 0x08000200, 0x08000201, 0x08040200, 0x08040201, - 0x09000200, 0x09000201, 0x09040200, 0x09040201, - 0x08000202, 0x08000203, 0x08040202, 0x08040203, - 0x09000202, 0x09000203, 0x09040202, 0x09040203, - /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */ - 0x00000000, 0x00100000, 0x00000100, 0x00100100, - 0x00000008, 0x00100008, 0x00000108, 0x00100108, - 0x00001000, 0x00101000, 0x00001100, 0x00101100, - 0x00001008, 0x00101008, 0x00001108, 0x00101108, - 0x04000000, 0x04100000, 0x04000100, 0x04100100, - 0x04000008, 0x04100008, 0x04000108, 0x04100108, - 0x04001000, 0x04101000, 0x04001100, 0x04101100, - 0x04001008, 0x04101008, 0x04001108, 0x04101108, - 0x00020000, 0x00120000, 0x00020100, 0x00120100, - 0x00020008, 0x00120008, 0x00020108, 0x00120108, - 0x00021000, 0x00121000, 0x00021100, 0x00121100, - 0x00021008, 0x00121008, 0x00021108, 0x00121108, - 0x04020000, 0x04120000, 0x04020100, 0x04120100, - 0x04020008, 0x04120008, 0x04020108, 0x04120108, - 0x04021000, 0x04121000, 0x04021100, 0x04121100, - 0x04021008, 0x04121008, 0x04021108, 0x04121108, - /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ - 0x00000000, 0x10000000, 0x00010000, 0x10010000, - 0x00000004, 0x10000004, 0x00010004, 0x10010004, - 0x20000000, 0x30000000, 0x20010000, 0x30010000, - 0x20000004, 0x30000004, 0x20010004, 0x30010004, - 0x00100000, 0x10100000, 0x00110000, 0x10110000, - 0x00100004, 0x10100004, 0x00110004, 0x10110004, - 0x20100000, 0x30100000, 0x20110000, 0x30110000, - 0x20100004, 0x30100004, 0x20110004, 0x30110004, - 0x00001000, 0x10001000, 0x00011000, 0x10011000, - 0x00001004, 0x10001004, 0x00011004, 0x10011004, - 0x20001000, 0x30001000, 0x20011000, 0x30011000, - 0x20001004, 0x30001004, 0x20011004, 0x30011004, - 0x00101000, 0x10101000, 0x00111000, 0x10111000, - 0x00101004, 0x10101004, 0x00111004, 0x10111004, - 0x20101000, 0x30101000, 0x20111000, 0x30111000, - 0x20101004, 0x30101004, 0x20111004, 0x30111004, - /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */ - 0x00000000, 0x08000000, 0x00000008, 0x08000008, - 0x00000400, 0x08000400, 0x00000408, 0x08000408, - 0x00020000, 0x08020000, 0x00020008, 0x08020008, - 0x00020400, 0x08020400, 0x00020408, 0x08020408, - 0x00000001, 0x08000001, 0x00000009, 0x08000009, - 0x00000401, 0x08000401, 0x00000409, 0x08000409, - 0x00020001, 0x08020001, 0x00020009, 0x08020009, - 0x00020401, 0x08020401, 0x00020409, 0x08020409, - 0x02000000, 0x0A000000, 0x02000008, 0x0A000008, - 0x02000400, 0x0A000400, 0x02000408, 0x0A000408, - 0x02020000, 0x0A020000, 0x02020008, 0x0A020008, - 0x02020400, 0x0A020400, 0x02020408, 0x0A020408, - 0x02000001, 0x0A000001, 0x02000009, 0x0A000009, - 0x02000401, 0x0A000401, 0x02000409, 0x0A000409, - 0x02020001, 0x0A020001, 0x02020009, 0x0A020009, - 0x02020401, 0x0A020401, 0x02020409, 0x0A020409, - /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */ - 0x00000000, 0x00000100, 0x00080000, 0x00080100, - 0x01000000, 0x01000100, 0x01080000, 0x01080100, - 0x00000010, 0x00000110, 0x00080010, 0x00080110, - 0x01000010, 0x01000110, 0x01080010, 0x01080110, - 0x00200000, 0x00200100, 0x00280000, 0x00280100, - 0x01200000, 0x01200100, 0x01280000, 0x01280100, - 0x00200010, 0x00200110, 0x00280010, 0x00280110, - 0x01200010, 0x01200110, 0x01280010, 0x01280110, - 0x00000200, 0x00000300, 0x00080200, 0x00080300, - 0x01000200, 0x01000300, 0x01080200, 0x01080300, - 0x00000210, 0x00000310, 0x00080210, 0x00080310, - 0x01000210, 0x01000310, 0x01080210, 0x01080310, - 0x00200200, 0x00200300, 0x00280200, 0x00280300, - 0x01200200, 0x01200300, 0x01280200, 0x01280300, - 0x00200210, 0x00200310, 0x00280210, 0x00280310, - 0x01200210, 0x01200310, 0x01280210, 0x01280310, - /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */ - 0x00000000, 0x04000000, 0x00040000, 0x04040000, - 0x00000002, 0x04000002, 0x00040002, 0x04040002, - 0x00002000, 0x04002000, 0x00042000, 0x04042000, - 0x00002002, 0x04002002, 0x00042002, 0x04042002, - 0x00000020, 0x04000020, 0x00040020, 0x04040020, - 0x00000022, 0x04000022, 0x00040022, 0x04040022, - 0x00002020, 0x04002020, 0x00042020, 0x04042020, - 0x00002022, 0x04002022, 0x00042022, 0x04042022, - 0x00000800, 0x04000800, 0x00040800, 0x04040800, - 0x00000802, 0x04000802, 0x00040802, 0x04040802, - 0x00002800, 0x04002800, 0x00042800, 0x04042800, - 0x00002802, 0x04002802, 0x00042802, 0x04042802, - 0x00000820, 0x04000820, 0x00040820, 0x04040820, - 0x00000822, 0x04000822, 0x00040822, 0x04040822, - 0x00002820, 0x04002820, 0x00042820, 0x04042820, - 0x00002822, 0x04002822, 0x00042822, 0x04042822 -}; - -#ifdef VECT_SIZE1 -#define BOX(i,n,S) u32x ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(i,n,S) u32x ((S)[(n)][(i).x], (S)[(n)][(i).y]) -#endif - -__device__ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], u32 s_SPtrans[8][64]) -{ - u32x tt; - - u32x r = data[0]; - u32x l = data[1]; - - IP (r, l, tt); - - r = rotl32 (r, 3u); - l = rotl32 (l, 3u); - - #pragma unroll 16 - for (int i = 0; i < 16; i++) - { - u32x u = Kc[i] ^ r; - u32x t = Kd[i] ^ rotl32 (r, 28u); - - l ^= BOX (((u >> 2) & 0x3f), 0, s_SPtrans) - | BOX (((u >> 10) & 0x3f), 2, s_SPtrans) - | BOX (((u >> 18) & 0x3f), 4, s_SPtrans) - | BOX (((u >> 26) & 0x3f), 6, s_SPtrans) - | BOX (((t >> 2) & 0x3f), 1, s_SPtrans) - | BOX (((t >> 10) & 0x3f), 3, s_SPtrans) - | BOX (((t >> 18) & 0x3f), 5, s_SPtrans) - | BOX (((t >> 26) & 0x3f), 7, s_SPtrans); - - tt = l; - l = r; - r = tt; - } - - l = rotl32 (l, 29u); - r = rotl32 (r, 29u); - - FP (r, l, tt); - - iv[0] = l; - iv[1] = r; -} - -__device__ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], u32 s_skb[8][64]) -{ - u32x tt; - - PERM_OP (d, c, tt, 4, 0x0f0f0f0f); - HPERM_OP (c, tt, 2, 0xcccc0000); - HPERM_OP (d, tt, 2, 0xcccc0000); - PERM_OP (d, c, tt, 1, 0x55555555); - PERM_OP (c, d, tt, 8, 0x00ff00ff); - PERM_OP (d, c, tt, 1, 0x55555555); - - d = ((d & 0x000000ff) << 16) - | ((d & 0x0000ff00) << 0) - | ((d & 0x00ff0000) >> 16) - | ((c & 0xf0000000) >> 4); - - c = c & 0x0fffffff; - - #pragma unroll 16 - for (int i = 0; i < 16; i++) - { - const u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 }; - const u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 }; - - c = c >> shifts3s0[i] | c << shifts3s1[i]; - d = d >> shifts3s0[i] | d << shifts3s1[i]; - - c = c & 0x0fffffff; - d = d & 0x0fffffff; - - u32x s = BOX ((( c >> 0) & 0x3f), 0, s_skb) - | BOX ((((c >> 6) & 0x03) - | ((c >> 7) & 0x3c)), 1, s_skb) - | BOX ((((c >> 13) & 0x0f) - | ((c >> 14) & 0x30)), 2, s_skb) - | BOX ((((c >> 20) & 0x01) - | ((c >> 21) & 0x06) - | ((c >> 22) & 0x38)), 3, s_skb); - - u32x t = BOX ((( d >> 0) & 0x3f), 4, s_skb) - | BOX ((((d >> 7) & 0x03) - | ((d >> 8) & 0x3c)), 5, s_skb) - | BOX ((((d >> 15) & 0x3f)), 6, s_skb) - | BOX ((((d >> 21) & 0x0f) - | ((d >> 22) & 0x30)), 7, s_skb); - - #if __CUDA_ARCH__ >= 200 - Kc[i] = __byte_perm (s, t, 0x5410); - Kd[i] = __byte_perm (s, t, 0x7632); - #else - Kc[i] = ((t << 16) | (s & 0x0000ffff)); - Kd[i] = ((s >> 16) | (t & 0xffff0000)); - #endif - - Kc[i] = rotl32 (Kc[i], 2u); - Kd[i] = rotl32 (Kd[i], 2u); - } -} - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m03100_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * sbox, kbox - */ - - __shared__ u32 s_SPtrans[8][64]; - __shared__ u32 s_skb[8][64]; - - if (lid < 64) - { - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - const u32 salt_word_len = (salt_len + out_len) * 2; - - /** - * prepend salt - */ - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - - u32x dst[16]; - - dst[ 0] = w0_t[0]; - dst[ 1] = w0_t[1]; - dst[ 2] = w0_t[2]; - dst[ 3] = w0_t[3]; - dst[ 4] = w1_t[0]; - dst[ 5] = w1_t[1]; - dst[ 6] = w1_t[2]; - dst[ 7] = w1_t[3]; - dst[ 8] = w2_t[0]; - dst[ 9] = w2_t[1]; - dst[10] = w2_t[2]; - dst[11] = w2_t[3]; - dst[12] = 0; - dst[13] = 0; - dst[14] = 0; - dst[15] = 0; - - /** - * precompute key1 since key is static: 0x0123456789abcdef - * plus LEFT_ROTATE by 2 - */ - - u32x Kc[16]; - - Kc[ 0] = 0x64649040; - Kc[ 1] = 0x14909858; - Kc[ 2] = 0xc4b44888; - Kc[ 3] = 0x9094e438; - Kc[ 4] = 0xd8a004f0; - Kc[ 5] = 0xa8f02810; - Kc[ 6] = 0xc84048d8; - Kc[ 7] = 0x68d804a8; - Kc[ 8] = 0x0490e40c; - Kc[ 9] = 0xac183024; - Kc[10] = 0x24c07c10; - Kc[11] = 0x8c88c038; - Kc[12] = 0xc048c824; - Kc[13] = 0x4c0470a8; - Kc[14] = 0x584020b4; - Kc[15] = 0x00742c4c; - - u32x Kd[16]; - - Kd[ 0] = 0xa42ce40c; - Kd[ 1] = 0x64689858; - Kd[ 2] = 0x484050b8; - Kd[ 3] = 0xe8184814; - Kd[ 4] = 0x405cc070; - Kd[ 5] = 0xa010784c; - Kd[ 6] = 0x6074a800; - Kd[ 7] = 0x80701c1c; - Kd[ 8] = 0x9cd49430; - Kd[ 9] = 0x4c8ce078; - Kd[10] = 0x5c18c088; - Kd[11] = 0x28a8a4c8; - Kd[12] = 0x3c180838; - Kd[13] = 0xb0b86c20; - Kd[14] = 0xac84a094; - Kd[15] = 0x4ce0c0c4; - - /** - * key1 (generate key) - */ - - u32x iv[2]; - - iv[0] = 0; - iv[1] = 0; - - for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++) - { - u32x data[2]; - - data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00); - data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00); - - data[0] ^= iv[0]; - data[1] ^= iv[1]; - - _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); - } - - /** - * key2 (generate hash) - */ - - _des_crypt_keysetup (iv[0], iv[1], Kc, Kd, s_skb); - - iv[0] = 0; - iv[1] = 0; - - for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++) - { - u32x data[2]; - - data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00); - data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00); - - data[0] ^= iv[0]; - data[1] ^= iv[1]; - - _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); - } - - /** - * cmp - */ - - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03100_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03100_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03100_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * sbox, kbox - */ - - __shared__ u32 s_SPtrans[8][64]; - __shared__ u32 s_skb[8][64]; - - if (lid < 64) - { - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - const u32 salt_word_len = (salt_len + out_len) * 2; - - /** - * prepend salt - */ - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - - u32x dst[16]; - - dst[ 0] = w0_t[0]; - dst[ 1] = w0_t[1]; - dst[ 2] = w0_t[2]; - dst[ 3] = w0_t[3]; - dst[ 4] = w1_t[0]; - dst[ 5] = w1_t[1]; - dst[ 6] = w1_t[2]; - dst[ 7] = w1_t[3]; - dst[ 8] = w2_t[0]; - dst[ 9] = w2_t[1]; - dst[10] = w2_t[2]; - dst[11] = w2_t[3]; - dst[12] = 0; - dst[13] = 0; - dst[14] = 0; - dst[15] = 0; - - /** - * precompute key1 since key is static: 0x0123456789abcdef - * plus LEFT_ROTATE by 2 - */ - - u32x Kc[16]; - - Kc[ 0] = 0x64649040; - Kc[ 1] = 0x14909858; - Kc[ 2] = 0xc4b44888; - Kc[ 3] = 0x9094e438; - Kc[ 4] = 0xd8a004f0; - Kc[ 5] = 0xa8f02810; - Kc[ 6] = 0xc84048d8; - Kc[ 7] = 0x68d804a8; - Kc[ 8] = 0x0490e40c; - Kc[ 9] = 0xac183024; - Kc[10] = 0x24c07c10; - Kc[11] = 0x8c88c038; - Kc[12] = 0xc048c824; - Kc[13] = 0x4c0470a8; - Kc[14] = 0x584020b4; - Kc[15] = 0x00742c4c; - - u32x Kd[16]; - - Kd[ 0] = 0xa42ce40c; - Kd[ 1] = 0x64689858; - Kd[ 2] = 0x484050b8; - Kd[ 3] = 0xe8184814; - Kd[ 4] = 0x405cc070; - Kd[ 5] = 0xa010784c; - Kd[ 6] = 0x6074a800; - Kd[ 7] = 0x80701c1c; - Kd[ 8] = 0x9cd49430; - Kd[ 9] = 0x4c8ce078; - Kd[10] = 0x5c18c088; - Kd[11] = 0x28a8a4c8; - Kd[12] = 0x3c180838; - Kd[13] = 0xb0b86c20; - Kd[14] = 0xac84a094; - Kd[15] = 0x4ce0c0c4; - - /** - * key1 (generate key) - */ - - u32x iv[2]; - - iv[0] = 0; - iv[1] = 0; - - for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++) - { - u32x data[2]; - - data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00); - data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00); - - data[0] ^= iv[0]; - data[1] ^= iv[1]; - - _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); - } - - /** - * key2 (generate hash) - */ - - _des_crypt_keysetup (iv[0], iv[1], Kc, Kd, s_skb); - - iv[0] = 0; - iv[1] = 0; - - for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++) - { - u32x data[2]; - - data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00); - data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00); - - data[0] ^= iv[0]; - data[1] ^= iv[1]; - - _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); - } - - /** - * cmp - */ - - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03100_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03100_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m03100_a1.cu b/nv/m03100_a1.cu deleted file mode 100644 index b2149d7..0000000 --- a/nv/m03100_a1.cu +++ /dev/null @@ -1,1159 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _DES_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#define PERM_OP(a,b,tt,n,m) \ -{ \ - tt = a >> n; \ - tt = tt ^ b; \ - tt = tt & m; \ - b = b ^ tt; \ - tt = tt << n; \ - a = a ^ tt; \ -} - -#define HPERM_OP(a,tt,n,m) \ -{ \ - tt = a << (16 + n); \ - tt = tt ^ a; \ - tt = tt & m; \ - a = a ^ tt; \ - tt = tt >> (16 + n); \ - a = a ^ tt; \ -} - -#define IP(l,r,tt) \ -{ \ - PERM_OP (r, l, tt, 4, 0x0f0f0f0f); \ - PERM_OP (l, r, tt, 16, 0x0000ffff); \ - PERM_OP (r, l, tt, 2, 0x33333333); \ - PERM_OP (l, r, tt, 8, 0x00ff00ff); \ - PERM_OP (r, l, tt, 1, 0x55555555); \ -} - -#define FP(l,r,tt) \ -{ \ - PERM_OP (l, r, tt, 1, 0x55555555); \ - PERM_OP (r, l, tt, 8, 0x00ff00ff); \ - PERM_OP (l, r, tt, 2, 0x33333333); \ - PERM_OP (r, l, tt, 16, 0x0000ffff); \ - PERM_OP (l, r, tt, 4, 0x0f0f0f0f); \ -} - -__device__ __constant__ u32 c_SPtrans[8][64] = -{ - /* nibble 0 */ - 0x02080800, 0x00080000, 0x02000002, 0x02080802, - 0x02000000, 0x00080802, 0x00080002, 0x02000002, - 0x00080802, 0x02080800, 0x02080000, 0x00000802, - 0x02000802, 0x02000000, 0x00000000, 0x00080002, - 0x00080000, 0x00000002, 0x02000800, 0x00080800, - 0x02080802, 0x02080000, 0x00000802, 0x02000800, - 0x00000002, 0x00000800, 0x00080800, 0x02080002, - 0x00000800, 0x02000802, 0x02080002, 0x00000000, - 0x00000000, 0x02080802, 0x02000800, 0x00080002, - 0x02080800, 0x00080000, 0x00000802, 0x02000800, - 0x02080002, 0x00000800, 0x00080800, 0x02000002, - 0x00080802, 0x00000002, 0x02000002, 0x02080000, - 0x02080802, 0x00080800, 0x02080000, 0x02000802, - 0x02000000, 0x00000802, 0x00080002, 0x00000000, - 0x00080000, 0x02000000, 0x02000802, 0x02080800, - 0x00000002, 0x02080002, 0x00000800, 0x00080802, - /* nibble 1 */ - 0x40108010, 0x00000000, 0x00108000, 0x40100000, - 0x40000010, 0x00008010, 0x40008000, 0x00108000, - 0x00008000, 0x40100010, 0x00000010, 0x40008000, - 0x00100010, 0x40108000, 0x40100000, 0x00000010, - 0x00100000, 0x40008010, 0x40100010, 0x00008000, - 0x00108010, 0x40000000, 0x00000000, 0x00100010, - 0x40008010, 0x00108010, 0x40108000, 0x40000010, - 0x40000000, 0x00100000, 0x00008010, 0x40108010, - 0x00100010, 0x40108000, 0x40008000, 0x00108010, - 0x40108010, 0x00100010, 0x40000010, 0x00000000, - 0x40000000, 0x00008010, 0x00100000, 0x40100010, - 0x00008000, 0x40000000, 0x00108010, 0x40008010, - 0x40108000, 0x00008000, 0x00000000, 0x40000010, - 0x00000010, 0x40108010, 0x00108000, 0x40100000, - 0x40100010, 0x00100000, 0x00008010, 0x40008000, - 0x40008010, 0x00000010, 0x40100000, 0x00108000, - /* nibble 2 */ - 0x04000001, 0x04040100, 0x00000100, 0x04000101, - 0x00040001, 0x04000000, 0x04000101, 0x00040100, - 0x04000100, 0x00040000, 0x04040000, 0x00000001, - 0x04040101, 0x00000101, 0x00000001, 0x04040001, - 0x00000000, 0x00040001, 0x04040100, 0x00000100, - 0x00000101, 0x04040101, 0x00040000, 0x04000001, - 0x04040001, 0x04000100, 0x00040101, 0x04040000, - 0x00040100, 0x00000000, 0x04000000, 0x00040101, - 0x04040100, 0x00000100, 0x00000001, 0x00040000, - 0x00000101, 0x00040001, 0x04040000, 0x04000101, - 0x00000000, 0x04040100, 0x00040100, 0x04040001, - 0x00040001, 0x04000000, 0x04040101, 0x00000001, - 0x00040101, 0x04000001, 0x04000000, 0x04040101, - 0x00040000, 0x04000100, 0x04000101, 0x00040100, - 0x04000100, 0x00000000, 0x04040001, 0x00000101, - 0x04000001, 0x00040101, 0x00000100, 0x04040000, - /* nibble 3 */ - 0x00401008, 0x10001000, 0x00000008, 0x10401008, - 0x00000000, 0x10400000, 0x10001008, 0x00400008, - 0x10401000, 0x10000008, 0x10000000, 0x00001008, - 0x10000008, 0x00401008, 0x00400000, 0x10000000, - 0x10400008, 0x00401000, 0x00001000, 0x00000008, - 0x00401000, 0x10001008, 0x10400000, 0x00001000, - 0x00001008, 0x00000000, 0x00400008, 0x10401000, - 0x10001000, 0x10400008, 0x10401008, 0x00400000, - 0x10400008, 0x00001008, 0x00400000, 0x10000008, - 0x00401000, 0x10001000, 0x00000008, 0x10400000, - 0x10001008, 0x00000000, 0x00001000, 0x00400008, - 0x00000000, 0x10400008, 0x10401000, 0x00001000, - 0x10000000, 0x10401008, 0x00401008, 0x00400000, - 0x10401008, 0x00000008, 0x10001000, 0x00401008, - 0x00400008, 0x00401000, 0x10400000, 0x10001008, - 0x00001008, 0x10000000, 0x10000008, 0x10401000, - /* nibble 4 */ - 0x08000000, 0x00010000, 0x00000400, 0x08010420, - 0x08010020, 0x08000400, 0x00010420, 0x08010000, - 0x00010000, 0x00000020, 0x08000020, 0x00010400, - 0x08000420, 0x08010020, 0x08010400, 0x00000000, - 0x00010400, 0x08000000, 0x00010020, 0x00000420, - 0x08000400, 0x00010420, 0x00000000, 0x08000020, - 0x00000020, 0x08000420, 0x08010420, 0x00010020, - 0x08010000, 0x00000400, 0x00000420, 0x08010400, - 0x08010400, 0x08000420, 0x00010020, 0x08010000, - 0x00010000, 0x00000020, 0x08000020, 0x08000400, - 0x08000000, 0x00010400, 0x08010420, 0x00000000, - 0x00010420, 0x08000000, 0x00000400, 0x00010020, - 0x08000420, 0x00000400, 0x00000000, 0x08010420, - 0x08010020, 0x08010400, 0x00000420, 0x00010000, - 0x00010400, 0x08010020, 0x08000400, 0x00000420, - 0x00000020, 0x00010420, 0x08010000, 0x08000020, - /* nibble 5 */ - 0x80000040, 0x00200040, 0x00000000, 0x80202000, - 0x00200040, 0x00002000, 0x80002040, 0x00200000, - 0x00002040, 0x80202040, 0x00202000, 0x80000000, - 0x80002000, 0x80000040, 0x80200000, 0x00202040, - 0x00200000, 0x80002040, 0x80200040, 0x00000000, - 0x00002000, 0x00000040, 0x80202000, 0x80200040, - 0x80202040, 0x80200000, 0x80000000, 0x00002040, - 0x00000040, 0x00202000, 0x00202040, 0x80002000, - 0x00002040, 0x80000000, 0x80002000, 0x00202040, - 0x80202000, 0x00200040, 0x00000000, 0x80002000, - 0x80000000, 0x00002000, 0x80200040, 0x00200000, - 0x00200040, 0x80202040, 0x00202000, 0x00000040, - 0x80202040, 0x00202000, 0x00200000, 0x80002040, - 0x80000040, 0x80200000, 0x00202040, 0x00000000, - 0x00002000, 0x80000040, 0x80002040, 0x80202000, - 0x80200000, 0x00002040, 0x00000040, 0x80200040, - /* nibble 6 */ - 0x00004000, 0x00000200, 0x01000200, 0x01000004, - 0x01004204, 0x00004004, 0x00004200, 0x00000000, - 0x01000000, 0x01000204, 0x00000204, 0x01004000, - 0x00000004, 0x01004200, 0x01004000, 0x00000204, - 0x01000204, 0x00004000, 0x00004004, 0x01004204, - 0x00000000, 0x01000200, 0x01000004, 0x00004200, - 0x01004004, 0x00004204, 0x01004200, 0x00000004, - 0x00004204, 0x01004004, 0x00000200, 0x01000000, - 0x00004204, 0x01004000, 0x01004004, 0x00000204, - 0x00004000, 0x00000200, 0x01000000, 0x01004004, - 0x01000204, 0x00004204, 0x00004200, 0x00000000, - 0x00000200, 0x01000004, 0x00000004, 0x01000200, - 0x00000000, 0x01000204, 0x01000200, 0x00004200, - 0x00000204, 0x00004000, 0x01004204, 0x01000000, - 0x01004200, 0x00000004, 0x00004004, 0x01004204, - 0x01000004, 0x01004200, 0x01004000, 0x00004004, - /* nibble 7 */ - 0x20800080, 0x20820000, 0x00020080, 0x00000000, - 0x20020000, 0x00800080, 0x20800000, 0x20820080, - 0x00000080, 0x20000000, 0x00820000, 0x00020080, - 0x00820080, 0x20020080, 0x20000080, 0x20800000, - 0x00020000, 0x00820080, 0x00800080, 0x20020000, - 0x20820080, 0x20000080, 0x00000000, 0x00820000, - 0x20000000, 0x00800000, 0x20020080, 0x20800080, - 0x00800000, 0x00020000, 0x20820000, 0x00000080, - 0x00800000, 0x00020000, 0x20000080, 0x20820080, - 0x00020080, 0x20000000, 0x00000000, 0x00820000, - 0x20800080, 0x20020080, 0x20020000, 0x00800080, - 0x20820000, 0x00000080, 0x00800080, 0x20020000, - 0x20820080, 0x00800000, 0x20800000, 0x20000080, - 0x00820000, 0x00020080, 0x20020080, 0x20800000, - 0x00000080, 0x20820000, 0x00820080, 0x00000000, - 0x20000000, 0x20800080, 0x00020000, 0x00820080, -}; - -__device__ __constant__ u32 c_skb[8][64] = -{ - /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ - 0x00000000, 0x00000010, 0x20000000, 0x20000010, - 0x00010000, 0x00010010, 0x20010000, 0x20010010, - 0x00000800, 0x00000810, 0x20000800, 0x20000810, - 0x00010800, 0x00010810, 0x20010800, 0x20010810, - 0x00000020, 0x00000030, 0x20000020, 0x20000030, - 0x00010020, 0x00010030, 0x20010020, 0x20010030, - 0x00000820, 0x00000830, 0x20000820, 0x20000830, - 0x00010820, 0x00010830, 0x20010820, 0x20010830, - 0x00080000, 0x00080010, 0x20080000, 0x20080010, - 0x00090000, 0x00090010, 0x20090000, 0x20090010, - 0x00080800, 0x00080810, 0x20080800, 0x20080810, - 0x00090800, 0x00090810, 0x20090800, 0x20090810, - 0x00080020, 0x00080030, 0x20080020, 0x20080030, - 0x00090020, 0x00090030, 0x20090020, 0x20090030, - 0x00080820, 0x00080830, 0x20080820, 0x20080830, - 0x00090820, 0x00090830, 0x20090820, 0x20090830, - /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */ - 0x00000000, 0x02000000, 0x00002000, 0x02002000, - 0x00200000, 0x02200000, 0x00202000, 0x02202000, - 0x00000004, 0x02000004, 0x00002004, 0x02002004, - 0x00200004, 0x02200004, 0x00202004, 0x02202004, - 0x00000400, 0x02000400, 0x00002400, 0x02002400, - 0x00200400, 0x02200400, 0x00202400, 0x02202400, - 0x00000404, 0x02000404, 0x00002404, 0x02002404, - 0x00200404, 0x02200404, 0x00202404, 0x02202404, - 0x10000000, 0x12000000, 0x10002000, 0x12002000, - 0x10200000, 0x12200000, 0x10202000, 0x12202000, - 0x10000004, 0x12000004, 0x10002004, 0x12002004, - 0x10200004, 0x12200004, 0x10202004, 0x12202004, - 0x10000400, 0x12000400, 0x10002400, 0x12002400, - 0x10200400, 0x12200400, 0x10202400, 0x12202400, - 0x10000404, 0x12000404, 0x10002404, 0x12002404, - 0x10200404, 0x12200404, 0x10202404, 0x12202404, - /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */ - 0x00000000, 0x00000001, 0x00040000, 0x00040001, - 0x01000000, 0x01000001, 0x01040000, 0x01040001, - 0x00000002, 0x00000003, 0x00040002, 0x00040003, - 0x01000002, 0x01000003, 0x01040002, 0x01040003, - 0x00000200, 0x00000201, 0x00040200, 0x00040201, - 0x01000200, 0x01000201, 0x01040200, 0x01040201, - 0x00000202, 0x00000203, 0x00040202, 0x00040203, - 0x01000202, 0x01000203, 0x01040202, 0x01040203, - 0x08000000, 0x08000001, 0x08040000, 0x08040001, - 0x09000000, 0x09000001, 0x09040000, 0x09040001, - 0x08000002, 0x08000003, 0x08040002, 0x08040003, - 0x09000002, 0x09000003, 0x09040002, 0x09040003, - 0x08000200, 0x08000201, 0x08040200, 0x08040201, - 0x09000200, 0x09000201, 0x09040200, 0x09040201, - 0x08000202, 0x08000203, 0x08040202, 0x08040203, - 0x09000202, 0x09000203, 0x09040202, 0x09040203, - /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */ - 0x00000000, 0x00100000, 0x00000100, 0x00100100, - 0x00000008, 0x00100008, 0x00000108, 0x00100108, - 0x00001000, 0x00101000, 0x00001100, 0x00101100, - 0x00001008, 0x00101008, 0x00001108, 0x00101108, - 0x04000000, 0x04100000, 0x04000100, 0x04100100, - 0x04000008, 0x04100008, 0x04000108, 0x04100108, - 0x04001000, 0x04101000, 0x04001100, 0x04101100, - 0x04001008, 0x04101008, 0x04001108, 0x04101108, - 0x00020000, 0x00120000, 0x00020100, 0x00120100, - 0x00020008, 0x00120008, 0x00020108, 0x00120108, - 0x00021000, 0x00121000, 0x00021100, 0x00121100, - 0x00021008, 0x00121008, 0x00021108, 0x00121108, - 0x04020000, 0x04120000, 0x04020100, 0x04120100, - 0x04020008, 0x04120008, 0x04020108, 0x04120108, - 0x04021000, 0x04121000, 0x04021100, 0x04121100, - 0x04021008, 0x04121008, 0x04021108, 0x04121108, - /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ - 0x00000000, 0x10000000, 0x00010000, 0x10010000, - 0x00000004, 0x10000004, 0x00010004, 0x10010004, - 0x20000000, 0x30000000, 0x20010000, 0x30010000, - 0x20000004, 0x30000004, 0x20010004, 0x30010004, - 0x00100000, 0x10100000, 0x00110000, 0x10110000, - 0x00100004, 0x10100004, 0x00110004, 0x10110004, - 0x20100000, 0x30100000, 0x20110000, 0x30110000, - 0x20100004, 0x30100004, 0x20110004, 0x30110004, - 0x00001000, 0x10001000, 0x00011000, 0x10011000, - 0x00001004, 0x10001004, 0x00011004, 0x10011004, - 0x20001000, 0x30001000, 0x20011000, 0x30011000, - 0x20001004, 0x30001004, 0x20011004, 0x30011004, - 0x00101000, 0x10101000, 0x00111000, 0x10111000, - 0x00101004, 0x10101004, 0x00111004, 0x10111004, - 0x20101000, 0x30101000, 0x20111000, 0x30111000, - 0x20101004, 0x30101004, 0x20111004, 0x30111004, - /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */ - 0x00000000, 0x08000000, 0x00000008, 0x08000008, - 0x00000400, 0x08000400, 0x00000408, 0x08000408, - 0x00020000, 0x08020000, 0x00020008, 0x08020008, - 0x00020400, 0x08020400, 0x00020408, 0x08020408, - 0x00000001, 0x08000001, 0x00000009, 0x08000009, - 0x00000401, 0x08000401, 0x00000409, 0x08000409, - 0x00020001, 0x08020001, 0x00020009, 0x08020009, - 0x00020401, 0x08020401, 0x00020409, 0x08020409, - 0x02000000, 0x0A000000, 0x02000008, 0x0A000008, - 0x02000400, 0x0A000400, 0x02000408, 0x0A000408, - 0x02020000, 0x0A020000, 0x02020008, 0x0A020008, - 0x02020400, 0x0A020400, 0x02020408, 0x0A020408, - 0x02000001, 0x0A000001, 0x02000009, 0x0A000009, - 0x02000401, 0x0A000401, 0x02000409, 0x0A000409, - 0x02020001, 0x0A020001, 0x02020009, 0x0A020009, - 0x02020401, 0x0A020401, 0x02020409, 0x0A020409, - /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */ - 0x00000000, 0x00000100, 0x00080000, 0x00080100, - 0x01000000, 0x01000100, 0x01080000, 0x01080100, - 0x00000010, 0x00000110, 0x00080010, 0x00080110, - 0x01000010, 0x01000110, 0x01080010, 0x01080110, - 0x00200000, 0x00200100, 0x00280000, 0x00280100, - 0x01200000, 0x01200100, 0x01280000, 0x01280100, - 0x00200010, 0x00200110, 0x00280010, 0x00280110, - 0x01200010, 0x01200110, 0x01280010, 0x01280110, - 0x00000200, 0x00000300, 0x00080200, 0x00080300, - 0x01000200, 0x01000300, 0x01080200, 0x01080300, - 0x00000210, 0x00000310, 0x00080210, 0x00080310, - 0x01000210, 0x01000310, 0x01080210, 0x01080310, - 0x00200200, 0x00200300, 0x00280200, 0x00280300, - 0x01200200, 0x01200300, 0x01280200, 0x01280300, - 0x00200210, 0x00200310, 0x00280210, 0x00280310, - 0x01200210, 0x01200310, 0x01280210, 0x01280310, - /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */ - 0x00000000, 0x04000000, 0x00040000, 0x04040000, - 0x00000002, 0x04000002, 0x00040002, 0x04040002, - 0x00002000, 0x04002000, 0x00042000, 0x04042000, - 0x00002002, 0x04002002, 0x00042002, 0x04042002, - 0x00000020, 0x04000020, 0x00040020, 0x04040020, - 0x00000022, 0x04000022, 0x00040022, 0x04040022, - 0x00002020, 0x04002020, 0x00042020, 0x04042020, - 0x00002022, 0x04002022, 0x00042022, 0x04042022, - 0x00000800, 0x04000800, 0x00040800, 0x04040800, - 0x00000802, 0x04000802, 0x00040802, 0x04040802, - 0x00002800, 0x04002800, 0x00042800, 0x04042800, - 0x00002802, 0x04002802, 0x00042802, 0x04042802, - 0x00000820, 0x04000820, 0x00040820, 0x04040820, - 0x00000822, 0x04000822, 0x00040822, 0x04040822, - 0x00002820, 0x04002820, 0x00042820, 0x04042820, - 0x00002822, 0x04002822, 0x00042822, 0x04042822 -}; - -#ifdef VECT_SIZE1 -#define BOX(i,n,S) u32x ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(i,n,S) u32x ((S)[(n)][(i).x], (S)[(n)][(i).y]) -#endif - -__device__ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], u32 s_SPtrans[8][64]) -{ - u32x tt; - - u32x r = data[0]; - u32x l = data[1]; - - IP (r, l, tt); - - r = rotl32 (r, 3u); - l = rotl32 (l, 3u); - - #pragma unroll 16 - for (int i = 0; i < 16; i++) - { - u32x u = Kc[i] ^ r; - u32x t = Kd[i] ^ rotl32 (r, 28u); - - l ^= BOX (((u >> 2) & 0x3f), 0, s_SPtrans) - | BOX (((u >> 10) & 0x3f), 2, s_SPtrans) - | BOX (((u >> 18) & 0x3f), 4, s_SPtrans) - | BOX (((u >> 26) & 0x3f), 6, s_SPtrans) - | BOX (((t >> 2) & 0x3f), 1, s_SPtrans) - | BOX (((t >> 10) & 0x3f), 3, s_SPtrans) - | BOX (((t >> 18) & 0x3f), 5, s_SPtrans) - | BOX (((t >> 26) & 0x3f), 7, s_SPtrans); - - tt = l; - l = r; - r = tt; - } - - l = rotl32 (l, 29u); - r = rotl32 (r, 29u); - - FP (r, l, tt); - - iv[0] = l; - iv[1] = r; -} - -__device__ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], u32 s_skb[8][64]) -{ - u32x tt; - - PERM_OP (d, c, tt, 4, 0x0f0f0f0f); - HPERM_OP (c, tt, 2, 0xcccc0000); - HPERM_OP (d, tt, 2, 0xcccc0000); - PERM_OP (d, c, tt, 1, 0x55555555); - PERM_OP (c, d, tt, 8, 0x00ff00ff); - PERM_OP (d, c, tt, 1, 0x55555555); - - d = ((d & 0x000000ff) << 16) - | ((d & 0x0000ff00) << 0) - | ((d & 0x00ff0000) >> 16) - | ((c & 0xf0000000) >> 4); - - c = c & 0x0fffffff; - - #pragma unroll 16 - for (int i = 0; i < 16; i++) - { - const u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 }; - const u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 }; - - c = c >> shifts3s0[i] | c << shifts3s1[i]; - d = d >> shifts3s0[i] | d << shifts3s1[i]; - - c = c & 0x0fffffff; - d = d & 0x0fffffff; - - u32x s = BOX ((( c >> 0) & 0x3f), 0, s_skb) - | BOX ((((c >> 6) & 0x03) - | ((c >> 7) & 0x3c)), 1, s_skb) - | BOX ((((c >> 13) & 0x0f) - | ((c >> 14) & 0x30)), 2, s_skb) - | BOX ((((c >> 20) & 0x01) - | ((c >> 21) & 0x06) - | ((c >> 22) & 0x38)), 3, s_skb); - - u32x t = BOX ((( d >> 0) & 0x3f), 4, s_skb) - | BOX ((((d >> 7) & 0x03) - | ((d >> 8) & 0x3c)), 5, s_skb) - | BOX ((((d >> 15) & 0x3f)), 6, s_skb) - | BOX ((((d >> 21) & 0x0f) - | ((d >> 22) & 0x30)), 7, s_skb); - - #if __CUDA_ARCH__ >= 200 - Kc[i] = __byte_perm (s, t, 0x5410); - Kd[i] = __byte_perm (s, t, 0x7632); - #else - Kc[i] = ((t << 16) | (s & 0x0000ffff)); - Kd[i] = ((s >> 16) | (t & 0xffff0000)); - #endif - - Kc[i] = rotl32 (Kc[i], 2u); - Kd[i] = rotl32 (Kd[i], 2u); - } -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m03100_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * sbox, kbox - */ - - __shared__ u32 s_SPtrans[8][64]; - - __shared__ u32 s_skb[8][64]; - - if (lid < 64) - { - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - const u32 salt_word_len = (salt_len + pw_len) * 2; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * prepend salt - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - - u32x dst[16]; - - dst[ 0] = w0_t[0]; - dst[ 1] = w0_t[1]; - dst[ 2] = w0_t[2]; - dst[ 3] = w0_t[3]; - dst[ 4] = w1_t[0]; - dst[ 5] = w1_t[1]; - dst[ 6] = w1_t[2]; - dst[ 7] = w1_t[3]; - dst[ 8] = w2_t[0]; - dst[ 9] = w2_t[1]; - dst[10] = w2_t[2]; - dst[11] = w2_t[3]; - dst[12] = 0; - dst[13] = 0; - dst[14] = 0; - dst[15] = 0; - - /** - * precompute key1 since key is static: 0x0123456789abcdef - * plus LEFT_ROTATE by 2 - */ - - u32x Kc[16]; - - Kc[ 0] = 0x64649040; - Kc[ 1] = 0x14909858; - Kc[ 2] = 0xc4b44888; - Kc[ 3] = 0x9094e438; - Kc[ 4] = 0xd8a004f0; - Kc[ 5] = 0xa8f02810; - Kc[ 6] = 0xc84048d8; - Kc[ 7] = 0x68d804a8; - Kc[ 8] = 0x0490e40c; - Kc[ 9] = 0xac183024; - Kc[10] = 0x24c07c10; - Kc[11] = 0x8c88c038; - Kc[12] = 0xc048c824; - Kc[13] = 0x4c0470a8; - Kc[14] = 0x584020b4; - Kc[15] = 0x00742c4c; - - u32x Kd[16]; - - Kd[ 0] = 0xa42ce40c; - Kd[ 1] = 0x64689858; - Kd[ 2] = 0x484050b8; - Kd[ 3] = 0xe8184814; - Kd[ 4] = 0x405cc070; - Kd[ 5] = 0xa010784c; - Kd[ 6] = 0x6074a800; - Kd[ 7] = 0x80701c1c; - Kd[ 8] = 0x9cd49430; - Kd[ 9] = 0x4c8ce078; - Kd[10] = 0x5c18c088; - Kd[11] = 0x28a8a4c8; - Kd[12] = 0x3c180838; - Kd[13] = 0xb0b86c20; - Kd[14] = 0xac84a094; - Kd[15] = 0x4ce0c0c4; - - /** - * key1 (generate key) - */ - - u32x iv[2]; - - iv[0] = 0; - iv[1] = 0; - - for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++) - { - u32x data[2]; - - data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00); - data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00); - - data[0] ^= iv[0]; - data[1] ^= iv[1]; - - _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); - } - - /** - * key2 (generate hash) - */ - - _des_crypt_keysetup (iv[0], iv[1], Kc, Kd, s_skb); - - iv[0] = 0; - iv[1] = 0; - - for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++) - { - u32x data[2]; - - data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00); - data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00); - - data[0] ^= iv[0]; - data[1] ^= iv[1]; - - _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); - } - - /** - * cmp - */ - - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03100_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03100_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03100_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * sbox, kbox - */ - - __shared__ u32 s_SPtrans[8][64]; - - __shared__ u32 s_skb[8][64]; - - if (lid < 64) - { - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - const u32 salt_word_len = (salt_len + pw_len) * 2; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * prepend salt - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - - u32x dst[16]; - - dst[ 0] = w0_t[0]; - dst[ 1] = w0_t[1]; - dst[ 2] = w0_t[2]; - dst[ 3] = w0_t[3]; - dst[ 4] = w1_t[0]; - dst[ 5] = w1_t[1]; - dst[ 6] = w1_t[2]; - dst[ 7] = w1_t[3]; - dst[ 8] = w2_t[0]; - dst[ 9] = w2_t[1]; - dst[10] = w2_t[2]; - dst[11] = w2_t[3]; - dst[12] = 0; - dst[13] = 0; - dst[14] = 0; - dst[15] = 0; - - /** - * precompute key1 since key is static: 0x0123456789abcdef - * plus LEFT_ROTATE by 2 - */ - - u32x Kc[16]; - - Kc[ 0] = 0x64649040; - Kc[ 1] = 0x14909858; - Kc[ 2] = 0xc4b44888; - Kc[ 3] = 0x9094e438; - Kc[ 4] = 0xd8a004f0; - Kc[ 5] = 0xa8f02810; - Kc[ 6] = 0xc84048d8; - Kc[ 7] = 0x68d804a8; - Kc[ 8] = 0x0490e40c; - Kc[ 9] = 0xac183024; - Kc[10] = 0x24c07c10; - Kc[11] = 0x8c88c038; - Kc[12] = 0xc048c824; - Kc[13] = 0x4c0470a8; - Kc[14] = 0x584020b4; - Kc[15] = 0x00742c4c; - - u32x Kd[16]; - - Kd[ 0] = 0xa42ce40c; - Kd[ 1] = 0x64689858; - Kd[ 2] = 0x484050b8; - Kd[ 3] = 0xe8184814; - Kd[ 4] = 0x405cc070; - Kd[ 5] = 0xa010784c; - Kd[ 6] = 0x6074a800; - Kd[ 7] = 0x80701c1c; - Kd[ 8] = 0x9cd49430; - Kd[ 9] = 0x4c8ce078; - Kd[10] = 0x5c18c088; - Kd[11] = 0x28a8a4c8; - Kd[12] = 0x3c180838; - Kd[13] = 0xb0b86c20; - Kd[14] = 0xac84a094; - Kd[15] = 0x4ce0c0c4; - - /** - * key1 (generate key) - */ - - u32x iv[2]; - - iv[0] = 0; - iv[1] = 0; - - for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++) - { - u32x data[2]; - - data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00); - data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00); - - data[0] ^= iv[0]; - data[1] ^= iv[1]; - - _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); - } - - /** - * key2 (generate hash) - */ - - _des_crypt_keysetup (iv[0], iv[1], Kc, Kd, s_skb); - - iv[0] = 0; - iv[1] = 0; - - for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++) - { - u32x data[2]; - - data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00); - data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00); - - data[0] ^= iv[0]; - data[1] ^= iv[1]; - - _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); - } - - /** - * cmp - */ - - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03100_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03100_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m03100_a3.cu b/nv/m03100_a3.cu deleted file mode 100644 index 85af90f..0000000 --- a/nv/m03100_a3.cu +++ /dev/null @@ -1,1363 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _DES_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#define PERM_OP(a,b,tt,n,m) \ -{ \ - tt = a >> n; \ - tt = tt ^ b; \ - tt = tt & m; \ - b = b ^ tt; \ - tt = tt << n; \ - a = a ^ tt; \ -} - -#define HPERM_OP(a,tt,n,m) \ -{ \ - tt = a << (16 + n); \ - tt = tt ^ a; \ - tt = tt & m; \ - a = a ^ tt; \ - tt = tt >> (16 + n); \ - a = a ^ tt; \ -} - -#define IP(l,r,tt) \ -{ \ - PERM_OP (r, l, tt, 4, 0x0f0f0f0f); \ - PERM_OP (l, r, tt, 16, 0x0000ffff); \ - PERM_OP (r, l, tt, 2, 0x33333333); \ - PERM_OP (l, r, tt, 8, 0x00ff00ff); \ - PERM_OP (r, l, tt, 1, 0x55555555); \ -} - -#define FP(l,r,tt) \ -{ \ - PERM_OP (l, r, tt, 1, 0x55555555); \ - PERM_OP (r, l, tt, 8, 0x00ff00ff); \ - PERM_OP (l, r, tt, 2, 0x33333333); \ - PERM_OP (r, l, tt, 16, 0x0000ffff); \ - PERM_OP (l, r, tt, 4, 0x0f0f0f0f); \ -} - -__device__ __constant__ u32 c_SPtrans[8][64] = -{ - /* nibble 0 */ - 0x02080800, 0x00080000, 0x02000002, 0x02080802, - 0x02000000, 0x00080802, 0x00080002, 0x02000002, - 0x00080802, 0x02080800, 0x02080000, 0x00000802, - 0x02000802, 0x02000000, 0x00000000, 0x00080002, - 0x00080000, 0x00000002, 0x02000800, 0x00080800, - 0x02080802, 0x02080000, 0x00000802, 0x02000800, - 0x00000002, 0x00000800, 0x00080800, 0x02080002, - 0x00000800, 0x02000802, 0x02080002, 0x00000000, - 0x00000000, 0x02080802, 0x02000800, 0x00080002, - 0x02080800, 0x00080000, 0x00000802, 0x02000800, - 0x02080002, 0x00000800, 0x00080800, 0x02000002, - 0x00080802, 0x00000002, 0x02000002, 0x02080000, - 0x02080802, 0x00080800, 0x02080000, 0x02000802, - 0x02000000, 0x00000802, 0x00080002, 0x00000000, - 0x00080000, 0x02000000, 0x02000802, 0x02080800, - 0x00000002, 0x02080002, 0x00000800, 0x00080802, - /* nibble 1 */ - 0x40108010, 0x00000000, 0x00108000, 0x40100000, - 0x40000010, 0x00008010, 0x40008000, 0x00108000, - 0x00008000, 0x40100010, 0x00000010, 0x40008000, - 0x00100010, 0x40108000, 0x40100000, 0x00000010, - 0x00100000, 0x40008010, 0x40100010, 0x00008000, - 0x00108010, 0x40000000, 0x00000000, 0x00100010, - 0x40008010, 0x00108010, 0x40108000, 0x40000010, - 0x40000000, 0x00100000, 0x00008010, 0x40108010, - 0x00100010, 0x40108000, 0x40008000, 0x00108010, - 0x40108010, 0x00100010, 0x40000010, 0x00000000, - 0x40000000, 0x00008010, 0x00100000, 0x40100010, - 0x00008000, 0x40000000, 0x00108010, 0x40008010, - 0x40108000, 0x00008000, 0x00000000, 0x40000010, - 0x00000010, 0x40108010, 0x00108000, 0x40100000, - 0x40100010, 0x00100000, 0x00008010, 0x40008000, - 0x40008010, 0x00000010, 0x40100000, 0x00108000, - /* nibble 2 */ - 0x04000001, 0x04040100, 0x00000100, 0x04000101, - 0x00040001, 0x04000000, 0x04000101, 0x00040100, - 0x04000100, 0x00040000, 0x04040000, 0x00000001, - 0x04040101, 0x00000101, 0x00000001, 0x04040001, - 0x00000000, 0x00040001, 0x04040100, 0x00000100, - 0x00000101, 0x04040101, 0x00040000, 0x04000001, - 0x04040001, 0x04000100, 0x00040101, 0x04040000, - 0x00040100, 0x00000000, 0x04000000, 0x00040101, - 0x04040100, 0x00000100, 0x00000001, 0x00040000, - 0x00000101, 0x00040001, 0x04040000, 0x04000101, - 0x00000000, 0x04040100, 0x00040100, 0x04040001, - 0x00040001, 0x04000000, 0x04040101, 0x00000001, - 0x00040101, 0x04000001, 0x04000000, 0x04040101, - 0x00040000, 0x04000100, 0x04000101, 0x00040100, - 0x04000100, 0x00000000, 0x04040001, 0x00000101, - 0x04000001, 0x00040101, 0x00000100, 0x04040000, - /* nibble 3 */ - 0x00401008, 0x10001000, 0x00000008, 0x10401008, - 0x00000000, 0x10400000, 0x10001008, 0x00400008, - 0x10401000, 0x10000008, 0x10000000, 0x00001008, - 0x10000008, 0x00401008, 0x00400000, 0x10000000, - 0x10400008, 0x00401000, 0x00001000, 0x00000008, - 0x00401000, 0x10001008, 0x10400000, 0x00001000, - 0x00001008, 0x00000000, 0x00400008, 0x10401000, - 0x10001000, 0x10400008, 0x10401008, 0x00400000, - 0x10400008, 0x00001008, 0x00400000, 0x10000008, - 0x00401000, 0x10001000, 0x00000008, 0x10400000, - 0x10001008, 0x00000000, 0x00001000, 0x00400008, - 0x00000000, 0x10400008, 0x10401000, 0x00001000, - 0x10000000, 0x10401008, 0x00401008, 0x00400000, - 0x10401008, 0x00000008, 0x10001000, 0x00401008, - 0x00400008, 0x00401000, 0x10400000, 0x10001008, - 0x00001008, 0x10000000, 0x10000008, 0x10401000, - /* nibble 4 */ - 0x08000000, 0x00010000, 0x00000400, 0x08010420, - 0x08010020, 0x08000400, 0x00010420, 0x08010000, - 0x00010000, 0x00000020, 0x08000020, 0x00010400, - 0x08000420, 0x08010020, 0x08010400, 0x00000000, - 0x00010400, 0x08000000, 0x00010020, 0x00000420, - 0x08000400, 0x00010420, 0x00000000, 0x08000020, - 0x00000020, 0x08000420, 0x08010420, 0x00010020, - 0x08010000, 0x00000400, 0x00000420, 0x08010400, - 0x08010400, 0x08000420, 0x00010020, 0x08010000, - 0x00010000, 0x00000020, 0x08000020, 0x08000400, - 0x08000000, 0x00010400, 0x08010420, 0x00000000, - 0x00010420, 0x08000000, 0x00000400, 0x00010020, - 0x08000420, 0x00000400, 0x00000000, 0x08010420, - 0x08010020, 0x08010400, 0x00000420, 0x00010000, - 0x00010400, 0x08010020, 0x08000400, 0x00000420, - 0x00000020, 0x00010420, 0x08010000, 0x08000020, - /* nibble 5 */ - 0x80000040, 0x00200040, 0x00000000, 0x80202000, - 0x00200040, 0x00002000, 0x80002040, 0x00200000, - 0x00002040, 0x80202040, 0x00202000, 0x80000000, - 0x80002000, 0x80000040, 0x80200000, 0x00202040, - 0x00200000, 0x80002040, 0x80200040, 0x00000000, - 0x00002000, 0x00000040, 0x80202000, 0x80200040, - 0x80202040, 0x80200000, 0x80000000, 0x00002040, - 0x00000040, 0x00202000, 0x00202040, 0x80002000, - 0x00002040, 0x80000000, 0x80002000, 0x00202040, - 0x80202000, 0x00200040, 0x00000000, 0x80002000, - 0x80000000, 0x00002000, 0x80200040, 0x00200000, - 0x00200040, 0x80202040, 0x00202000, 0x00000040, - 0x80202040, 0x00202000, 0x00200000, 0x80002040, - 0x80000040, 0x80200000, 0x00202040, 0x00000000, - 0x00002000, 0x80000040, 0x80002040, 0x80202000, - 0x80200000, 0x00002040, 0x00000040, 0x80200040, - /* nibble 6 */ - 0x00004000, 0x00000200, 0x01000200, 0x01000004, - 0x01004204, 0x00004004, 0x00004200, 0x00000000, - 0x01000000, 0x01000204, 0x00000204, 0x01004000, - 0x00000004, 0x01004200, 0x01004000, 0x00000204, - 0x01000204, 0x00004000, 0x00004004, 0x01004204, - 0x00000000, 0x01000200, 0x01000004, 0x00004200, - 0x01004004, 0x00004204, 0x01004200, 0x00000004, - 0x00004204, 0x01004004, 0x00000200, 0x01000000, - 0x00004204, 0x01004000, 0x01004004, 0x00000204, - 0x00004000, 0x00000200, 0x01000000, 0x01004004, - 0x01000204, 0x00004204, 0x00004200, 0x00000000, - 0x00000200, 0x01000004, 0x00000004, 0x01000200, - 0x00000000, 0x01000204, 0x01000200, 0x00004200, - 0x00000204, 0x00004000, 0x01004204, 0x01000000, - 0x01004200, 0x00000004, 0x00004004, 0x01004204, - 0x01000004, 0x01004200, 0x01004000, 0x00004004, - /* nibble 7 */ - 0x20800080, 0x20820000, 0x00020080, 0x00000000, - 0x20020000, 0x00800080, 0x20800000, 0x20820080, - 0x00000080, 0x20000000, 0x00820000, 0x00020080, - 0x00820080, 0x20020080, 0x20000080, 0x20800000, - 0x00020000, 0x00820080, 0x00800080, 0x20020000, - 0x20820080, 0x20000080, 0x00000000, 0x00820000, - 0x20000000, 0x00800000, 0x20020080, 0x20800080, - 0x00800000, 0x00020000, 0x20820000, 0x00000080, - 0x00800000, 0x00020000, 0x20000080, 0x20820080, - 0x00020080, 0x20000000, 0x00000000, 0x00820000, - 0x20800080, 0x20020080, 0x20020000, 0x00800080, - 0x20820000, 0x00000080, 0x00800080, 0x20020000, - 0x20820080, 0x00800000, 0x20800000, 0x20000080, - 0x00820000, 0x00020080, 0x20020080, 0x20800000, - 0x00000080, 0x20820000, 0x00820080, 0x00000000, - 0x20000000, 0x20800080, 0x00020000, 0x00820080, -}; - -__device__ __constant__ u32 c_skb[8][64] = -{ - /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ - 0x00000000, 0x00000010, 0x20000000, 0x20000010, - 0x00010000, 0x00010010, 0x20010000, 0x20010010, - 0x00000800, 0x00000810, 0x20000800, 0x20000810, - 0x00010800, 0x00010810, 0x20010800, 0x20010810, - 0x00000020, 0x00000030, 0x20000020, 0x20000030, - 0x00010020, 0x00010030, 0x20010020, 0x20010030, - 0x00000820, 0x00000830, 0x20000820, 0x20000830, - 0x00010820, 0x00010830, 0x20010820, 0x20010830, - 0x00080000, 0x00080010, 0x20080000, 0x20080010, - 0x00090000, 0x00090010, 0x20090000, 0x20090010, - 0x00080800, 0x00080810, 0x20080800, 0x20080810, - 0x00090800, 0x00090810, 0x20090800, 0x20090810, - 0x00080020, 0x00080030, 0x20080020, 0x20080030, - 0x00090020, 0x00090030, 0x20090020, 0x20090030, - 0x00080820, 0x00080830, 0x20080820, 0x20080830, - 0x00090820, 0x00090830, 0x20090820, 0x20090830, - /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */ - 0x00000000, 0x02000000, 0x00002000, 0x02002000, - 0x00200000, 0x02200000, 0x00202000, 0x02202000, - 0x00000004, 0x02000004, 0x00002004, 0x02002004, - 0x00200004, 0x02200004, 0x00202004, 0x02202004, - 0x00000400, 0x02000400, 0x00002400, 0x02002400, - 0x00200400, 0x02200400, 0x00202400, 0x02202400, - 0x00000404, 0x02000404, 0x00002404, 0x02002404, - 0x00200404, 0x02200404, 0x00202404, 0x02202404, - 0x10000000, 0x12000000, 0x10002000, 0x12002000, - 0x10200000, 0x12200000, 0x10202000, 0x12202000, - 0x10000004, 0x12000004, 0x10002004, 0x12002004, - 0x10200004, 0x12200004, 0x10202004, 0x12202004, - 0x10000400, 0x12000400, 0x10002400, 0x12002400, - 0x10200400, 0x12200400, 0x10202400, 0x12202400, - 0x10000404, 0x12000404, 0x10002404, 0x12002404, - 0x10200404, 0x12200404, 0x10202404, 0x12202404, - /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */ - 0x00000000, 0x00000001, 0x00040000, 0x00040001, - 0x01000000, 0x01000001, 0x01040000, 0x01040001, - 0x00000002, 0x00000003, 0x00040002, 0x00040003, - 0x01000002, 0x01000003, 0x01040002, 0x01040003, - 0x00000200, 0x00000201, 0x00040200, 0x00040201, - 0x01000200, 0x01000201, 0x01040200, 0x01040201, - 0x00000202, 0x00000203, 0x00040202, 0x00040203, - 0x01000202, 0x01000203, 0x01040202, 0x01040203, - 0x08000000, 0x08000001, 0x08040000, 0x08040001, - 0x09000000, 0x09000001, 0x09040000, 0x09040001, - 0x08000002, 0x08000003, 0x08040002, 0x08040003, - 0x09000002, 0x09000003, 0x09040002, 0x09040003, - 0x08000200, 0x08000201, 0x08040200, 0x08040201, - 0x09000200, 0x09000201, 0x09040200, 0x09040201, - 0x08000202, 0x08000203, 0x08040202, 0x08040203, - 0x09000202, 0x09000203, 0x09040202, 0x09040203, - /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */ - 0x00000000, 0x00100000, 0x00000100, 0x00100100, - 0x00000008, 0x00100008, 0x00000108, 0x00100108, - 0x00001000, 0x00101000, 0x00001100, 0x00101100, - 0x00001008, 0x00101008, 0x00001108, 0x00101108, - 0x04000000, 0x04100000, 0x04000100, 0x04100100, - 0x04000008, 0x04100008, 0x04000108, 0x04100108, - 0x04001000, 0x04101000, 0x04001100, 0x04101100, - 0x04001008, 0x04101008, 0x04001108, 0x04101108, - 0x00020000, 0x00120000, 0x00020100, 0x00120100, - 0x00020008, 0x00120008, 0x00020108, 0x00120108, - 0x00021000, 0x00121000, 0x00021100, 0x00121100, - 0x00021008, 0x00121008, 0x00021108, 0x00121108, - 0x04020000, 0x04120000, 0x04020100, 0x04120100, - 0x04020008, 0x04120008, 0x04020108, 0x04120108, - 0x04021000, 0x04121000, 0x04021100, 0x04121100, - 0x04021008, 0x04121008, 0x04021108, 0x04121108, - /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ - 0x00000000, 0x10000000, 0x00010000, 0x10010000, - 0x00000004, 0x10000004, 0x00010004, 0x10010004, - 0x20000000, 0x30000000, 0x20010000, 0x30010000, - 0x20000004, 0x30000004, 0x20010004, 0x30010004, - 0x00100000, 0x10100000, 0x00110000, 0x10110000, - 0x00100004, 0x10100004, 0x00110004, 0x10110004, - 0x20100000, 0x30100000, 0x20110000, 0x30110000, - 0x20100004, 0x30100004, 0x20110004, 0x30110004, - 0x00001000, 0x10001000, 0x00011000, 0x10011000, - 0x00001004, 0x10001004, 0x00011004, 0x10011004, - 0x20001000, 0x30001000, 0x20011000, 0x30011000, - 0x20001004, 0x30001004, 0x20011004, 0x30011004, - 0x00101000, 0x10101000, 0x00111000, 0x10111000, - 0x00101004, 0x10101004, 0x00111004, 0x10111004, - 0x20101000, 0x30101000, 0x20111000, 0x30111000, - 0x20101004, 0x30101004, 0x20111004, 0x30111004, - /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */ - 0x00000000, 0x08000000, 0x00000008, 0x08000008, - 0x00000400, 0x08000400, 0x00000408, 0x08000408, - 0x00020000, 0x08020000, 0x00020008, 0x08020008, - 0x00020400, 0x08020400, 0x00020408, 0x08020408, - 0x00000001, 0x08000001, 0x00000009, 0x08000009, - 0x00000401, 0x08000401, 0x00000409, 0x08000409, - 0x00020001, 0x08020001, 0x00020009, 0x08020009, - 0x00020401, 0x08020401, 0x00020409, 0x08020409, - 0x02000000, 0x0A000000, 0x02000008, 0x0A000008, - 0x02000400, 0x0A000400, 0x02000408, 0x0A000408, - 0x02020000, 0x0A020000, 0x02020008, 0x0A020008, - 0x02020400, 0x0A020400, 0x02020408, 0x0A020408, - 0x02000001, 0x0A000001, 0x02000009, 0x0A000009, - 0x02000401, 0x0A000401, 0x02000409, 0x0A000409, - 0x02020001, 0x0A020001, 0x02020009, 0x0A020009, - 0x02020401, 0x0A020401, 0x02020409, 0x0A020409, - /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */ - 0x00000000, 0x00000100, 0x00080000, 0x00080100, - 0x01000000, 0x01000100, 0x01080000, 0x01080100, - 0x00000010, 0x00000110, 0x00080010, 0x00080110, - 0x01000010, 0x01000110, 0x01080010, 0x01080110, - 0x00200000, 0x00200100, 0x00280000, 0x00280100, - 0x01200000, 0x01200100, 0x01280000, 0x01280100, - 0x00200010, 0x00200110, 0x00280010, 0x00280110, - 0x01200010, 0x01200110, 0x01280010, 0x01280110, - 0x00000200, 0x00000300, 0x00080200, 0x00080300, - 0x01000200, 0x01000300, 0x01080200, 0x01080300, - 0x00000210, 0x00000310, 0x00080210, 0x00080310, - 0x01000210, 0x01000310, 0x01080210, 0x01080310, - 0x00200200, 0x00200300, 0x00280200, 0x00280300, - 0x01200200, 0x01200300, 0x01280200, 0x01280300, - 0x00200210, 0x00200310, 0x00280210, 0x00280310, - 0x01200210, 0x01200310, 0x01280210, 0x01280310, - /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */ - 0x00000000, 0x04000000, 0x00040000, 0x04040000, - 0x00000002, 0x04000002, 0x00040002, 0x04040002, - 0x00002000, 0x04002000, 0x00042000, 0x04042000, - 0x00002002, 0x04002002, 0x00042002, 0x04042002, - 0x00000020, 0x04000020, 0x00040020, 0x04040020, - 0x00000022, 0x04000022, 0x00040022, 0x04040022, - 0x00002020, 0x04002020, 0x00042020, 0x04042020, - 0x00002022, 0x04002022, 0x00042022, 0x04042022, - 0x00000800, 0x04000800, 0x00040800, 0x04040800, - 0x00000802, 0x04000802, 0x00040802, 0x04040802, - 0x00002800, 0x04002800, 0x00042800, 0x04042800, - 0x00002802, 0x04002802, 0x00042802, 0x04042802, - 0x00000820, 0x04000820, 0x00040820, 0x04040820, - 0x00000822, 0x04000822, 0x00040822, 0x04040822, - 0x00002820, 0x04002820, 0x00042820, 0x04042820, - 0x00002822, 0x04002822, 0x00042822, 0x04042822 -}; - -#ifdef VECT_SIZE1 -#define BOX(i,n,S) u32x ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(i,n,S) u32x ((S)[(n)][(i).x], (S)[(n)][(i).y]) -#endif - -__device__ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], u32 s_SPtrans[8][64]) -{ - u32x tt; - - u32x r = data[0]; - u32x l = data[1]; - - IP (r, l, tt); - - r = rotl32 (r, 3u); - l = rotl32 (l, 3u); - - #pragma unroll 16 - for (int i = 0; i < 16; i++) - { - u32x u = Kc[i] ^ r; - u32x t = Kd[i] ^ rotl32 (r, 28u); - - l ^= BOX (((u >> 2) & 0x3f), 0, s_SPtrans) - | BOX (((u >> 10) & 0x3f), 2, s_SPtrans) - | BOX (((u >> 18) & 0x3f), 4, s_SPtrans) - | BOX (((u >> 26) & 0x3f), 6, s_SPtrans) - | BOX (((t >> 2) & 0x3f), 1, s_SPtrans) - | BOX (((t >> 10) & 0x3f), 3, s_SPtrans) - | BOX (((t >> 18) & 0x3f), 5, s_SPtrans) - | BOX (((t >> 26) & 0x3f), 7, s_SPtrans); - - tt = l; - l = r; - r = tt; - } - - l = rotl32 (l, 29u); - r = rotl32 (r, 29u); - - FP (r, l, tt); - - iv[0] = l; - iv[1] = r; -} - -__device__ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], u32 s_skb[8][64]) -{ - u32x tt; - - PERM_OP (d, c, tt, 4, 0x0f0f0f0f); - HPERM_OP (c, tt, 2, 0xcccc0000); - HPERM_OP (d, tt, 2, 0xcccc0000); - PERM_OP (d, c, tt, 1, 0x55555555); - PERM_OP (c, d, tt, 8, 0x00ff00ff); - PERM_OP (d, c, tt, 1, 0x55555555); - - d = ((d & 0x000000ff) << 16) - | ((d & 0x0000ff00) << 0) - | ((d & 0x00ff0000) >> 16) - | ((c & 0xf0000000) >> 4); - - c = c & 0x0fffffff; - - #pragma unroll 16 - for (u32 i = 0; i < 16; i++) - { - const u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 }; - const u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 }; - - c = c >> shifts3s0[i] | c << shifts3s1[i]; - d = d >> shifts3s0[i] | d << shifts3s1[i]; - - c = c & 0x0fffffff; - d = d & 0x0fffffff; - - u32x s = BOX ((( c >> 0) & 0x3f), 0, s_skb) - | BOX ((((c >> 6) & 0x03) - | ((c >> 7) & 0x3c)), 1, s_skb) - | BOX ((((c >> 13) & 0x0f) - | ((c >> 14) & 0x30)), 2, s_skb) - | BOX ((((c >> 20) & 0x01) - | ((c >> 21) & 0x06) - | ((c >> 22) & 0x38)), 3, s_skb); - - u32x t = BOX ((( d >> 0) & 0x3f), 4, s_skb) - | BOX ((((d >> 7) & 0x03) - | ((d >> 8) & 0x3c)), 5, s_skb) - | BOX ((((d >> 15) & 0x3f)), 6, s_skb) - | BOX ((((d >> 21) & 0x0f) - | ((d >> 22) & 0x30)), 7, s_skb); - - #if __CUDA_ARCH__ >= 200 - Kc[i] = __byte_perm (s, t, 0x5410); - Kd[i] = __byte_perm (s, t, 0x7632); - #else - Kc[i] = ((t << 16) | (s & 0x0000ffff)); - Kd[i] = ((s >> 16) | (t & 0xffff0000)); - #endif - - Kc[i] = rotl32 (Kc[i], 2u); - Kd[i] = rotl32 (Kd[i], 2u); - } -} - -__shared__ u32 s_SPtrans[8][64]; - -__shared__ u32 s_skb[8][64]; - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m03100m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - const u32 salt_word_len = (salt_len + pw_len) * 2; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * prepend salt - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - w2_t[0] |= salt_buf2[0]; - w2_t[1] |= salt_buf2[1]; - w2_t[2] |= salt_buf2[2]; - w2_t[3] |= salt_buf2[3]; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x dst[16]; - - dst[ 0] = w0_t[0]; - dst[ 1] = w0_t[1]; - dst[ 2] = w0_t[2]; - dst[ 3] = w0_t[3]; - dst[ 4] = w1_t[0]; - dst[ 5] = w1_t[1]; - dst[ 6] = w1_t[2]; - dst[ 7] = w1_t[3]; - dst[ 8] = w2_t[0]; - dst[ 9] = w2_t[1]; - dst[10] = w2_t[2]; - dst[11] = w2_t[3]; - dst[12] = w3_t[0]; - dst[13] = w3_t[1]; - dst[14] = w3_t[2]; - dst[15] = w3_t[3]; - - /** - * precompute key1 since key is static: 0x0123456789abcdef - * plus LEFT_ROTATE by 2 - */ - - u32x Kc[16]; - - Kc[ 0] = 0x64649040; - Kc[ 1] = 0x14909858; - Kc[ 2] = 0xc4b44888; - Kc[ 3] = 0x9094e438; - Kc[ 4] = 0xd8a004f0; - Kc[ 5] = 0xa8f02810; - Kc[ 6] = 0xc84048d8; - Kc[ 7] = 0x68d804a8; - Kc[ 8] = 0x0490e40c; - Kc[ 9] = 0xac183024; - Kc[10] = 0x24c07c10; - Kc[11] = 0x8c88c038; - Kc[12] = 0xc048c824; - Kc[13] = 0x4c0470a8; - Kc[14] = 0x584020b4; - Kc[15] = 0x00742c4c; - - u32x Kd[16]; - - Kd[ 0] = 0xa42ce40c; - Kd[ 1] = 0x64689858; - Kd[ 2] = 0x484050b8; - Kd[ 3] = 0xe8184814; - Kd[ 4] = 0x405cc070; - Kd[ 5] = 0xa010784c; - Kd[ 6] = 0x6074a800; - Kd[ 7] = 0x80701c1c; - Kd[ 8] = 0x9cd49430; - Kd[ 9] = 0x4c8ce078; - Kd[10] = 0x5c18c088; - Kd[11] = 0x28a8a4c8; - Kd[12] = 0x3c180838; - Kd[13] = 0xb0b86c20; - Kd[14] = 0xac84a094; - Kd[15] = 0x4ce0c0c4; - - /** - * key1 (generate key) - */ - - u32x iv[2]; - - iv[0] = 0; - iv[1] = 0; - - for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++) - { - u32x data[2]; - - data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00); - data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00); - - data[0] ^= iv[0]; - data[1] ^= iv[1]; - - _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); - } - - /** - * key2 (generate hash) - */ - - _des_crypt_keysetup (iv[0], iv[1], Kc, Kd, s_skb); - - iv[0] = 0; - iv[1] = 0; - - for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++) - { - u32x data[2]; - - data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00); - data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00); - - data[0] ^= iv[0]; - data[1] ^= iv[1]; - - _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); - } - - /** - * cmp - */ - - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } -} - -__device__ static void m03100s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - u32x w0l = w0[0]; - - const u32 salt_word_len = (salt_len + pw_len) * 2; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * prepend salt - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - w2_t[0] |= salt_buf2[0]; - w2_t[1] |= salt_buf2[1]; - w2_t[2] |= salt_buf2[2]; - w2_t[3] |= salt_buf2[3]; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x dst[16]; - - dst[ 0] = w0_t[0]; - dst[ 1] = w0_t[1]; - dst[ 2] = w0_t[2]; - dst[ 3] = w0_t[3]; - dst[ 4] = w1_t[0]; - dst[ 5] = w1_t[1]; - dst[ 6] = w1_t[2]; - dst[ 7] = w1_t[3]; - dst[ 8] = w2_t[0]; - dst[ 9] = w2_t[1]; - dst[10] = w2_t[2]; - dst[11] = w2_t[3]; - dst[12] = w3_t[0]; - dst[13] = w3_t[1]; - dst[14] = w3_t[2]; - dst[15] = w3_t[3]; - - /** - * precompute key1 since key is static: 0x0123456789abcdef - * plus LEFT_ROTATE by 2 - */ - - u32x Kc[16]; - - Kc[ 0] = 0x64649040; - Kc[ 1] = 0x14909858; - Kc[ 2] = 0xc4b44888; - Kc[ 3] = 0x9094e438; - Kc[ 4] = 0xd8a004f0; - Kc[ 5] = 0xa8f02810; - Kc[ 6] = 0xc84048d8; - Kc[ 7] = 0x68d804a8; - Kc[ 8] = 0x0490e40c; - Kc[ 9] = 0xac183024; - Kc[10] = 0x24c07c10; - Kc[11] = 0x8c88c038; - Kc[12] = 0xc048c824; - Kc[13] = 0x4c0470a8; - Kc[14] = 0x584020b4; - Kc[15] = 0x00742c4c; - - u32x Kd[16]; - - Kd[ 0] = 0xa42ce40c; - Kd[ 1] = 0x64689858; - Kd[ 2] = 0x484050b8; - Kd[ 3] = 0xe8184814; - Kd[ 4] = 0x405cc070; - Kd[ 5] = 0xa010784c; - Kd[ 6] = 0x6074a800; - Kd[ 7] = 0x80701c1c; - Kd[ 8] = 0x9cd49430; - Kd[ 9] = 0x4c8ce078; - Kd[10] = 0x5c18c088; - Kd[11] = 0x28a8a4c8; - Kd[12] = 0x3c180838; - Kd[13] = 0xb0b86c20; - Kd[14] = 0xac84a094; - Kd[15] = 0x4ce0c0c4; - - /** - * key1 (generate key) - */ - - u32x iv[2]; - - iv[0] = 0; - iv[1] = 0; - - for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++) - { - u32x data[2]; - - data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00); - data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00); - - data[0] ^= iv[0]; - data[1] ^= iv[1]; - - _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); - } - - /** - * key2 (generate hash) - */ - - _des_crypt_keysetup (iv[0], iv[1], Kc, Kd, s_skb); - - iv[0] = 0; - iv[1] = 0; - - for (u32 j = 0, k = 0; j < salt_word_len; j += 8, k++) - { - u32x data[2]; - - data[0] = ((dst[k] << 16) & 0xff000000) | ((dst[k] << 8) & 0x0000ff00); - data[1] = ((dst[k] >> 0) & 0xff000000) | ((dst[k] >> 8) & 0x0000ff00); - - data[0] ^= iv[0]; - data[1] ^= iv[1]; - - _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); - } - - /** - * cmp - */ - - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03100_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * sbox, kbox - */ - - if (lid < 64) - { - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m03100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03100_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * sbox, kbox - */ - - if (lid < 64) - { - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m03100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03100_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * sbox, kbox - */ - - if (lid < 64) - { - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m03100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03100_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * sbox, kbox - */ - - if (lid < 64) - { - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m03100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03100_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * sbox, kbox - */ - - if (lid < 64) - { - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m03100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03100_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * sbox, kbox - */ - - if (lid < 64) - { - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m03100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m03200.cu b/nv/m03200.cu deleted file mode 100644 index 90f7990..0000000 --- a/nv/m03200.cu +++ /dev/null @@ -1,895 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _BCRYPT_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -// http://www.schneier.com/code/constants.txt - -__device__ __constant__ u32 c_sbox0[256] = -{ - 0xd1310ba6, 0x98dfb5ac, 0x2ffd72db, 0xd01adfb7, - 0xb8e1afed, 0x6a267e96, 0xba7c9045, 0xf12c7f99, - 0x24a19947, 0xb3916cf7, 0x0801f2e2, 0x858efc16, - 0x636920d8, 0x71574e69, 0xa458fea3, 0xf4933d7e, - 0x0d95748f, 0x728eb658, 0x718bcd58, 0x82154aee, - 0x7b54a41d, 0xc25a59b5, 0x9c30d539, 0x2af26013, - 0xc5d1b023, 0x286085f0, 0xca417918, 0xb8db38ef, - 0x8e79dcb0, 0x603a180e, 0x6c9e0e8b, 0xb01e8a3e, - 0xd71577c1, 0xbd314b27, 0x78af2fda, 0x55605c60, - 0xe65525f3, 0xaa55ab94, 0x57489862, 0x63e81440, - 0x55ca396a, 0x2aab10b6, 0xb4cc5c34, 0x1141e8ce, - 0xa15486af, 0x7c72e993, 0xb3ee1411, 0x636fbc2a, - 0x2ba9c55d, 0x741831f6, 0xce5c3e16, 0x9b87931e, - 0xafd6ba33, 0x6c24cf5c, 0x7a325381, 0x28958677, - 0x3b8f4898, 0x6b4bb9af, 0xc4bfe81b, 0x66282193, - 0x61d809cc, 0xfb21a991, 0x487cac60, 0x5dec8032, - 0xef845d5d, 0xe98575b1, 0xdc262302, 0xeb651b88, - 0x23893e81, 0xd396acc5, 0x0f6d6ff3, 0x83f44239, - 0x2e0b4482, 0xa4842004, 0x69c8f04a, 0x9e1f9b5e, - 0x21c66842, 0xf6e96c9a, 0x670c9c61, 0xabd388f0, - 0x6a51a0d2, 0xd8542f68, 0x960fa728, 0xab5133a3, - 0x6eef0b6c, 0x137a3be4, 0xba3bf050, 0x7efb2a98, - 0xa1f1651d, 0x39af0176, 0x66ca593e, 0x82430e88, - 0x8cee8619, 0x456f9fb4, 0x7d84a5c3, 0x3b8b5ebe, - 0xe06f75d8, 0x85c12073, 0x401a449f, 0x56c16aa6, - 0x4ed3aa62, 0x363f7706, 0x1bfedf72, 0x429b023d, - 0x37d0d724, 0xd00a1248, 0xdb0fead3, 0x49f1c09b, - 0x075372c9, 0x80991b7b, 0x25d479d8, 0xf6e8def7, - 0xe3fe501a, 0xb6794c3b, 0x976ce0bd, 0x04c006ba, - 0xc1a94fb6, 0x409f60c4, 0x5e5c9ec2, 0x196a2463, - 0x68fb6faf, 0x3e6c53b5, 0x1339b2eb, 0x3b52ec6f, - 0x6dfc511f, 0x9b30952c, 0xcc814544, 0xaf5ebd09, - 0xbee3d004, 0xde334afd, 0x660f2807, 0x192e4bb3, - 0xc0cba857, 0x45c8740f, 0xd20b5f39, 0xb9d3fbdb, - 0x5579c0bd, 0x1a60320a, 0xd6a100c6, 0x402c7279, - 0x679f25fe, 0xfb1fa3cc, 0x8ea5e9f8, 0xdb3222f8, - 0x3c7516df, 0xfd616b15, 0x2f501ec8, 0xad0552ab, - 0x323db5fa, 0xfd238760, 0x53317b48, 0x3e00df82, - 0x9e5c57bb, 0xca6f8ca0, 0x1a87562e, 0xdf1769db, - 0xd542a8f6, 0x287effc3, 0xac6732c6, 0x8c4f5573, - 0x695b27b0, 0xbbca58c8, 0xe1ffa35d, 0xb8f011a0, - 0x10fa3d98, 0xfd2183b8, 0x4afcb56c, 0x2dd1d35b, - 0x9a53e479, 0xb6f84565, 0xd28e49bc, 0x4bfb9790, - 0xe1ddf2da, 0xa4cb7e33, 0x62fb1341, 0xcee4c6e8, - 0xef20cada, 0x36774c01, 0xd07e9efe, 0x2bf11fb4, - 0x95dbda4d, 0xae909198, 0xeaad8e71, 0x6b93d5a0, - 0xd08ed1d0, 0xafc725e0, 0x8e3c5b2f, 0x8e7594b7, - 0x8ff6e2fb, 0xf2122b64, 0x8888b812, 0x900df01c, - 0x4fad5ea0, 0x688fc31c, 0xd1cff191, 0xb3a8c1ad, - 0x2f2f2218, 0xbe0e1777, 0xea752dfe, 0x8b021fa1, - 0xe5a0cc0f, 0xb56f74e8, 0x18acf3d6, 0xce89e299, - 0xb4a84fe0, 0xfd13e0b7, 0x7cc43b81, 0xd2ada8d9, - 0x165fa266, 0x80957705, 0x93cc7314, 0x211a1477, - 0xe6ad2065, 0x77b5fa86, 0xc75442f5, 0xfb9d35cf, - 0xebcdaf0c, 0x7b3e89a0, 0xd6411bd3, 0xae1e7e49, - 0x00250e2d, 0x2071b35e, 0x226800bb, 0x57b8e0af, - 0x2464369b, 0xf009b91e, 0x5563911d, 0x59dfa6aa, - 0x78c14389, 0xd95a537f, 0x207d5ba2, 0x02e5b9c5, - 0x83260376, 0x6295cfa9, 0x11c81968, 0x4e734a41, - 0xb3472dca, 0x7b14a94a, 0x1b510052, 0x9a532915, - 0xd60f573f, 0xbc9bc6e4, 0x2b60a476, 0x81e67400, - 0x08ba6fb5, 0x571be91f, 0xf296ec6b, 0x2a0dd915, - 0xb6636521, 0xe7b9f9b6, 0xff34052e, 0xc5855664, - 0x53b02d5d, 0xa99f8fa1, 0x08ba4799, 0x6e85076a -}; - -__device__ __constant__ u32 c_sbox1[256] = -{ - 0x4b7a70e9, 0xb5b32944, 0xdb75092e, 0xc4192623, - 0xad6ea6b0, 0x49a7df7d, 0x9cee60b8, 0x8fedb266, - 0xecaa8c71, 0x699a17ff, 0x5664526c, 0xc2b19ee1, - 0x193602a5, 0x75094c29, 0xa0591340, 0xe4183a3e, - 0x3f54989a, 0x5b429d65, 0x6b8fe4d6, 0x99f73fd6, - 0xa1d29c07, 0xefe830f5, 0x4d2d38e6, 0xf0255dc1, - 0x4cdd2086, 0x8470eb26, 0x6382e9c6, 0x021ecc5e, - 0x09686b3f, 0x3ebaefc9, 0x3c971814, 0x6b6a70a1, - 0x687f3584, 0x52a0e286, 0xb79c5305, 0xaa500737, - 0x3e07841c, 0x7fdeae5c, 0x8e7d44ec, 0x5716f2b8, - 0xb03ada37, 0xf0500c0d, 0xf01c1f04, 0x0200b3ff, - 0xae0cf51a, 0x3cb574b2, 0x25837a58, 0xdc0921bd, - 0xd19113f9, 0x7ca92ff6, 0x94324773, 0x22f54701, - 0x3ae5e581, 0x37c2dadc, 0xc8b57634, 0x9af3dda7, - 0xa9446146, 0x0fd0030e, 0xecc8c73e, 0xa4751e41, - 0xe238cd99, 0x3bea0e2f, 0x3280bba1, 0x183eb331, - 0x4e548b38, 0x4f6db908, 0x6f420d03, 0xf60a04bf, - 0x2cb81290, 0x24977c79, 0x5679b072, 0xbcaf89af, - 0xde9a771f, 0xd9930810, 0xb38bae12, 0xdccf3f2e, - 0x5512721f, 0x2e6b7124, 0x501adde6, 0x9f84cd87, - 0x7a584718, 0x7408da17, 0xbc9f9abc, 0xe94b7d8c, - 0xec7aec3a, 0xdb851dfa, 0x63094366, 0xc464c3d2, - 0xef1c1847, 0x3215d908, 0xdd433b37, 0x24c2ba16, - 0x12a14d43, 0x2a65c451, 0x50940002, 0x133ae4dd, - 0x71dff89e, 0x10314e55, 0x81ac77d6, 0x5f11199b, - 0x043556f1, 0xd7a3c76b, 0x3c11183b, 0x5924a509, - 0xf28fe6ed, 0x97f1fbfa, 0x9ebabf2c, 0x1e153c6e, - 0x86e34570, 0xeae96fb1, 0x860e5e0a, 0x5a3e2ab3, - 0x771fe71c, 0x4e3d06fa, 0x2965dcb9, 0x99e71d0f, - 0x803e89d6, 0x5266c825, 0x2e4cc978, 0x9c10b36a, - 0xc6150eba, 0x94e2ea78, 0xa5fc3c53, 0x1e0a2df4, - 0xf2f74ea7, 0x361d2b3d, 0x1939260f, 0x19c27960, - 0x5223a708, 0xf71312b6, 0xebadfe6e, 0xeac31f66, - 0xe3bc4595, 0xa67bc883, 0xb17f37d1, 0x018cff28, - 0xc332ddef, 0xbe6c5aa5, 0x65582185, 0x68ab9802, - 0xeecea50f, 0xdb2f953b, 0x2aef7dad, 0x5b6e2f84, - 0x1521b628, 0x29076170, 0xecdd4775, 0x619f1510, - 0x13cca830, 0xeb61bd96, 0x0334fe1e, 0xaa0363cf, - 0xb5735c90, 0x4c70a239, 0xd59e9e0b, 0xcbaade14, - 0xeecc86bc, 0x60622ca7, 0x9cab5cab, 0xb2f3846e, - 0x648b1eaf, 0x19bdf0ca, 0xa02369b9, 0x655abb50, - 0x40685a32, 0x3c2ab4b3, 0x319ee9d5, 0xc021b8f7, - 0x9b540b19, 0x875fa099, 0x95f7997e, 0x623d7da8, - 0xf837889a, 0x97e32d77, 0x11ed935f, 0x16681281, - 0x0e358829, 0xc7e61fd6, 0x96dedfa1, 0x7858ba99, - 0x57f584a5, 0x1b227263, 0x9b83c3ff, 0x1ac24696, - 0xcdb30aeb, 0x532e3054, 0x8fd948e4, 0x6dbc3128, - 0x58ebf2ef, 0x34c6ffea, 0xfe28ed61, 0xee7c3c73, - 0x5d4a14d9, 0xe864b7e3, 0x42105d14, 0x203e13e0, - 0x45eee2b6, 0xa3aaabea, 0xdb6c4f15, 0xfacb4fd0, - 0xc742f442, 0xef6abbb5, 0x654f3b1d, 0x41cd2105, - 0xd81e799e, 0x86854dc7, 0xe44b476a, 0x3d816250, - 0xcf62a1f2, 0x5b8d2646, 0xfc8883a0, 0xc1c7b6a3, - 0x7f1524c3, 0x69cb7492, 0x47848a0b, 0x5692b285, - 0x095bbf00, 0xad19489d, 0x1462b174, 0x23820e00, - 0x58428d2a, 0x0c55f5ea, 0x1dadf43e, 0x233f7061, - 0x3372f092, 0x8d937e41, 0xd65fecf1, 0x6c223bdb, - 0x7cde3759, 0xcbee7460, 0x4085f2a7, 0xce77326e, - 0xa6078084, 0x19f8509e, 0xe8efd855, 0x61d99735, - 0xa969a7aa, 0xc50c06c2, 0x5a04abfc, 0x800bcadc, - 0x9e447a2e, 0xc3453484, 0xfdd56705, 0x0e1e9ec9, - 0xdb73dbd3, 0x105588cd, 0x675fda79, 0xe3674340, - 0xc5c43465, 0x713e38d8, 0x3d28f89e, 0xf16dff20, - 0x153e21e7, 0x8fb03d4a, 0xe6e39f2b, 0xdb83adf7 -}; - -__device__ __constant__ u32 c_sbox2[256] = -{ - 0xe93d5a68, 0x948140f7, 0xf64c261c, 0x94692934, - 0x411520f7, 0x7602d4f7, 0xbcf46b2e, 0xd4a20068, - 0xd4082471, 0x3320f46a, 0x43b7d4b7, 0x500061af, - 0x1e39f62e, 0x97244546, 0x14214f74, 0xbf8b8840, - 0x4d95fc1d, 0x96b591af, 0x70f4ddd3, 0x66a02f45, - 0xbfbc09ec, 0x03bd9785, 0x7fac6dd0, 0x31cb8504, - 0x96eb27b3, 0x55fd3941, 0xda2547e6, 0xabca0a9a, - 0x28507825, 0x530429f4, 0x0a2c86da, 0xe9b66dfb, - 0x68dc1462, 0xd7486900, 0x680ec0a4, 0x27a18dee, - 0x4f3ffea2, 0xe887ad8c, 0xb58ce006, 0x7af4d6b6, - 0xaace1e7c, 0xd3375fec, 0xce78a399, 0x406b2a42, - 0x20fe9e35, 0xd9f385b9, 0xee39d7ab, 0x3b124e8b, - 0x1dc9faf7, 0x4b6d1856, 0x26a36631, 0xeae397b2, - 0x3a6efa74, 0xdd5b4332, 0x6841e7f7, 0xca7820fb, - 0xfb0af54e, 0xd8feb397, 0x454056ac, 0xba489527, - 0x55533a3a, 0x20838d87, 0xfe6ba9b7, 0xd096954b, - 0x55a867bc, 0xa1159a58, 0xcca92963, 0x99e1db33, - 0xa62a4a56, 0x3f3125f9, 0x5ef47e1c, 0x9029317c, - 0xfdf8e802, 0x04272f70, 0x80bb155c, 0x05282ce3, - 0x95c11548, 0xe4c66d22, 0x48c1133f, 0xc70f86dc, - 0x07f9c9ee, 0x41041f0f, 0x404779a4, 0x5d886e17, - 0x325f51eb, 0xd59bc0d1, 0xf2bcc18f, 0x41113564, - 0x257b7834, 0x602a9c60, 0xdff8e8a3, 0x1f636c1b, - 0x0e12b4c2, 0x02e1329e, 0xaf664fd1, 0xcad18115, - 0x6b2395e0, 0x333e92e1, 0x3b240b62, 0xeebeb922, - 0x85b2a20e, 0xe6ba0d99, 0xde720c8c, 0x2da2f728, - 0xd0127845, 0x95b794fd, 0x647d0862, 0xe7ccf5f0, - 0x5449a36f, 0x877d48fa, 0xc39dfd27, 0xf33e8d1e, - 0x0a476341, 0x992eff74, 0x3a6f6eab, 0xf4f8fd37, - 0xa812dc60, 0xa1ebddf8, 0x991be14c, 0xdb6e6b0d, - 0xc67b5510, 0x6d672c37, 0x2765d43b, 0xdcd0e804, - 0xf1290dc7, 0xcc00ffa3, 0xb5390f92, 0x690fed0b, - 0x667b9ffb, 0xcedb7d9c, 0xa091cf0b, 0xd9155ea3, - 0xbb132f88, 0x515bad24, 0x7b9479bf, 0x763bd6eb, - 0x37392eb3, 0xcc115979, 0x8026e297, 0xf42e312d, - 0x6842ada7, 0xc66a2b3b, 0x12754ccc, 0x782ef11c, - 0x6a124237, 0xb79251e7, 0x06a1bbe6, 0x4bfb6350, - 0x1a6b1018, 0x11caedfa, 0x3d25bdd8, 0xe2e1c3c9, - 0x44421659, 0x0a121386, 0xd90cec6e, 0xd5abea2a, - 0x64af674e, 0xda86a85f, 0xbebfe988, 0x64e4c3fe, - 0x9dbc8057, 0xf0f7c086, 0x60787bf8, 0x6003604d, - 0xd1fd8346, 0xf6381fb0, 0x7745ae04, 0xd736fccc, - 0x83426b33, 0xf01eab71, 0xb0804187, 0x3c005e5f, - 0x77a057be, 0xbde8ae24, 0x55464299, 0xbf582e61, - 0x4e58f48f, 0xf2ddfda2, 0xf474ef38, 0x8789bdc2, - 0x5366f9c3, 0xc8b38e74, 0xb475f255, 0x46fcd9b9, - 0x7aeb2661, 0x8b1ddf84, 0x846a0e79, 0x915f95e2, - 0x466e598e, 0x20b45770, 0x8cd55591, 0xc902de4c, - 0xb90bace1, 0xbb8205d0, 0x11a86248, 0x7574a99e, - 0xb77f19b6, 0xe0a9dc09, 0x662d09a1, 0xc4324633, - 0xe85a1f02, 0x09f0be8c, 0x4a99a025, 0x1d6efe10, - 0x1ab93d1d, 0x0ba5a4df, 0xa186f20f, 0x2868f169, - 0xdcb7da83, 0x573906fe, 0xa1e2ce9b, 0x4fcd7f52, - 0x50115e01, 0xa70683fa, 0xa002b5c4, 0x0de6d027, - 0x9af88c27, 0x773f8641, 0xc3604c06, 0x61a806b5, - 0xf0177a28, 0xc0f586e0, 0x006058aa, 0x30dc7d62, - 0x11e69ed7, 0x2338ea63, 0x53c2dd94, 0xc2c21634, - 0xbbcbee56, 0x90bcb6de, 0xebfc7da1, 0xce591d76, - 0x6f05e409, 0x4b7c0188, 0x39720a3d, 0x7c927c24, - 0x86e3725f, 0x724d9db9, 0x1ac15bb4, 0xd39eb8fc, - 0xed545578, 0x08fca5b5, 0xd83d7cd3, 0x4dad0fc4, - 0x1e50ef5e, 0xb161e6f8, 0xa28514d9, 0x6c51133c, - 0x6fd5c7e7, 0x56e14ec4, 0x362abfce, 0xddc6c837, - 0xd79a3234, 0x92638212, 0x670efa8e, 0x406000e0 -}; - -__device__ __constant__ u32 c_sbox3[256] = -{ - 0x3a39ce37, 0xd3faf5cf, 0xabc27737, 0x5ac52d1b, - 0x5cb0679e, 0x4fa33742, 0xd3822740, 0x99bc9bbe, - 0xd5118e9d, 0xbf0f7315, 0xd62d1c7e, 0xc700c47b, - 0xb78c1b6b, 0x21a19045, 0xb26eb1be, 0x6a366eb4, - 0x5748ab2f, 0xbc946e79, 0xc6a376d2, 0x6549c2c8, - 0x530ff8ee, 0x468dde7d, 0xd5730a1d, 0x4cd04dc6, - 0x2939bbdb, 0xa9ba4650, 0xac9526e8, 0xbe5ee304, - 0xa1fad5f0, 0x6a2d519a, 0x63ef8ce2, 0x9a86ee22, - 0xc089c2b8, 0x43242ef6, 0xa51e03aa, 0x9cf2d0a4, - 0x83c061ba, 0x9be96a4d, 0x8fe51550, 0xba645bd6, - 0x2826a2f9, 0xa73a3ae1, 0x4ba99586, 0xef5562e9, - 0xc72fefd3, 0xf752f7da, 0x3f046f69, 0x77fa0a59, - 0x80e4a915, 0x87b08601, 0x9b09e6ad, 0x3b3ee593, - 0xe990fd5a, 0x9e34d797, 0x2cf0b7d9, 0x022b8b51, - 0x96d5ac3a, 0x017da67d, 0xd1cf3ed6, 0x7c7d2d28, - 0x1f9f25cf, 0xadf2b89b, 0x5ad6b472, 0x5a88f54c, - 0xe029ac71, 0xe019a5e6, 0x47b0acfd, 0xed93fa9b, - 0xe8d3c48d, 0x283b57cc, 0xf8d56629, 0x79132e28, - 0x785f0191, 0xed756055, 0xf7960e44, 0xe3d35e8c, - 0x15056dd4, 0x88f46dba, 0x03a16125, 0x0564f0bd, - 0xc3eb9e15, 0x3c9057a2, 0x97271aec, 0xa93a072a, - 0x1b3f6d9b, 0x1e6321f5, 0xf59c66fb, 0x26dcf319, - 0x7533d928, 0xb155fdf5, 0x03563482, 0x8aba3cbb, - 0x28517711, 0xc20ad9f8, 0xabcc5167, 0xccad925f, - 0x4de81751, 0x3830dc8e, 0x379d5862, 0x9320f991, - 0xea7a90c2, 0xfb3e7bce, 0x5121ce64, 0x774fbe32, - 0xa8b6e37e, 0xc3293d46, 0x48de5369, 0x6413e680, - 0xa2ae0810, 0xdd6db224, 0x69852dfd, 0x09072166, - 0xb39a460a, 0x6445c0dd, 0x586cdecf, 0x1c20c8ae, - 0x5bbef7dd, 0x1b588d40, 0xccd2017f, 0x6bb4e3bb, - 0xdda26a7e, 0x3a59ff45, 0x3e350a44, 0xbcb4cdd5, - 0x72eacea8, 0xfa6484bb, 0x8d6612ae, 0xbf3c6f47, - 0xd29be463, 0x542f5d9e, 0xaec2771b, 0xf64e6370, - 0x740e0d8d, 0xe75b1357, 0xf8721671, 0xaf537d5d, - 0x4040cb08, 0x4eb4e2cc, 0x34d2466a, 0x0115af84, - 0xe1b00428, 0x95983a1d, 0x06b89fb4, 0xce6ea048, - 0x6f3f3b82, 0x3520ab82, 0x011a1d4b, 0x277227f8, - 0x611560b1, 0xe7933fdc, 0xbb3a792b, 0x344525bd, - 0xa08839e1, 0x51ce794b, 0x2f32c9b7, 0xa01fbac9, - 0xe01cc87e, 0xbcc7d1f6, 0xcf0111c3, 0xa1e8aac7, - 0x1a908749, 0xd44fbd9a, 0xd0dadecb, 0xd50ada38, - 0x0339c32a, 0xc6913667, 0x8df9317c, 0xe0b12b4f, - 0xf79e59b7, 0x43f5bb3a, 0xf2d519ff, 0x27d9459c, - 0xbf97222c, 0x15e6fc2a, 0x0f91fc71, 0x9b941525, - 0xfae59361, 0xceb69ceb, 0xc2a86459, 0x12baa8d1, - 0xb6c1075e, 0xe3056a0c, 0x10d25065, 0xcb03a442, - 0xe0ec6e0e, 0x1698db3b, 0x4c98a0be, 0x3278e964, - 0x9f1f9532, 0xe0d392df, 0xd3a0342b, 0x8971f21e, - 0x1b0a7441, 0x4ba3348c, 0xc5be7120, 0xc37632d8, - 0xdf359f8d, 0x9b992f2e, 0xe60b6f47, 0x0fe3f11d, - 0xe54cda54, 0x1edad891, 0xce6279cf, 0xcd3e7e6f, - 0x1618b166, 0xfd2c1d05, 0x848fd2c5, 0xf6fb2299, - 0xf523f357, 0xa6327623, 0x93a83531, 0x56cccd02, - 0xacf08162, 0x5a75ebb5, 0x6e163697, 0x88d273cc, - 0xde966292, 0x81b949d0, 0x4c50901b, 0x71c65614, - 0xe6c6c7bd, 0x327a140a, 0x45e1d006, 0xc3f27b9a, - 0xc9aa53fd, 0x62a80f00, 0xbb25bfe2, 0x35bdd2f6, - 0x71126905, 0xb2040222, 0xb6cbcf7c, 0xcd769c2b, - 0x53113ec0, 0x1640e3d3, 0x38abbd60, 0x2547adf0, - 0xba38209c, 0xf746ce76, 0x77afa1c5, 0x20756060, - 0x85cbfe4e, 0x8ae88dd8, 0x7aaaf9b0, 0x4cf9aa7e, - 0x1948c25c, 0x02fb8a8c, 0x01c36ae4, 0xd6ebe1f9, - 0x90d4f869, 0xa65cdea0, 0x3f09252d, 0xc208e69f, - 0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6 -}; - -__device__ __constant__ u32 c_pbox[18] = -{ - 0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344, - 0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89, - 0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c, - 0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917, - 0x9216d5d9, 0x8979fb1b -}; - -#define BF_ROUND(L,R,N) \ -{ \ - u32x tmp; \ - \ - tmp = S0[((L) >> 24) & 0xff]; \ - tmp += S1[((L) >> 16) & 0xff]; \ - tmp ^= S2[((L) >> 8) & 0xff]; \ - tmp += S3[((L) >> 0) & 0xff]; \ - \ - (R) ^= tmp ^ P[(N)]; \ -} - -#define BF_ENCRYPT(L,R) \ -{ \ - L ^= P[0]; \ - BF_ROUND (L, R, 1); \ - BF_ROUND (R, L, 2); \ - BF_ROUND (L, R, 3); \ - BF_ROUND (R, L, 4); \ - BF_ROUND (L, R, 5); \ - BF_ROUND (R, L, 6); \ - BF_ROUND (L, R, 7); \ - BF_ROUND (R, L, 8); \ - BF_ROUND (L, R, 9); \ - BF_ROUND (R, L, 10); \ - BF_ROUND (L, R, 11); \ - BF_ROUND (R, L, 12); \ - BF_ROUND (L, R, 13); \ - BF_ROUND (R, L, 14); \ - BF_ROUND (L, R, 15); \ - BF_ROUND (R, L, 16); \ - tmp = R; \ - R = L; \ - L = tmp ^ P[17]; \ -} - -__device__ static void expand_key (u32x E[34], const u32x W[16], const u32 len) -{ - u8 *E_cur = (u8 *) E; - u8 *E_stop = E_cur + 72; - - while (E_cur < E_stop) - { - u8 *W_cur = (u8 *) W; - u8 *W_stop = W_cur + len; - - while (W_cur < W_stop) - { - *E_cur++ = *W_cur++; - } - - *E_cur++ = 0; - } -} - -extern "C" __global__ void __launch_bounds__ (8, 1) m03200_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, bcrypt_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 lid = threadIdx.x; - - const u32 pw_len = pws[gid].pw_len; - - u32x w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - u32x E[34]; - - expand_key (E, w, pw_len); - - E[ 0] = swap_workaround (E[ 0]); - E[ 1] = swap_workaround (E[ 1]); - E[ 2] = swap_workaround (E[ 2]); - E[ 3] = swap_workaround (E[ 3]); - E[ 4] = swap_workaround (E[ 4]); - E[ 5] = swap_workaround (E[ 5]); - E[ 6] = swap_workaround (E[ 6]); - E[ 7] = swap_workaround (E[ 7]); - E[ 8] = swap_workaround (E[ 8]); - E[ 9] = swap_workaround (E[ 9]); - E[10] = swap_workaround (E[10]); - E[11] = swap_workaround (E[11]); - E[12] = swap_workaround (E[12]); - E[13] = swap_workaround (E[13]); - E[14] = swap_workaround (E[14]); - E[15] = swap_workaround (E[15]); - E[16] = swap_workaround (E[16]); - E[17] = swap_workaround (E[17]); - - /** - * salt - */ - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - /** - * do the key setup - */ - - __shared__ u32x S0_all[8][256]; - __shared__ u32x S1_all[8][256]; - __shared__ u32x S2_all[8][256]; - __shared__ u32x S3_all[8][256]; - - u32x *S0 = S0_all[lid]; - u32x *S1 = S1_all[lid]; - u32x *S2 = S2_all[lid]; - u32x *S3 = S3_all[lid]; - - // initstate - - u32x P[18]; - - for (u32 i = 0; i < 18; i++) - { - P[i] = c_pbox[i]; - } - - for (u32 i = 0; i < 256; i++) - { - S0[i] = c_sbox0[i]; - S1[i] = c_sbox1[i]; - S2[i] = c_sbox2[i]; - S3[i] = c_sbox3[i]; - } - - // expandstate - - for (u32 i = 0; i < 18; i++) - { - P[i] ^= E[i]; - } - - u32 tmp; - - u32 L0 = 0; - u32 R0 = 0; - - for (u32 i = 0; i < 18; i += 2) - { - L0 ^= salt_buf[(i & 2) + 0]; - R0 ^= salt_buf[(i & 2) + 1]; - - BF_ENCRYPT (L0, R0); - - P[i + 0] = L0; - P[i + 1] = R0; - } - - for (u32 i = 0; i < 256; i += 4) - { - L0 ^= salt_buf[2]; - R0 ^= salt_buf[3]; - - BF_ENCRYPT (L0, R0); - - S0[i + 0] = L0; - S0[i + 1] = R0; - - L0 ^= salt_buf[0]; - R0 ^= salt_buf[1]; - - BF_ENCRYPT (L0, R0); - - S0[i + 2] = L0; - S0[i + 3] = R0; - } - - for (u32 i = 0; i < 256; i += 4) - { - L0 ^= salt_buf[2]; - R0 ^= salt_buf[3]; - - BF_ENCRYPT (L0, R0); - - S1[i + 0] = L0; - S1[i + 1] = R0; - - L0 ^= salt_buf[0]; - R0 ^= salt_buf[1]; - - BF_ENCRYPT (L0, R0); - - S1[i + 2] = L0; - S1[i + 3] = R0; - } - - for (u32 i = 0; i < 256; i += 4) - { - L0 ^= salt_buf[2]; - R0 ^= salt_buf[3]; - - BF_ENCRYPT (L0, R0); - - S2[i + 0] = L0; - S2[i + 1] = R0; - - L0 ^= salt_buf[0]; - R0 ^= salt_buf[1]; - - BF_ENCRYPT (L0, R0); - - S2[i + 2] = L0; - S2[i + 3] = R0; - } - - for (u32 i = 0; i < 256; i += 4) - { - L0 ^= salt_buf[2]; - R0 ^= salt_buf[3]; - - BF_ENCRYPT (L0, R0); - - S3[i + 0] = L0; - S3[i + 1] = R0; - - L0 ^= salt_buf[0]; - R0 ^= salt_buf[1]; - - BF_ENCRYPT (L0, R0); - - S3[i + 2] = L0; - S3[i + 3] = R0; - } - - // store - - for (u32 i = 0; i < 18; i++) - { - tmps[gid].P[i] = P[i]; - } - - for (u32 i = 0; i < 256; i++) - { - tmps[gid].S0[i] = S0[i]; - tmps[gid].S1[i] = S1[i]; - tmps[gid].S2[i] = S2[i]; - tmps[gid].S3[i] = S3[i]; - } -} - -extern "C" __global__ void __launch_bounds__ (8, 1) m03200_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, bcrypt_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 lid = threadIdx.x; - - const u32 pw_len = pws[gid].pw_len; - - u32x w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - u32x E[34]; - - expand_key (E, w, pw_len); - - E[ 0] = swap_workaround (E[ 0]); - E[ 1] = swap_workaround (E[ 1]); - E[ 2] = swap_workaround (E[ 2]); - E[ 3] = swap_workaround (E[ 3]); - E[ 4] = swap_workaround (E[ 4]); - E[ 5] = swap_workaround (E[ 5]); - E[ 6] = swap_workaround (E[ 6]); - E[ 7] = swap_workaround (E[ 7]); - E[ 8] = swap_workaround (E[ 8]); - E[ 9] = swap_workaround (E[ 9]); - E[10] = swap_workaround (E[10]); - E[11] = swap_workaround (E[11]); - E[12] = swap_workaround (E[12]); - E[13] = swap_workaround (E[13]); - E[14] = swap_workaround (E[14]); - E[15] = swap_workaround (E[15]); - E[16] = swap_workaround (E[16]); - E[17] = swap_workaround (E[17]); - - // load - - u32x P[18]; - - for (u32 i = 0; i < 18; i++) - { - P[i] = tmps[gid].P[i]; - } - - __shared__ u32x S0_all[8][256]; - __shared__ u32x S1_all[8][256]; - __shared__ u32x S2_all[8][256]; - __shared__ u32x S3_all[8][256]; - - u32x *S0 = S0_all[lid]; - u32x *S1 = S1_all[lid]; - u32x *S2 = S2_all[lid]; - u32x *S3 = S3_all[lid]; - - for (u32 i = 0; i < 256; i++) - { - S0[i] = tmps[gid].S0[i]; - S1[i] = tmps[gid].S1[i]; - S2[i] = tmps[gid].S2[i]; - S3[i] = tmps[gid].S3[i]; - } - - /** - * salt - */ - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - /** - * main loop - */ - - u32 tmp; - - u32 L0; - u32 R0; - - for (u32 i = 0; i < loop_cnt; i++) - { - for (u32 i = 0; i < 18; i++) - { - P[i] ^= E[i]; - } - - L0 = 0; - R0 = 0; - - for (u32 i = 0; i < 9; i++) - { - BF_ENCRYPT (L0, R0); - - P[i * 2 + 0] = L0; - P[i * 2 + 1] = R0; - } - - for (u32 i = 0; i < 256; i += 2) - { - BF_ENCRYPT (L0, R0); - - S0[i + 0] = L0; - S0[i + 1] = R0; - } - - for (u32 i = 0; i < 256; i += 2) - { - BF_ENCRYPT (L0, R0); - - S1[i + 0] = L0; - S1[i + 1] = R0; - } - - for (u32 i = 0; i < 256; i += 2) - { - BF_ENCRYPT (L0, R0); - - S2[i + 0] = L0; - S2[i + 1] = R0; - } - - for (u32 i = 0; i < 256; i += 2) - { - BF_ENCRYPT (L0, R0); - - S3[i + 0] = L0; - S3[i + 1] = R0; - } - - P[ 0] ^= salt_buf[0]; - P[ 1] ^= salt_buf[1]; - P[ 2] ^= salt_buf[2]; - P[ 3] ^= salt_buf[3]; - P[ 4] ^= salt_buf[0]; - P[ 5] ^= salt_buf[1]; - P[ 6] ^= salt_buf[2]; - P[ 7] ^= salt_buf[3]; - P[ 8] ^= salt_buf[0]; - P[ 9] ^= salt_buf[1]; - P[10] ^= salt_buf[2]; - P[11] ^= salt_buf[3]; - P[12] ^= salt_buf[0]; - P[13] ^= salt_buf[1]; - P[14] ^= salt_buf[2]; - P[15] ^= salt_buf[3]; - P[16] ^= salt_buf[0]; - P[17] ^= salt_buf[1]; - - L0 = 0; - R0 = 0; - - for (u32 i = 0; i < 9; i++) - { - BF_ENCRYPT (L0, R0); - - P[i * 2+ 0] = L0; - P[i * 2+ 1] = R0; - } - - for (u32 i = 0; i < 256; i += 2) - { - BF_ENCRYPT (L0, R0); - - S0[i + 0] = L0; - S0[i + 1] = R0; - } - - for (u32 i = 0; i < 256; i += 2) - { - BF_ENCRYPT (L0, R0); - - S1[i + 0] = L0; - S1[i + 1] = R0; - } - - for (u32 i = 0; i < 256; i += 2) - { - BF_ENCRYPT (L0, R0); - - S2[i + 0] = L0; - S2[i + 1] = R0; - } - - for (u32 i = 0; i < 256; i += 2) - { - BF_ENCRYPT (L0, R0); - - S3[i + 0] = L0; - S3[i + 1] = R0; - } - } - - // store - - for (u32 i = 0; i < 18; i++) - { - tmps[gid].P[i] = P[i]; - } - - for (u32 i = 0; i < 256; i++) - { - tmps[gid].S0[i] = S0[i]; - tmps[gid].S1[i] = S1[i]; - tmps[gid].S2[i] = S2[i]; - tmps[gid].S3[i] = S3[i]; - } -} - -extern "C" __global__ void __launch_bounds__ (8, 1) m03200_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, bcrypt_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 lid = threadIdx.x; - - // load - - u32x P[18]; - - for (u32 i = 0; i < 18; i++) - { - P[i] = tmps[gid].P[i]; - } - - __shared__ u32x S0_all[8][256]; - __shared__ u32x S1_all[8][256]; - __shared__ u32x S2_all[8][256]; - __shared__ u32x S3_all[8][256]; - - u32x *S0 = S0_all[lid]; - u32x *S1 = S1_all[lid]; - u32x *S2 = S2_all[lid]; - u32x *S3 = S3_all[lid]; - - for (u32 i = 0; i < 256; i++) - { - S0[i] = tmps[gid].S0[i]; - S1[i] = tmps[gid].S1[i]; - S2[i] = tmps[gid].S2[i]; - S3[i] = tmps[gid].S3[i]; - } - - /** - * main - */ - - u32 tmp; - - u32 L0; - u32 R0; - - L0 = BCRYPTM_0; - R0 = BCRYPTM_1; - - for (u32 i = 0; i < 64; i++) - { - BF_ENCRYPT (L0, R0); - } - - const u32x r0 = L0; - const u32x r1 = R0; - - L0 = BCRYPTM_2; - R0 = BCRYPTM_3; - - for (u32 i = 0; i < 64; i++) - { - BF_ENCRYPT (L0, R0); - } - - const u32x r2 = L0; - const u32x r3 = R0; - - /* - e = L0; - f = R0; - - f &= ~0xff; // its just 23 not 24 ! - */ - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m03710_a0.cu b/nv/m03710_a0.cu deleted file mode 100644 index b0a7ffb..0000000 --- a/nv/m03710_a0.cu +++ /dev/null @@ -1,766 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m03710_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = 32 + salt_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - w3[2] = out_len * 8; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - w2_t[0] = 0x00000080; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - /** - * prepend salt - */ - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w3_t[2] = pw_salt_len * 8; - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - w2_t[0] |= salt_buf2[0]; - w2_t[1] |= salt_buf2[1]; - w2_t[2] |= salt_buf2[2]; - w2_t[3] |= salt_buf2[3]; - w3_t[0] |= salt_buf3[0]; - w3_t[1] |= salt_buf3[1]; - w3_t[2] |= salt_buf3[2]; - w3_t[3] |= salt_buf3[3]; - - /** - * md5 - */ - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03710_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03710_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03710_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = 32 + salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - w3[2] = out_len * 8; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - w2_t[0] = 0x00000080; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - /** - * prepend salt - */ - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w3_t[2] = pw_salt_len * 8; - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - w2_t[0] |= salt_buf2[0]; - w2_t[1] |= salt_buf2[1]; - w2_t[2] |= salt_buf2[2]; - w2_t[3] |= salt_buf2[3]; - w3_t[0] |= salt_buf3[0]; - w3_t[1] |= salt_buf3[1]; - w3_t[2] |= salt_buf3[2]; - w3_t[3] |= salt_buf3[3]; - - /** - * md5 - */ - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03710_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03710_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m03710_a1.cu b/nv/m03710_a1.cu deleted file mode 100644 index 1fd46c6..0000000 --- a/nv/m03710_a1.cu +++ /dev/null @@ -1,867 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m03710_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = 32 + salt_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = pw_len * 8; - w3[3] = 0; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - w2_t[0] = 0x00000080; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - /** - * prepend salt - */ - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w3_t[2] = pw_salt_len * 8; - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - w2_t[0] |= salt_buf2[0]; - w2_t[1] |= salt_buf2[1]; - w2_t[2] |= salt_buf2[2]; - w2_t[3] |= salt_buf2[3]; - w3_t[0] |= salt_buf3[0]; - w3_t[1] |= salt_buf3[1]; - w3_t[2] |= salt_buf3[2]; - w3_t[3] |= salt_buf3[3]; - - /** - * md5 - */ - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03710_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03710_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03710_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = 32 + salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = pw_len * 8; - w3[3] = 0; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - w2_t[0] = 0x00000080; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - /** - * prepend salt - */ - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w3_t[2] = pw_salt_len * 8; - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - w2_t[0] |= salt_buf2[0]; - w2_t[1] |= salt_buf2[1]; - w2_t[2] |= salt_buf2[2]; - w2_t[3] |= salt_buf2[3]; - w3_t[0] |= salt_buf3[0]; - w3_t[1] |= salt_buf3[1]; - w3_t[2] |= salt_buf3[2]; - w3_t[3] |= salt_buf3[3]; - - /** - * md5 - */ - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03710_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03710_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m03710_a3.cu b/nv/m03710_a3.cu deleted file mode 100644 index d6a7eaa..0000000 --- a/nv/m03710_a3.cu +++ /dev/null @@ -1,999 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m03710m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = 32 + salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - w2_t[0] = 0x00000080; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - /** - * prepend salt - */ - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w3_t[2] = pw_salt_len * 8; - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - w2_t[0] |= salt_buf2[0]; - w2_t[1] |= salt_buf2[1]; - w2_t[2] |= salt_buf2[2]; - w2_t[3] |= salt_buf2[3]; - w3_t[0] |= salt_buf3[0]; - w3_t[1] |= salt_buf3[1]; - w3_t[2] |= salt_buf3[2]; - w3_t[3] |= salt_buf3[3]; - - /** - * md5 - */ - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m03710s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = 32 + salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - w2_t[0] = 0x00000080; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - /** - * prepend salt - */ - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w3_t[2] = pw_salt_len * 8; - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - w2_t[0] |= salt_buf2[0]; - w2_t[1] |= salt_buf2[1]; - w2_t[2] |= salt_buf2[2]; - w2_t[3] |= salt_buf2[3]; - w3_t[0] |= salt_buf3[0]; - w3_t[1] |= salt_buf3[1]; - w3_t[2] |= salt_buf3[2]; - w3_t[3] |= salt_buf3[3]; - - /** - * md5 - */ - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03710_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m03710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03710_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m03710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03710_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m03710m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03710_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m03710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03710_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m03710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03710_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m03710s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m03800_a0.cu b/nv/m03800_a0.cu deleted file mode 100644 index c2f7758..0000000 --- a/nv/m03800_a0.cu +++ /dev/null @@ -1,668 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m03800_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - - /** - * prepend salt - */ - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - w2_t[0] |= salt_buf2[0]; - w2_t[1] |= salt_buf2[1]; - w2_t[2] |= salt_buf2[2]; - w2_t[3] |= salt_buf2[3]; - w3_t[0] |= salt_buf3[0]; - w3_t[1] |= salt_buf3[1]; - w3_t[2] |= salt_buf3[2]; - w3_t[3] |= salt_buf3[3]; - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, salt_len + out_len); - - w0_t[0] |= s0[0]; - w0_t[1] |= s0[1]; - w0_t[2] |= s0[2]; - w0_t[3] |= s0[3]; - w1_t[0] |= s1[0]; - w1_t[1] |= s1[1]; - w1_t[2] |= s1[2]; - w1_t[3] |= s1[3]; - w2_t[0] |= s2[0]; - w2_t[1] |= s2[1]; - w2_t[2] |= s2[2]; - w2_t[3] |= s2[3]; - w3_t[0] |= s3[0]; - w3_t[1] |= s3[1]; - w3_t[2] |= s3[2]; - w3_t[3] |= s3[3]; - - const u32 pw_salt_len = salt_len + out_len + salt_len; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len); - - w3_t[2] = pw_salt_len * 8; - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03800_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03800_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03800_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - - /** - * prepend salt - */ - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - w2_t[0] |= salt_buf2[0]; - w2_t[1] |= salt_buf2[1]; - w2_t[2] |= salt_buf2[2]; - w2_t[3] |= salt_buf2[3]; - w3_t[0] |= salt_buf3[0]; - w3_t[1] |= salt_buf3[1]; - w3_t[2] |= salt_buf3[2]; - w3_t[3] |= salt_buf3[3]; - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, salt_len + out_len); - - w0_t[0] |= s0[0]; - w0_t[1] |= s0[1]; - w0_t[2] |= s0[2]; - w0_t[3] |= s0[3]; - w1_t[0] |= s1[0]; - w1_t[1] |= s1[1]; - w1_t[2] |= s1[2]; - w1_t[3] |= s1[3]; - w2_t[0] |= s2[0]; - w2_t[1] |= s2[1]; - w2_t[2] |= s2[2]; - w2_t[3] |= s2[3]; - w3_t[0] |= s3[0]; - w3_t[1] |= s3[1]; - w3_t[2] |= s3[2]; - w3_t[3] |= s3[3]; - - const u32 pw_salt_len = salt_len + out_len + salt_len; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len); - - w3_t[2] = pw_salt_len * 8; - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03800_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03800_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m03800_a1.cu b/nv/m03800_a1.cu deleted file mode 100644 index 28fe339..0000000 --- a/nv/m03800_a1.cu +++ /dev/null @@ -1,772 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m03800_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - /** - * prepend salt - */ - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - w2_t[0] |= salt_buf2[0]; - w2_t[1] |= salt_buf2[1]; - w2_t[2] |= salt_buf2[2]; - w2_t[3] |= salt_buf2[3]; - w3_t[0] |= salt_buf3[0]; - w3_t[1] |= salt_buf3[1]; - w3_t[2] |= salt_buf3[2]; - w3_t[3] |= salt_buf3[3]; - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, salt_len + pw_len); - - w0_t[0] |= s0[0]; - w0_t[1] |= s0[1]; - w0_t[2] |= s0[2]; - w0_t[3] |= s0[3]; - w1_t[0] |= s1[0]; - w1_t[1] |= s1[1]; - w1_t[2] |= s1[2]; - w1_t[3] |= s1[3]; - w2_t[0] |= s2[0]; - w2_t[1] |= s2[1]; - w2_t[2] |= s2[2]; - w2_t[3] |= s2[3]; - w3_t[0] |= s3[0]; - w3_t[1] |= s3[1]; - w3_t[2] |= s3[2]; - w3_t[3] |= s3[3]; - - const u32 pw_salt_len = salt_len + pw_len + salt_len; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len); - - w3_t[2] = pw_salt_len * 8; - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03800_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03800_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03800_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - /** - * prepend salt - */ - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - w2_t[0] |= salt_buf2[0]; - w2_t[1] |= salt_buf2[1]; - w2_t[2] |= salt_buf2[2]; - w2_t[3] |= salt_buf2[3]; - w3_t[0] |= salt_buf3[0]; - w3_t[1] |= salt_buf3[1]; - w3_t[2] |= salt_buf3[2]; - w3_t[3] |= salt_buf3[3]; - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, salt_len + pw_len); - - w0_t[0] |= s0[0]; - w0_t[1] |= s0[1]; - w0_t[2] |= s0[2]; - w0_t[3] |= s0[3]; - w1_t[0] |= s1[0]; - w1_t[1] |= s1[1]; - w1_t[2] |= s1[2]; - w1_t[3] |= s1[3]; - w2_t[0] |= s2[0]; - w2_t[1] |= s2[1]; - w2_t[2] |= s2[2]; - w2_t[3] |= s2[3]; - w3_t[0] |= s3[0]; - w3_t[1] |= s3[1]; - w3_t[2] |= s3[2]; - w3_t[3] |= s3[3]; - - const u32 pw_salt_len = salt_len + pw_len + salt_len; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len); - - w3_t[2] = pw_salt_len * 8; - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03800_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03800_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m03800_a3.cu b/nv/m03800_a3.cu deleted file mode 100644 index dd6f336..0000000 --- a/nv/m03800_a3.cu +++ /dev/null @@ -1,842 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m03800m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = salt_len + pw_len + salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - - /** - * prepend salt - */ - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - w2_t[0] |= salt_buf2[0]; - w2_t[1] |= salt_buf2[1]; - w2_t[2] |= salt_buf2[2]; - w2_t[3] |= salt_buf2[3]; - w3_t[0] |= salt_buf3[0]; - w3_t[1] |= salt_buf3[1]; - w3_t[2] |= salt_buf3[2]; - w3_t[3] |= salt_buf3[3]; - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, salt_len + pw_len); - - w0_t[0] |= s0[0]; - w0_t[1] |= s0[1]; - w0_t[2] |= s0[2]; - w0_t[3] |= s0[3]; - w1_t[0] |= s1[0]; - w1_t[1] |= s1[1]; - w1_t[2] |= s1[2]; - w1_t[3] |= s1[3]; - w2_t[0] |= s2[0]; - w2_t[1] |= s2[1]; - w2_t[2] |= s2[2]; - w2_t[3] |= s2[3]; - w3_t[0] |= s3[0]; - w3_t[1] |= s3[1]; - w3_t[2] |= s3[2]; - w3_t[3] |= s3[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len); - - w3_t[2] = pw_salt_len * 8; - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m03800s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = salt_len + pw_len + salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - - /** - * prepend salt - */ - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - w2_t[0] |= salt_buf2[0]; - w2_t[1] |= salt_buf2[1]; - w2_t[2] |= salt_buf2[2]; - w2_t[3] |= salt_buf2[3]; - w3_t[0] |= salt_buf3[0]; - w3_t[1] |= salt_buf3[1]; - w3_t[2] |= salt_buf3[2]; - w3_t[3] |= salt_buf3[3]; - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, salt_len + pw_len); - - w0_t[0] |= s0[0]; - w0_t[1] |= s0[1]; - w0_t[2] |= s0[2]; - w0_t[3] |= s0[3]; - w1_t[0] |= s1[0]; - w1_t[1] |= s1[1]; - w1_t[2] |= s1[2]; - w1_t[3] |= s1[3]; - w2_t[0] |= s2[0]; - w2_t[1] |= s2[1]; - w2_t[2] |= s2[2]; - w2_t[3] |= s2[3]; - w3_t[0] |= s3[0]; - w3_t[1] |= s3[1]; - w3_t[2] |= s3[2]; - w3_t[3] |= s3[3]; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len); - - w3_t[2] = pw_salt_len * 8; - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03800_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m03800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03800_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m03800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03800_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m03800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03800_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m03800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03800_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m03800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m03800_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m03800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m04310_a0.cu b/nv/m04310_a0.cu deleted file mode 100644 index 6e27434..0000000 --- a/nv/m04310_a0.cu +++ /dev/null @@ -1,657 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; - -__device__ __shared__ short l_bin2asc[256]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m04310_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 s[8]; - - s[0] = salt_bufs[salt_pos].salt_buf[0]; - s[1] = salt_bufs[salt_pos].salt_buf[1]; - s[2] = salt_bufs[salt_pos].salt_buf[2]; - s[3] = salt_bufs[salt_pos].salt_buf[3]; - s[4] = salt_bufs[salt_pos].salt_buf[4]; - s[5] = salt_bufs[salt_pos].salt_buf[5]; - s[6] = (32 + salt_len) * 8; - s[7] = 0; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - w3[2] = out_len * 8; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - const u32 w8_t = s[0]; - const u32 w9_t = s[1]; - const u32 wa_t = s[2]; - const u32 wb_t = s[3]; - const u32 wc_t = s[4]; - const u32 wd_t = s[5]; - const u32 we_t = s[6]; - const u32 wf_t = s[7]; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04310_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04310_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04310_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 s[8]; - - s[0] = salt_bufs[salt_pos].salt_buf[0]; - s[1] = salt_bufs[salt_pos].salt_buf[1]; - s[2] = salt_bufs[salt_pos].salt_buf[2]; - s[3] = salt_bufs[salt_pos].salt_buf[3]; - s[4] = salt_bufs[salt_pos].salt_buf[4]; - s[5] = salt_bufs[salt_pos].salt_buf[5]; - s[6] = (32 + salt_len) * 8; - s[7] = 0; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - w3[2] = out_len * 8; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - const u32 w8_t = s[0]; - const u32 w9_t = s[1]; - const u32 wa_t = s[2]; - const u32 wb_t = s[3]; - const u32 wc_t = s[4]; - const u32 wd_t = s[5]; - const u32 we_t = s[6]; - const u32 wf_t = s[7]; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04310_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04310_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m04310_a1.cu b/nv/m04310_a1.cu deleted file mode 100644 index 6d369aa..0000000 --- a/nv/m04310_a1.cu +++ /dev/null @@ -1,759 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; - -__device__ __shared__ short l_bin2asc[256]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m04310_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 s[8]; - - s[0] = salt_bufs[salt_pos].salt_buf[0]; - s[1] = salt_bufs[salt_pos].salt_buf[1]; - s[2] = salt_bufs[salt_pos].salt_buf[2]; - s[3] = salt_bufs[salt_pos].salt_buf[3]; - s[4] = salt_bufs[salt_pos].salt_buf[4]; - s[5] = salt_bufs[salt_pos].salt_buf[5]; - s[6] = (32 + salt_len) * 8; - s[7] = 0; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = pw_len * 8; - w3[3] = 0; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - const u32 w8_t = s[0]; - const u32 w9_t = s[1]; - const u32 wa_t = s[2]; - const u32 wb_t = s[3]; - const u32 wc_t = s[4]; - const u32 wd_t = s[5]; - const u32 we_t = s[6]; - const u32 wf_t = s[7]; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04310_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04310_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04310_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 s[8]; - - s[0] = salt_bufs[salt_pos].salt_buf[0]; - s[1] = salt_bufs[salt_pos].salt_buf[1]; - s[2] = salt_bufs[salt_pos].salt_buf[2]; - s[3] = salt_bufs[salt_pos].salt_buf[3]; - s[4] = salt_bufs[salt_pos].salt_buf[4]; - s[5] = salt_bufs[salt_pos].salt_buf[5]; - s[6] = (32 + salt_len) * 8; - s[7] = 0; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = pw_len * 8; - w3[3] = 0; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - const u32 w8_t = s[0]; - const u32 w9_t = s[1]; - const u32 wa_t = s[2]; - const u32 wb_t = s[3]; - const u32 wc_t = s[4]; - const u32 wd_t = s[5]; - const u32 we_t = s[6]; - const u32 wf_t = s[7]; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04310_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04310_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m04310_a3.cu b/nv/m04310_a3.cu deleted file mode 100644 index 31e3e48..0000000 --- a/nv/m04310_a3.cu +++ /dev/null @@ -1,861 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ bf_t c_bfs[1024]; - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ static void m04310m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 s[8]; - - s[0] = salt_bufs[salt_pos].salt_buf[0]; - s[1] = salt_bufs[salt_pos].salt_buf[1]; - s[2] = salt_bufs[salt_pos].salt_buf[2]; - s[3] = salt_bufs[salt_pos].salt_buf[3]; - s[4] = salt_bufs[salt_pos].salt_buf[4]; - s[5] = salt_bufs[salt_pos].salt_buf[5]; - s[6] = (32 + salt_len) * 8; - s[7] = 0; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - const u32 w8_t = s[0]; - const u32 w9_t = s[1]; - const u32 wa_t = s[2]; - const u32 wb_t = s[3]; - const u32 wc_t = s[4]; - const u32 wd_t = s[5]; - const u32 we_t = s[6]; - const u32 wf_t = s[7]; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m04310s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 s[8]; - - s[0] = salt_bufs[salt_pos].salt_buf[0]; - s[1] = salt_bufs[salt_pos].salt_buf[1]; - s[2] = salt_bufs[salt_pos].salt_buf[2]; - s[3] = salt_bufs[salt_pos].salt_buf[3]; - s[4] = salt_bufs[salt_pos].salt_buf[4]; - s[5] = salt_bufs[salt_pos].salt_buf[5]; - s[6] = (32 + salt_len) * 8; - s[7] = 0; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - const u32x w0_t = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - const u32x w7_t = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - const u32 w8_t = s[0]; - const u32 w9_t = s[1]; - const u32 wa_t = s[2]; - const u32 wb_t = s[3]; - const u32 wc_t = s[4]; - const u32 wd_t = s[5]; - const u32 we_t = s[6]; - const u32 wf_t = s[7]; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04310_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m04310m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04310_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m04310m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04310_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m04310m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04310_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m04310s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04310_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m04310s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04310_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m04310s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m04400_a0.cu b/nv/m04400_a0.cu deleted file mode 100644 index 87e4f76..0000000 --- a/nv/m04400_a0.cu +++ /dev/null @@ -1,733 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m04400_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - /** - * sha1 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - /** - * md5 - */ - - w0_t = uint_to_hex_lower8 ((a >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a >> 16) & 255) << 16; - w1_t = uint_to_hex_lower8 ((a >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a >> 0) & 255) << 16; - w2_t = uint_to_hex_lower8 ((b >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b >> 16) & 255) << 16; - w3_t = uint_to_hex_lower8 ((b >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b >> 0) & 255) << 16; - w4_t = uint_to_hex_lower8 ((c >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c >> 16) & 255) << 16; - w5_t = uint_to_hex_lower8 ((c >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c >> 0) & 255) << 16; - w6_t = uint_to_hex_lower8 ((d >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d >> 16) & 255) << 16; - w7_t = uint_to_hex_lower8 ((d >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d >> 0) & 255) << 16; - w8_t = uint_to_hex_lower8 ((e >> 24) & 255) << 0 - | uint_to_hex_lower8 ((e >> 16) & 255) << 16; - w9_t = uint_to_hex_lower8 ((e >> 8) & 255) << 0 - | uint_to_hex_lower8 ((e >> 0) & 255) << 16; - - wa_t = 0x80; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 40 * 8; - wf_t = 0; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04400_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04400_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04400_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - /** - * sha1 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - /** - * md5 - */ - - w0_t = uint_to_hex_lower8 ((a >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a >> 16) & 255) << 16; - w1_t = uint_to_hex_lower8 ((a >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a >> 0) & 255) << 16; - w2_t = uint_to_hex_lower8 ((b >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b >> 16) & 255) << 16; - w3_t = uint_to_hex_lower8 ((b >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b >> 0) & 255) << 16; - w4_t = uint_to_hex_lower8 ((c >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c >> 16) & 255) << 16; - w5_t = uint_to_hex_lower8 ((c >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c >> 0) & 255) << 16; - w6_t = uint_to_hex_lower8 ((d >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d >> 16) & 255) << 16; - w7_t = uint_to_hex_lower8 ((d >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d >> 0) & 255) << 16; - w8_t = uint_to_hex_lower8 ((e >> 24) & 255) << 0 - | uint_to_hex_lower8 ((e >> 16) & 255) << 16; - w9_t = uint_to_hex_lower8 ((e >> 8) & 255) << 0 - | uint_to_hex_lower8 ((e >> 0) & 255) << 16; - - wa_t = 0x80; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 40 * 8; - wf_t = 0; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04400_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04400_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m04400_a1.cu b/nv/m04400_a1.cu deleted file mode 100644 index d766a92..0000000 --- a/nv/m04400_a1.cu +++ /dev/null @@ -1,843 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m04400_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - append_0x80_2 (wordr0, wordr1, pw_r_len); - - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - /** - * sha1 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - /** - * md5 - */ - - w0_t = uint_to_hex_lower8 ((a >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a >> 16) & 255) << 16; - w1_t = uint_to_hex_lower8 ((a >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a >> 0) & 255) << 16; - w2_t = uint_to_hex_lower8 ((b >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b >> 16) & 255) << 16; - w3_t = uint_to_hex_lower8 ((b >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b >> 0) & 255) << 16; - w4_t = uint_to_hex_lower8 ((c >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c >> 16) & 255) << 16; - w5_t = uint_to_hex_lower8 ((c >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c >> 0) & 255) << 16; - w6_t = uint_to_hex_lower8 ((d >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d >> 16) & 255) << 16; - w7_t = uint_to_hex_lower8 ((d >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d >> 0) & 255) << 16; - w8_t = uint_to_hex_lower8 ((e >> 24) & 255) << 0 - | uint_to_hex_lower8 ((e >> 16) & 255) << 16; - w9_t = uint_to_hex_lower8 ((e >> 8) & 255) << 0 - | uint_to_hex_lower8 ((e >> 0) & 255) << 16; - - wa_t = 0x80; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 40 * 8; - wf_t = 0; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04400_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04400_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04400_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - append_0x80_2 (wordr0, wordr1, pw_r_len); - - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - /** - * sha1 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - /** - * md5 - */ - - w0_t = uint_to_hex_lower8 ((a >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a >> 16) & 255) << 16; - w1_t = uint_to_hex_lower8 ((a >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a >> 0) & 255) << 16; - w2_t = uint_to_hex_lower8 ((b >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b >> 16) & 255) << 16; - w3_t = uint_to_hex_lower8 ((b >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b >> 0) & 255) << 16; - w4_t = uint_to_hex_lower8 ((c >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c >> 16) & 255) << 16; - w5_t = uint_to_hex_lower8 ((c >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c >> 0) & 255) << 16; - w6_t = uint_to_hex_lower8 ((d >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d >> 16) & 255) << 16; - w7_t = uint_to_hex_lower8 ((d >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d >> 0) & 255) << 16; - w8_t = uint_to_hex_lower8 ((e >> 24) & 255) << 0 - | uint_to_hex_lower8 ((e >> 16) & 255) << 16; - w9_t = uint_to_hex_lower8 ((e >> 8) & 255) << 0 - | uint_to_hex_lower8 ((e >> 0) & 255) << 16; - - wa_t = 0x80; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 40 * 8; - wf_t = 0; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04400_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04400_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m04400_a3.cu b/nv/m04400_a3.cu deleted file mode 100644 index 167a805..0000000 --- a/nv/m04400_a3.cu +++ /dev/null @@ -1,972 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m04400m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * sha1 - */ - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - /** - * md5 - */ - - w0_t = uint_to_hex_lower8 ((a >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a >> 16) & 255) << 16; - w1_t = uint_to_hex_lower8 ((a >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a >> 0) & 255) << 16; - w2_t = uint_to_hex_lower8 ((b >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b >> 16) & 255) << 16; - w3_t = uint_to_hex_lower8 ((b >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b >> 0) & 255) << 16; - w4_t = uint_to_hex_lower8 ((c >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c >> 16) & 255) << 16; - w5_t = uint_to_hex_lower8 ((c >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c >> 0) & 255) << 16; - w6_t = uint_to_hex_lower8 ((d >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d >> 16) & 255) << 16; - w7_t = uint_to_hex_lower8 ((d >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d >> 0) & 255) << 16; - w8_t = uint_to_hex_lower8 ((e >> 24) & 255) << 0 - | uint_to_hex_lower8 ((e >> 16) & 255) << 16; - w9_t = uint_to_hex_lower8 ((e >> 8) & 255) << 0 - | uint_to_hex_lower8 ((e >> 0) & 255) << 16; - - wa_t = 0x80; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 40 * 8; - wf_t = 0; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m04400s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * sha1 - */ - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - /** - * md5 - */ - - w0_t = uint_to_hex_lower8 ((a >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a >> 16) & 255) << 16; - w1_t = uint_to_hex_lower8 ((a >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a >> 0) & 255) << 16; - w2_t = uint_to_hex_lower8 ((b >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b >> 16) & 255) << 16; - w3_t = uint_to_hex_lower8 ((b >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b >> 0) & 255) << 16; - w4_t = uint_to_hex_lower8 ((c >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c >> 16) & 255) << 16; - w5_t = uint_to_hex_lower8 ((c >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c >> 0) & 255) << 16; - w6_t = uint_to_hex_lower8 ((d >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d >> 16) & 255) << 16; - w7_t = uint_to_hex_lower8 ((d >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d >> 0) & 255) << 16; - w8_t = uint_to_hex_lower8 ((e >> 24) & 255) << 0 - | uint_to_hex_lower8 ((e >> 16) & 255) << 16; - w9_t = uint_to_hex_lower8 ((e >> 8) & 255) << 0 - | uint_to_hex_lower8 ((e >> 0) & 255) << 16; - - wa_t = 0x80; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 40 * 8; - wf_t = 0; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04400_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m04400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04400_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m04400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04400_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m04400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04400_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m04400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04400_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m04400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04400_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m04400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m04500_a0.cu b/nv/m04500_a0.cu deleted file mode 100644 index 290f0d8..0000000 --- a/nv/m04500_a0.cu +++ /dev/null @@ -1,801 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8_le(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m04500_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 0 - | c_bin2asc[(lid >> 4) & 15] << 8; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - /** - * sha1 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - /** - * 2nd SHA1 - */ - - w0_t = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; - w1_t = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; - w2_t = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; - w3_t = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; - w4_t = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; - w5_t = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; - w6_t = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; - w7_t = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; - w8_t = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; - w9_t = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; - - wa_t = 0x80000000; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 40 * 8; - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04500_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04500_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04500_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 0 - | c_bin2asc[(lid >> 4) & 15] << 8; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - /** - * sha1 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - /** - * 2nd SHA1 - */ - - w0_t = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; - w1_t = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; - w2_t = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; - w3_t = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; - w4_t = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; - w5_t = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; - w6_t = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; - w7_t = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; - w8_t = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; - w9_t = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; - - wa_t = 0x80000000; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 40 * 8; - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - - if (e != e_rev) continue; - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04500_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04500_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m04500_a1.cu b/nv/m04500_a1.cu deleted file mode 100644 index 24efd79..0000000 --- a/nv/m04500_a1.cu +++ /dev/null @@ -1,910 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8_le(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m04500_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 0 - | c_bin2asc[(lid >> 4) & 15] << 8; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - append_0x80_2 (wordr0, wordr1, pw_r_len); - - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - /** - * sha1 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - /** - * 2nd SHA1 - */ - - w0_t = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; - w1_t = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; - w2_t = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; - w3_t = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; - w4_t = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; - w5_t = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; - w6_t = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; - w7_t = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; - w8_t = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; - w9_t = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; - - wa_t = 0x80000000; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 40 * 8; - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04500_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04500_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04500_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 0 - | c_bin2asc[(lid >> 4) & 15] << 8; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - append_0x80_2 (wordr0, wordr1, pw_r_len); - - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - /** - * sha1 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - /** - * 2nd SHA1 - */ - - w0_t = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; - w1_t = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; - w2_t = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; - w3_t = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; - w4_t = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; - w5_t = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; - w6_t = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; - w7_t = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; - w8_t = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; - w9_t = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; - - wa_t = 0x80000000; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 40 * 8; - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - - if (e != e_rev) continue; - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04500_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04500_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m04500_a3.cu b/nv/m04500_a3.cu deleted file mode 100644 index 0bf6062..0000000 --- a/nv/m04500_a3.cu +++ /dev/null @@ -1,1039 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8_le(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m04500m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * sha1 - */ - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - /** - * 2nd SHA1 - */ - - w0_t = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; - w1_t = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; - w2_t = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; - w3_t = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; - w4_t = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; - w5_t = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; - w6_t = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; - w7_t = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; - w8_t = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; - w9_t = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; - - wa_t = 0x80000000; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 40 * 8; - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m04500s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u); - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * sha1 - */ - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - /** - * 2nd SHA1 - */ - - w0_t = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; - w1_t = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; - w2_t = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; - w3_t = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; - w4_t = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; - w5_t = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; - w6_t = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; - w7_t = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; - w8_t = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; - w9_t = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; - - wa_t = 0x80000000; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 40 * 8; - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - - if (e != e_rev) continue; - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04500_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 0 - | c_bin2asc[(lid >> 4) & 15] << 8; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m04500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04500_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 0 - | c_bin2asc[(lid >> 4) & 15] << 8; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m04500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04500_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 0 - | c_bin2asc[(lid >> 4) & 15] << 8; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m04500m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04500_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 0 - | c_bin2asc[(lid >> 4) & 15] << 8; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m04500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04500_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 0 - | c_bin2asc[(lid >> 4) & 15] << 8; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m04500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04500_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 0 - | c_bin2asc[(lid >> 4) & 15] << 8; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m04500s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m04700_a0.cu b/nv/m04700_a0.cu deleted file mode 100644 index bc901a1..0000000 --- a/nv/m04700_a0.cu +++ /dev/null @@ -1,707 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#undef _MD5_ -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8_le(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m04700_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 0 - | c_bin2asc[(lid >> 4) & 15] << 8; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pw_len * 8; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - /* - * sha1 - */ - - u32x w0_t = uint_to_hex_lower8_le ((a >> 8) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 0) & 255) << 16; - u32x w1_t = uint_to_hex_lower8_le ((a >> 24) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 16) & 255) << 16; - u32x w2_t = uint_to_hex_lower8_le ((b >> 8) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 0) & 255) << 16; - u32x w3_t = uint_to_hex_lower8_le ((b >> 24) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 16) & 255) << 16; - u32x w4_t = uint_to_hex_lower8_le ((c >> 8) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 0) & 255) << 16; - u32x w5_t = uint_to_hex_lower8_le ((c >> 24) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 16) & 255) << 16; - u32x w6_t = uint_to_hex_lower8_le ((d >> 8) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 0) & 255) << 16; - u32x w7_t = uint_to_hex_lower8_le ((d >> 24) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 16) & 255) << 16; - - u32x w8_t = 0x80000000; - u32x w9_t = 0; - u32x wa_t = 0; - u32x wb_t = 0; - u32x wc_t = 0; - u32x wd_t = 0; - u32x we_t = 0; - u32x wf_t = 32 * 8; - - u32x e; - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04700_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04700_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04700_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 0 - | c_bin2asc[(lid >> 4) & 15] << 8; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pw_len * 8; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - /* - * sha1 - */ - - u32x w0_t = uint_to_hex_lower8_le ((a >> 8) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 0) & 255) << 16; - u32x w1_t = uint_to_hex_lower8_le ((a >> 24) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 16) & 255) << 16; - u32x w2_t = uint_to_hex_lower8_le ((b >> 8) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 0) & 255) << 16; - u32x w3_t = uint_to_hex_lower8_le ((b >> 24) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 16) & 255) << 16; - u32x w4_t = uint_to_hex_lower8_le ((c >> 8) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 0) & 255) << 16; - u32x w5_t = uint_to_hex_lower8_le ((c >> 24) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 16) & 255) << 16; - u32x w6_t = uint_to_hex_lower8_le ((d >> 8) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 0) & 255) << 16; - u32x w7_t = uint_to_hex_lower8_le ((d >> 24) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 16) & 255) << 16; - - u32x w8_t = 0x80000000; - u32x w9_t = 0; - u32x wa_t = 0; - u32x wb_t = 0; - u32x wc_t = 0; - u32x wd_t = 0; - u32x we_t = 0; - u32x wf_t = 32 * 8; - - u32x e; - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - - if (e != e_rev) continue; - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04700_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04700_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m04700_a1.cu b/nv/m04700_a1.cu deleted file mode 100644 index 5894c98..0000000 --- a/nv/m04700_a1.cu +++ /dev/null @@ -1,817 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#undef _MD5_ -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8_le(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m04700_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 0 - | c_bin2asc[(lid >> 4) & 15] << 8; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - append_0x80_2 (wordr0, wordr1, pw_r_len); - - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = pw_len * 8; - w3[3] = 0; - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - /* - * sha1 - */ - - u32x w0_t = uint_to_hex_lower8_le ((a >> 8) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 0) & 255) << 16; - u32x w1_t = uint_to_hex_lower8_le ((a >> 24) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 16) & 255) << 16; - u32x w2_t = uint_to_hex_lower8_le ((b >> 8) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 0) & 255) << 16; - u32x w3_t = uint_to_hex_lower8_le ((b >> 24) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 16) & 255) << 16; - u32x w4_t = uint_to_hex_lower8_le ((c >> 8) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 0) & 255) << 16; - u32x w5_t = uint_to_hex_lower8_le ((c >> 24) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 16) & 255) << 16; - u32x w6_t = uint_to_hex_lower8_le ((d >> 8) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 0) & 255) << 16; - u32x w7_t = uint_to_hex_lower8_le ((d >> 24) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 16) & 255) << 16; - - u32x w8_t = 0x80000000; - u32x w9_t = 0; - u32x wa_t = 0; - u32x wb_t = 0; - u32x wc_t = 0; - u32x wd_t = 0; - u32x we_t = 0; - u32x wf_t = 32 * 8; - - u32x e; - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04700_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04700_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04700_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 0 - | c_bin2asc[(lid >> 4) & 15] << 8; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - append_0x80_2 (wordr0, wordr1, pw_r_len); - - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = pw_len * 8; - w3[3] = 0; - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - /* - * sha1 - */ - - u32x w0_t = uint_to_hex_lower8_le ((a >> 8) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 0) & 255) << 16; - u32x w1_t = uint_to_hex_lower8_le ((a >> 24) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 16) & 255) << 16; - u32x w2_t = uint_to_hex_lower8_le ((b >> 8) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 0) & 255) << 16; - u32x w3_t = uint_to_hex_lower8_le ((b >> 24) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 16) & 255) << 16; - u32x w4_t = uint_to_hex_lower8_le ((c >> 8) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 0) & 255) << 16; - u32x w5_t = uint_to_hex_lower8_le ((c >> 24) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 16) & 255) << 16; - u32x w6_t = uint_to_hex_lower8_le ((d >> 8) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 0) & 255) << 16; - u32x w7_t = uint_to_hex_lower8_le ((d >> 24) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 16) & 255) << 16; - - u32x w8_t = 0x80000000; - u32x w9_t = 0; - u32x wa_t = 0; - u32x wb_t = 0; - u32x wc_t = 0; - u32x wd_t = 0; - u32x we_t = 0; - u32x wf_t = 32 * 8; - - u32x e; - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - - if (e != e_rev) continue; - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04700_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04700_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m04700_a3.cu b/nv/m04700_a3.cu deleted file mode 100644 index 6cc4d1a..0000000 --- a/nv/m04700_a3.cu +++ /dev/null @@ -1,946 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#undef _MD5_ -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8_le(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m04700m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - /* - * sha1 - */ - - u32x w0_t = uint_to_hex_lower8_le ((a >> 8) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 0) & 255) << 16; - u32x w1_t = uint_to_hex_lower8_le ((a >> 24) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 16) & 255) << 16; - u32x w2_t = uint_to_hex_lower8_le ((b >> 8) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 0) & 255) << 16; - u32x w3_t = uint_to_hex_lower8_le ((b >> 24) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 16) & 255) << 16; - u32x w4_t = uint_to_hex_lower8_le ((c >> 8) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 0) & 255) << 16; - u32x w5_t = uint_to_hex_lower8_le ((c >> 24) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 16) & 255) << 16; - u32x w6_t = uint_to_hex_lower8_le ((d >> 8) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 0) & 255) << 16; - u32x w7_t = uint_to_hex_lower8_le ((d >> 24) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 16) & 255) << 16; - - u32x w8_t = 0x80000000; - u32x w9_t = 0; - u32x wa_t = 0; - u32x wb_t = 0; - u32x wc_t = 0; - u32x wd_t = 0; - u32x we_t = 0; - u32x wf_t = 32 * 8; - - u32x e; - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m04700s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u); - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - /* - * sha1 - */ - - u32x w0_t = uint_to_hex_lower8_le ((a >> 8) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 0) & 255) << 16; - u32x w1_t = uint_to_hex_lower8_le ((a >> 24) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 16) & 255) << 16; - u32x w2_t = uint_to_hex_lower8_le ((b >> 8) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 0) & 255) << 16; - u32x w3_t = uint_to_hex_lower8_le ((b >> 24) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 16) & 255) << 16; - u32x w4_t = uint_to_hex_lower8_le ((c >> 8) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 0) & 255) << 16; - u32x w5_t = uint_to_hex_lower8_le ((c >> 24) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 16) & 255) << 16; - u32x w6_t = uint_to_hex_lower8_le ((d >> 8) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 0) & 255) << 16; - u32x w7_t = uint_to_hex_lower8_le ((d >> 24) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 16) & 255) << 16; - - u32x w8_t = 0x80000000; - u32x w9_t = 0; - u32x wa_t = 0; - u32x wb_t = 0; - u32x wc_t = 0; - u32x wd_t = 0; - u32x we_t = 0; - u32x wf_t = 32 * 8; - - u32x e; - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - - if (e != e_rev) continue; - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04700_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 0 - | c_bin2asc[(lid >> 4) & 15] << 8; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m04700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04700_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 0 - | c_bin2asc[(lid >> 4) & 15] << 8; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m04700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04700_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 0 - | c_bin2asc[(lid >> 4) & 15] << 8; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m04700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04700_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 0 - | c_bin2asc[(lid >> 4) & 15] << 8; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m04700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04700_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 0 - | c_bin2asc[(lid >> 4) & 15] << 8; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m04700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04700_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 0 - | c_bin2asc[(lid >> 4) & 15] << 8; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m04700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m04800_a0.cu b/nv/m04800_a0.cu deleted file mode 100644 index bae6df9..0000000 --- a/nv/m04800_a0.cu +++ /dev/null @@ -1,546 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m04800_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf[5]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - u32 pw_salt_len = out_len + salt_len; - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf[0]; - s0[1] = salt_buf[1]; - s0[2] = salt_buf[2]; - s0[3] = salt_buf[3]; - - u32 s1[4]; - - s1[0] = 0x80; - s1[1] = 0; - s1[2] = 0; - s1[3] = 0; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, out_len); - - w0[0] |= s0[0]; - w0[1] |= s0[1]; - w0[2] |= s0[2]; - w0[3] |= s0[3]; - - w1[0] |= s1[0]; - w1[1] |= s1[1]; - w1[2] |= s1[2]; - w1[3] |= s1[3]; - - w2[0] |= s2[0]; - w2[1] |= s2[1]; - w2[2] |= s2[2]; - w2[3] |= s2[3]; - - w3[0] |= s3[0]; - w3[1] |= s3[1]; - w3[2] = 0; - w3[3] = 0; - - /* - * add id byte - */ - - switch_buffer_by_offset (w0, w1, w2, w3, 1); - - w0[0] |= salt_buf[4]; - - w3[2] = pw_salt_len * 8; - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04800_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04800_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04800_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf[5]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - u32 pw_salt_len = out_len + salt_len; - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf[0]; - s0[1] = salt_buf[1]; - s0[2] = salt_buf[2]; - s0[3] = salt_buf[3]; - - u32 s1[4]; - - s1[0] = 0x80; - s1[1] = 0; - s1[2] = 0; - s1[3] = 0; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, out_len); - - w0[0] |= s0[0]; - w0[1] |= s0[1]; - w0[2] |= s0[2]; - w0[3] |= s0[3]; - - w1[0] |= s1[0]; - w1[1] |= s1[1]; - w1[2] |= s1[2]; - w1[3] |= s1[3]; - - w2[0] |= s2[0]; - w2[1] |= s2[1]; - w2[2] |= s2[2]; - w2[3] |= s2[3]; - - w3[0] |= s3[0]; - w3[1] |= s3[1]; - w3[2] = 0; - w3[3] = 0; - - /* - * add id byte - */ - - switch_buffer_by_offset (w0, w1, w2, w3, 1); - - w0[0] |= salt_buf[4]; - - w3[2] = pw_salt_len * 8; - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - - bool q_cond = (search[0] != a); - - if (q_cond) continue; - - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04800_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04800_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m04800_a1.cu b/nv/m04800_a1.cu deleted file mode 100644 index 429c2e5..0000000 --- a/nv/m04800_a1.cu +++ /dev/null @@ -1,642 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m04800_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf[5]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /* - * add id byte - */ - - switch_buffer_by_offset (w0, w1, w2, w3, 1); - - w0[0] |= salt_buf[4]; - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf[0]; - s0[1] = salt_buf[1]; - s0[2] = salt_buf[2]; - s0[3] = salt_buf[3]; - - u32 s1[4]; - - s1[0] = 0x80; - s1[1] = 0; - s1[2] = 0; - s1[3] = 0; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, 1 + pw_len); - - const u32 pw_salt_len = pw_len + salt_len; - - w0[0] |= s0[0]; - w0[1] |= s0[1]; - w0[2] |= s0[2]; - w0[3] |= s0[3]; - w1[0] |= s1[0]; - w1[1] |= s1[1]; - w1[2] |= s1[2]; - w1[3] |= s1[3]; - w2[0] |= s2[0]; - w2[1] |= s2[1]; - w2[2] |= s2[2]; - w2[3] |= s2[3]; - w3[0] |= s3[0]; - w3[1] |= s3[1]; - w3[2] = pw_salt_len * 8; - w3[3] = 0; - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04800_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04800_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04800_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf[5]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /* - * add id byte - */ - - switch_buffer_by_offset (w0, w1, w2, w3, 1); - - w0[0] |= salt_buf[4]; - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf[0]; - s0[1] = salt_buf[1]; - s0[2] = salt_buf[2]; - s0[3] = salt_buf[3]; - - u32 s1[4]; - - s1[0] = 0x80; - s1[1] = 0; - s1[2] = 0; - s1[3] = 0; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, 1 + pw_len); - - const u32 pw_salt_len = pw_len + salt_len; - - w0[0] |= s0[0]; - w0[1] |= s0[1]; - w0[2] |= s0[2]; - w0[3] |= s0[3]; - w1[0] |= s1[0]; - w1[1] |= s1[1]; - w1[2] |= s1[2]; - w1[3] |= s1[3]; - w2[0] |= s2[0]; - w2[1] |= s2[1]; - w2[2] |= s2[2]; - w2[3] |= s2[3]; - w3[0] |= s3[0]; - w3[1] |= s3[1]; - w3[2] = pw_salt_len * 8; - w3[3] = 0; - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - - bool q_cond = (search[0] != a); - - if (q_cond) continue; - - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04800_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04800_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m04800_a3.cu b/nv/m04800_a3.cu deleted file mode 100644 index f900b38..0000000 --- a/nv/m04800_a3.cu +++ /dev/null @@ -1,773 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m04800m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf[5]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - u32 s0[4]; - - s0[0] = salt_buf[0]; - s0[1] = salt_buf[1]; - s0[2] = salt_buf[2]; - s0[3] = salt_buf[3]; - - u32 s1[4]; - - s1[0] = 0x80; - s1[1] = 0; - s1[2] = 0; - s1[3] = 0; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - // move w by 1 - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, 1); - - switch_buffer_by_offset (s0, s1, s2, s3, 1 + pw_len); - - w0_t[0] |= s0[0]; - w0_t[1] |= s0[1]; - w0_t[2] |= s0[2]; - w0_t[3] |= s0[3]; - - w1_t[0] |= s1[0]; - w1_t[1] |= s1[1]; - w1_t[2] |= s1[2]; - w1_t[3] |= s1[3]; - - w2_t[0] |= s2[0]; - w2_t[1] |= s2[1]; - w2_t[2] |= s2[2]; - w2_t[3] |= s2[3]; - - w3_t[0] |= s3[0]; - w3_t[1] |= s3[1]; - w3_t[2] = pw_salt_len * 8; - w3_t[3] = 0; - - /** - * add id byte - */ - - w0_t[0] |= salt_buf[4]; - - /** - * loop - */ - - u32x w0l = w0_t[0]; - u32x w1l = w0_t[1]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0_t[0] = w0l | (w0r << 8); - w0_t[1] = w1l | (w0r >> 24); - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m04800s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf[5]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - u32 s0[4]; - - s0[0] = salt_buf[0]; - s0[1] = salt_buf[1]; - s0[2] = salt_buf[2]; - s0[3] = salt_buf[3]; - - u32 s1[4]; - - s1[0] = 0x80; - s1[1] = 0; - s1[2] = 0; - s1[3] = 0; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - // move w by 1 - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, 1); - - switch_buffer_by_offset (s0, s1, s2, s3, 1 + pw_len); - - w0_t[0] |= s0[0]; - w0_t[1] |= s0[1]; - w0_t[2] |= s0[2]; - w0_t[3] |= s0[3]; - - w1_t[0] |= s1[0]; - w1_t[1] |= s1[1]; - w1_t[2] |= s1[2]; - w1_t[3] |= s1[3]; - - w2_t[0] |= s2[0]; - w2_t[1] |= s2[1]; - w2_t[2] |= s2[2]; - w2_t[3] |= s2[3]; - - w3_t[0] |= s3[0]; - w3_t[1] |= s3[1]; - w3_t[2] = pw_salt_len * 8; - w3_t[3] = 0; - - /** - * add id byte - */ - - w0_t[0] |= salt_buf[4]; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - u32x w0l = w0_t[0]; - u32x w1l = w0_t[1]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0_t[0] = w0l | (w0r << 8); - w0_t[1] = w1l | (w0r >> 24); - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - - bool q_cond = (search[0] != a); - - if (q_cond) continue; - - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04800_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m04800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04800_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m04800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04800_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m04800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04800_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m04800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04800_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m04800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04800_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m04800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m04900_a0.cu b/nv/m04900_a0.cu deleted file mode 100644 index 2a160ea..0000000 --- a/nv/m04900_a0.cu +++ /dev/null @@ -1,701 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m04900_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0_t[4]; - - w0_t[0] = pw_buf0[0]; - w0_t[1] = pw_buf0[1]; - w0_t[2] = pw_buf0[2]; - w0_t[3] = pw_buf0[3]; - - u32x w1_t[4]; - - w1_t[0] = pw_buf1[0]; - w1_t[1] = pw_buf1[1]; - w1_t[2] = pw_buf1[2]; - w1_t[3] = pw_buf1[3]; - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0_t, w1_t, pw_len); - - /** - * prepend salt - */ - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - w2_t[0] |= salt_buf2[0]; - w2_t[1] |= salt_buf2[1]; - w2_t[2] |= salt_buf2[2]; - w2_t[3] |= salt_buf2[3]; - w3_t[0] |= salt_buf3[0]; - w3_t[1] |= salt_buf3[1]; - w3_t[2] |= salt_buf3[2]; - w3_t[3] |= salt_buf3[3]; - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, salt_len + out_len); - - w0_t[0] |= s0[0]; - w0_t[1] |= s0[1]; - w0_t[2] |= s0[2]; - w0_t[3] |= s0[3]; - w1_t[0] |= s1[0]; - w1_t[1] |= s1[1]; - w1_t[2] |= s1[2]; - w1_t[3] |= s1[3]; - w2_t[0] |= s2[0]; - w2_t[1] |= s2[1]; - w2_t[2] |= s2[2]; - w2_t[3] |= s2[3]; - w3_t[0] |= s3[0]; - w3_t[1] |= s3[1]; - w3_t[2] |= s3[2]; - w3_t[3] |= s3[3]; - - const u32 pw_salt_len = salt_len + out_len + salt_len; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len); - - u32x w0 = swap_workaround (w0_t[0]); - u32x w1 = swap_workaround (w0_t[1]); - u32x w2 = swap_workaround (w0_t[2]); - u32x w3 = swap_workaround (w0_t[3]); - u32x w4 = swap_workaround (w1_t[0]); - u32x w5 = swap_workaround (w1_t[1]); - u32x w6 = swap_workaround (w1_t[2]); - u32x w7 = swap_workaround (w1_t[3]); - u32x w8 = swap_workaround (w2_t[0]); - u32x w9 = swap_workaround (w2_t[1]); - u32x wa = swap_workaround (w2_t[2]); - u32x wb = swap_workaround (w2_t[3]); - u32x wc = swap_workaround (w3_t[0]); - u32x wd = swap_workaround (w3_t[1]); - u32x we = 0; - u32x wf = pw_salt_len * 8; - - /** - * sha1 - */ - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf); - w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0); - w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1); - w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2); - w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3); - - #undef K - #define K SHA1C01 - - w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4); - w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5); - w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6); - w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7); - w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8); - w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9); - wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa); - wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb); - wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc); - wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd); - we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we); - wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf); - w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0); - w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1); - w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2); - w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3); - w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4); - w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5); - w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6); - w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7); - - #undef K - #define K SHA1C02 - - w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8); - w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9); - wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa); - wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb); - wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc); - wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd); - we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we); - wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf); - w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0); - w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1); - w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2); - w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3); - w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4); - w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5); - w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6); - w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7); - w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8); - w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9); - wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa); - wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb); - - #undef K - #define K SHA1C03 - - wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc); - wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd); - we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we); - wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf); - w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0); - w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1); - w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2); - w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3); - w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4); - w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5); - w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6); - w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7); - w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8); - w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9); - wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa); - wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb); - wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc); - wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd); - we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we); - wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf); - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04900_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04900_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04900_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0_t[4]; - - w0_t[0] = pw_buf0[0]; - w0_t[1] = pw_buf0[1]; - w0_t[2] = pw_buf0[2]; - w0_t[3] = pw_buf0[3]; - - u32x w1_t[4]; - - w1_t[0] = pw_buf1[0]; - w1_t[1] = pw_buf1[1]; - w1_t[2] = pw_buf1[2]; - w1_t[3] = pw_buf1[3]; - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0_t, w1_t, pw_len); - - /** - * prepend salt - */ - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - w2_t[0] |= salt_buf2[0]; - w2_t[1] |= salt_buf2[1]; - w2_t[2] |= salt_buf2[2]; - w2_t[3] |= salt_buf2[3]; - w3_t[0] |= salt_buf3[0]; - w3_t[1] |= salt_buf3[1]; - w3_t[2] |= salt_buf3[2]; - w3_t[3] |= salt_buf3[3]; - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, salt_len + out_len); - - w0_t[0] |= s0[0]; - w0_t[1] |= s0[1]; - w0_t[2] |= s0[2]; - w0_t[3] |= s0[3]; - w1_t[0] |= s1[0]; - w1_t[1] |= s1[1]; - w1_t[2] |= s1[2]; - w1_t[3] |= s1[3]; - w2_t[0] |= s2[0]; - w2_t[1] |= s2[1]; - w2_t[2] |= s2[2]; - w2_t[3] |= s2[3]; - w3_t[0] |= s3[0]; - w3_t[1] |= s3[1]; - w3_t[2] |= s3[2]; - w3_t[3] |= s3[3]; - - const u32 pw_salt_len = salt_len + out_len + salt_len; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len); - - u32x w0 = swap_workaround (w0_t[0]); - u32x w1 = swap_workaround (w0_t[1]); - u32x w2 = swap_workaround (w0_t[2]); - u32x w3 = swap_workaround (w0_t[3]); - u32x w4 = swap_workaround (w1_t[0]); - u32x w5 = swap_workaround (w1_t[1]); - u32x w6 = swap_workaround (w1_t[2]); - u32x w7 = swap_workaround (w1_t[3]); - u32x w8 = swap_workaround (w2_t[0]); - u32x w9 = swap_workaround (w2_t[1]); - u32x wa = swap_workaround (w2_t[2]); - u32x wb = swap_workaround (w2_t[3]); - u32x wc = swap_workaround (w3_t[0]); - u32x wd = swap_workaround (w3_t[1]); - u32x we = 0; - u32x wf = pw_salt_len * 8; - - /** - * sha1 - */ - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf); - w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0); - w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1); - w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2); - w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3); - - #undef K - #define K SHA1C01 - - w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4); - w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5); - w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6); - w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7); - w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8); - w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9); - wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa); - wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb); - wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc); - wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd); - we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we); - wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf); - w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0); - w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1); - w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2); - w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3); - w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4); - w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5); - w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6); - w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7); - - #undef K - #define K SHA1C02 - - w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8); - w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9); - wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa); - wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb); - wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc); - wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd); - we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we); - wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf); - w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0); - w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1); - w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2); - w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3); - w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4); - w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5); - w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6); - w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7); - w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8); - w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9); - wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa); - wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb); - - #undef K - #define K SHA1C03 - - wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc); - wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd); - we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we); - wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf); - w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0); - w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1); - w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2); - w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3); - w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4); - w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5); - w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6); - w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7); - w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8); - w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9); - wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa); - wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb); - - if (e != e_rev) continue; - - wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc); - wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd); - we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we); - wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf); - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04900_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04900_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m04900_a1.cu b/nv/m04900_a1.cu deleted file mode 100644 index 74f6ae1..0000000 --- a/nv/m04900_a1.cu +++ /dev/null @@ -1,812 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m04900_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0_t[4]; - - w0_t[0] = wordl0[0] | wordr0[0]; - w0_t[1] = wordl0[1] | wordr0[1]; - w0_t[2] = wordl0[2] | wordr0[2]; - w0_t[3] = wordl0[3] | wordr0[3]; - - u32x w1_t[4]; - - w1_t[0] = wordl1[0] | wordr1[0]; - w1_t[1] = wordl1[1] | wordr1[1]; - w1_t[2] = wordl1[2] | wordr1[2]; - w1_t[3] = wordl1[3] | wordr1[3]; - - u32x w2_t[4]; - - w2_t[0] = wordl2[0] | wordr2[0]; - w2_t[1] = wordl2[1] | wordr2[1]; - w2_t[2] = wordl2[2] | wordr2[2]; - w2_t[3] = wordl2[3] | wordr2[3]; - - u32x w3_t[4]; - - w3_t[0] = wordl3[0] | wordr3[0]; - w3_t[1] = wordl3[1] | wordr3[1]; - w3_t[2] = 0; - w3_t[3] = 0; - - /** - * prepend salt - */ - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - w2_t[0] |= salt_buf2[0]; - w2_t[1] |= salt_buf2[1]; - w2_t[2] |= salt_buf2[2]; - w2_t[3] |= salt_buf2[3]; - w3_t[0] |= salt_buf3[0]; - w3_t[1] |= salt_buf3[1]; - w3_t[2] |= salt_buf3[2]; - w3_t[3] |= salt_buf3[3]; - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, salt_len + pw_len); - - w0_t[0] |= s0[0]; - w0_t[1] |= s0[1]; - w0_t[2] |= s0[2]; - w0_t[3] |= s0[3]; - w1_t[0] |= s1[0]; - w1_t[1] |= s1[1]; - w1_t[2] |= s1[2]; - w1_t[3] |= s1[3]; - w2_t[0] |= s2[0]; - w2_t[1] |= s2[1]; - w2_t[2] |= s2[2]; - w2_t[3] |= s2[3]; - w3_t[0] |= s3[0]; - w3_t[1] |= s3[1]; - w3_t[2] |= s3[2]; - w3_t[3] |= s3[3]; - - const u32 pw_salt_len = salt_len + pw_len + salt_len; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len); - - u32x w0 = swap_workaround (w0_t[0]); - u32x w1 = swap_workaround (w0_t[1]); - u32x w2 = swap_workaround (w0_t[2]); - u32x w3 = swap_workaround (w0_t[3]); - u32x w4 = swap_workaround (w1_t[0]); - u32x w5 = swap_workaround (w1_t[1]); - u32x w6 = swap_workaround (w1_t[2]); - u32x w7 = swap_workaround (w1_t[3]); - u32x w8 = swap_workaround (w2_t[0]); - u32x w9 = swap_workaround (w2_t[1]); - u32x wa = swap_workaround (w2_t[2]); - u32x wb = swap_workaround (w2_t[3]); - u32x wc = swap_workaround (w3_t[0]); - u32x wd = swap_workaround (w3_t[1]); - u32x we = 0; - u32x wf = pw_salt_len * 8; - - /** - * sha1 - */ - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf); - w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0); - w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1); - w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2); - w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3); - - #undef K - #define K SHA1C01 - - w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4); - w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5); - w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6); - w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7); - w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8); - w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9); - wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa); - wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb); - wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc); - wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd); - we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we); - wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf); - w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0); - w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1); - w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2); - w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3); - w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4); - w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5); - w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6); - w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7); - - #undef K - #define K SHA1C02 - - w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8); - w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9); - wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa); - wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb); - wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc); - wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd); - we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we); - wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf); - w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0); - w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1); - w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2); - w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3); - w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4); - w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5); - w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6); - w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7); - w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8); - w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9); - wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa); - wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb); - - #undef K - #define K SHA1C03 - - wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc); - wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd); - we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we); - wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf); - w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0); - w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1); - w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2); - w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3); - w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4); - w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5); - w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6); - w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7); - w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8); - w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9); - wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa); - wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb); - wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc); - wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd); - we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we); - wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf); - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04900_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04900_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04900_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0_t[4]; - - w0_t[0] = wordl0[0] | wordr0[0]; - w0_t[1] = wordl0[1] | wordr0[1]; - w0_t[2] = wordl0[2] | wordr0[2]; - w0_t[3] = wordl0[3] | wordr0[3]; - - u32x w1_t[4]; - - w1_t[0] = wordl1[0] | wordr1[0]; - w1_t[1] = wordl1[1] | wordr1[1]; - w1_t[2] = wordl1[2] | wordr1[2]; - w1_t[3] = wordl1[3] | wordr1[3]; - - u32x w2_t[4]; - - w2_t[0] = wordl2[0] | wordr2[0]; - w2_t[1] = wordl2[1] | wordr2[1]; - w2_t[2] = wordl2[2] | wordr2[2]; - w2_t[3] = wordl2[3] | wordr2[3]; - - u32x w3_t[4]; - - w3_t[0] = wordl3[0] | wordr3[0]; - w3_t[1] = wordl3[1] | wordr3[1]; - w3_t[2] = 0; - w3_t[3] = 0; - - /** - * prepend salt - */ - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - w2_t[0] |= salt_buf2[0]; - w2_t[1] |= salt_buf2[1]; - w2_t[2] |= salt_buf2[2]; - w2_t[3] |= salt_buf2[3]; - w3_t[0] |= salt_buf3[0]; - w3_t[1] |= salt_buf3[1]; - w3_t[2] |= salt_buf3[2]; - w3_t[3] |= salt_buf3[3]; - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, salt_len + pw_len); - - w0_t[0] |= s0[0]; - w0_t[1] |= s0[1]; - w0_t[2] |= s0[2]; - w0_t[3] |= s0[3]; - w1_t[0] |= s1[0]; - w1_t[1] |= s1[1]; - w1_t[2] |= s1[2]; - w1_t[3] |= s1[3]; - w2_t[0] |= s2[0]; - w2_t[1] |= s2[1]; - w2_t[2] |= s2[2]; - w2_t[3] |= s2[3]; - w3_t[0] |= s3[0]; - w3_t[1] |= s3[1]; - w3_t[2] |= s3[2]; - w3_t[3] |= s3[3]; - - const u32 pw_salt_len = salt_len + pw_len + salt_len; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len); - - u32x w0 = swap_workaround (w0_t[0]); - u32x w1 = swap_workaround (w0_t[1]); - u32x w2 = swap_workaround (w0_t[2]); - u32x w3 = swap_workaround (w0_t[3]); - u32x w4 = swap_workaround (w1_t[0]); - u32x w5 = swap_workaround (w1_t[1]); - u32x w6 = swap_workaround (w1_t[2]); - u32x w7 = swap_workaround (w1_t[3]); - u32x w8 = swap_workaround (w2_t[0]); - u32x w9 = swap_workaround (w2_t[1]); - u32x wa = swap_workaround (w2_t[2]); - u32x wb = swap_workaround (w2_t[3]); - u32x wc = swap_workaround (w3_t[0]); - u32x wd = swap_workaround (w3_t[1]); - u32x we = 0; - u32x wf = pw_salt_len * 8; - - /** - * sha1 - */ - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf); - w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0); - w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1); - w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2); - w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3); - - #undef K - #define K SHA1C01 - - w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4); - w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5); - w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6); - w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7); - w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8); - w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9); - wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa); - wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb); - wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc); - wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd); - we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we); - wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf); - w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0); - w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1); - w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2); - w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3); - w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4); - w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5); - w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6); - w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7); - - #undef K - #define K SHA1C02 - - w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8); - w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9); - wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa); - wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb); - wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc); - wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd); - we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we); - wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf); - w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0); - w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1); - w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2); - w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3); - w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4); - w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5); - w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6); - w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7); - w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8); - w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9); - wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa); - wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb); - - #undef K - #define K SHA1C03 - - wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc); - wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd); - we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we); - wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf); - w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0); - w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1); - w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2); - w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3); - w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4); - w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5); - w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6); - w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7); - w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8); - w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9); - wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa); - wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb); - - if (e != e_rev) continue; - - wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc); - wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd); - we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we); - wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf); - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04900_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04900_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m04900_a3.cu b/nv/m04900_a3.cu deleted file mode 100644 index 14fd0ad..0000000 --- a/nv/m04900_a3.cu +++ /dev/null @@ -1,936 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m04900m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0_t[4]; - - salt_buf0_t[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0_t[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0_t[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0_t[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1_t[4]; - - salt_buf1_t[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1_t[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1_t[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1_t[3] = salt_bufs[salt_pos].salt_buf[7]; - - u32 salt_buf2_t[4]; - - salt_buf2_t[0] = 0; - salt_buf2_t[1] = 0; - salt_buf2_t[2] = 0; - salt_buf2_t[3] = 0; - - u32 salt_buf3_t[4]; - - salt_buf3_t[0] = 0; - salt_buf3_t[1] = 0; - salt_buf3_t[2] = 0; - salt_buf3_t[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = salt_len + pw_len + salt_len; - - // first we need to switch the right-hand salt to the correct position (2nd salt) - - switch_buffer_by_offset (salt_buf0_t, salt_buf1_t, salt_buf2_t, salt_buf3_t, salt_len + pw_len); - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - // concatenate the 1st and 2nd instance of the salt - - salt_buf0[0] |= salt_buf0_t[0]; - salt_buf0[1] |= salt_buf0_t[1]; - salt_buf0[2] |= salt_buf0_t[2]; - salt_buf0[3] |= salt_buf0_t[3]; - - salt_buf1[0] |= salt_buf1_t[0]; - salt_buf1[1] |= salt_buf1_t[1]; - salt_buf1[2] |= salt_buf1_t[2]; - salt_buf1[3] |= salt_buf1_t[3]; - - salt_buf2[0] |= salt_buf2_t[0]; - salt_buf2[1] |= salt_buf2_t[1]; - salt_buf2[2] |= salt_buf2_t[2]; - salt_buf2[3] |= salt_buf2_t[3]; - - salt_buf3[0] |= salt_buf3_t[0]; - salt_buf3[1] |= salt_buf3_t[1]; - salt_buf3[2] |= salt_buf3_t[2]; - salt_buf3[3] |= salt_buf3_t[3]; - - append_0x80_4 (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_salt_len); - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - /** - * put the password after the first salt but before the second salt - */ - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - w2_t[0] |= salt_buf2[0]; - w2_t[1] |= salt_buf2[1]; - w2_t[2] |= salt_buf2[2]; - w2_t[3] |= salt_buf2[3]; - w3_t[0] |= salt_buf3[0]; - w3_t[1] |= salt_buf3[1]; - w3_t[2] |= salt_buf3[2]; - - u32x w0 = swap_workaround (w0_t[0]); - u32x w1 = swap_workaround (w0_t[1]); - u32x w2 = swap_workaround (w0_t[2]); - u32x w3 = swap_workaround (w0_t[3]); - u32x w4 = swap_workaround (w1_t[0]); - u32x w5 = swap_workaround (w1_t[1]); - u32x w6 = swap_workaround (w1_t[2]); - u32x w7 = swap_workaround (w1_t[3]); - u32x w8 = swap_workaround (w2_t[0]); - u32x w9 = swap_workaround (w2_t[1]); - u32x wa = swap_workaround (w2_t[2]); - u32x wb = swap_workaround (w2_t[3]); - u32x wc = swap_workaround (w3_t[0]); - u32x wd = swap_workaround (w3_t[1]); - u32x we = swap_workaround (w3_t[2]); - u32x wf = pw_salt_len * 8; - - /** - * sha1 - */ - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf); - w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0); - w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1); - w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2); - w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3); - - #undef K - #define K SHA1C01 - - w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4); - w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5); - w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6); - w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7); - w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8); - w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9); - wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa); - wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb); - wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc); - wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd); - we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we); - wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf); - w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0); - w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1); - w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2); - w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3); - w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4); - w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5); - w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6); - w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7); - - #undef K - #define K SHA1C02 - - w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8); - w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9); - wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa); - wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb); - wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc); - wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd); - we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we); - wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf); - w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0); - w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1); - w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2); - w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3); - w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4); - w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5); - w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6); - w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7); - w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8); - w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9); - wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa); - wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb); - - #undef K - #define K SHA1C03 - - wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc); - wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd); - we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we); - wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf); - w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0); - w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1); - w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2); - w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3); - w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4); - w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5); - w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6); - w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7); - w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8); - w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9); - wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa); - wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb); - wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc); - wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd); - we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we); - wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf); - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m04900s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u); - - /** - * salt - */ - - u32 salt_buf0_t[4]; - - salt_buf0_t[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0_t[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0_t[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0_t[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1_t[4]; - - salt_buf1_t[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1_t[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1_t[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1_t[3] = salt_bufs[salt_pos].salt_buf[7]; - - u32 salt_buf2_t[4]; - - salt_buf2_t[0] = 0; - salt_buf2_t[1] = 0; - salt_buf2_t[2] = 0; - salt_buf2_t[3] = 0; - - u32 salt_buf3_t[4]; - - salt_buf3_t[0] = 0; - salt_buf3_t[1] = 0; - salt_buf3_t[2] = 0; - salt_buf3_t[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = salt_len + pw_len + salt_len; - - // first we need to switch the right-hand salt to the correct position (2nd salt) - - switch_buffer_by_offset (salt_buf0_t, salt_buf1_t, salt_buf2_t, salt_buf3_t, salt_len + pw_len); - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - // concatenate the 1st and 2nd instance of the salt - - salt_buf0[0] |= salt_buf0_t[0]; - salt_buf0[1] |= salt_buf0_t[1]; - salt_buf0[2] |= salt_buf0_t[2]; - salt_buf0[3] |= salt_buf0_t[3]; - - salt_buf1[0] |= salt_buf1_t[0]; - salt_buf1[1] |= salt_buf1_t[1]; - salt_buf1[2] |= salt_buf1_t[2]; - salt_buf1[3] |= salt_buf1_t[3]; - - salt_buf2[0] |= salt_buf2_t[0]; - salt_buf2[1] |= salt_buf2_t[1]; - salt_buf2[2] |= salt_buf2_t[2]; - salt_buf2[3] |= salt_buf2_t[3]; - - salt_buf3[0] |= salt_buf3_t[0]; - salt_buf3[1] |= salt_buf3_t[1]; - salt_buf3[2] |= salt_buf3_t[2]; - salt_buf3[3] |= salt_buf3_t[3]; - - append_0x80_4 (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_salt_len); - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - /** - * put the password after the first salt but before the second salt - */ - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] |= salt_buf0[0]; - w0_t[1] |= salt_buf0[1]; - w0_t[2] |= salt_buf0[2]; - w0_t[3] |= salt_buf0[3]; - w1_t[0] |= salt_buf1[0]; - w1_t[1] |= salt_buf1[1]; - w1_t[2] |= salt_buf1[2]; - w1_t[3] |= salt_buf1[3]; - w2_t[0] |= salt_buf2[0]; - w2_t[1] |= salt_buf2[1]; - w2_t[2] |= salt_buf2[2]; - w2_t[3] |= salt_buf2[3]; - w3_t[0] |= salt_buf3[0]; - w3_t[1] |= salt_buf3[1]; - w3_t[2] |= salt_buf3[2]; - - u32x w0 = swap_workaround (w0_t[0]); - u32x w1 = swap_workaround (w0_t[1]); - u32x w2 = swap_workaround (w0_t[2]); - u32x w3 = swap_workaround (w0_t[3]); - u32x w4 = swap_workaround (w1_t[0]); - u32x w5 = swap_workaround (w1_t[1]); - u32x w6 = swap_workaround (w1_t[2]); - u32x w7 = swap_workaround (w1_t[3]); - u32x w8 = swap_workaround (w2_t[0]); - u32x w9 = swap_workaround (w2_t[1]); - u32x wa = swap_workaround (w2_t[2]); - u32x wb = swap_workaround (w2_t[3]); - u32x wc = swap_workaround (w3_t[0]); - u32x wd = swap_workaround (w3_t[1]); - u32x we = swap_workaround (w3_t[2]); - u32x wf = pw_salt_len * 8; - - /** - * sha1 - */ - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf); - w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0); - w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1); - w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2); - w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3); - - #undef K - #define K SHA1C01 - - w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4); - w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5); - w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6); - w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7); - w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8); - w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9); - wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa); - wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb); - wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc); - wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd); - we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we); - wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf); - w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0); - w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1); - w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2); - w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3); - w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4); - w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5); - w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6); - w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7); - - #undef K - #define K SHA1C02 - - w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8); - w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9); - wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa); - wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb); - wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc); - wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd); - we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we); - wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf); - w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0); - w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1); - w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2); - w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3); - w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4); - w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5); - w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6); - w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7); - w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8); - w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9); - wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa); - wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb); - - #undef K - #define K SHA1C03 - - wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc); - wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd); - we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we); - wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf); - w0 = rotl32 ((wd ^ w8 ^ w2 ^ w0), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0); - w1 = rotl32 ((we ^ w9 ^ w3 ^ w1), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1); - w2 = rotl32 ((wf ^ wa ^ w4 ^ w2), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2); - w3 = rotl32 ((w0 ^ wb ^ w5 ^ w3), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3); - w4 = rotl32 ((w1 ^ wc ^ w6 ^ w4), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4); - w5 = rotl32 ((w2 ^ wd ^ w7 ^ w5), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5); - w6 = rotl32 ((w3 ^ we ^ w8 ^ w6), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6); - w7 = rotl32 ((w4 ^ wf ^ w9 ^ w7), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7); - w8 = rotl32 ((w5 ^ w0 ^ wa ^ w8), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8); - w9 = rotl32 ((w6 ^ w1 ^ wb ^ w9), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9); - wa = rotl32 ((w7 ^ w2 ^ wc ^ wa), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa); - wb = rotl32 ((w8 ^ w3 ^ wd ^ wb), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb); - - if (e != e_rev) continue; - - wc = rotl32 ((w9 ^ w4 ^ we ^ wc), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc); - wd = rotl32 ((wa ^ w5 ^ wf ^ wd), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd); - we = rotl32 ((wb ^ w6 ^ w0 ^ we), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we); - wf = rotl32 ((wc ^ w7 ^ w1 ^ wf), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf); - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04900_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m04900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04900_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m04900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04900_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m04900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04900_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m04900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04900_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m04900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m04900_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m04900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m05000_a0.cu b/nv/m05000_a0.cu deleted file mode 100644 index 4230993..0000000 --- a/nv/m05000_a0.cu +++ /dev/null @@ -1,528 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _KECCAK_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 2 -#define DGST_R1 3 -#define DGST_R2 4 -#define DGST_R3 5 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -/** - * constants - */ - -#ifndef KECCAK_ROUNDS -#define KECCAK_ROUNDS 24 -#endif - -#define Theta1(s) (st[0 + s] ^ st[5 + s] ^ st[10 + s] ^ st[15 + s] ^ st[20 + s]) - -#define Theta2(s) \ -{ \ - st[ 0 + s] ^= t; \ - st[ 5 + s] ^= t; \ - st[10 + s] ^= t; \ - st[15 + s] ^= t; \ - st[20 + s] ^= t; \ -} - -#define Rho_Pi(s) \ -{ \ - u32 j = keccakf_piln[s]; \ - u32 k = keccakf_rotc[s]; \ - bc0 = st[j]; \ - st[j] = rotl64 (t, k); \ - t = bc0; \ -} - -#define Chi(s) \ -{ \ - bc0 = st[0 + s]; \ - bc1 = st[1 + s]; \ - bc2 = st[2 + s]; \ - bc3 = st[3 + s]; \ - bc4 = st[4 + s]; \ - st[0 + s] ^= ~bc1 & bc2; \ - st[1 + s] ^= ~bc2 & bc3; \ - st[2 + s] ^= ~bc3 & bc4; \ - st[3 + s] ^= ~bc4 & bc0; \ - st[4 + s] ^= ~bc0 & bc1; \ -} - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m05000_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - /** - * constants - */ - - const u64 keccakf_rndc[24] = - { - 0x0000000000000001, 0x0000000000008082, 0x800000000000808a, - 0x8000000080008000, 0x000000000000808b, 0x0000000080000001, - 0x8000000080008081, 0x8000000000008009, 0x000000000000008a, - 0x0000000000000088, 0x0000000080008009, 0x000000008000000a, - 0x000000008000808b, 0x800000000000008b, 0x8000000000008089, - 0x8000000000008003, 0x8000000000008002, 0x8000000000000080, - 0x000000000000800a, 0x800000008000000a, 0x8000000080008081, - 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 - }; - - const u32 keccakf_rotc[24] = - { - 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, - 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44 - }; - - const u32 keccakf_piln[24] = - { - 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, - 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1 - }; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * 0x80 keccak, very special - */ - - const u32 mdlen = salt_bufs[salt_pos].keccak_mdlen; - - const u32 rsiz = 200 - (2 * mdlen); - - const u32 add80w = (rsiz - 1) / 8; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x01_2 (w0, w1, out_len); - - u64x st[25]; - - st[ 0] = (u64x) (w0[0]) | (u64x) (w0[1]) << 32; - st[ 1] = (u64x) (w0[2]) | (u64x) (w0[3]) << 32; - st[ 2] = (u64x) (w1[0]) | (u64x) (w1[1]) << 32; - st[ 3] = (u64x) (w1[2]) | (u64x) (w1[3]) << 32; - st[ 4] = 0; - st[ 5] = 0; - st[ 6] = 0; - st[ 7] = 0; - st[ 8] = 0; - st[ 9] = 0; - st[10] = 0; - st[11] = 0; - st[12] = 0; - st[13] = 0; - st[14] = 0; - st[15] = 0; - st[16] = 0; - st[17] = 0; - st[18] = 0; - st[19] = 0; - st[20] = 0; - st[21] = 0; - st[22] = 0; - st[23] = 0; - st[24] = 0; - - st[add80w] |= 0x8000000000000000; - - int round; - - for (round = 0; round < KECCAK_ROUNDS; round++) - { - // Theta - - u64x bc0 = Theta1 (0); - u64x bc1 = Theta1 (1); - u64x bc2 = Theta1 (2); - u64x bc3 = Theta1 (3); - u64x bc4 = Theta1 (4); - - u64x t; - - t = bc4 ^ rotl64 (bc1, 1); Theta2 (0); - t = bc0 ^ rotl64 (bc2, 1); Theta2 (1); - t = bc1 ^ rotl64 (bc3, 1); Theta2 (2); - t = bc2 ^ rotl64 (bc4, 1); Theta2 (3); - t = bc3 ^ rotl64 (bc0, 1); Theta2 (4); - - // Rho Pi - - t = st[1]; - - Rho_Pi (0); - Rho_Pi (1); - Rho_Pi (2); - Rho_Pi (3); - Rho_Pi (4); - Rho_Pi (5); - Rho_Pi (6); - Rho_Pi (7); - Rho_Pi (8); - Rho_Pi (9); - Rho_Pi (10); - Rho_Pi (11); - Rho_Pi (12); - Rho_Pi (13); - Rho_Pi (14); - Rho_Pi (15); - Rho_Pi (16); - Rho_Pi (17); - Rho_Pi (18); - Rho_Pi (19); - Rho_Pi (20); - Rho_Pi (21); - Rho_Pi (22); - Rho_Pi (23); - - // Chi - - Chi (0); - Chi (5); - Chi (10); - Chi (15); - Chi (20); - - // Iota - - st[0] ^= keccakf_rndc[round]; - } - - const u32x r0 = l32_from_64 (st[1]); - const u32x r1 = h32_from_64 (st[1]); - const u32x r2 = l32_from_64 (st[2]); - const u32x r3 = h32_from_64 (st[2]); - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05000_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05000_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05000_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * constants - */ - - const u64 keccakf_rndc[24] = - { - 0x0000000000000001, 0x0000000000008082, 0x800000000000808a, - 0x8000000080008000, 0x000000000000808b, 0x0000000080000001, - 0x8000000080008081, 0x8000000000008009, 0x000000000000008a, - 0x0000000000000088, 0x0000000080008009, 0x000000008000000a, - 0x000000008000808b, 0x800000000000008b, 0x8000000000008089, - 0x8000000000008003, 0x8000000000008002, 0x8000000000000080, - 0x000000000000800a, 0x800000008000000a, 0x8000000080008081, - 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 - }; - - const u32 keccakf_rotc[24] = - { - 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, - 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44 - }; - - const u32 keccakf_piln[24] = - { - 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, - 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1 - }; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * 0x80 keccak, very special - */ - - const u32 mdlen = salt_bufs[salt_pos].keccak_mdlen; - - const u32 rsiz = 200 - (2 * mdlen); - - const u32 add80w = (rsiz - 1) / 8; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x01_2 (w0, w1, out_len); - - u64x st[25]; - - st[ 0] = (u64x) (w0[0]) | (u64x) (w0[1]) << 32; - st[ 1] = (u64x) (w0[2]) | (u64x) (w0[3]) << 32; - st[ 2] = (u64x) (w1[0]) | (u64x) (w1[1]) << 32; - st[ 3] = (u64x) (w1[2]) | (u64x) (w1[3]) << 32; - st[ 4] = 0; - st[ 5] = 0; - st[ 6] = 0; - st[ 7] = 0; - st[ 8] = 0; - st[ 9] = 0; - st[10] = 0; - st[11] = 0; - st[12] = 0; - st[13] = 0; - st[14] = 0; - st[15] = 0; - st[16] = 0; - st[17] = 0; - st[18] = 0; - st[19] = 0; - st[20] = 0; - st[21] = 0; - st[22] = 0; - st[23] = 0; - st[24] = 0; - - st[add80w] |= 0x8000000000000000; - - int round; - - for (round = 0; round < KECCAK_ROUNDS; round++) - { - // Theta - - u64x bc0 = Theta1 (0); - u64x bc1 = Theta1 (1); - u64x bc2 = Theta1 (2); - u64x bc3 = Theta1 (3); - u64x bc4 = Theta1 (4); - - u64x t; - - t = bc4 ^ rotl64 (bc1, 1); Theta2 (0); - t = bc0 ^ rotl64 (bc2, 1); Theta2 (1); - t = bc1 ^ rotl64 (bc3, 1); Theta2 (2); - t = bc2 ^ rotl64 (bc4, 1); Theta2 (3); - t = bc3 ^ rotl64 (bc0, 1); Theta2 (4); - - // Rho Pi - - t = st[1]; - - Rho_Pi (0); - Rho_Pi (1); - Rho_Pi (2); - Rho_Pi (3); - Rho_Pi (4); - Rho_Pi (5); - Rho_Pi (6); - Rho_Pi (7); - Rho_Pi (8); - Rho_Pi (9); - Rho_Pi (10); - Rho_Pi (11); - Rho_Pi (12); - Rho_Pi (13); - Rho_Pi (14); - Rho_Pi (15); - Rho_Pi (16); - Rho_Pi (17); - Rho_Pi (18); - Rho_Pi (19); - Rho_Pi (20); - Rho_Pi (21); - Rho_Pi (22); - Rho_Pi (23); - - // Chi - - Chi (0); - Chi (5); - Chi (10); - Chi (15); - Chi (20); - - // Iota - - st[0] ^= keccakf_rndc[round]; - } - - const u32x r0 = l32_from_64 (st[1]); - const u32x r1 = h32_from_64 (st[1]); - const u32x r2 = l32_from_64 (st[2]); - const u32x r3 = h32_from_64 (st[2]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05000_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05000_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m05000_a1.cu b/nv/m05000_a1.cu deleted file mode 100644 index 77c5bc7..0000000 --- a/nv/m05000_a1.cu +++ /dev/null @@ -1,634 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _KECCAK_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 2 -#define DGST_R1 3 -#define DGST_R2 4 -#define DGST_R3 5 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifndef KECCAK_ROUNDS -#define KECCAK_ROUNDS 24 -#endif - -#define Theta1(s) (st[0 + s] ^ st[5 + s] ^ st[10 + s] ^ st[15 + s] ^ st[20 + s]) - -#define Theta2(s) \ -{ \ - st[ 0 + s] ^= t; \ - st[ 5 + s] ^= t; \ - st[10 + s] ^= t; \ - st[15 + s] ^= t; \ - st[20 + s] ^= t; \ -} - -#define Rho_Pi(s) \ -{ \ - u32 j = keccakf_piln[s]; \ - u32 k = keccakf_rotc[s]; \ - bc0 = st[j]; \ - st[j] = rotl64 (t, k); \ - t = bc0; \ -} - -#define Chi(s) \ -{ \ - bc0 = st[0 + s]; \ - bc1 = st[1 + s]; \ - bc2 = st[2 + s]; \ - bc3 = st[3 + s]; \ - bc4 = st[4 + s]; \ - st[0 + s] ^= ~bc1 & bc2; \ - st[1 + s] ^= ~bc2 & bc3; \ - st[2 + s] ^= ~bc3 & bc4; \ - st[3 + s] ^= ~bc4 & bc0; \ - st[4 + s] ^= ~bc0 & bc1; \ -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m05000_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - /** - * constants - */ - - const u64 keccakf_rndc[24] = - { - 0x0000000000000001, 0x0000000000008082, 0x800000000000808a, - 0x8000000080008000, 0x000000000000808b, 0x0000000080000001, - 0x8000000080008081, 0x8000000000008009, 0x000000000000008a, - 0x0000000000000088, 0x0000000080008009, 0x000000008000000a, - 0x000000008000808b, 0x800000000000008b, 0x8000000000008089, - 0x8000000000008003, 0x8000000000008002, 0x8000000000000080, - 0x000000000000800a, 0x800000008000000a, 0x8000000080008081, - 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 - }; - - const u32 keccakf_rotc[24] = - { - 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, - 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44 - }; - - const u32 keccakf_piln[24] = - { - 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, - 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1 - }; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x01_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * 0x80 keccak, very special - */ - - const u32 mdlen = salt_bufs[salt_pos].keccak_mdlen; - - const u32 rsiz = 200 - (2 * mdlen); - - const u32 add80w = (rsiz - 1) / 8; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - append_0x01_2 (wordr0, wordr1, pw_r_len); - - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = pw_len * 8; - w3[3] = 0; - - u64x st[25]; - - st[ 0] = (u64x) (w0[0]) | (u64x) (w0[1]) << 32; - st[ 1] = (u64x) (w0[2]) | (u64x) (w0[3]) << 32; - st[ 2] = (u64x) (w1[0]) | (u64x) (w1[1]) << 32; - st[ 3] = (u64x) (w1[2]) | (u64x) (w1[3]) << 32; - st[ 4] = 0; - st[ 5] = 0; - st[ 6] = 0; - st[ 7] = 0; - st[ 8] = 0; - st[ 9] = 0; - st[10] = 0; - st[11] = 0; - st[12] = 0; - st[13] = 0; - st[14] = 0; - st[15] = 0; - st[16] = 0; - st[17] = 0; - st[18] = 0; - st[19] = 0; - st[20] = 0; - st[21] = 0; - st[22] = 0; - st[23] = 0; - st[24] = 0; - - st[add80w] |= 0x8000000000000000; - - int round; - - for (round = 0; round < KECCAK_ROUNDS; round++) - { - // Theta - - u64x bc0 = Theta1 (0); - u64x bc1 = Theta1 (1); - u64x bc2 = Theta1 (2); - u64x bc3 = Theta1 (3); - u64x bc4 = Theta1 (4); - - u64x t; - - t = bc4 ^ rotl64 (bc1, 1); Theta2 (0); - t = bc0 ^ rotl64 (bc2, 1); Theta2 (1); - t = bc1 ^ rotl64 (bc3, 1); Theta2 (2); - t = bc2 ^ rotl64 (bc4, 1); Theta2 (3); - t = bc3 ^ rotl64 (bc0, 1); Theta2 (4); - - // Rho Pi - - t = st[1]; - - Rho_Pi (0); - Rho_Pi (1); - Rho_Pi (2); - Rho_Pi (3); - Rho_Pi (4); - Rho_Pi (5); - Rho_Pi (6); - Rho_Pi (7); - Rho_Pi (8); - Rho_Pi (9); - Rho_Pi (10); - Rho_Pi (11); - Rho_Pi (12); - Rho_Pi (13); - Rho_Pi (14); - Rho_Pi (15); - Rho_Pi (16); - Rho_Pi (17); - Rho_Pi (18); - Rho_Pi (19); - Rho_Pi (20); - Rho_Pi (21); - Rho_Pi (22); - Rho_Pi (23); - - // Chi - - Chi (0); - Chi (5); - Chi (10); - Chi (15); - Chi (20); - - // Iota - - st[0] ^= keccakf_rndc[round]; - } - - const u32x r0 = l32_from_64 (st[1]); - const u32x r1 = h32_from_64 (st[1]); - const u32x r2 = l32_from_64 (st[2]); - const u32x r3 = h32_from_64 (st[2]); - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05000_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05000_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05000_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * constants - */ - - const u64 keccakf_rndc[24] = - { - 0x0000000000000001, 0x0000000000008082, 0x800000000000808a, - 0x8000000080008000, 0x000000000000808b, 0x0000000080000001, - 0x8000000080008081, 0x8000000000008009, 0x000000000000008a, - 0x0000000000000088, 0x0000000080008009, 0x000000008000000a, - 0x000000008000808b, 0x800000000000008b, 0x8000000000008089, - 0x8000000000008003, 0x8000000000008002, 0x8000000000000080, - 0x000000000000800a, 0x800000008000000a, 0x8000000080008081, - 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 - }; - - const u32 keccakf_rotc[24] = - { - 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, - 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44 - }; - - const u32 keccakf_piln[24] = - { - 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, - 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1 - }; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x01_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * 0x80 keccak, very special - */ - - const u32 mdlen = salt_bufs[salt_pos].keccak_mdlen; - - const u32 rsiz = 200 - (2 * mdlen); - - const u32 add80w = (rsiz - 1) / 8; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - append_0x01_2 (wordr0, wordr1, pw_r_len); - - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = pw_len * 8; - w3[3] = 0; - - u64x st[25]; - - st[ 0] = (u64x) (w0[0]) | (u64x) (w0[1]) << 32; - st[ 1] = (u64x) (w0[2]) | (u64x) (w0[3]) << 32; - st[ 2] = (u64x) (w1[0]) | (u64x) (w1[1]) << 32; - st[ 3] = (u64x) (w1[2]) | (u64x) (w1[3]) << 32; - st[ 4] = 0; - st[ 5] = 0; - st[ 6] = 0; - st[ 7] = 0; - st[ 8] = 0; - st[ 9] = 0; - st[10] = 0; - st[11] = 0; - st[12] = 0; - st[13] = 0; - st[14] = 0; - st[15] = 0; - st[16] = 0; - st[17] = 0; - st[18] = 0; - st[19] = 0; - st[20] = 0; - st[21] = 0; - st[22] = 0; - st[23] = 0; - st[24] = 0; - - st[add80w] |= 0x8000000000000000; - - int round; - - for (round = 0; round < KECCAK_ROUNDS; round++) - { - // Theta - - u64x bc0 = Theta1 (0); - u64x bc1 = Theta1 (1); - u64x bc2 = Theta1 (2); - u64x bc3 = Theta1 (3); - u64x bc4 = Theta1 (4); - - u64x t; - - t = bc4 ^ rotl64 (bc1, 1); Theta2 (0); - t = bc0 ^ rotl64 (bc2, 1); Theta2 (1); - t = bc1 ^ rotl64 (bc3, 1); Theta2 (2); - t = bc2 ^ rotl64 (bc4, 1); Theta2 (3); - t = bc3 ^ rotl64 (bc0, 1); Theta2 (4); - - // Rho Pi - - t = st[1]; - - Rho_Pi (0); - Rho_Pi (1); - Rho_Pi (2); - Rho_Pi (3); - Rho_Pi (4); - Rho_Pi (5); - Rho_Pi (6); - Rho_Pi (7); - Rho_Pi (8); - Rho_Pi (9); - Rho_Pi (10); - Rho_Pi (11); - Rho_Pi (12); - Rho_Pi (13); - Rho_Pi (14); - Rho_Pi (15); - Rho_Pi (16); - Rho_Pi (17); - Rho_Pi (18); - Rho_Pi (19); - Rho_Pi (20); - Rho_Pi (21); - Rho_Pi (22); - Rho_Pi (23); - - // Chi - - Chi (0); - Chi (5); - Chi (10); - Chi (15); - Chi (20); - - // Iota - - st[0] ^= keccakf_rndc[round]; - } - - const u32x r0 = l32_from_64 (st[1]); - const u32x r1 = h32_from_64 (st[1]); - const u32x r2 = l32_from_64 (st[2]); - const u32x r3 = h32_from_64 (st[2]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05000_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05000_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m05000_a3.cu b/nv/m05000_a3.cu deleted file mode 100644 index 956c424..0000000 --- a/nv/m05000_a3.cu +++ /dev/null @@ -1,695 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _KECCAK_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 2 -#define DGST_R1 3 -#define DGST_R2 4 -#define DGST_R3 5 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -#ifndef KECCAK_ROUNDS -#define KECCAK_ROUNDS 24 -#endif - -#define Theta1(s) (st[0 + s] ^ st[5 + s] ^ st[10 + s] ^ st[15 + s] ^ st[20 + s]) - -#define Theta2(s) \ -{ \ - st[ 0 + s] ^= t; \ - st[ 5 + s] ^= t; \ - st[10 + s] ^= t; \ - st[15 + s] ^= t; \ - st[20 + s] ^= t; \ -} - -#define Rho_Pi(s) \ -{ \ - u32 j = keccakf_piln[s]; \ - u32 k = keccakf_rotc[s]; \ - bc0 = st[j]; \ - st[j] = rotl64 (t, k); \ - t = bc0; \ -} - -#define Chi(s) \ -{ \ - bc0 = st[0 + s]; \ - bc1 = st[1 + s]; \ - bc2 = st[2 + s]; \ - bc3 = st[3 + s]; \ - bc4 = st[4 + s]; \ - st[0 + s] ^= ~bc1 & bc2; \ - st[1 + s] ^= ~bc2 & bc3; \ - st[2 + s] ^= ~bc3 & bc4; \ - st[3 + s] ^= ~bc4 & bc0; \ - st[4 + s] ^= ~bc0 & bc1; \ -} - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m05000m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * constants - */ - - const u64 keccakf_rndc[24] = - { - 0x0000000000000001, 0x0000000000008082, 0x800000000000808a, - 0x8000000080008000, 0x000000000000808b, 0x0000000080000001, - 0x8000000080008081, 0x8000000000008009, 0x000000000000008a, - 0x0000000000000088, 0x0000000080008009, 0x000000008000000a, - 0x000000008000808b, 0x800000000000008b, 0x8000000000008089, - 0x8000000000008003, 0x8000000000008002, 0x8000000000000080, - 0x000000000000800a, 0x800000008000000a, 0x8000000080008081, - 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 - }; - - const u32 keccakf_rotc[24] = - { - 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, - 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44 - }; - - const u32 keccakf_piln[24] = - { - 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, - 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1 - }; - - /** - * 0x80 keccak, very special - */ - - const u32 mdlen = salt_bufs[salt_pos].keccak_mdlen; - - const u32 rsiz = 200 - (2 * mdlen); - - const u32 add80w = (rsiz - 1) / 8; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u64x st[25]; - - st[ 0] = (u64x) (w0[0]) | (u64x) (w0[1]) << 32; - st[ 1] = (u64x) (w0[2]) | (u64x) (w0[3]) << 32; - st[ 2] = (u64x) (w1[0]) | (u64x) (w1[1]) << 32; - st[ 3] = (u64x) (w1[2]) | (u64x) (w1[3]) << 32; - st[ 4] = (u64x) (w2[0]) | (u64x) (w2[1]) << 32; - st[ 5] = (u64x) (w2[2]) | (u64x) (w2[3]) << 32; - st[ 6] = (u64x) (w3[0]) | (u64x) (w3[1]) << 32; - st[ 7] = (u64x) (w3[2]) | (u64x) (w3[3]) << 32; - st[ 8] = 0; - st[ 9] = 0; - st[10] = 0; - st[11] = 0; - st[12] = 0; - st[13] = 0; - st[14] = 0; - st[15] = 0; - st[16] = 0; - st[17] = 0; - st[18] = 0; - st[19] = 0; - st[20] = 0; - st[21] = 0; - st[22] = 0; - st[23] = 0; - st[24] = 0; - - st[add80w] |= 0x8000000000000000; - - int round; - - for (round = 0; round < KECCAK_ROUNDS; round++) - { - // Theta - - u64x bc0 = Theta1 (0); - u64x bc1 = Theta1 (1); - u64x bc2 = Theta1 (2); - u64x bc3 = Theta1 (3); - u64x bc4 = Theta1 (4); - - u64x t; - - t = bc4 ^ rotl64 (bc1, 1); Theta2 (0); - t = bc0 ^ rotl64 (bc2, 1); Theta2 (1); - t = bc1 ^ rotl64 (bc3, 1); Theta2 (2); - t = bc2 ^ rotl64 (bc4, 1); Theta2 (3); - t = bc3 ^ rotl64 (bc0, 1); Theta2 (4); - - // Rho Pi - - t = st[1]; - - Rho_Pi (0); - Rho_Pi (1); - Rho_Pi (2); - Rho_Pi (3); - Rho_Pi (4); - Rho_Pi (5); - Rho_Pi (6); - Rho_Pi (7); - Rho_Pi (8); - Rho_Pi (9); - Rho_Pi (10); - Rho_Pi (11); - Rho_Pi (12); - Rho_Pi (13); - Rho_Pi (14); - Rho_Pi (15); - Rho_Pi (16); - Rho_Pi (17); - Rho_Pi (18); - Rho_Pi (19); - Rho_Pi (20); - Rho_Pi (21); - Rho_Pi (22); - Rho_Pi (23); - - // Chi - - Chi (0); - Chi (5); - Chi (10); - Chi (15); - Chi (20); - - // Iota - - st[0] ^= keccakf_rndc[round]; - } - - const u32x r0 = l32_from_64 (st[1]); - const u32x r1 = h32_from_64 (st[1]); - const u32x r2 = l32_from_64 (st[2]); - const u32x r3 = h32_from_64 (st[2]); - - #include VECT_COMPARE_M - } -} - -__device__ static void m05000s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * constants - */ - - const u64 keccakf_rndc[24] = - { - 0x0000000000000001, 0x0000000000008082, 0x800000000000808a, - 0x8000000080008000, 0x000000000000808b, 0x0000000080000001, - 0x8000000080008081, 0x8000000000008009, 0x000000000000008a, - 0x0000000000000088, 0x0000000080008009, 0x000000008000000a, - 0x000000008000808b, 0x800000000000008b, 0x8000000000008089, - 0x8000000000008003, 0x8000000000008002, 0x8000000000000080, - 0x000000000000800a, 0x800000008000000a, 0x8000000080008081, - 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 - }; - - const u32 keccakf_rotc[24] = - { - 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, - 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44 - }; - - const u32 keccakf_piln[24] = - { - 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, - 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1 - }; - - /** - * 0x80 keccak, very special - */ - - const u32 mdlen = salt_bufs[salt_pos].keccak_mdlen; - - const u32 rsiz = 200 - (2 * mdlen); - - const u32 add80w = (rsiz - 1) / 8; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u64x st[25]; - - st[ 0] = (u64x) (w0[0]) | (u64x) (w0[1]) << 32; - st[ 1] = (u64x) (w0[2]) | (u64x) (w0[3]) << 32; - st[ 2] = (u64x) (w1[0]) | (u64x) (w1[1]) << 32; - st[ 3] = (u64x) (w1[2]) | (u64x) (w1[3]) << 32; - st[ 4] = (u64x) (w2[0]) | (u64x) (w2[1]) << 32; - st[ 5] = (u64x) (w2[2]) | (u64x) (w2[3]) << 32; - st[ 6] = (u64x) (w3[0]) | (u64x) (w3[1]) << 32; - st[ 7] = (u64x) (w3[2]) | (u64x) (w3[3]) << 32; - st[ 8] = 0; - st[ 9] = 0; - st[10] = 0; - st[11] = 0; - st[12] = 0; - st[13] = 0; - st[14] = 0; - st[15] = 0; - st[16] = 0; - st[17] = 0; - st[18] = 0; - st[19] = 0; - st[20] = 0; - st[21] = 0; - st[22] = 0; - st[23] = 0; - st[24] = 0; - - st[add80w] |= 0x8000000000000000; - - int round; - - for (round = 0; round < KECCAK_ROUNDS; round++) - { - // Theta - - u64x bc0 = Theta1 (0); - u64x bc1 = Theta1 (1); - u64x bc2 = Theta1 (2); - u64x bc3 = Theta1 (3); - u64x bc4 = Theta1 (4); - - u64x t; - - t = bc4 ^ rotl64 (bc1, 1); Theta2 (0); - t = bc0 ^ rotl64 (bc2, 1); Theta2 (1); - t = bc1 ^ rotl64 (bc3, 1); Theta2 (2); - t = bc2 ^ rotl64 (bc4, 1); Theta2 (3); - t = bc3 ^ rotl64 (bc0, 1); Theta2 (4); - - // Rho Pi - - t = st[1]; - - Rho_Pi (0); - Rho_Pi (1); - Rho_Pi (2); - Rho_Pi (3); - Rho_Pi (4); - Rho_Pi (5); - Rho_Pi (6); - Rho_Pi (7); - Rho_Pi (8); - Rho_Pi (9); - Rho_Pi (10); - Rho_Pi (11); - Rho_Pi (12); - Rho_Pi (13); - Rho_Pi (14); - Rho_Pi (15); - Rho_Pi (16); - Rho_Pi (17); - Rho_Pi (18); - Rho_Pi (19); - Rho_Pi (20); - Rho_Pi (21); - Rho_Pi (22); - Rho_Pi (23); - - // Chi - - Chi (0); - Chi (5); - Chi (10); - Chi (15); - Chi (20); - - // Iota - - st[0] ^= keccakf_rndc[round]; - } - - const u32x r0 = l32_from_64 (st[1]); - const u32x r1 = h32_from_64 (st[1]); - const u32x r2 = l32_from_64 (st[2]); - const u32x r3 = h32_from_64 (st[2]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05000_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m05000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05000_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m05000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05000_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m05000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05000_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m05000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05000_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m05000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05000_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m05000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m05100_a0.cu b/nv/m05100_a0.cu deleted file mode 100644 index 4cf8eb1..0000000 --- a/nv/m05100_a0.cu +++ /dev/null @@ -1,431 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5H_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m05100_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - w3[2] = out_len * 8; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - { - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } - - { - const u32x r0 = b; - const u32x r1 = c; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } - - { - const u32x r0 = c; - const u32x r1 = d; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05100_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05100_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05100_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - w3[2] = out_len * 8; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - { - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } - - { - const u32x r0 = b; - const u32x r1 = c; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } - - { - const u32x r0 = c; - const u32x r1 = d; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05100_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05100_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m05100_a1.cu b/nv/m05100_a1.cu deleted file mode 100644 index 51af13a..0000000 --- a/nv/m05100_a1.cu +++ /dev/null @@ -1,533 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5H_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m05100_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = pw_len * 8; - w3[3] = 0; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - { - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } - - { - const u32x r0 = b; - const u32x r1 = c; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } - - { - const u32x r0 = c; - const u32x r1 = d; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05100_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05100_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05100_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = pw_len * 8; - w3[3] = 0; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - { - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } - - { - const u32x r0 = b; - const u32x r1 = c; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } - - { - const u32x r0 = c; - const u32x r1 = d; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05100_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05100_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m05100_a3.cu b/nv/m05100_a3.cu deleted file mode 100644 index 2edec9f..0000000 --- a/nv/m05100_a3.cu +++ /dev/null @@ -1,605 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5H_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m05100m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * base - */ - - w3[2] = pw_len * 8; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - { - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } - - { - const u32x r0 = b; - const u32x r1 = c; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } - - { - const u32x r0 = c; - const u32x r1 = d; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } - } -} - -__device__ static void m05100s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * base - */ - - w3[2] = pw_len * 8; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - { - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } - - { - const u32x r0 = b; - const u32x r1 = c; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } - - { - const u32x r0 = c; - const u32x r1 = d; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05100_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m05100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05100_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m05100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05100_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m05100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05100_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m05100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05100_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m05100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05100_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m05100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m05200.cu b/nv/m05200.cu deleted file mode 100644 index 3938846..0000000 --- a/nv/m05200.cu +++ /dev/null @@ -1,387 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA256_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ static void sha256_64 (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8]) -{ - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05200_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, pwsafe3_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - u32 salt_len = salt_bufs[salt_pos].salt_len; - - switch_buffer_by_offset (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); - - w0[0] |= salt_buf0[0]; - w0[1] |= salt_buf0[1]; - w0[2] |= salt_buf0[2]; - w0[3] |= salt_buf0[3]; - - w1[0] |= salt_buf1[0]; - w1[1] |= salt_buf1[1]; - w1[2] |= salt_buf1[2]; - w1[3] |= salt_buf1[3]; - - w2[0] |= salt_buf2[0]; - w2[1] |= salt_buf2[1]; - w2[2] |= salt_buf2[2]; - w2[3] |= salt_buf2[3]; - - w3[0] |= salt_buf3[0]; - w3[1] |= salt_buf3[1]; - w3[2] |= salt_buf3[2]; - w3[3] |= salt_buf3[3]; - - const u32 block_len = pw_len + salt_len; - - append_0x80_4 (w0, w1, w2, w3, block_len); - - /** - * init - */ - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - - w2[0] = swap_workaround (w2[0]); - w2[1] = swap_workaround (w2[1]); - w2[2] = swap_workaround (w2[2]); - w2[3] = swap_workaround (w2[3]); - - w3[0] = swap_workaround (w3[0]); - w3[1] = swap_workaround (w3[1]); - w3[2] = 0; - w3[3] = block_len * 8; - - /** - * main - */ - - u32x digest[8]; - - digest[0] = SHA256M_A; - digest[1] = SHA256M_B; - digest[2] = SHA256M_C; - digest[3] = SHA256M_D; - digest[4] = SHA256M_E; - digest[5] = SHA256M_F; - digest[6] = SHA256M_G; - digest[7] = SHA256M_H; - - sha256_64 (w0, w1, w2, w3, digest); - - tmps[gid].digest_buf[0] = digest[0]; - tmps[gid].digest_buf[1] = digest[1]; - tmps[gid].digest_buf[2] = digest[2]; - tmps[gid].digest_buf[3] = digest[3]; - tmps[gid].digest_buf[4] = digest[4]; - tmps[gid].digest_buf[5] = digest[5]; - tmps[gid].digest_buf[6] = digest[6]; - tmps[gid].digest_buf[7] = digest[7]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05200_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, pwsafe3_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x digest[8]; - - digest[0] = tmps[gid].digest_buf[0]; - digest[1] = tmps[gid].digest_buf[1]; - digest[2] = tmps[gid].digest_buf[2]; - digest[3] = tmps[gid].digest_buf[3]; - digest[4] = tmps[gid].digest_buf[4]; - digest[5] = tmps[gid].digest_buf[5]; - digest[6] = tmps[gid].digest_buf[6]; - digest[7] = tmps[gid].digest_buf[7]; - - for (u32 i = 0; i < loop_cnt; i++) - { - u32x w0[4]; - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - - u32x w1[4]; - - w1[0] = digest[4]; - w1[1] = digest[5]; - w1[2] = digest[6]; - w1[3] = digest[7]; - - u32x w2[4]; - - w2[0] = 0x80000000; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 32 * 8; - - digest[0] = SHA256M_A; - digest[1] = SHA256M_B; - digest[2] = SHA256M_C; - digest[3] = SHA256M_D; - digest[4] = SHA256M_E; - digest[5] = SHA256M_F; - digest[6] = SHA256M_G; - digest[7] = SHA256M_H; - - sha256_64 (w0, w1, w2, w3, digest); - } - - tmps[gid].digest_buf[0] = digest[0]; - tmps[gid].digest_buf[1] = digest[1]; - tmps[gid].digest_buf[2] = digest[2]; - tmps[gid].digest_buf[3] = digest[3]; - tmps[gid].digest_buf[4] = digest[4]; - tmps[gid].digest_buf[5] = digest[5]; - tmps[gid].digest_buf[6] = digest[6]; - tmps[gid].digest_buf[7] = digest[7]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05200_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, pwsafe3_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32x r0 = tmps[gid].digest_buf[DGST_R0]; - const u32x r1 = tmps[gid].digest_buf[DGST_R1]; - const u32x r2 = tmps[gid].digest_buf[DGST_R2]; - const u32x r3 = tmps[gid].digest_buf[DGST_R3]; - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m05300_a0.cu b/nv/m05300_a0.cu deleted file mode 100644 index e5fe228..0000000 --- a/nv/m05300_a0.cu +++ /dev/null @@ -1,748 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = MD5M_A; - ipad[1] = MD5M_B; - ipad[2] = MD5M_C; - ipad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = MD5M_A; - opad[1] = MD5M_B; - opad[2] = MD5M_C; - opad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - - md5_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = 0x80; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = (64 + 16) * 8; - w3[3] = 0; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - - md5_transform (w0, w1, w2, w3, digest); -} - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m05300_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - const u32 nr_len = ikepsk_bufs[salt_pos].nr_len; - const u32 msg_len = ikepsk_bufs[salt_pos].msg_len; - - u32 salt_buf0[4]; - - salt_buf0[0] = ikepsk_bufs[salt_pos].nr_buf[ 0]; - salt_buf0[1] = ikepsk_bufs[salt_pos].nr_buf[ 1]; - salt_buf0[2] = ikepsk_bufs[salt_pos].nr_buf[ 2]; - salt_buf0[3] = ikepsk_bufs[salt_pos].nr_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = ikepsk_bufs[salt_pos].nr_buf[ 4]; - salt_buf1[1] = ikepsk_bufs[salt_pos].nr_buf[ 5]; - salt_buf1[2] = ikepsk_bufs[salt_pos].nr_buf[ 6]; - salt_buf1[3] = ikepsk_bufs[salt_pos].nr_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = ikepsk_bufs[salt_pos].nr_buf[ 8]; - salt_buf2[1] = ikepsk_bufs[salt_pos].nr_buf[ 9]; - salt_buf2[2] = ikepsk_bufs[salt_pos].nr_buf[10]; - salt_buf2[3] = ikepsk_bufs[salt_pos].nr_buf[11]; - - u32 salt_buf3[4]; - - salt_buf3[0] = ikepsk_bufs[salt_pos].nr_buf[12]; - salt_buf3[1] = ikepsk_bufs[salt_pos].nr_buf[13]; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - /** - * s_msg - */ - - __shared__ u32 s_msg_buf[128]; - - if (lid < 128) - { - s_msg_buf[lid] = ikepsk_bufs[salt_pos].msg_buf[lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[4]; - u32x opad[4]; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = salt_buf2[2]; - w2_t[3] = salt_buf2[3]; - w3_t[0] = salt_buf3[0]; - w3_t[1] = salt_buf3[1]; - w3_t[2] = (64 + nr_len) * 8; - w3_t[3] = 0; - - u32x digest[4]; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - int left; - int off; - - for (left = ikepsk_bufs[salt_pos].msg_len, off = 0; left >= 56; left -= 64, off += 16) - { - w0_t[0] = s_msg_buf[off + 0]; - w0_t[1] = s_msg_buf[off + 1]; - w0_t[2] = s_msg_buf[off + 2]; - w0_t[3] = s_msg_buf[off + 3]; - w1_t[0] = s_msg_buf[off + 4]; - w1_t[1] = s_msg_buf[off + 5]; - w1_t[2] = s_msg_buf[off + 6]; - w1_t[3] = s_msg_buf[off + 7]; - w2_t[0] = s_msg_buf[off + 8]; - w2_t[1] = s_msg_buf[off + 9]; - w2_t[2] = s_msg_buf[off + 10]; - w2_t[3] = s_msg_buf[off + 11]; - w3_t[0] = s_msg_buf[off + 12]; - w3_t[1] = s_msg_buf[off + 13]; - w3_t[2] = s_msg_buf[off + 14]; - w3_t[3] = s_msg_buf[off + 15]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, ipad); - } - - w0_t[0] = s_msg_buf[off + 0]; - w0_t[1] = s_msg_buf[off + 1]; - w0_t[2] = s_msg_buf[off + 2]; - w0_t[3] = s_msg_buf[off + 3]; - w1_t[0] = s_msg_buf[off + 4]; - w1_t[1] = s_msg_buf[off + 5]; - w1_t[2] = s_msg_buf[off + 6]; - w1_t[3] = s_msg_buf[off + 7]; - w2_t[0] = s_msg_buf[off + 8]; - w2_t[1] = s_msg_buf[off + 9]; - w2_t[2] = s_msg_buf[off + 10]; - w2_t[3] = s_msg_buf[off + 11]; - w3_t[0] = s_msg_buf[off + 12]; - w3_t[1] = s_msg_buf[off + 13]; - w3_t[2] = (64 + msg_len) * 8; - w3_t[3] = 0; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05300_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05300_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05300_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - const u32 nr_len = ikepsk_bufs[salt_pos].nr_len; - const u32 msg_len = ikepsk_bufs[salt_pos].msg_len; - - u32 salt_buf0[4]; - - salt_buf0[0] = ikepsk_bufs[salt_pos].nr_buf[ 0]; - salt_buf0[1] = ikepsk_bufs[salt_pos].nr_buf[ 1]; - salt_buf0[2] = ikepsk_bufs[salt_pos].nr_buf[ 2]; - salt_buf0[3] = ikepsk_bufs[salt_pos].nr_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = ikepsk_bufs[salt_pos].nr_buf[ 4]; - salt_buf1[1] = ikepsk_bufs[salt_pos].nr_buf[ 5]; - salt_buf1[2] = ikepsk_bufs[salt_pos].nr_buf[ 6]; - salt_buf1[3] = ikepsk_bufs[salt_pos].nr_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = ikepsk_bufs[salt_pos].nr_buf[ 8]; - salt_buf2[1] = ikepsk_bufs[salt_pos].nr_buf[ 9]; - salt_buf2[2] = ikepsk_bufs[salt_pos].nr_buf[10]; - salt_buf2[3] = ikepsk_bufs[salt_pos].nr_buf[11]; - - u32 salt_buf3[4]; - - salt_buf3[0] = ikepsk_bufs[salt_pos].nr_buf[12]; - salt_buf3[1] = ikepsk_bufs[salt_pos].nr_buf[13]; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - /** - * s_msg - */ - - __shared__ u32 s_msg_buf[128]; - - if (lid < 128) - { - s_msg_buf[lid] = ikepsk_bufs[salt_pos].msg_buf[lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[4]; - u32x opad[4]; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = salt_buf2[2]; - w2_t[3] = salt_buf2[3]; - w3_t[0] = salt_buf3[0]; - w3_t[1] = salt_buf3[1]; - w3_t[2] = (64 + nr_len) * 8; - w3_t[3] = 0; - - u32x digest[4]; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - int left; - int off; - - for (left = ikepsk_bufs[salt_pos].msg_len, off = 0; left >= 56; left -= 64, off += 16) - { - w0_t[0] = s_msg_buf[off + 0]; - w0_t[1] = s_msg_buf[off + 1]; - w0_t[2] = s_msg_buf[off + 2]; - w0_t[3] = s_msg_buf[off + 3]; - w1_t[0] = s_msg_buf[off + 4]; - w1_t[1] = s_msg_buf[off + 5]; - w1_t[2] = s_msg_buf[off + 6]; - w1_t[3] = s_msg_buf[off + 7]; - w2_t[0] = s_msg_buf[off + 8]; - w2_t[1] = s_msg_buf[off + 9]; - w2_t[2] = s_msg_buf[off + 10]; - w2_t[3] = s_msg_buf[off + 11]; - w3_t[0] = s_msg_buf[off + 12]; - w3_t[1] = s_msg_buf[off + 13]; - w3_t[2] = s_msg_buf[off + 14]; - w3_t[3] = s_msg_buf[off + 15]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, ipad); - } - - w0_t[0] = s_msg_buf[off + 0]; - w0_t[1] = s_msg_buf[off + 1]; - w0_t[2] = s_msg_buf[off + 2]; - w0_t[3] = s_msg_buf[off + 3]; - w1_t[0] = s_msg_buf[off + 4]; - w1_t[1] = s_msg_buf[off + 5]; - w1_t[2] = s_msg_buf[off + 6]; - w1_t[3] = s_msg_buf[off + 7]; - w2_t[0] = s_msg_buf[off + 8]; - w2_t[1] = s_msg_buf[off + 9]; - w2_t[2] = s_msg_buf[off + 10]; - w2_t[3] = s_msg_buf[off + 11]; - w3_t[0] = s_msg_buf[off + 12]; - w3_t[1] = s_msg_buf[off + 13]; - w3_t[2] = (64 + msg_len) * 8; - w3_t[3] = 0; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05300_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05300_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m05300_a1.cu b/nv/m05300_a1.cu deleted file mode 100644 index 57c011f..0000000 --- a/nv/m05300_a1.cu +++ /dev/null @@ -1,854 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = MD5M_A; - ipad[1] = MD5M_B; - ipad[2] = MD5M_C; - ipad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = MD5M_A; - opad[1] = MD5M_B; - opad[2] = MD5M_C; - opad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - - md5_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = 0x80; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = (64 + 16) * 8; - w3[3] = 0; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - - md5_transform (w0, w1, w2, w3, digest); -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m05300_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - const u32 nr_len = ikepsk_bufs[salt_pos].nr_len; - const u32 msg_len = ikepsk_bufs[salt_pos].msg_len; - - u32 salt_buf0[4]; - - salt_buf0[0] = ikepsk_bufs[salt_pos].nr_buf[ 0]; - salt_buf0[1] = ikepsk_bufs[salt_pos].nr_buf[ 1]; - salt_buf0[2] = ikepsk_bufs[salt_pos].nr_buf[ 2]; - salt_buf0[3] = ikepsk_bufs[salt_pos].nr_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = ikepsk_bufs[salt_pos].nr_buf[ 4]; - salt_buf1[1] = ikepsk_bufs[salt_pos].nr_buf[ 5]; - salt_buf1[2] = ikepsk_bufs[salt_pos].nr_buf[ 6]; - salt_buf1[3] = ikepsk_bufs[salt_pos].nr_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = ikepsk_bufs[salt_pos].nr_buf[ 8]; - salt_buf2[1] = ikepsk_bufs[salt_pos].nr_buf[ 9]; - salt_buf2[2] = ikepsk_bufs[salt_pos].nr_buf[10]; - salt_buf2[3] = ikepsk_bufs[salt_pos].nr_buf[11]; - - u32 salt_buf3[4]; - - salt_buf3[0] = ikepsk_bufs[salt_pos].nr_buf[12]; - salt_buf3[1] = ikepsk_bufs[salt_pos].nr_buf[13]; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - /** - * s_msg - */ - - __shared__ u32 s_msg_buf[128]; - - if (lid < 128) - { - s_msg_buf[lid] = ikepsk_bufs[salt_pos].msg_buf[lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - u32x ipad[4]; - u32x opad[4]; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = salt_buf2[2]; - w2_t[3] = salt_buf2[3]; - w3_t[0] = salt_buf3[0]; - w3_t[1] = salt_buf3[1]; - w3_t[2] = (64 + nr_len) * 8; - w3_t[3] = 0; - - u32x digest[4]; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - int left; - int off; - - for (left = ikepsk_bufs[salt_pos].msg_len, off = 0; left >= 56; left -= 64, off += 16) - { - w0_t[0] = s_msg_buf[off + 0]; - w0_t[1] = s_msg_buf[off + 1]; - w0_t[2] = s_msg_buf[off + 2]; - w0_t[3] = s_msg_buf[off + 3]; - w1_t[0] = s_msg_buf[off + 4]; - w1_t[1] = s_msg_buf[off + 5]; - w1_t[2] = s_msg_buf[off + 6]; - w1_t[3] = s_msg_buf[off + 7]; - w2_t[0] = s_msg_buf[off + 8]; - w2_t[1] = s_msg_buf[off + 9]; - w2_t[2] = s_msg_buf[off + 10]; - w2_t[3] = s_msg_buf[off + 11]; - w3_t[0] = s_msg_buf[off + 12]; - w3_t[1] = s_msg_buf[off + 13]; - w3_t[2] = s_msg_buf[off + 14]; - w3_t[3] = s_msg_buf[off + 15]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, ipad); - } - - w0_t[0] = s_msg_buf[off + 0]; - w0_t[1] = s_msg_buf[off + 1]; - w0_t[2] = s_msg_buf[off + 2]; - w0_t[3] = s_msg_buf[off + 3]; - w1_t[0] = s_msg_buf[off + 4]; - w1_t[1] = s_msg_buf[off + 5]; - w1_t[2] = s_msg_buf[off + 6]; - w1_t[3] = s_msg_buf[off + 7]; - w2_t[0] = s_msg_buf[off + 8]; - w2_t[1] = s_msg_buf[off + 9]; - w2_t[2] = s_msg_buf[off + 10]; - w2_t[3] = s_msg_buf[off + 11]; - w3_t[0] = s_msg_buf[off + 12]; - w3_t[1] = s_msg_buf[off + 13]; - w3_t[2] = (64 + msg_len) * 8; - w3_t[3] = 0; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05300_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05300_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05300_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - const u32 nr_len = ikepsk_bufs[salt_pos].nr_len; - const u32 msg_len = ikepsk_bufs[salt_pos].msg_len; - - u32 salt_buf0[4]; - - salt_buf0[0] = ikepsk_bufs[salt_pos].nr_buf[ 0]; - salt_buf0[1] = ikepsk_bufs[salt_pos].nr_buf[ 1]; - salt_buf0[2] = ikepsk_bufs[salt_pos].nr_buf[ 2]; - salt_buf0[3] = ikepsk_bufs[salt_pos].nr_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = ikepsk_bufs[salt_pos].nr_buf[ 4]; - salt_buf1[1] = ikepsk_bufs[salt_pos].nr_buf[ 5]; - salt_buf1[2] = ikepsk_bufs[salt_pos].nr_buf[ 6]; - salt_buf1[3] = ikepsk_bufs[salt_pos].nr_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = ikepsk_bufs[salt_pos].nr_buf[ 8]; - salt_buf2[1] = ikepsk_bufs[salt_pos].nr_buf[ 9]; - salt_buf2[2] = ikepsk_bufs[salt_pos].nr_buf[10]; - salt_buf2[3] = ikepsk_bufs[salt_pos].nr_buf[11]; - - u32 salt_buf3[4]; - - salt_buf3[0] = ikepsk_bufs[salt_pos].nr_buf[12]; - salt_buf3[1] = ikepsk_bufs[salt_pos].nr_buf[13]; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - /** - * s_msg - */ - - __shared__ u32 s_msg_buf[128]; - - if (lid < 128) - { - s_msg_buf[lid] = ikepsk_bufs[salt_pos].msg_buf[lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[4]; - u32x opad[4]; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = salt_buf2[2]; - w2_t[3] = salt_buf2[3]; - w3_t[0] = salt_buf3[0]; - w3_t[1] = salt_buf3[1]; - w3_t[2] = (64 + nr_len) * 8; - w3_t[3] = 0; - - u32x digest[4]; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - int left; - int off; - - for (left = ikepsk_bufs[salt_pos].msg_len, off = 0; left >= 56; left -= 64, off += 16) - { - w0_t[0] = s_msg_buf[off + 0]; - w0_t[1] = s_msg_buf[off + 1]; - w0_t[2] = s_msg_buf[off + 2]; - w0_t[3] = s_msg_buf[off + 3]; - w1_t[0] = s_msg_buf[off + 4]; - w1_t[1] = s_msg_buf[off + 5]; - w1_t[2] = s_msg_buf[off + 6]; - w1_t[3] = s_msg_buf[off + 7]; - w2_t[0] = s_msg_buf[off + 8]; - w2_t[1] = s_msg_buf[off + 9]; - w2_t[2] = s_msg_buf[off + 10]; - w2_t[3] = s_msg_buf[off + 11]; - w3_t[0] = s_msg_buf[off + 12]; - w3_t[1] = s_msg_buf[off + 13]; - w3_t[2] = s_msg_buf[off + 14]; - w3_t[3] = s_msg_buf[off + 15]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, ipad); - } - - w0_t[0] = s_msg_buf[off + 0]; - w0_t[1] = s_msg_buf[off + 1]; - w0_t[2] = s_msg_buf[off + 2]; - w0_t[3] = s_msg_buf[off + 3]; - w1_t[0] = s_msg_buf[off + 4]; - w1_t[1] = s_msg_buf[off + 5]; - w1_t[2] = s_msg_buf[off + 6]; - w1_t[3] = s_msg_buf[off + 7]; - w2_t[0] = s_msg_buf[off + 8]; - w2_t[1] = s_msg_buf[off + 9]; - w2_t[2] = s_msg_buf[off + 10]; - w2_t[3] = s_msg_buf[off + 11]; - w3_t[0] = s_msg_buf[off + 12]; - w3_t[1] = s_msg_buf[off + 13]; - w3_t[2] = (64 + msg_len) * 8; - w3_t[3] = 0; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05300_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05300_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m05300_a3.cu b/nv/m05300_a3.cu deleted file mode 100644 index 2e3fc2e..0000000 --- a/nv/m05300_a3.cu +++ /dev/null @@ -1,976 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = MD5M_A; - ipad[1] = MD5M_B; - ipad[2] = MD5M_C; - ipad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = MD5M_A; - opad[1] = MD5M_B; - opad[2] = MD5M_C; - opad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - - md5_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = 0x80; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = (64 + 16) * 8; - w3[3] = 0; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - - md5_transform (w0, w1, w2, w3, digest); -} - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m05300m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, u32 s_msg_buf[128]) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - const u32 nr_len = ikepsk_bufs[salt_pos].nr_len; - const u32 msg_len = ikepsk_bufs[salt_pos].msg_len; - - u32 salt_buf0[4]; - - salt_buf0[0] = ikepsk_bufs[salt_pos].nr_buf[ 0]; - salt_buf0[1] = ikepsk_bufs[salt_pos].nr_buf[ 1]; - salt_buf0[2] = ikepsk_bufs[salt_pos].nr_buf[ 2]; - salt_buf0[3] = ikepsk_bufs[salt_pos].nr_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = ikepsk_bufs[salt_pos].nr_buf[ 4]; - salt_buf1[1] = ikepsk_bufs[salt_pos].nr_buf[ 5]; - salt_buf1[2] = ikepsk_bufs[salt_pos].nr_buf[ 6]; - salt_buf1[3] = ikepsk_bufs[salt_pos].nr_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = ikepsk_bufs[salt_pos].nr_buf[ 8]; - salt_buf2[1] = ikepsk_bufs[salt_pos].nr_buf[ 9]; - salt_buf2[2] = ikepsk_bufs[salt_pos].nr_buf[10]; - salt_buf2[3] = ikepsk_bufs[salt_pos].nr_buf[11]; - - u32 salt_buf3[4]; - - salt_buf3[0] = ikepsk_bufs[salt_pos].nr_buf[12]; - salt_buf3[1] = ikepsk_bufs[salt_pos].nr_buf[13]; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - u32x ipad[4]; - u32x opad[4]; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = salt_buf2[2]; - w2_t[3] = salt_buf2[3]; - w3_t[0] = salt_buf3[0]; - w3_t[1] = salt_buf3[1]; - w3_t[2] = (64 + nr_len) * 8; - w3_t[3] = 0; - - u32x digest[4]; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - int left; - int off; - - for (left = ikepsk_bufs[salt_pos].msg_len, off = 0; left >= 56; left -= 64, off += 16) - { - w0_t[0] = s_msg_buf[off + 0]; - w0_t[1] = s_msg_buf[off + 1]; - w0_t[2] = s_msg_buf[off + 2]; - w0_t[3] = s_msg_buf[off + 3]; - w1_t[0] = s_msg_buf[off + 4]; - w1_t[1] = s_msg_buf[off + 5]; - w1_t[2] = s_msg_buf[off + 6]; - w1_t[3] = s_msg_buf[off + 7]; - w2_t[0] = s_msg_buf[off + 8]; - w2_t[1] = s_msg_buf[off + 9]; - w2_t[2] = s_msg_buf[off + 10]; - w2_t[3] = s_msg_buf[off + 11]; - w3_t[0] = s_msg_buf[off + 12]; - w3_t[1] = s_msg_buf[off + 13]; - w3_t[2] = s_msg_buf[off + 14]; - w3_t[3] = s_msg_buf[off + 15]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, ipad); - } - - w0_t[0] = s_msg_buf[off + 0]; - w0_t[1] = s_msg_buf[off + 1]; - w0_t[2] = s_msg_buf[off + 2]; - w0_t[3] = s_msg_buf[off + 3]; - w1_t[0] = s_msg_buf[off + 4]; - w1_t[1] = s_msg_buf[off + 5]; - w1_t[2] = s_msg_buf[off + 6]; - w1_t[3] = s_msg_buf[off + 7]; - w2_t[0] = s_msg_buf[off + 8]; - w2_t[1] = s_msg_buf[off + 9]; - w2_t[2] = s_msg_buf[off + 10]; - w2_t[3] = s_msg_buf[off + 11]; - w3_t[0] = s_msg_buf[off + 12]; - w3_t[1] = s_msg_buf[off + 13]; - w3_t[2] = (64 + msg_len) * 8; - w3_t[3] = 0; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -__device__ static void m05300s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, u32 s_msg_buf[128]) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - const u32 nr_len = ikepsk_bufs[salt_pos].nr_len; - const u32 msg_len = ikepsk_bufs[salt_pos].msg_len; - - u32 salt_buf0[4]; - - salt_buf0[0] = ikepsk_bufs[salt_pos].nr_buf[ 0]; - salt_buf0[1] = ikepsk_bufs[salt_pos].nr_buf[ 1]; - salt_buf0[2] = ikepsk_bufs[salt_pos].nr_buf[ 2]; - salt_buf0[3] = ikepsk_bufs[salt_pos].nr_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = ikepsk_bufs[salt_pos].nr_buf[ 4]; - salt_buf1[1] = ikepsk_bufs[salt_pos].nr_buf[ 5]; - salt_buf1[2] = ikepsk_bufs[salt_pos].nr_buf[ 6]; - salt_buf1[3] = ikepsk_bufs[salt_pos].nr_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = ikepsk_bufs[salt_pos].nr_buf[ 8]; - salt_buf2[1] = ikepsk_bufs[salt_pos].nr_buf[ 9]; - salt_buf2[2] = ikepsk_bufs[salt_pos].nr_buf[10]; - salt_buf2[3] = ikepsk_bufs[salt_pos].nr_buf[11]; - - u32 salt_buf3[4]; - - salt_buf3[0] = ikepsk_bufs[salt_pos].nr_buf[12]; - salt_buf3[1] = ikepsk_bufs[salt_pos].nr_buf[13]; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - u32x ipad[4]; - u32x opad[4]; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = salt_buf2[2]; - w2_t[3] = salt_buf2[3]; - w3_t[0] = salt_buf3[0]; - w3_t[1] = salt_buf3[1]; - w3_t[2] = (64 + nr_len) * 8; - w3_t[3] = 0; - - u32x digest[4]; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - int left; - int off; - - for (left = ikepsk_bufs[salt_pos].msg_len, off = 0; left >= 56; left -= 64, off += 16) - { - w0_t[0] = s_msg_buf[off + 0]; - w0_t[1] = s_msg_buf[off + 1]; - w0_t[2] = s_msg_buf[off + 2]; - w0_t[3] = s_msg_buf[off + 3]; - w1_t[0] = s_msg_buf[off + 4]; - w1_t[1] = s_msg_buf[off + 5]; - w1_t[2] = s_msg_buf[off + 6]; - w1_t[3] = s_msg_buf[off + 7]; - w2_t[0] = s_msg_buf[off + 8]; - w2_t[1] = s_msg_buf[off + 9]; - w2_t[2] = s_msg_buf[off + 10]; - w2_t[3] = s_msg_buf[off + 11]; - w3_t[0] = s_msg_buf[off + 12]; - w3_t[1] = s_msg_buf[off + 13]; - w3_t[2] = s_msg_buf[off + 14]; - w3_t[3] = s_msg_buf[off + 15]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, ipad); - } - - w0_t[0] = s_msg_buf[off + 0]; - w0_t[1] = s_msg_buf[off + 1]; - w0_t[2] = s_msg_buf[off + 2]; - w0_t[3] = s_msg_buf[off + 3]; - w1_t[0] = s_msg_buf[off + 4]; - w1_t[1] = s_msg_buf[off + 5]; - w1_t[2] = s_msg_buf[off + 6]; - w1_t[3] = s_msg_buf[off + 7]; - w2_t[0] = s_msg_buf[off + 8]; - w2_t[1] = s_msg_buf[off + 9]; - w2_t[2] = s_msg_buf[off + 10]; - w2_t[3] = s_msg_buf[off + 11]; - w3_t[0] = s_msg_buf[off + 12]; - w3_t[1] = s_msg_buf[off + 13]; - w3_t[2] = (64 + msg_len) * 8; - w3_t[3] = 0; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05300_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * s_msg - */ - - __shared__ u32 s_msg_buf[128]; - - if (lid < 128) - { - s_msg_buf[lid] = ikepsk_bufs[salt_pos].msg_buf[lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m05300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, ikepsk_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, s_msg_buf); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05300_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * s_msg - */ - - __shared__ u32 s_msg_buf[128]; - - if (lid < 128) - { - s_msg_buf[lid] = ikepsk_bufs[salt_pos].msg_buf[lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m05300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, ikepsk_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, s_msg_buf); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05300_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * s_msg - */ - - __shared__ u32 s_msg_buf[128]; - - if (lid < 128) - { - s_msg_buf[lid] = ikepsk_bufs[salt_pos].msg_buf[lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m05300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, ikepsk_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, s_msg_buf); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05300_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * s_msg - */ - - __shared__ u32 s_msg_buf[128]; - - if (lid < 128) - { - s_msg_buf[lid] = ikepsk_bufs[salt_pos].msg_buf[lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m05300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, ikepsk_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, s_msg_buf); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05300_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * s_msg - */ - - __shared__ u32 s_msg_buf[128]; - - if (lid < 128) - { - s_msg_buf[lid] = ikepsk_bufs[salt_pos].msg_buf[lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m05300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, ikepsk_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, s_msg_buf); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05300_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * s_msg - */ - - __shared__ u32 s_msg_buf[128]; - - if (lid < 128) - { - s_msg_buf[lid] = ikepsk_bufs[salt_pos].msg_buf[lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m05300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, ikepsk_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, s_msg_buf); -} diff --git a/nv/m05400_a0.cu b/nv/m05400_a0.cu deleted file mode 100644 index 40528fc..0000000 --- a/nv/m05400_a0.cu +++ /dev/null @@ -1,782 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = SHA1M_A; - ipad[1] = SHA1M_B; - ipad[2] = SHA1M_C; - ipad[3] = SHA1M_D; - ipad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = SHA1M_A; - opad[1] = SHA1M_B; - opad[2] = SHA1M_C; - opad[3] = SHA1M_D; - opad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - - sha1_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - - sha1_transform (w0, w1, w2, w3, digest); -} - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m05400_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - const u32 nr_len = ikepsk_bufs[salt_pos].nr_len; - const u32 msg_len = ikepsk_bufs[salt_pos].msg_len; - - u32 salt_buf0[4]; - - salt_buf0[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 0]); - salt_buf0[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 1]); - salt_buf0[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 2]); - salt_buf0[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 3]); - - u32 salt_buf1[4]; - - salt_buf1[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 4]); - salt_buf1[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 5]); - salt_buf1[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 6]); - salt_buf1[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 7]); - - u32 salt_buf2[4]; - - salt_buf2[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 8]); - salt_buf2[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 9]); - salt_buf2[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[10]); - salt_buf2[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[11]); - - u32 salt_buf3[4]; - - salt_buf3[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[12]); - salt_buf3[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[13]); - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - /** - * s_msg - */ - - __shared__ u32 s_msg_buf[128]; - - if (lid < 128) - { - s_msg_buf[lid] = swap_workaround (ikepsk_bufs[salt_pos].msg_buf[lid]); - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[5]; - u32x opad[5]; - - hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = salt_buf2[2]; - w2_t[3] = salt_buf2[3]; - w3_t[0] = salt_buf3[0]; - w3_t[1] = salt_buf3[1]; - w3_t[2] = 0; - w3_t[3] = (64 + nr_len) * 8; - - u32x digest[5]; - - hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = digest[4]; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - int left; - int off; - - for (left = ikepsk_bufs[salt_pos].msg_len, off = 0; left >= 56; left -= 64, off += 16) - { - w0_t[0] = s_msg_buf[off + 0]; - w0_t[1] = s_msg_buf[off + 1]; - w0_t[2] = s_msg_buf[off + 2]; - w0_t[3] = s_msg_buf[off + 3]; - w1_t[0] = s_msg_buf[off + 4]; - w1_t[1] = s_msg_buf[off + 5]; - w1_t[2] = s_msg_buf[off + 6]; - w1_t[3] = s_msg_buf[off + 7]; - w2_t[0] = s_msg_buf[off + 8]; - w2_t[1] = s_msg_buf[off + 9]; - w2_t[2] = s_msg_buf[off + 10]; - w2_t[3] = s_msg_buf[off + 11]; - w3_t[0] = s_msg_buf[off + 12]; - w3_t[1] = s_msg_buf[off + 13]; - w3_t[2] = s_msg_buf[off + 14]; - w3_t[3] = s_msg_buf[off + 15]; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, ipad); - } - - w0_t[0] = s_msg_buf[off + 0]; - w0_t[1] = s_msg_buf[off + 1]; - w0_t[2] = s_msg_buf[off + 2]; - w0_t[3] = s_msg_buf[off + 3]; - w1_t[0] = s_msg_buf[off + 4]; - w1_t[1] = s_msg_buf[off + 5]; - w1_t[2] = s_msg_buf[off + 6]; - w1_t[3] = s_msg_buf[off + 7]; - w2_t[0] = s_msg_buf[off + 8]; - w2_t[1] = s_msg_buf[off + 9]; - w2_t[2] = s_msg_buf[off + 10]; - w2_t[3] = s_msg_buf[off + 11]; - w3_t[0] = s_msg_buf[off + 12]; - w3_t[1] = s_msg_buf[off + 13]; - w3_t[2] = 0; - w3_t[3] = (64 + msg_len) * 8; - - hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05400_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05400_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05400_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - const u32 nr_len = ikepsk_bufs[salt_pos].nr_len; - const u32 msg_len = ikepsk_bufs[salt_pos].msg_len; - - u32 salt_buf0[4]; - - salt_buf0[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 0]); - salt_buf0[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 1]); - salt_buf0[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 2]); - salt_buf0[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 3]); - - u32 salt_buf1[4]; - - salt_buf1[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 4]); - salt_buf1[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 5]); - salt_buf1[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 6]); - salt_buf1[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 7]); - - u32 salt_buf2[4]; - - salt_buf2[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 8]); - salt_buf2[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 9]); - salt_buf2[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[10]); - salt_buf2[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[11]); - - u32 salt_buf3[4]; - - salt_buf3[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[12]); - salt_buf3[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[13]); - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - /** - * s_msg - */ - - __shared__ u32 s_msg_buf[128]; - - if (lid < 128) - { - s_msg_buf[lid] = swap_workaround (ikepsk_bufs[salt_pos].msg_buf[lid]); - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[5]; - u32x opad[5]; - - hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = salt_buf2[2]; - w2_t[3] = salt_buf2[3]; - w3_t[0] = salt_buf3[0]; - w3_t[1] = salt_buf3[1]; - w3_t[2] = 0; - w3_t[3] = (64 + nr_len) * 8; - - u32x digest[5]; - - hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = digest[4]; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - int left; - int off; - - for (left = ikepsk_bufs[salt_pos].msg_len, off = 0; left >= 56; left -= 64, off += 16) - { - w0_t[0] = s_msg_buf[off + 0]; - w0_t[1] = s_msg_buf[off + 1]; - w0_t[2] = s_msg_buf[off + 2]; - w0_t[3] = s_msg_buf[off + 3]; - w1_t[0] = s_msg_buf[off + 4]; - w1_t[1] = s_msg_buf[off + 5]; - w1_t[2] = s_msg_buf[off + 6]; - w1_t[3] = s_msg_buf[off + 7]; - w2_t[0] = s_msg_buf[off + 8]; - w2_t[1] = s_msg_buf[off + 9]; - w2_t[2] = s_msg_buf[off + 10]; - w2_t[3] = s_msg_buf[off + 11]; - w3_t[0] = s_msg_buf[off + 12]; - w3_t[1] = s_msg_buf[off + 13]; - w3_t[2] = s_msg_buf[off + 14]; - w3_t[3] = s_msg_buf[off + 15]; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, ipad); - } - - w0_t[0] = s_msg_buf[off + 0]; - w0_t[1] = s_msg_buf[off + 1]; - w0_t[2] = s_msg_buf[off + 2]; - w0_t[3] = s_msg_buf[off + 3]; - w1_t[0] = s_msg_buf[off + 4]; - w1_t[1] = s_msg_buf[off + 5]; - w1_t[2] = s_msg_buf[off + 6]; - w1_t[3] = s_msg_buf[off + 7]; - w2_t[0] = s_msg_buf[off + 8]; - w2_t[1] = s_msg_buf[off + 9]; - w2_t[2] = s_msg_buf[off + 10]; - w2_t[3] = s_msg_buf[off + 11]; - w3_t[0] = s_msg_buf[off + 12]; - w3_t[1] = s_msg_buf[off + 13]; - w3_t[2] = 0; - w3_t[3] = (64 + msg_len) * 8; - - hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05400_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05400_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m05400_a1.cu b/nv/m05400_a1.cu deleted file mode 100644 index 81a70c7..0000000 --- a/nv/m05400_a1.cu +++ /dev/null @@ -1,888 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = SHA1M_A; - ipad[1] = SHA1M_B; - ipad[2] = SHA1M_C; - ipad[3] = SHA1M_D; - ipad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = SHA1M_A; - opad[1] = SHA1M_B; - opad[2] = SHA1M_C; - opad[3] = SHA1M_D; - opad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - - sha1_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - - sha1_transform (w0, w1, w2, w3, digest); -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m05400_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - const u32 nr_len = ikepsk_bufs[salt_pos].nr_len; - const u32 msg_len = ikepsk_bufs[salt_pos].msg_len; - - u32 salt_buf0[4]; - - salt_buf0[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 0]); - salt_buf0[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 1]); - salt_buf0[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 2]); - salt_buf0[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 3]); - - u32 salt_buf1[4]; - - salt_buf1[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 4]); - salt_buf1[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 5]); - salt_buf1[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 6]); - salt_buf1[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 7]); - - u32 salt_buf2[4]; - - salt_buf2[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 8]); - salt_buf2[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 9]); - salt_buf2[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[10]); - salt_buf2[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[11]); - - u32 salt_buf3[4]; - - salt_buf3[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[12]); - salt_buf3[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[13]); - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - /** - * s_msg - */ - - __shared__ u32 s_msg_buf[128]; - - if (lid < 128) - { - s_msg_buf[lid] = swap_workaround (ikepsk_bufs[salt_pos].msg_buf[lid]); - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[5]; - u32x opad[5]; - - hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = salt_buf2[2]; - w2_t[3] = salt_buf2[3]; - w3_t[0] = salt_buf3[0]; - w3_t[1] = salt_buf3[1]; - w3_t[2] = 0; - w3_t[3] = (64 + nr_len) * 8; - - u32x digest[5]; - - hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = digest[4]; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - int left; - int off; - - for (left = ikepsk_bufs[salt_pos].msg_len, off = 0; left >= 56; left -= 64, off += 16) - { - w0_t[0] = s_msg_buf[off + 0]; - w0_t[1] = s_msg_buf[off + 1]; - w0_t[2] = s_msg_buf[off + 2]; - w0_t[3] = s_msg_buf[off + 3]; - w1_t[0] = s_msg_buf[off + 4]; - w1_t[1] = s_msg_buf[off + 5]; - w1_t[2] = s_msg_buf[off + 6]; - w1_t[3] = s_msg_buf[off + 7]; - w2_t[0] = s_msg_buf[off + 8]; - w2_t[1] = s_msg_buf[off + 9]; - w2_t[2] = s_msg_buf[off + 10]; - w2_t[3] = s_msg_buf[off + 11]; - w3_t[0] = s_msg_buf[off + 12]; - w3_t[1] = s_msg_buf[off + 13]; - w3_t[2] = s_msg_buf[off + 14]; - w3_t[3] = s_msg_buf[off + 15]; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, ipad); - } - - w0_t[0] = s_msg_buf[off + 0]; - w0_t[1] = s_msg_buf[off + 1]; - w0_t[2] = s_msg_buf[off + 2]; - w0_t[3] = s_msg_buf[off + 3]; - w1_t[0] = s_msg_buf[off + 4]; - w1_t[1] = s_msg_buf[off + 5]; - w1_t[2] = s_msg_buf[off + 6]; - w1_t[3] = s_msg_buf[off + 7]; - w2_t[0] = s_msg_buf[off + 8]; - w2_t[1] = s_msg_buf[off + 9]; - w2_t[2] = s_msg_buf[off + 10]; - w2_t[3] = s_msg_buf[off + 11]; - w3_t[0] = s_msg_buf[off + 12]; - w3_t[1] = s_msg_buf[off + 13]; - w3_t[2] = 0; - w3_t[3] = (64 + msg_len) * 8; - - hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05400_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05400_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05400_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - const u32 nr_len = ikepsk_bufs[salt_pos].nr_len; - const u32 msg_len = ikepsk_bufs[salt_pos].msg_len; - - u32 salt_buf0[4]; - - salt_buf0[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 0]); - salt_buf0[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 1]); - salt_buf0[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 2]); - salt_buf0[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 3]); - - u32 salt_buf1[4]; - - salt_buf1[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 4]); - salt_buf1[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 5]); - salt_buf1[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 6]); - salt_buf1[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 7]); - - u32 salt_buf2[4]; - - salt_buf2[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 8]); - salt_buf2[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 9]); - salt_buf2[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[10]); - salt_buf2[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[11]); - - u32 salt_buf3[4]; - - salt_buf3[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[12]); - salt_buf3[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[13]); - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - /** - * s_msg - */ - - __shared__ u32 s_msg_buf[128]; - - if (lid < 128) - { - s_msg_buf[lid] = swap_workaround (ikepsk_bufs[salt_pos].msg_buf[lid]); - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[5]; - u32x opad[5]; - - hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = salt_buf2[2]; - w2_t[3] = salt_buf2[3]; - w3_t[0] = salt_buf3[0]; - w3_t[1] = salt_buf3[1]; - w3_t[2] = 0; - w3_t[3] = (64 + nr_len) * 8; - - u32x digest[5]; - - hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = digest[4]; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - int left; - int off; - - for (left = ikepsk_bufs[salt_pos].msg_len, off = 0; left >= 56; left -= 64, off += 16) - { - w0_t[0] = s_msg_buf[off + 0]; - w0_t[1] = s_msg_buf[off + 1]; - w0_t[2] = s_msg_buf[off + 2]; - w0_t[3] = s_msg_buf[off + 3]; - w1_t[0] = s_msg_buf[off + 4]; - w1_t[1] = s_msg_buf[off + 5]; - w1_t[2] = s_msg_buf[off + 6]; - w1_t[3] = s_msg_buf[off + 7]; - w2_t[0] = s_msg_buf[off + 8]; - w2_t[1] = s_msg_buf[off + 9]; - w2_t[2] = s_msg_buf[off + 10]; - w2_t[3] = s_msg_buf[off + 11]; - w3_t[0] = s_msg_buf[off + 12]; - w3_t[1] = s_msg_buf[off + 13]; - w3_t[2] = s_msg_buf[off + 14]; - w3_t[3] = s_msg_buf[off + 15]; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, ipad); - } - - w0_t[0] = s_msg_buf[off + 0]; - w0_t[1] = s_msg_buf[off + 1]; - w0_t[2] = s_msg_buf[off + 2]; - w0_t[3] = s_msg_buf[off + 3]; - w1_t[0] = s_msg_buf[off + 4]; - w1_t[1] = s_msg_buf[off + 5]; - w1_t[2] = s_msg_buf[off + 6]; - w1_t[3] = s_msg_buf[off + 7]; - w2_t[0] = s_msg_buf[off + 8]; - w2_t[1] = s_msg_buf[off + 9]; - w2_t[2] = s_msg_buf[off + 10]; - w2_t[3] = s_msg_buf[off + 11]; - w3_t[0] = s_msg_buf[off + 12]; - w3_t[1] = s_msg_buf[off + 13]; - w3_t[2] = 0; - w3_t[3] = (64 + msg_len) * 8; - - hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05400_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05400_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m05400_a3.cu b/nv/m05400_a3.cu deleted file mode 100644 index ea9ada9..0000000 --- a/nv/m05400_a3.cu +++ /dev/null @@ -1,1010 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = SHA1M_A; - ipad[1] = SHA1M_B; - ipad[2] = SHA1M_C; - ipad[3] = SHA1M_D; - ipad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = SHA1M_A; - opad[1] = SHA1M_B; - opad[2] = SHA1M_C; - opad[3] = SHA1M_D; - opad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - - sha1_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - - sha1_transform (w0, w1, w2, w3, digest); -} - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m05400m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, u32 s_msg_buf[128]) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - const u32 nr_len = ikepsk_bufs[salt_pos].nr_len; - const u32 msg_len = ikepsk_bufs[salt_pos].msg_len; - - u32 salt_buf0[4]; - - salt_buf0[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 0]); - salt_buf0[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 1]); - salt_buf0[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 2]); - salt_buf0[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 3]); - - u32 salt_buf1[4]; - - salt_buf1[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 4]); - salt_buf1[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 5]); - salt_buf1[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 6]); - salt_buf1[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 7]); - - u32 salt_buf2[4]; - - salt_buf2[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 8]); - salt_buf2[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 9]); - salt_buf2[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[10]); - salt_buf2[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[11]); - - u32 salt_buf3[4]; - - salt_buf3[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[12]); - salt_buf3[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[13]); - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[5]; - u32x opad[5]; - - hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = salt_buf2[2]; - w2_t[3] = salt_buf2[3]; - w3_t[0] = salt_buf3[0]; - w3_t[1] = salt_buf3[1]; - w3_t[2] = 0; - w3_t[3] = (64 + nr_len) * 8; - - u32x digest[5]; - - hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = digest[4]; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - int left; - int off; - - for (left = ikepsk_bufs[salt_pos].msg_len, off = 0; left >= 56; left -= 64, off += 16) - { - w0_t[0] = s_msg_buf[off + 0]; - w0_t[1] = s_msg_buf[off + 1]; - w0_t[2] = s_msg_buf[off + 2]; - w0_t[3] = s_msg_buf[off + 3]; - w1_t[0] = s_msg_buf[off + 4]; - w1_t[1] = s_msg_buf[off + 5]; - w1_t[2] = s_msg_buf[off + 6]; - w1_t[3] = s_msg_buf[off + 7]; - w2_t[0] = s_msg_buf[off + 8]; - w2_t[1] = s_msg_buf[off + 9]; - w2_t[2] = s_msg_buf[off + 10]; - w2_t[3] = s_msg_buf[off + 11]; - w3_t[0] = s_msg_buf[off + 12]; - w3_t[1] = s_msg_buf[off + 13]; - w3_t[2] = s_msg_buf[off + 14]; - w3_t[3] = s_msg_buf[off + 15]; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, ipad); - } - - w0_t[0] = s_msg_buf[off + 0]; - w0_t[1] = s_msg_buf[off + 1]; - w0_t[2] = s_msg_buf[off + 2]; - w0_t[3] = s_msg_buf[off + 3]; - w1_t[0] = s_msg_buf[off + 4]; - w1_t[1] = s_msg_buf[off + 5]; - w1_t[2] = s_msg_buf[off + 6]; - w1_t[3] = s_msg_buf[off + 7]; - w2_t[0] = s_msg_buf[off + 8]; - w2_t[1] = s_msg_buf[off + 9]; - w2_t[2] = s_msg_buf[off + 10]; - w2_t[3] = s_msg_buf[off + 11]; - w3_t[0] = s_msg_buf[off + 12]; - w3_t[1] = s_msg_buf[off + 13]; - w3_t[2] = 0; - w3_t[3] = (64 + msg_len) * 8; - - hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -__device__ static void m05400s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, u32 s_msg_buf[128]) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - const u32 nr_len = ikepsk_bufs[salt_pos].nr_len; - const u32 msg_len = ikepsk_bufs[salt_pos].msg_len; - - u32 salt_buf0[4]; - - salt_buf0[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 0]); - salt_buf0[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 1]); - salt_buf0[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 2]); - salt_buf0[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 3]); - - u32 salt_buf1[4]; - - salt_buf1[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 4]); - salt_buf1[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 5]); - salt_buf1[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 6]); - salt_buf1[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 7]); - - u32 salt_buf2[4]; - - salt_buf2[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 8]); - salt_buf2[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[ 9]); - salt_buf2[2] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[10]); - salt_buf2[3] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[11]); - - u32 salt_buf3[4]; - - salt_buf3[0] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[12]); - salt_buf3[1] = swap_workaround (ikepsk_bufs[salt_pos].nr_buf[13]); - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[5]; - u32x opad[5]; - - hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = salt_buf2[2]; - w2_t[3] = salt_buf2[3]; - w3_t[0] = salt_buf3[0]; - w3_t[1] = salt_buf3[1]; - w3_t[2] = 0; - w3_t[3] = (64 + nr_len) * 8; - - u32x digest[5]; - - hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = digest[4]; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - int left; - int off; - - for (left = ikepsk_bufs[salt_pos].msg_len, off = 0; left >= 56; left -= 64, off += 16) - { - w0_t[0] = s_msg_buf[off + 0]; - w0_t[1] = s_msg_buf[off + 1]; - w0_t[2] = s_msg_buf[off + 2]; - w0_t[3] = s_msg_buf[off + 3]; - w1_t[0] = s_msg_buf[off + 4]; - w1_t[1] = s_msg_buf[off + 5]; - w1_t[2] = s_msg_buf[off + 6]; - w1_t[3] = s_msg_buf[off + 7]; - w2_t[0] = s_msg_buf[off + 8]; - w2_t[1] = s_msg_buf[off + 9]; - w2_t[2] = s_msg_buf[off + 10]; - w2_t[3] = s_msg_buf[off + 11]; - w3_t[0] = s_msg_buf[off + 12]; - w3_t[1] = s_msg_buf[off + 13]; - w3_t[2] = s_msg_buf[off + 14]; - w3_t[3] = s_msg_buf[off + 15]; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, ipad); - } - - w0_t[0] = s_msg_buf[off + 0]; - w0_t[1] = s_msg_buf[off + 1]; - w0_t[2] = s_msg_buf[off + 2]; - w0_t[3] = s_msg_buf[off + 3]; - w1_t[0] = s_msg_buf[off + 4]; - w1_t[1] = s_msg_buf[off + 5]; - w1_t[2] = s_msg_buf[off + 6]; - w1_t[3] = s_msg_buf[off + 7]; - w2_t[0] = s_msg_buf[off + 8]; - w2_t[1] = s_msg_buf[off + 9]; - w2_t[2] = s_msg_buf[off + 10]; - w2_t[3] = s_msg_buf[off + 11]; - w3_t[0] = s_msg_buf[off + 12]; - w3_t[1] = s_msg_buf[off + 13]; - w3_t[2] = 0; - w3_t[3] = (64 + msg_len) * 8; - - hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05400_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * s_msg - */ - - __shared__ u32 s_msg_buf[128]; - - if (lid < 128) - { - s_msg_buf[lid] = swap_workaround (ikepsk_bufs[salt_pos].msg_buf[lid]); - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m05400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, ikepsk_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, s_msg_buf); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05400_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * s_msg - */ - - __shared__ u32 s_msg_buf[128]; - - if (lid < 128) - { - s_msg_buf[lid] = swap_workaround (ikepsk_bufs[salt_pos].msg_buf[lid]); - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m05400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, ikepsk_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, s_msg_buf); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05400_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * s_msg - */ - - __shared__ u32 s_msg_buf[128]; - - if (lid < 128) - { - s_msg_buf[lid] = swap_workaround (ikepsk_bufs[salt_pos].msg_buf[lid]); - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m05400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, ikepsk_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, s_msg_buf); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05400_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * s_msg - */ - - __shared__ u32 s_msg_buf[128]; - - if (lid < 128) - { - s_msg_buf[lid] = swap_workaround (ikepsk_bufs[salt_pos].msg_buf[lid]); - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m05400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, ikepsk_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, s_msg_buf); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05400_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * s_msg - */ - - __shared__ u32 s_msg_buf[128]; - - if (lid < 128) - { - s_msg_buf[lid] = swap_workaround (ikepsk_bufs[salt_pos].msg_buf[lid]); - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m05400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, ikepsk_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, s_msg_buf); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05400_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const ikepsk_t *ikepsk_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * s_msg - */ - - __shared__ u32 s_msg_buf[128]; - - if (lid < 128) - { - s_msg_buf[lid] = swap_workaround (ikepsk_bufs[salt_pos].msg_buf[lid]); - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m05400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, ikepsk_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, s_msg_buf); -} diff --git a/nv/m05500_a0.cu b/nv/m05500_a0.cu deleted file mode 100644 index 0c8302a..0000000 --- a/nv/m05500_a0.cu +++ /dev/null @@ -1,959 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD4_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#define PERM_OP(a,b,tt,n,m) \ -{ \ - tt = a >> n; \ - tt = tt ^ b; \ - tt = tt & m; \ - b = b ^ tt; \ - tt = tt << n; \ - a = a ^ tt; \ -} - -#define HPERM_OP(a,tt,n,m) \ -{ \ - tt = a << (16 + n); \ - tt = tt ^ a; \ - tt = tt & m; \ - a = a ^ tt; \ - tt = tt >> (16 + n); \ - a = a ^ tt; \ -} - -__device__ __constant__ u32 c_SPtrans[8][64] = -{ - /* nibble 0 */ - 0x02080800, 0x00080000, 0x02000002, 0x02080802, - 0x02000000, 0x00080802, 0x00080002, 0x02000002, - 0x00080802, 0x02080800, 0x02080000, 0x00000802, - 0x02000802, 0x02000000, 0x00000000, 0x00080002, - 0x00080000, 0x00000002, 0x02000800, 0x00080800, - 0x02080802, 0x02080000, 0x00000802, 0x02000800, - 0x00000002, 0x00000800, 0x00080800, 0x02080002, - 0x00000800, 0x02000802, 0x02080002, 0x00000000, - 0x00000000, 0x02080802, 0x02000800, 0x00080002, - 0x02080800, 0x00080000, 0x00000802, 0x02000800, - 0x02080002, 0x00000800, 0x00080800, 0x02000002, - 0x00080802, 0x00000002, 0x02000002, 0x02080000, - 0x02080802, 0x00080800, 0x02080000, 0x02000802, - 0x02000000, 0x00000802, 0x00080002, 0x00000000, - 0x00080000, 0x02000000, 0x02000802, 0x02080800, - 0x00000002, 0x02080002, 0x00000800, 0x00080802, - /* nibble 1 */ - 0x40108010, 0x00000000, 0x00108000, 0x40100000, - 0x40000010, 0x00008010, 0x40008000, 0x00108000, - 0x00008000, 0x40100010, 0x00000010, 0x40008000, - 0x00100010, 0x40108000, 0x40100000, 0x00000010, - 0x00100000, 0x40008010, 0x40100010, 0x00008000, - 0x00108010, 0x40000000, 0x00000000, 0x00100010, - 0x40008010, 0x00108010, 0x40108000, 0x40000010, - 0x40000000, 0x00100000, 0x00008010, 0x40108010, - 0x00100010, 0x40108000, 0x40008000, 0x00108010, - 0x40108010, 0x00100010, 0x40000010, 0x00000000, - 0x40000000, 0x00008010, 0x00100000, 0x40100010, - 0x00008000, 0x40000000, 0x00108010, 0x40008010, - 0x40108000, 0x00008000, 0x00000000, 0x40000010, - 0x00000010, 0x40108010, 0x00108000, 0x40100000, - 0x40100010, 0x00100000, 0x00008010, 0x40008000, - 0x40008010, 0x00000010, 0x40100000, 0x00108000, - /* nibble 2 */ - 0x04000001, 0x04040100, 0x00000100, 0x04000101, - 0x00040001, 0x04000000, 0x04000101, 0x00040100, - 0x04000100, 0x00040000, 0x04040000, 0x00000001, - 0x04040101, 0x00000101, 0x00000001, 0x04040001, - 0x00000000, 0x00040001, 0x04040100, 0x00000100, - 0x00000101, 0x04040101, 0x00040000, 0x04000001, - 0x04040001, 0x04000100, 0x00040101, 0x04040000, - 0x00040100, 0x00000000, 0x04000000, 0x00040101, - 0x04040100, 0x00000100, 0x00000001, 0x00040000, - 0x00000101, 0x00040001, 0x04040000, 0x04000101, - 0x00000000, 0x04040100, 0x00040100, 0x04040001, - 0x00040001, 0x04000000, 0x04040101, 0x00000001, - 0x00040101, 0x04000001, 0x04000000, 0x04040101, - 0x00040000, 0x04000100, 0x04000101, 0x00040100, - 0x04000100, 0x00000000, 0x04040001, 0x00000101, - 0x04000001, 0x00040101, 0x00000100, 0x04040000, - /* nibble 3 */ - 0x00401008, 0x10001000, 0x00000008, 0x10401008, - 0x00000000, 0x10400000, 0x10001008, 0x00400008, - 0x10401000, 0x10000008, 0x10000000, 0x00001008, - 0x10000008, 0x00401008, 0x00400000, 0x10000000, - 0x10400008, 0x00401000, 0x00001000, 0x00000008, - 0x00401000, 0x10001008, 0x10400000, 0x00001000, - 0x00001008, 0x00000000, 0x00400008, 0x10401000, - 0x10001000, 0x10400008, 0x10401008, 0x00400000, - 0x10400008, 0x00001008, 0x00400000, 0x10000008, - 0x00401000, 0x10001000, 0x00000008, 0x10400000, - 0x10001008, 0x00000000, 0x00001000, 0x00400008, - 0x00000000, 0x10400008, 0x10401000, 0x00001000, - 0x10000000, 0x10401008, 0x00401008, 0x00400000, - 0x10401008, 0x00000008, 0x10001000, 0x00401008, - 0x00400008, 0x00401000, 0x10400000, 0x10001008, - 0x00001008, 0x10000000, 0x10000008, 0x10401000, - /* nibble 4 */ - 0x08000000, 0x00010000, 0x00000400, 0x08010420, - 0x08010020, 0x08000400, 0x00010420, 0x08010000, - 0x00010000, 0x00000020, 0x08000020, 0x00010400, - 0x08000420, 0x08010020, 0x08010400, 0x00000000, - 0x00010400, 0x08000000, 0x00010020, 0x00000420, - 0x08000400, 0x00010420, 0x00000000, 0x08000020, - 0x00000020, 0x08000420, 0x08010420, 0x00010020, - 0x08010000, 0x00000400, 0x00000420, 0x08010400, - 0x08010400, 0x08000420, 0x00010020, 0x08010000, - 0x00010000, 0x00000020, 0x08000020, 0x08000400, - 0x08000000, 0x00010400, 0x08010420, 0x00000000, - 0x00010420, 0x08000000, 0x00000400, 0x00010020, - 0x08000420, 0x00000400, 0x00000000, 0x08010420, - 0x08010020, 0x08010400, 0x00000420, 0x00010000, - 0x00010400, 0x08010020, 0x08000400, 0x00000420, - 0x00000020, 0x00010420, 0x08010000, 0x08000020, - /* nibble 5 */ - 0x80000040, 0x00200040, 0x00000000, 0x80202000, - 0x00200040, 0x00002000, 0x80002040, 0x00200000, - 0x00002040, 0x80202040, 0x00202000, 0x80000000, - 0x80002000, 0x80000040, 0x80200000, 0x00202040, - 0x00200000, 0x80002040, 0x80200040, 0x00000000, - 0x00002000, 0x00000040, 0x80202000, 0x80200040, - 0x80202040, 0x80200000, 0x80000000, 0x00002040, - 0x00000040, 0x00202000, 0x00202040, 0x80002000, - 0x00002040, 0x80000000, 0x80002000, 0x00202040, - 0x80202000, 0x00200040, 0x00000000, 0x80002000, - 0x80000000, 0x00002000, 0x80200040, 0x00200000, - 0x00200040, 0x80202040, 0x00202000, 0x00000040, - 0x80202040, 0x00202000, 0x00200000, 0x80002040, - 0x80000040, 0x80200000, 0x00202040, 0x00000000, - 0x00002000, 0x80000040, 0x80002040, 0x80202000, - 0x80200000, 0x00002040, 0x00000040, 0x80200040, - /* nibble 6 */ - 0x00004000, 0x00000200, 0x01000200, 0x01000004, - 0x01004204, 0x00004004, 0x00004200, 0x00000000, - 0x01000000, 0x01000204, 0x00000204, 0x01004000, - 0x00000004, 0x01004200, 0x01004000, 0x00000204, - 0x01000204, 0x00004000, 0x00004004, 0x01004204, - 0x00000000, 0x01000200, 0x01000004, 0x00004200, - 0x01004004, 0x00004204, 0x01004200, 0x00000004, - 0x00004204, 0x01004004, 0x00000200, 0x01000000, - 0x00004204, 0x01004000, 0x01004004, 0x00000204, - 0x00004000, 0x00000200, 0x01000000, 0x01004004, - 0x01000204, 0x00004204, 0x00004200, 0x00000000, - 0x00000200, 0x01000004, 0x00000004, 0x01000200, - 0x00000000, 0x01000204, 0x01000200, 0x00004200, - 0x00000204, 0x00004000, 0x01004204, 0x01000000, - 0x01004200, 0x00000004, 0x00004004, 0x01004204, - 0x01000004, 0x01004200, 0x01004000, 0x00004004, - /* nibble 7 */ - 0x20800080, 0x20820000, 0x00020080, 0x00000000, - 0x20020000, 0x00800080, 0x20800000, 0x20820080, - 0x00000080, 0x20000000, 0x00820000, 0x00020080, - 0x00820080, 0x20020080, 0x20000080, 0x20800000, - 0x00020000, 0x00820080, 0x00800080, 0x20020000, - 0x20820080, 0x20000080, 0x00000000, 0x00820000, - 0x20000000, 0x00800000, 0x20020080, 0x20800080, - 0x00800000, 0x00020000, 0x20820000, 0x00000080, - 0x00800000, 0x00020000, 0x20000080, 0x20820080, - 0x00020080, 0x20000000, 0x00000000, 0x00820000, - 0x20800080, 0x20020080, 0x20020000, 0x00800080, - 0x20820000, 0x00000080, 0x00800080, 0x20020000, - 0x20820080, 0x00800000, 0x20800000, 0x20000080, - 0x00820000, 0x00020080, 0x20020080, 0x20800000, - 0x00000080, 0x20820000, 0x00820080, 0x00000000, - 0x20000000, 0x20800080, 0x00020000, 0x00820080, -}; - -__device__ __constant__ u32 c_skb[8][64] = -{ - /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ - 0x00000000, 0x00000010, 0x20000000, 0x20000010, - 0x00010000, 0x00010010, 0x20010000, 0x20010010, - 0x00000800, 0x00000810, 0x20000800, 0x20000810, - 0x00010800, 0x00010810, 0x20010800, 0x20010810, - 0x00000020, 0x00000030, 0x20000020, 0x20000030, - 0x00010020, 0x00010030, 0x20010020, 0x20010030, - 0x00000820, 0x00000830, 0x20000820, 0x20000830, - 0x00010820, 0x00010830, 0x20010820, 0x20010830, - 0x00080000, 0x00080010, 0x20080000, 0x20080010, - 0x00090000, 0x00090010, 0x20090000, 0x20090010, - 0x00080800, 0x00080810, 0x20080800, 0x20080810, - 0x00090800, 0x00090810, 0x20090800, 0x20090810, - 0x00080020, 0x00080030, 0x20080020, 0x20080030, - 0x00090020, 0x00090030, 0x20090020, 0x20090030, - 0x00080820, 0x00080830, 0x20080820, 0x20080830, - 0x00090820, 0x00090830, 0x20090820, 0x20090830, - /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */ - 0x00000000, 0x02000000, 0x00002000, 0x02002000, - 0x00200000, 0x02200000, 0x00202000, 0x02202000, - 0x00000004, 0x02000004, 0x00002004, 0x02002004, - 0x00200004, 0x02200004, 0x00202004, 0x02202004, - 0x00000400, 0x02000400, 0x00002400, 0x02002400, - 0x00200400, 0x02200400, 0x00202400, 0x02202400, - 0x00000404, 0x02000404, 0x00002404, 0x02002404, - 0x00200404, 0x02200404, 0x00202404, 0x02202404, - 0x10000000, 0x12000000, 0x10002000, 0x12002000, - 0x10200000, 0x12200000, 0x10202000, 0x12202000, - 0x10000004, 0x12000004, 0x10002004, 0x12002004, - 0x10200004, 0x12200004, 0x10202004, 0x12202004, - 0x10000400, 0x12000400, 0x10002400, 0x12002400, - 0x10200400, 0x12200400, 0x10202400, 0x12202400, - 0x10000404, 0x12000404, 0x10002404, 0x12002404, - 0x10200404, 0x12200404, 0x10202404, 0x12202404, - /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */ - 0x00000000, 0x00000001, 0x00040000, 0x00040001, - 0x01000000, 0x01000001, 0x01040000, 0x01040001, - 0x00000002, 0x00000003, 0x00040002, 0x00040003, - 0x01000002, 0x01000003, 0x01040002, 0x01040003, - 0x00000200, 0x00000201, 0x00040200, 0x00040201, - 0x01000200, 0x01000201, 0x01040200, 0x01040201, - 0x00000202, 0x00000203, 0x00040202, 0x00040203, - 0x01000202, 0x01000203, 0x01040202, 0x01040203, - 0x08000000, 0x08000001, 0x08040000, 0x08040001, - 0x09000000, 0x09000001, 0x09040000, 0x09040001, - 0x08000002, 0x08000003, 0x08040002, 0x08040003, - 0x09000002, 0x09000003, 0x09040002, 0x09040003, - 0x08000200, 0x08000201, 0x08040200, 0x08040201, - 0x09000200, 0x09000201, 0x09040200, 0x09040201, - 0x08000202, 0x08000203, 0x08040202, 0x08040203, - 0x09000202, 0x09000203, 0x09040202, 0x09040203, - /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */ - 0x00000000, 0x00100000, 0x00000100, 0x00100100, - 0x00000008, 0x00100008, 0x00000108, 0x00100108, - 0x00001000, 0x00101000, 0x00001100, 0x00101100, - 0x00001008, 0x00101008, 0x00001108, 0x00101108, - 0x04000000, 0x04100000, 0x04000100, 0x04100100, - 0x04000008, 0x04100008, 0x04000108, 0x04100108, - 0x04001000, 0x04101000, 0x04001100, 0x04101100, - 0x04001008, 0x04101008, 0x04001108, 0x04101108, - 0x00020000, 0x00120000, 0x00020100, 0x00120100, - 0x00020008, 0x00120008, 0x00020108, 0x00120108, - 0x00021000, 0x00121000, 0x00021100, 0x00121100, - 0x00021008, 0x00121008, 0x00021108, 0x00121108, - 0x04020000, 0x04120000, 0x04020100, 0x04120100, - 0x04020008, 0x04120008, 0x04020108, 0x04120108, - 0x04021000, 0x04121000, 0x04021100, 0x04121100, - 0x04021008, 0x04121008, 0x04021108, 0x04121108, - /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ - 0x00000000, 0x10000000, 0x00010000, 0x10010000, - 0x00000004, 0x10000004, 0x00010004, 0x10010004, - 0x20000000, 0x30000000, 0x20010000, 0x30010000, - 0x20000004, 0x30000004, 0x20010004, 0x30010004, - 0x00100000, 0x10100000, 0x00110000, 0x10110000, - 0x00100004, 0x10100004, 0x00110004, 0x10110004, - 0x20100000, 0x30100000, 0x20110000, 0x30110000, - 0x20100004, 0x30100004, 0x20110004, 0x30110004, - 0x00001000, 0x10001000, 0x00011000, 0x10011000, - 0x00001004, 0x10001004, 0x00011004, 0x10011004, - 0x20001000, 0x30001000, 0x20011000, 0x30011000, - 0x20001004, 0x30001004, 0x20011004, 0x30011004, - 0x00101000, 0x10101000, 0x00111000, 0x10111000, - 0x00101004, 0x10101004, 0x00111004, 0x10111004, - 0x20101000, 0x30101000, 0x20111000, 0x30111000, - 0x20101004, 0x30101004, 0x20111004, 0x30111004, - /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */ - 0x00000000, 0x08000000, 0x00000008, 0x08000008, - 0x00000400, 0x08000400, 0x00000408, 0x08000408, - 0x00020000, 0x08020000, 0x00020008, 0x08020008, - 0x00020400, 0x08020400, 0x00020408, 0x08020408, - 0x00000001, 0x08000001, 0x00000009, 0x08000009, - 0x00000401, 0x08000401, 0x00000409, 0x08000409, - 0x00020001, 0x08020001, 0x00020009, 0x08020009, - 0x00020401, 0x08020401, 0x00020409, 0x08020409, - 0x02000000, 0x0A000000, 0x02000008, 0x0A000008, - 0x02000400, 0x0A000400, 0x02000408, 0x0A000408, - 0x02020000, 0x0A020000, 0x02020008, 0x0A020008, - 0x02020400, 0x0A020400, 0x02020408, 0x0A020408, - 0x02000001, 0x0A000001, 0x02000009, 0x0A000009, - 0x02000401, 0x0A000401, 0x02000409, 0x0A000409, - 0x02020001, 0x0A020001, 0x02020009, 0x0A020009, - 0x02020401, 0x0A020401, 0x02020409, 0x0A020409, - /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */ - 0x00000000, 0x00000100, 0x00080000, 0x00080100, - 0x01000000, 0x01000100, 0x01080000, 0x01080100, - 0x00000010, 0x00000110, 0x00080010, 0x00080110, - 0x01000010, 0x01000110, 0x01080010, 0x01080110, - 0x00200000, 0x00200100, 0x00280000, 0x00280100, - 0x01200000, 0x01200100, 0x01280000, 0x01280100, - 0x00200010, 0x00200110, 0x00280010, 0x00280110, - 0x01200010, 0x01200110, 0x01280010, 0x01280110, - 0x00000200, 0x00000300, 0x00080200, 0x00080300, - 0x01000200, 0x01000300, 0x01080200, 0x01080300, - 0x00000210, 0x00000310, 0x00080210, 0x00080310, - 0x01000210, 0x01000310, 0x01080210, 0x01080310, - 0x00200200, 0x00200300, 0x00280200, 0x00280300, - 0x01200200, 0x01200300, 0x01280200, 0x01280300, - 0x00200210, 0x00200310, 0x00280210, 0x00280310, - 0x01200210, 0x01200310, 0x01280210, 0x01280310, - /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */ - 0x00000000, 0x04000000, 0x00040000, 0x04040000, - 0x00000002, 0x04000002, 0x00040002, 0x04040002, - 0x00002000, 0x04002000, 0x00042000, 0x04042000, - 0x00002002, 0x04002002, 0x00042002, 0x04042002, - 0x00000020, 0x04000020, 0x00040020, 0x04040020, - 0x00000022, 0x04000022, 0x00040022, 0x04040022, - 0x00002020, 0x04002020, 0x00042020, 0x04042020, - 0x00002022, 0x04002022, 0x00042022, 0x04042022, - 0x00000800, 0x04000800, 0x00040800, 0x04040800, - 0x00000802, 0x04000802, 0x00040802, 0x04040802, - 0x00002800, 0x04002800, 0x00042800, 0x04042800, - 0x00002802, 0x04002802, 0x00042802, 0x04042802, - 0x00000820, 0x04000820, 0x00040820, 0x04040820, - 0x00000822, 0x04000822, 0x00040822, 0x04040822, - 0x00002820, 0x04002820, 0x00042820, 0x04042820, - 0x00002822, 0x04002822, 0x00042822, 0x04042822 -}; - -#ifdef VECT_SIZE1 -#define BOX(i,n,S) u32x ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(i,n,S) u32x ((S)[(n)][(i).x], (S)[(n)][(i).y]) -#endif - -__device__ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], u32 s_SPtrans[8][64]) -{ - u32x tt; - - u32x r = data[0]; - u32x l = data[1]; - - #pragma unroll 16 - for (u32 i = 0; i < 16; i++) - { - u32x u = Kc[i] ^ r; - u32x t = Kd[i] ^ rotl32 (r, 28u); - - l ^= BOX (((u >> 2) & 0x3f), 0, s_SPtrans) - | BOX (((u >> 10) & 0x3f), 2, s_SPtrans) - | BOX (((u >> 18) & 0x3f), 4, s_SPtrans) - | BOX (((u >> 26) & 0x3f), 6, s_SPtrans) - | BOX (((t >> 2) & 0x3f), 1, s_SPtrans) - | BOX (((t >> 10) & 0x3f), 3, s_SPtrans) - | BOX (((t >> 18) & 0x3f), 5, s_SPtrans) - | BOX (((t >> 26) & 0x3f), 7, s_SPtrans); - - tt = l; - l = r; - r = tt; - } - - iv[0] = l; - iv[1] = r; -} - -__device__ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], u32 s_skb[8][64]) -{ - u32x tt; - - PERM_OP (d, c, tt, 4, 0x0f0f0f0f); - HPERM_OP (c, tt, 2, 0xcccc0000); - HPERM_OP (d, tt, 2, 0xcccc0000); - PERM_OP (d, c, tt, 1, 0x55555555); - PERM_OP (c, d, tt, 8, 0x00ff00ff); - PERM_OP (d, c, tt, 1, 0x55555555); - - d = ((d & 0x000000ff) << 16) - | ((d & 0x0000ff00) << 0) - | ((d & 0x00ff0000) >> 16) - | ((c & 0xf0000000) >> 4); - - c = c & 0x0fffffff; - - #pragma unroll 16 - for (u32 i = 0; i < 16; i++) - { - const u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 }; - const u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 }; - - c = c >> shifts3s0[i] | c << shifts3s1[i]; - d = d >> shifts3s0[i] | d << shifts3s1[i]; - - c = c & 0x0fffffff; - d = d & 0x0fffffff; - - u32x s = BOX ((( c >> 0) & 0x3f), 0, s_skb) - | BOX ((((c >> 6) & 0x03) - | ((c >> 7) & 0x3c)), 1, s_skb) - | BOX ((((c >> 13) & 0x0f) - | ((c >> 14) & 0x30)), 2, s_skb) - | BOX ((((c >> 20) & 0x01) - | ((c >> 21) & 0x06) - | ((c >> 22) & 0x38)), 3, s_skb); - - u32x t = BOX ((( d >> 0) & 0x3f), 4, s_skb) - | BOX ((((d >> 7) & 0x03) - | ((d >> 8) & 0x3c)), 5, s_skb) - | BOX ((((d >> 15) & 0x3f)), 6, s_skb) - | BOX ((((d >> 21) & 0x0f) - | ((d >> 22) & 0x30)), 7, s_skb); - - #if __CUDA_ARCH__ >= 200 - Kc[i] = __byte_perm (s, t, 0x5410); - Kd[i] = __byte_perm (s, t, 0x7632); - #else - Kc[i] = ((t << 16) | (s & 0x0000ffff)); - Kd[i] = ((s >> 16) | (t & 0xffff0000)); - #endif - - Kc[i] = rotl32 (Kc[i], 2u); - Kd[i] = rotl32 (Kd[i], 2u); - } -} - -__device__ static void transform_netntlmv1_key (const u32x w0, const u32x w1, u32x out[2]) -{ - u32x t[8]; - - t[0] = (w0 >> 0) & 0xff; - t[1] = (w0 >> 8) & 0xff; - t[2] = (w0 >> 16) & 0xff; - t[3] = (w0 >> 24) & 0xff; - t[4] = (w1 >> 0) & 0xff; - t[5] = (w1 >> 8) & 0xff; - t[6] = (w1 >> 16) & 0xff; - t[7] = (w1 >> 24) & 0xff; - - u32x k[8]; - - k[0] = (t[0] >> 0); - k[1] = (t[0] << 7) | (t[1] >> 1); - k[2] = (t[1] << 6) | (t[2] >> 2); - k[3] = (t[2] << 5) | (t[3] >> 3); - k[4] = (t[3] << 4) | (t[4] >> 4); - k[5] = (t[4] << 3) | (t[5] >> 5); - k[6] = (t[5] << 2) | (t[6] >> 6); - k[7] = (t[6] << 1); - - out[0] = ((k[0] & 0xff) << 0) - | ((k[1] & 0xff) << 8) - | ((k[2] & 0xff) << 16) - | ((k[3] & 0xff) << 24); - - out[1] = ((k[4] & 0xff) << 0) - | ((k[5] & 0xff) << 8) - | ((k[6] & 0xff) << 16) - | ((k[7] & 0xff) << 24); -} - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m05500_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * sbox, kbox - */ - - __shared__ u32 s_SPtrans[8][64]; - __shared__ u32 s_skb[8][64]; - - if (lid < 64) - { - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * salt - */ - - const u32 s0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 s1 = salt_bufs[salt_pos].salt_buf[1]; - const u32 s2 = salt_bufs[salt_pos].salt_buf[2]; - - u32x data[2]; - - data[0] = s0; - data[1] = s1; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w3_t[2] = out_len * 8 * 2; - - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; - - MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w0_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w0_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w1_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w1_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w1_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w1_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w2_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w2_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w2_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w2_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w3_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w3_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w3_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w3_t[3], MD4C00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0_t[0], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[0], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[0], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[0], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[1], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[1], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[1], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[1], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[2], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[2], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[2], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[2], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[3], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[3], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[3], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[3], MD4C01, MD4S13); - - MD4_STEP (MD4_H , a, b, c, d, w0_t[0], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[0], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[0], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[0], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[2], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[2], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[2], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[2], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[1], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[1], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[1], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[1], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[3], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[3], MD4C02, MD4S21); - - if (s2 != ((d + MD4M_D) >> 16)) continue; - - MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23); - - a += MD4M_A; - b += MD4M_B; - c += MD4M_C; - d += MD4M_D; - - /** - * DES1 - */ - - u32x key[2]; - - transform_netntlmv1_key (a, b, key); - - u32x Kc[16]; - u32x Kd[16]; - - _des_crypt_keysetup (key[0], key[1], Kc, Kd, s_skb); - - u32x iv1[2]; - - _des_crypt_encrypt (iv1, data, Kc, Kd, s_SPtrans); - - /** - * DES2 - */ - - transform_netntlmv1_key (((b >> 24) | (c << 8)), ((c >> 24) | (d << 8)), key); - - _des_crypt_keysetup (key[0], key[1], Kc, Kd, s_skb); - - u32x iv2[2]; - - _des_crypt_encrypt (iv2, data, Kc, Kd, s_SPtrans); - - /** - * compare - */ - - const u32x r0 = iv1[0]; - const u32x r1 = iv1[1]; - const u32x r2 = iv2[0]; - const u32x r3 = iv2[1]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05500_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05500_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05500_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * sbox, kbox - */ - - __shared__ u32 s_SPtrans[8][64]; - __shared__ u32 s_skb[8][64]; - - if (lid < 64) - { - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * salt - */ - - const u32 s0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 s1 = salt_bufs[salt_pos].salt_buf[1]; - const u32 s2 = salt_bufs[salt_pos].salt_buf[2]; - - u32x data[2]; - - data[0] = s0; - data[1] = s1; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w3_t[2] = out_len * 8 * 2; - - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; - - MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w0_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w0_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w1_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w1_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w1_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w1_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w2_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w2_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w2_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w2_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w3_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w3_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w3_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w3_t[3], MD4C00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0_t[0], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[0], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[0], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[0], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[1], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[1], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[1], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[1], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[2], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[2], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[2], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[2], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[3], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[3], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[3], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[3], MD4C01, MD4S13); - - MD4_STEP (MD4_H , a, b, c, d, w0_t[0], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[0], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[0], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[0], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[2], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[2], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[2], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[2], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[1], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[1], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[1], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[1], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[3], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[3], MD4C02, MD4S21); - - if (s2 != ((d + MD4M_D) >> 16)) continue; - - MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23); - - a += MD4M_A; - b += MD4M_B; - c += MD4M_C; - d += MD4M_D; - - /** - * DES1 - */ - - u32x key[2]; - - transform_netntlmv1_key (a, b, key); - - u32x Kc[16]; - u32x Kd[16]; - - _des_crypt_keysetup (key[0], key[1], Kc, Kd, s_skb); - - u32x iv1[2]; - - _des_crypt_encrypt (iv1, data, Kc, Kd, s_SPtrans); - - /** - * DES2 - */ - - transform_netntlmv1_key (((b >> 24) | (c << 8)), ((c >> 24) | (d << 8)), key); - - _des_crypt_keysetup (key[0], key[1], Kc, Kd, s_skb); - - u32x iv2[2]; - - _des_crypt_encrypt (iv2, data, Kc, Kd, s_SPtrans); - - /** - * compare - */ - - const u32x r0 = iv1[0]; - const u32x r1 = iv1[1]; - const u32x r2 = iv2[0]; - const u32x r3 = iv2[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05500_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05500_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m05500_a1.cu b/nv/m05500_a1.cu deleted file mode 100644 index a913468..0000000 --- a/nv/m05500_a1.cu +++ /dev/null @@ -1,1065 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD4_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#define PERM_OP(a,b,tt,n,m) \ -{ \ - tt = a >> n; \ - tt = tt ^ b; \ - tt = tt & m; \ - b = b ^ tt; \ - tt = tt << n; \ - a = a ^ tt; \ -} - -#define HPERM_OP(a,tt,n,m) \ -{ \ - tt = a << (16 + n); \ - tt = tt ^ a; \ - tt = tt & m; \ - a = a ^ tt; \ - tt = tt >> (16 + n); \ - a = a ^ tt; \ -} - -__device__ __constant__ u32 c_SPtrans[8][64] = -{ - /* nibble 0 */ - 0x02080800, 0x00080000, 0x02000002, 0x02080802, - 0x02000000, 0x00080802, 0x00080002, 0x02000002, - 0x00080802, 0x02080800, 0x02080000, 0x00000802, - 0x02000802, 0x02000000, 0x00000000, 0x00080002, - 0x00080000, 0x00000002, 0x02000800, 0x00080800, - 0x02080802, 0x02080000, 0x00000802, 0x02000800, - 0x00000002, 0x00000800, 0x00080800, 0x02080002, - 0x00000800, 0x02000802, 0x02080002, 0x00000000, - 0x00000000, 0x02080802, 0x02000800, 0x00080002, - 0x02080800, 0x00080000, 0x00000802, 0x02000800, - 0x02080002, 0x00000800, 0x00080800, 0x02000002, - 0x00080802, 0x00000002, 0x02000002, 0x02080000, - 0x02080802, 0x00080800, 0x02080000, 0x02000802, - 0x02000000, 0x00000802, 0x00080002, 0x00000000, - 0x00080000, 0x02000000, 0x02000802, 0x02080800, - 0x00000002, 0x02080002, 0x00000800, 0x00080802, - /* nibble 1 */ - 0x40108010, 0x00000000, 0x00108000, 0x40100000, - 0x40000010, 0x00008010, 0x40008000, 0x00108000, - 0x00008000, 0x40100010, 0x00000010, 0x40008000, - 0x00100010, 0x40108000, 0x40100000, 0x00000010, - 0x00100000, 0x40008010, 0x40100010, 0x00008000, - 0x00108010, 0x40000000, 0x00000000, 0x00100010, - 0x40008010, 0x00108010, 0x40108000, 0x40000010, - 0x40000000, 0x00100000, 0x00008010, 0x40108010, - 0x00100010, 0x40108000, 0x40008000, 0x00108010, - 0x40108010, 0x00100010, 0x40000010, 0x00000000, - 0x40000000, 0x00008010, 0x00100000, 0x40100010, - 0x00008000, 0x40000000, 0x00108010, 0x40008010, - 0x40108000, 0x00008000, 0x00000000, 0x40000010, - 0x00000010, 0x40108010, 0x00108000, 0x40100000, - 0x40100010, 0x00100000, 0x00008010, 0x40008000, - 0x40008010, 0x00000010, 0x40100000, 0x00108000, - /* nibble 2 */ - 0x04000001, 0x04040100, 0x00000100, 0x04000101, - 0x00040001, 0x04000000, 0x04000101, 0x00040100, - 0x04000100, 0x00040000, 0x04040000, 0x00000001, - 0x04040101, 0x00000101, 0x00000001, 0x04040001, - 0x00000000, 0x00040001, 0x04040100, 0x00000100, - 0x00000101, 0x04040101, 0x00040000, 0x04000001, - 0x04040001, 0x04000100, 0x00040101, 0x04040000, - 0x00040100, 0x00000000, 0x04000000, 0x00040101, - 0x04040100, 0x00000100, 0x00000001, 0x00040000, - 0x00000101, 0x00040001, 0x04040000, 0x04000101, - 0x00000000, 0x04040100, 0x00040100, 0x04040001, - 0x00040001, 0x04000000, 0x04040101, 0x00000001, - 0x00040101, 0x04000001, 0x04000000, 0x04040101, - 0x00040000, 0x04000100, 0x04000101, 0x00040100, - 0x04000100, 0x00000000, 0x04040001, 0x00000101, - 0x04000001, 0x00040101, 0x00000100, 0x04040000, - /* nibble 3 */ - 0x00401008, 0x10001000, 0x00000008, 0x10401008, - 0x00000000, 0x10400000, 0x10001008, 0x00400008, - 0x10401000, 0x10000008, 0x10000000, 0x00001008, - 0x10000008, 0x00401008, 0x00400000, 0x10000000, - 0x10400008, 0x00401000, 0x00001000, 0x00000008, - 0x00401000, 0x10001008, 0x10400000, 0x00001000, - 0x00001008, 0x00000000, 0x00400008, 0x10401000, - 0x10001000, 0x10400008, 0x10401008, 0x00400000, - 0x10400008, 0x00001008, 0x00400000, 0x10000008, - 0x00401000, 0x10001000, 0x00000008, 0x10400000, - 0x10001008, 0x00000000, 0x00001000, 0x00400008, - 0x00000000, 0x10400008, 0x10401000, 0x00001000, - 0x10000000, 0x10401008, 0x00401008, 0x00400000, - 0x10401008, 0x00000008, 0x10001000, 0x00401008, - 0x00400008, 0x00401000, 0x10400000, 0x10001008, - 0x00001008, 0x10000000, 0x10000008, 0x10401000, - /* nibble 4 */ - 0x08000000, 0x00010000, 0x00000400, 0x08010420, - 0x08010020, 0x08000400, 0x00010420, 0x08010000, - 0x00010000, 0x00000020, 0x08000020, 0x00010400, - 0x08000420, 0x08010020, 0x08010400, 0x00000000, - 0x00010400, 0x08000000, 0x00010020, 0x00000420, - 0x08000400, 0x00010420, 0x00000000, 0x08000020, - 0x00000020, 0x08000420, 0x08010420, 0x00010020, - 0x08010000, 0x00000400, 0x00000420, 0x08010400, - 0x08010400, 0x08000420, 0x00010020, 0x08010000, - 0x00010000, 0x00000020, 0x08000020, 0x08000400, - 0x08000000, 0x00010400, 0x08010420, 0x00000000, - 0x00010420, 0x08000000, 0x00000400, 0x00010020, - 0x08000420, 0x00000400, 0x00000000, 0x08010420, - 0x08010020, 0x08010400, 0x00000420, 0x00010000, - 0x00010400, 0x08010020, 0x08000400, 0x00000420, - 0x00000020, 0x00010420, 0x08010000, 0x08000020, - /* nibble 5 */ - 0x80000040, 0x00200040, 0x00000000, 0x80202000, - 0x00200040, 0x00002000, 0x80002040, 0x00200000, - 0x00002040, 0x80202040, 0x00202000, 0x80000000, - 0x80002000, 0x80000040, 0x80200000, 0x00202040, - 0x00200000, 0x80002040, 0x80200040, 0x00000000, - 0x00002000, 0x00000040, 0x80202000, 0x80200040, - 0x80202040, 0x80200000, 0x80000000, 0x00002040, - 0x00000040, 0x00202000, 0x00202040, 0x80002000, - 0x00002040, 0x80000000, 0x80002000, 0x00202040, - 0x80202000, 0x00200040, 0x00000000, 0x80002000, - 0x80000000, 0x00002000, 0x80200040, 0x00200000, - 0x00200040, 0x80202040, 0x00202000, 0x00000040, - 0x80202040, 0x00202000, 0x00200000, 0x80002040, - 0x80000040, 0x80200000, 0x00202040, 0x00000000, - 0x00002000, 0x80000040, 0x80002040, 0x80202000, - 0x80200000, 0x00002040, 0x00000040, 0x80200040, - /* nibble 6 */ - 0x00004000, 0x00000200, 0x01000200, 0x01000004, - 0x01004204, 0x00004004, 0x00004200, 0x00000000, - 0x01000000, 0x01000204, 0x00000204, 0x01004000, - 0x00000004, 0x01004200, 0x01004000, 0x00000204, - 0x01000204, 0x00004000, 0x00004004, 0x01004204, - 0x00000000, 0x01000200, 0x01000004, 0x00004200, - 0x01004004, 0x00004204, 0x01004200, 0x00000004, - 0x00004204, 0x01004004, 0x00000200, 0x01000000, - 0x00004204, 0x01004000, 0x01004004, 0x00000204, - 0x00004000, 0x00000200, 0x01000000, 0x01004004, - 0x01000204, 0x00004204, 0x00004200, 0x00000000, - 0x00000200, 0x01000004, 0x00000004, 0x01000200, - 0x00000000, 0x01000204, 0x01000200, 0x00004200, - 0x00000204, 0x00004000, 0x01004204, 0x01000000, - 0x01004200, 0x00000004, 0x00004004, 0x01004204, - 0x01000004, 0x01004200, 0x01004000, 0x00004004, - /* nibble 7 */ - 0x20800080, 0x20820000, 0x00020080, 0x00000000, - 0x20020000, 0x00800080, 0x20800000, 0x20820080, - 0x00000080, 0x20000000, 0x00820000, 0x00020080, - 0x00820080, 0x20020080, 0x20000080, 0x20800000, - 0x00020000, 0x00820080, 0x00800080, 0x20020000, - 0x20820080, 0x20000080, 0x00000000, 0x00820000, - 0x20000000, 0x00800000, 0x20020080, 0x20800080, - 0x00800000, 0x00020000, 0x20820000, 0x00000080, - 0x00800000, 0x00020000, 0x20000080, 0x20820080, - 0x00020080, 0x20000000, 0x00000000, 0x00820000, - 0x20800080, 0x20020080, 0x20020000, 0x00800080, - 0x20820000, 0x00000080, 0x00800080, 0x20020000, - 0x20820080, 0x00800000, 0x20800000, 0x20000080, - 0x00820000, 0x00020080, 0x20020080, 0x20800000, - 0x00000080, 0x20820000, 0x00820080, 0x00000000, - 0x20000000, 0x20800080, 0x00020000, 0x00820080, -}; - -__device__ __constant__ u32 c_skb[8][64] = -{ - /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ - 0x00000000, 0x00000010, 0x20000000, 0x20000010, - 0x00010000, 0x00010010, 0x20010000, 0x20010010, - 0x00000800, 0x00000810, 0x20000800, 0x20000810, - 0x00010800, 0x00010810, 0x20010800, 0x20010810, - 0x00000020, 0x00000030, 0x20000020, 0x20000030, - 0x00010020, 0x00010030, 0x20010020, 0x20010030, - 0x00000820, 0x00000830, 0x20000820, 0x20000830, - 0x00010820, 0x00010830, 0x20010820, 0x20010830, - 0x00080000, 0x00080010, 0x20080000, 0x20080010, - 0x00090000, 0x00090010, 0x20090000, 0x20090010, - 0x00080800, 0x00080810, 0x20080800, 0x20080810, - 0x00090800, 0x00090810, 0x20090800, 0x20090810, - 0x00080020, 0x00080030, 0x20080020, 0x20080030, - 0x00090020, 0x00090030, 0x20090020, 0x20090030, - 0x00080820, 0x00080830, 0x20080820, 0x20080830, - 0x00090820, 0x00090830, 0x20090820, 0x20090830, - /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */ - 0x00000000, 0x02000000, 0x00002000, 0x02002000, - 0x00200000, 0x02200000, 0x00202000, 0x02202000, - 0x00000004, 0x02000004, 0x00002004, 0x02002004, - 0x00200004, 0x02200004, 0x00202004, 0x02202004, - 0x00000400, 0x02000400, 0x00002400, 0x02002400, - 0x00200400, 0x02200400, 0x00202400, 0x02202400, - 0x00000404, 0x02000404, 0x00002404, 0x02002404, - 0x00200404, 0x02200404, 0x00202404, 0x02202404, - 0x10000000, 0x12000000, 0x10002000, 0x12002000, - 0x10200000, 0x12200000, 0x10202000, 0x12202000, - 0x10000004, 0x12000004, 0x10002004, 0x12002004, - 0x10200004, 0x12200004, 0x10202004, 0x12202004, - 0x10000400, 0x12000400, 0x10002400, 0x12002400, - 0x10200400, 0x12200400, 0x10202400, 0x12202400, - 0x10000404, 0x12000404, 0x10002404, 0x12002404, - 0x10200404, 0x12200404, 0x10202404, 0x12202404, - /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */ - 0x00000000, 0x00000001, 0x00040000, 0x00040001, - 0x01000000, 0x01000001, 0x01040000, 0x01040001, - 0x00000002, 0x00000003, 0x00040002, 0x00040003, - 0x01000002, 0x01000003, 0x01040002, 0x01040003, - 0x00000200, 0x00000201, 0x00040200, 0x00040201, - 0x01000200, 0x01000201, 0x01040200, 0x01040201, - 0x00000202, 0x00000203, 0x00040202, 0x00040203, - 0x01000202, 0x01000203, 0x01040202, 0x01040203, - 0x08000000, 0x08000001, 0x08040000, 0x08040001, - 0x09000000, 0x09000001, 0x09040000, 0x09040001, - 0x08000002, 0x08000003, 0x08040002, 0x08040003, - 0x09000002, 0x09000003, 0x09040002, 0x09040003, - 0x08000200, 0x08000201, 0x08040200, 0x08040201, - 0x09000200, 0x09000201, 0x09040200, 0x09040201, - 0x08000202, 0x08000203, 0x08040202, 0x08040203, - 0x09000202, 0x09000203, 0x09040202, 0x09040203, - /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */ - 0x00000000, 0x00100000, 0x00000100, 0x00100100, - 0x00000008, 0x00100008, 0x00000108, 0x00100108, - 0x00001000, 0x00101000, 0x00001100, 0x00101100, - 0x00001008, 0x00101008, 0x00001108, 0x00101108, - 0x04000000, 0x04100000, 0x04000100, 0x04100100, - 0x04000008, 0x04100008, 0x04000108, 0x04100108, - 0x04001000, 0x04101000, 0x04001100, 0x04101100, - 0x04001008, 0x04101008, 0x04001108, 0x04101108, - 0x00020000, 0x00120000, 0x00020100, 0x00120100, - 0x00020008, 0x00120008, 0x00020108, 0x00120108, - 0x00021000, 0x00121000, 0x00021100, 0x00121100, - 0x00021008, 0x00121008, 0x00021108, 0x00121108, - 0x04020000, 0x04120000, 0x04020100, 0x04120100, - 0x04020008, 0x04120008, 0x04020108, 0x04120108, - 0x04021000, 0x04121000, 0x04021100, 0x04121100, - 0x04021008, 0x04121008, 0x04021108, 0x04121108, - /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ - 0x00000000, 0x10000000, 0x00010000, 0x10010000, - 0x00000004, 0x10000004, 0x00010004, 0x10010004, - 0x20000000, 0x30000000, 0x20010000, 0x30010000, - 0x20000004, 0x30000004, 0x20010004, 0x30010004, - 0x00100000, 0x10100000, 0x00110000, 0x10110000, - 0x00100004, 0x10100004, 0x00110004, 0x10110004, - 0x20100000, 0x30100000, 0x20110000, 0x30110000, - 0x20100004, 0x30100004, 0x20110004, 0x30110004, - 0x00001000, 0x10001000, 0x00011000, 0x10011000, - 0x00001004, 0x10001004, 0x00011004, 0x10011004, - 0x20001000, 0x30001000, 0x20011000, 0x30011000, - 0x20001004, 0x30001004, 0x20011004, 0x30011004, - 0x00101000, 0x10101000, 0x00111000, 0x10111000, - 0x00101004, 0x10101004, 0x00111004, 0x10111004, - 0x20101000, 0x30101000, 0x20111000, 0x30111000, - 0x20101004, 0x30101004, 0x20111004, 0x30111004, - /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */ - 0x00000000, 0x08000000, 0x00000008, 0x08000008, - 0x00000400, 0x08000400, 0x00000408, 0x08000408, - 0x00020000, 0x08020000, 0x00020008, 0x08020008, - 0x00020400, 0x08020400, 0x00020408, 0x08020408, - 0x00000001, 0x08000001, 0x00000009, 0x08000009, - 0x00000401, 0x08000401, 0x00000409, 0x08000409, - 0x00020001, 0x08020001, 0x00020009, 0x08020009, - 0x00020401, 0x08020401, 0x00020409, 0x08020409, - 0x02000000, 0x0A000000, 0x02000008, 0x0A000008, - 0x02000400, 0x0A000400, 0x02000408, 0x0A000408, - 0x02020000, 0x0A020000, 0x02020008, 0x0A020008, - 0x02020400, 0x0A020400, 0x02020408, 0x0A020408, - 0x02000001, 0x0A000001, 0x02000009, 0x0A000009, - 0x02000401, 0x0A000401, 0x02000409, 0x0A000409, - 0x02020001, 0x0A020001, 0x02020009, 0x0A020009, - 0x02020401, 0x0A020401, 0x02020409, 0x0A020409, - /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */ - 0x00000000, 0x00000100, 0x00080000, 0x00080100, - 0x01000000, 0x01000100, 0x01080000, 0x01080100, - 0x00000010, 0x00000110, 0x00080010, 0x00080110, - 0x01000010, 0x01000110, 0x01080010, 0x01080110, - 0x00200000, 0x00200100, 0x00280000, 0x00280100, - 0x01200000, 0x01200100, 0x01280000, 0x01280100, - 0x00200010, 0x00200110, 0x00280010, 0x00280110, - 0x01200010, 0x01200110, 0x01280010, 0x01280110, - 0x00000200, 0x00000300, 0x00080200, 0x00080300, - 0x01000200, 0x01000300, 0x01080200, 0x01080300, - 0x00000210, 0x00000310, 0x00080210, 0x00080310, - 0x01000210, 0x01000310, 0x01080210, 0x01080310, - 0x00200200, 0x00200300, 0x00280200, 0x00280300, - 0x01200200, 0x01200300, 0x01280200, 0x01280300, - 0x00200210, 0x00200310, 0x00280210, 0x00280310, - 0x01200210, 0x01200310, 0x01280210, 0x01280310, - /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */ - 0x00000000, 0x04000000, 0x00040000, 0x04040000, - 0x00000002, 0x04000002, 0x00040002, 0x04040002, - 0x00002000, 0x04002000, 0x00042000, 0x04042000, - 0x00002002, 0x04002002, 0x00042002, 0x04042002, - 0x00000020, 0x04000020, 0x00040020, 0x04040020, - 0x00000022, 0x04000022, 0x00040022, 0x04040022, - 0x00002020, 0x04002020, 0x00042020, 0x04042020, - 0x00002022, 0x04002022, 0x00042022, 0x04042022, - 0x00000800, 0x04000800, 0x00040800, 0x04040800, - 0x00000802, 0x04000802, 0x00040802, 0x04040802, - 0x00002800, 0x04002800, 0x00042800, 0x04042800, - 0x00002802, 0x04002802, 0x00042802, 0x04042802, - 0x00000820, 0x04000820, 0x00040820, 0x04040820, - 0x00000822, 0x04000822, 0x00040822, 0x04040822, - 0x00002820, 0x04002820, 0x00042820, 0x04042820, - 0x00002822, 0x04002822, 0x00042822, 0x04042822 -}; - -#ifdef VECT_SIZE1 -#define BOX(i,n,S) u32x ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(i,n,S) u32x ((S)[(n)][(i).x], (S)[(n)][(i).y]) -#endif - -__device__ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], u32 s_SPtrans[8][64]) -{ - u32x tt; - - u32x r = data[0]; - u32x l = data[1]; - - #pragma unroll 16 - for (u32 i = 0; i < 16; i++) - { - u32x u = Kc[i] ^ r; - u32x t = Kd[i] ^ rotl32 (r, 28u); - - l ^= BOX (((u >> 2) & 0x3f), 0, s_SPtrans) - | BOX (((u >> 10) & 0x3f), 2, s_SPtrans) - | BOX (((u >> 18) & 0x3f), 4, s_SPtrans) - | BOX (((u >> 26) & 0x3f), 6, s_SPtrans) - | BOX (((t >> 2) & 0x3f), 1, s_SPtrans) - | BOX (((t >> 10) & 0x3f), 3, s_SPtrans) - | BOX (((t >> 18) & 0x3f), 5, s_SPtrans) - | BOX (((t >> 26) & 0x3f), 7, s_SPtrans); - - tt = l; - l = r; - r = tt; - } - - iv[0] = l; - iv[1] = r; -} - -__device__ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], u32 s_skb[8][64]) -{ - u32x tt; - - PERM_OP (d, c, tt, 4, 0x0f0f0f0f); - HPERM_OP (c, tt, 2, 0xcccc0000); - HPERM_OP (d, tt, 2, 0xcccc0000); - PERM_OP (d, c, tt, 1, 0x55555555); - PERM_OP (c, d, tt, 8, 0x00ff00ff); - PERM_OP (d, c, tt, 1, 0x55555555); - - d = ((d & 0x000000ff) << 16) - | ((d & 0x0000ff00) << 0) - | ((d & 0x00ff0000) >> 16) - | ((c & 0xf0000000) >> 4); - - c = c & 0x0fffffff; - - #pragma unroll 16 - for (u32 i = 0; i < 16; i++) - { - const u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 }; - const u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 }; - - c = c >> shifts3s0[i] | c << shifts3s1[i]; - d = d >> shifts3s0[i] | d << shifts3s1[i]; - - c = c & 0x0fffffff; - d = d & 0x0fffffff; - - u32x s = BOX ((( c >> 0) & 0x3f), 0, s_skb) - | BOX ((((c >> 6) & 0x03) - | ((c >> 7) & 0x3c)), 1, s_skb) - | BOX ((((c >> 13) & 0x0f) - | ((c >> 14) & 0x30)), 2, s_skb) - | BOX ((((c >> 20) & 0x01) - | ((c >> 21) & 0x06) - | ((c >> 22) & 0x38)), 3, s_skb); - - u32x t = BOX ((( d >> 0) & 0x3f), 4, s_skb) - | BOX ((((d >> 7) & 0x03) - | ((d >> 8) & 0x3c)), 5, s_skb) - | BOX ((((d >> 15) & 0x3f)), 6, s_skb) - | BOX ((((d >> 21) & 0x0f) - | ((d >> 22) & 0x30)), 7, s_skb); - - #if __CUDA_ARCH__ >= 200 - Kc[i] = __byte_perm (s, t, 0x5410); - Kd[i] = __byte_perm (s, t, 0x7632); - #else - Kc[i] = ((t << 16) | (s & 0x0000ffff)); - Kd[i] = ((s >> 16) | (t & 0xffff0000)); - #endif - - Kc[i] = rotl32 (Kc[i], 2u); - Kd[i] = rotl32 (Kd[i], 2u); - } -} - -__device__ static void transform_netntlmv1_key (const u32x w0, const u32x w1, u32x out[2]) -{ - u32x t[8]; - - t[0] = (w0 >> 0) & 0xff; - t[1] = (w0 >> 8) & 0xff; - t[2] = (w0 >> 16) & 0xff; - t[3] = (w0 >> 24) & 0xff; - t[4] = (w1 >> 0) & 0xff; - t[5] = (w1 >> 8) & 0xff; - t[6] = (w1 >> 16) & 0xff; - t[7] = (w1 >> 24) & 0xff; - - u32x k[8]; - - k[0] = (t[0] >> 0); - k[1] = (t[0] << 7) | (t[1] >> 1); - k[2] = (t[1] << 6) | (t[2] >> 2); - k[3] = (t[2] << 5) | (t[3] >> 3); - k[4] = (t[3] << 4) | (t[4] >> 4); - k[5] = (t[4] << 3) | (t[5] >> 5); - k[6] = (t[5] << 2) | (t[6] >> 6); - k[7] = (t[6] << 1); - - out[0] = ((k[0] & 0xff) << 0) - | ((k[1] & 0xff) << 8) - | ((k[2] & 0xff) << 16) - | ((k[3] & 0xff) << 24); - - out[1] = ((k[4] & 0xff) << 0) - | ((k[5] & 0xff) << 8) - | ((k[6] & 0xff) << 16) - | ((k[7] & 0xff) << 24); -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m05500_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * sbox, kbox - */ - - __shared__ u32 s_SPtrans[8][64]; - __shared__ u32 s_skb[8][64]; - - if (lid < 64) - { - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * salt - */ - - const u32 s0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 s1 = salt_bufs[salt_pos].salt_buf[1]; - const u32 s2 = salt_bufs[salt_pos].salt_buf[2]; - - u32x data[2]; - - data[0] = s0; - data[1] = s1; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w3_t[2] = pw_len * 8 * 2; - - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; - - MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w0_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w0_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w1_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w1_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w1_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w1_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w2_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w2_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w2_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w2_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w3_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w3_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w3_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w3_t[3], MD4C00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0_t[0], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[0], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[0], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[0], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[1], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[1], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[1], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[1], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[2], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[2], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[2], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[2], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[3], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[3], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[3], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[3], MD4C01, MD4S13); - - MD4_STEP (MD4_H , a, b, c, d, w0_t[0], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[0], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[0], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[0], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[2], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[2], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[2], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[2], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[1], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[1], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[1], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[1], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[3], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[3], MD4C02, MD4S21); - - if (s2 != ((d + MD4M_D) >> 16)) continue; - - MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23); - - a += MD4M_A; - b += MD4M_B; - c += MD4M_C; - d += MD4M_D; - - /** - * DES1 - */ - - u32x key[2]; - - transform_netntlmv1_key (a, b, key); - - u32x Kc[16]; - u32x Kd[16]; - - _des_crypt_keysetup (key[0], key[1], Kc, Kd, s_skb); - - u32x iv1[2]; - - _des_crypt_encrypt (iv1, data, Kc, Kd, s_SPtrans); - - /** - * DES2 - */ - - transform_netntlmv1_key (((b >> 24) | (c << 8)), ((c >> 24) | (d << 8)), key); - - _des_crypt_keysetup (key[0], key[1], Kc, Kd, s_skb); - - u32x iv2[2]; - - _des_crypt_encrypt (iv2, data, Kc, Kd, s_SPtrans); - - /** - * compare - */ - - const u32x r0 = iv1[0]; - const u32x r1 = iv1[1]; - const u32x r2 = iv2[0]; - const u32x r3 = iv2[1]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05500_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05500_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05500_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * sbox, kbox - */ - - __shared__ u32 s_SPtrans[8][64]; - __shared__ u32 s_skb[8][64]; - - if (lid < 64) - { - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * salt - */ - - const u32 s0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 s1 = salt_bufs[salt_pos].salt_buf[1]; - const u32 s2 = salt_bufs[salt_pos].salt_buf[2]; - - u32x data[2]; - - data[0] = s0; - data[1] = s1; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w3_t[2] = pw_len * 8 * 2; - - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; - - MD4_STEP (MD4_Fo, a, b, c, d, w0_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w0_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w0_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w0_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w1_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w1_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w1_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w1_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w2_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w2_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w2_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w2_t[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w3_t[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w3_t[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w3_t[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w3_t[3], MD4C00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0_t[0], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[0], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[0], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[0], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[1], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[1], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[1], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[1], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[2], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[2], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[2], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[2], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0_t[3], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1_t[3], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2_t[3], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3_t[3], MD4C01, MD4S13); - - MD4_STEP (MD4_H , a, b, c, d, w0_t[0], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[0], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[0], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[0], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[2], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[2], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[2], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[2], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[1], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[1], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1_t[1], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[1], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0_t[3], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2_t[3], MD4C02, MD4S21); - - if (s2 != ((d + MD4M_D) >> 16)) continue; - - MD4_STEP (MD4_H , c, d, a, b, w1_t[3], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3_t[3], MD4C02, MD4S23); - - a += MD4M_A; - b += MD4M_B; - c += MD4M_C; - d += MD4M_D; - - /** - * DES1 - */ - - u32x key[2]; - - transform_netntlmv1_key (a, b, key); - - u32x Kc[16]; - u32x Kd[16]; - - _des_crypt_keysetup (key[0], key[1], Kc, Kd, s_skb); - - u32x iv1[2]; - - _des_crypt_encrypt (iv1, data, Kc, Kd, s_SPtrans); - - /** - * DES2 - */ - - transform_netntlmv1_key (((b >> 24) | (c << 8)), ((c >> 24) | (d << 8)), key); - - _des_crypt_keysetup (key[0], key[1], Kc, Kd, s_skb); - - u32x iv2[2]; - - _des_crypt_encrypt (iv2, data, Kc, Kd, s_SPtrans); - - /** - * compare - */ - - const u32x r0 = iv1[0]; - const u32x r1 = iv1[1]; - const u32x r2 = iv2[0]; - const u32x r3 = iv2[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05500_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05500_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m05500_a3.cu b/nv/m05500_a3.cu deleted file mode 100644 index 5290143..0000000 --- a/nv/m05500_a3.cu +++ /dev/null @@ -1,1077 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD4_ -#define _SCALAR_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#define PERM_OP(a,b,tt,n,m) \ -{ \ - tt = a >> n; \ - tt = tt ^ b; \ - tt = tt & m; \ - b = b ^ tt; \ - tt = tt << n; \ - a = a ^ tt; \ -} - -#define HPERM_OP(a,tt,n,m) \ -{ \ - tt = a << (16 + n); \ - tt = tt ^ a; \ - tt = tt & m; \ - a = a ^ tt; \ - tt = tt >> (16 + n); \ - a = a ^ tt; \ -} - -__device__ __constant__ u32 c_SPtrans[8][64] = -{ - /* nibble 0 */ - 0x02080800, 0x00080000, 0x02000002, 0x02080802, - 0x02000000, 0x00080802, 0x00080002, 0x02000002, - 0x00080802, 0x02080800, 0x02080000, 0x00000802, - 0x02000802, 0x02000000, 0x00000000, 0x00080002, - 0x00080000, 0x00000002, 0x02000800, 0x00080800, - 0x02080802, 0x02080000, 0x00000802, 0x02000800, - 0x00000002, 0x00000800, 0x00080800, 0x02080002, - 0x00000800, 0x02000802, 0x02080002, 0x00000000, - 0x00000000, 0x02080802, 0x02000800, 0x00080002, - 0x02080800, 0x00080000, 0x00000802, 0x02000800, - 0x02080002, 0x00000800, 0x00080800, 0x02000002, - 0x00080802, 0x00000002, 0x02000002, 0x02080000, - 0x02080802, 0x00080800, 0x02080000, 0x02000802, - 0x02000000, 0x00000802, 0x00080002, 0x00000000, - 0x00080000, 0x02000000, 0x02000802, 0x02080800, - 0x00000002, 0x02080002, 0x00000800, 0x00080802, - /* nibble 1 */ - 0x40108010, 0x00000000, 0x00108000, 0x40100000, - 0x40000010, 0x00008010, 0x40008000, 0x00108000, - 0x00008000, 0x40100010, 0x00000010, 0x40008000, - 0x00100010, 0x40108000, 0x40100000, 0x00000010, - 0x00100000, 0x40008010, 0x40100010, 0x00008000, - 0x00108010, 0x40000000, 0x00000000, 0x00100010, - 0x40008010, 0x00108010, 0x40108000, 0x40000010, - 0x40000000, 0x00100000, 0x00008010, 0x40108010, - 0x00100010, 0x40108000, 0x40008000, 0x00108010, - 0x40108010, 0x00100010, 0x40000010, 0x00000000, - 0x40000000, 0x00008010, 0x00100000, 0x40100010, - 0x00008000, 0x40000000, 0x00108010, 0x40008010, - 0x40108000, 0x00008000, 0x00000000, 0x40000010, - 0x00000010, 0x40108010, 0x00108000, 0x40100000, - 0x40100010, 0x00100000, 0x00008010, 0x40008000, - 0x40008010, 0x00000010, 0x40100000, 0x00108000, - /* nibble 2 */ - 0x04000001, 0x04040100, 0x00000100, 0x04000101, - 0x00040001, 0x04000000, 0x04000101, 0x00040100, - 0x04000100, 0x00040000, 0x04040000, 0x00000001, - 0x04040101, 0x00000101, 0x00000001, 0x04040001, - 0x00000000, 0x00040001, 0x04040100, 0x00000100, - 0x00000101, 0x04040101, 0x00040000, 0x04000001, - 0x04040001, 0x04000100, 0x00040101, 0x04040000, - 0x00040100, 0x00000000, 0x04000000, 0x00040101, - 0x04040100, 0x00000100, 0x00000001, 0x00040000, - 0x00000101, 0x00040001, 0x04040000, 0x04000101, - 0x00000000, 0x04040100, 0x00040100, 0x04040001, - 0x00040001, 0x04000000, 0x04040101, 0x00000001, - 0x00040101, 0x04000001, 0x04000000, 0x04040101, - 0x00040000, 0x04000100, 0x04000101, 0x00040100, - 0x04000100, 0x00000000, 0x04040001, 0x00000101, - 0x04000001, 0x00040101, 0x00000100, 0x04040000, - /* nibble 3 */ - 0x00401008, 0x10001000, 0x00000008, 0x10401008, - 0x00000000, 0x10400000, 0x10001008, 0x00400008, - 0x10401000, 0x10000008, 0x10000000, 0x00001008, - 0x10000008, 0x00401008, 0x00400000, 0x10000000, - 0x10400008, 0x00401000, 0x00001000, 0x00000008, - 0x00401000, 0x10001008, 0x10400000, 0x00001000, - 0x00001008, 0x00000000, 0x00400008, 0x10401000, - 0x10001000, 0x10400008, 0x10401008, 0x00400000, - 0x10400008, 0x00001008, 0x00400000, 0x10000008, - 0x00401000, 0x10001000, 0x00000008, 0x10400000, - 0x10001008, 0x00000000, 0x00001000, 0x00400008, - 0x00000000, 0x10400008, 0x10401000, 0x00001000, - 0x10000000, 0x10401008, 0x00401008, 0x00400000, - 0x10401008, 0x00000008, 0x10001000, 0x00401008, - 0x00400008, 0x00401000, 0x10400000, 0x10001008, - 0x00001008, 0x10000000, 0x10000008, 0x10401000, - /* nibble 4 */ - 0x08000000, 0x00010000, 0x00000400, 0x08010420, - 0x08010020, 0x08000400, 0x00010420, 0x08010000, - 0x00010000, 0x00000020, 0x08000020, 0x00010400, - 0x08000420, 0x08010020, 0x08010400, 0x00000000, - 0x00010400, 0x08000000, 0x00010020, 0x00000420, - 0x08000400, 0x00010420, 0x00000000, 0x08000020, - 0x00000020, 0x08000420, 0x08010420, 0x00010020, - 0x08010000, 0x00000400, 0x00000420, 0x08010400, - 0x08010400, 0x08000420, 0x00010020, 0x08010000, - 0x00010000, 0x00000020, 0x08000020, 0x08000400, - 0x08000000, 0x00010400, 0x08010420, 0x00000000, - 0x00010420, 0x08000000, 0x00000400, 0x00010020, - 0x08000420, 0x00000400, 0x00000000, 0x08010420, - 0x08010020, 0x08010400, 0x00000420, 0x00010000, - 0x00010400, 0x08010020, 0x08000400, 0x00000420, - 0x00000020, 0x00010420, 0x08010000, 0x08000020, - /* nibble 5 */ - 0x80000040, 0x00200040, 0x00000000, 0x80202000, - 0x00200040, 0x00002000, 0x80002040, 0x00200000, - 0x00002040, 0x80202040, 0x00202000, 0x80000000, - 0x80002000, 0x80000040, 0x80200000, 0x00202040, - 0x00200000, 0x80002040, 0x80200040, 0x00000000, - 0x00002000, 0x00000040, 0x80202000, 0x80200040, - 0x80202040, 0x80200000, 0x80000000, 0x00002040, - 0x00000040, 0x00202000, 0x00202040, 0x80002000, - 0x00002040, 0x80000000, 0x80002000, 0x00202040, - 0x80202000, 0x00200040, 0x00000000, 0x80002000, - 0x80000000, 0x00002000, 0x80200040, 0x00200000, - 0x00200040, 0x80202040, 0x00202000, 0x00000040, - 0x80202040, 0x00202000, 0x00200000, 0x80002040, - 0x80000040, 0x80200000, 0x00202040, 0x00000000, - 0x00002000, 0x80000040, 0x80002040, 0x80202000, - 0x80200000, 0x00002040, 0x00000040, 0x80200040, - /* nibble 6 */ - 0x00004000, 0x00000200, 0x01000200, 0x01000004, - 0x01004204, 0x00004004, 0x00004200, 0x00000000, - 0x01000000, 0x01000204, 0x00000204, 0x01004000, - 0x00000004, 0x01004200, 0x01004000, 0x00000204, - 0x01000204, 0x00004000, 0x00004004, 0x01004204, - 0x00000000, 0x01000200, 0x01000004, 0x00004200, - 0x01004004, 0x00004204, 0x01004200, 0x00000004, - 0x00004204, 0x01004004, 0x00000200, 0x01000000, - 0x00004204, 0x01004000, 0x01004004, 0x00000204, - 0x00004000, 0x00000200, 0x01000000, 0x01004004, - 0x01000204, 0x00004204, 0x00004200, 0x00000000, - 0x00000200, 0x01000004, 0x00000004, 0x01000200, - 0x00000000, 0x01000204, 0x01000200, 0x00004200, - 0x00000204, 0x00004000, 0x01004204, 0x01000000, - 0x01004200, 0x00000004, 0x00004004, 0x01004204, - 0x01000004, 0x01004200, 0x01004000, 0x00004004, - /* nibble 7 */ - 0x20800080, 0x20820000, 0x00020080, 0x00000000, - 0x20020000, 0x00800080, 0x20800000, 0x20820080, - 0x00000080, 0x20000000, 0x00820000, 0x00020080, - 0x00820080, 0x20020080, 0x20000080, 0x20800000, - 0x00020000, 0x00820080, 0x00800080, 0x20020000, - 0x20820080, 0x20000080, 0x00000000, 0x00820000, - 0x20000000, 0x00800000, 0x20020080, 0x20800080, - 0x00800000, 0x00020000, 0x20820000, 0x00000080, - 0x00800000, 0x00020000, 0x20000080, 0x20820080, - 0x00020080, 0x20000000, 0x00000000, 0x00820000, - 0x20800080, 0x20020080, 0x20020000, 0x00800080, - 0x20820000, 0x00000080, 0x00800080, 0x20020000, - 0x20820080, 0x00800000, 0x20800000, 0x20000080, - 0x00820000, 0x00020080, 0x20020080, 0x20800000, - 0x00000080, 0x20820000, 0x00820080, 0x00000000, - 0x20000000, 0x20800080, 0x00020000, 0x00820080, -}; - -__device__ __constant__ u32 c_skb[8][64] = -{ - /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ - 0x00000000, 0x00000010, 0x20000000, 0x20000010, - 0x00010000, 0x00010010, 0x20010000, 0x20010010, - 0x00000800, 0x00000810, 0x20000800, 0x20000810, - 0x00010800, 0x00010810, 0x20010800, 0x20010810, - 0x00000020, 0x00000030, 0x20000020, 0x20000030, - 0x00010020, 0x00010030, 0x20010020, 0x20010030, - 0x00000820, 0x00000830, 0x20000820, 0x20000830, - 0x00010820, 0x00010830, 0x20010820, 0x20010830, - 0x00080000, 0x00080010, 0x20080000, 0x20080010, - 0x00090000, 0x00090010, 0x20090000, 0x20090010, - 0x00080800, 0x00080810, 0x20080800, 0x20080810, - 0x00090800, 0x00090810, 0x20090800, 0x20090810, - 0x00080020, 0x00080030, 0x20080020, 0x20080030, - 0x00090020, 0x00090030, 0x20090020, 0x20090030, - 0x00080820, 0x00080830, 0x20080820, 0x20080830, - 0x00090820, 0x00090830, 0x20090820, 0x20090830, - /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */ - 0x00000000, 0x02000000, 0x00002000, 0x02002000, - 0x00200000, 0x02200000, 0x00202000, 0x02202000, - 0x00000004, 0x02000004, 0x00002004, 0x02002004, - 0x00200004, 0x02200004, 0x00202004, 0x02202004, - 0x00000400, 0x02000400, 0x00002400, 0x02002400, - 0x00200400, 0x02200400, 0x00202400, 0x02202400, - 0x00000404, 0x02000404, 0x00002404, 0x02002404, - 0x00200404, 0x02200404, 0x00202404, 0x02202404, - 0x10000000, 0x12000000, 0x10002000, 0x12002000, - 0x10200000, 0x12200000, 0x10202000, 0x12202000, - 0x10000004, 0x12000004, 0x10002004, 0x12002004, - 0x10200004, 0x12200004, 0x10202004, 0x12202004, - 0x10000400, 0x12000400, 0x10002400, 0x12002400, - 0x10200400, 0x12200400, 0x10202400, 0x12202400, - 0x10000404, 0x12000404, 0x10002404, 0x12002404, - 0x10200404, 0x12200404, 0x10202404, 0x12202404, - /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */ - 0x00000000, 0x00000001, 0x00040000, 0x00040001, - 0x01000000, 0x01000001, 0x01040000, 0x01040001, - 0x00000002, 0x00000003, 0x00040002, 0x00040003, - 0x01000002, 0x01000003, 0x01040002, 0x01040003, - 0x00000200, 0x00000201, 0x00040200, 0x00040201, - 0x01000200, 0x01000201, 0x01040200, 0x01040201, - 0x00000202, 0x00000203, 0x00040202, 0x00040203, - 0x01000202, 0x01000203, 0x01040202, 0x01040203, - 0x08000000, 0x08000001, 0x08040000, 0x08040001, - 0x09000000, 0x09000001, 0x09040000, 0x09040001, - 0x08000002, 0x08000003, 0x08040002, 0x08040003, - 0x09000002, 0x09000003, 0x09040002, 0x09040003, - 0x08000200, 0x08000201, 0x08040200, 0x08040201, - 0x09000200, 0x09000201, 0x09040200, 0x09040201, - 0x08000202, 0x08000203, 0x08040202, 0x08040203, - 0x09000202, 0x09000203, 0x09040202, 0x09040203, - /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */ - 0x00000000, 0x00100000, 0x00000100, 0x00100100, - 0x00000008, 0x00100008, 0x00000108, 0x00100108, - 0x00001000, 0x00101000, 0x00001100, 0x00101100, - 0x00001008, 0x00101008, 0x00001108, 0x00101108, - 0x04000000, 0x04100000, 0x04000100, 0x04100100, - 0x04000008, 0x04100008, 0x04000108, 0x04100108, - 0x04001000, 0x04101000, 0x04001100, 0x04101100, - 0x04001008, 0x04101008, 0x04001108, 0x04101108, - 0x00020000, 0x00120000, 0x00020100, 0x00120100, - 0x00020008, 0x00120008, 0x00020108, 0x00120108, - 0x00021000, 0x00121000, 0x00021100, 0x00121100, - 0x00021008, 0x00121008, 0x00021108, 0x00121108, - 0x04020000, 0x04120000, 0x04020100, 0x04120100, - 0x04020008, 0x04120008, 0x04020108, 0x04120108, - 0x04021000, 0x04121000, 0x04021100, 0x04121100, - 0x04021008, 0x04121008, 0x04021108, 0x04121108, - /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ - 0x00000000, 0x10000000, 0x00010000, 0x10010000, - 0x00000004, 0x10000004, 0x00010004, 0x10010004, - 0x20000000, 0x30000000, 0x20010000, 0x30010000, - 0x20000004, 0x30000004, 0x20010004, 0x30010004, - 0x00100000, 0x10100000, 0x00110000, 0x10110000, - 0x00100004, 0x10100004, 0x00110004, 0x10110004, - 0x20100000, 0x30100000, 0x20110000, 0x30110000, - 0x20100004, 0x30100004, 0x20110004, 0x30110004, - 0x00001000, 0x10001000, 0x00011000, 0x10011000, - 0x00001004, 0x10001004, 0x00011004, 0x10011004, - 0x20001000, 0x30001000, 0x20011000, 0x30011000, - 0x20001004, 0x30001004, 0x20011004, 0x30011004, - 0x00101000, 0x10101000, 0x00111000, 0x10111000, - 0x00101004, 0x10101004, 0x00111004, 0x10111004, - 0x20101000, 0x30101000, 0x20111000, 0x30111000, - 0x20101004, 0x30101004, 0x20111004, 0x30111004, - /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */ - 0x00000000, 0x08000000, 0x00000008, 0x08000008, - 0x00000400, 0x08000400, 0x00000408, 0x08000408, - 0x00020000, 0x08020000, 0x00020008, 0x08020008, - 0x00020400, 0x08020400, 0x00020408, 0x08020408, - 0x00000001, 0x08000001, 0x00000009, 0x08000009, - 0x00000401, 0x08000401, 0x00000409, 0x08000409, - 0x00020001, 0x08020001, 0x00020009, 0x08020009, - 0x00020401, 0x08020401, 0x00020409, 0x08020409, - 0x02000000, 0x0A000000, 0x02000008, 0x0A000008, - 0x02000400, 0x0A000400, 0x02000408, 0x0A000408, - 0x02020000, 0x0A020000, 0x02020008, 0x0A020008, - 0x02020400, 0x0A020400, 0x02020408, 0x0A020408, - 0x02000001, 0x0A000001, 0x02000009, 0x0A000009, - 0x02000401, 0x0A000401, 0x02000409, 0x0A000409, - 0x02020001, 0x0A020001, 0x02020009, 0x0A020009, - 0x02020401, 0x0A020401, 0x02020409, 0x0A020409, - /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */ - 0x00000000, 0x00000100, 0x00080000, 0x00080100, - 0x01000000, 0x01000100, 0x01080000, 0x01080100, - 0x00000010, 0x00000110, 0x00080010, 0x00080110, - 0x01000010, 0x01000110, 0x01080010, 0x01080110, - 0x00200000, 0x00200100, 0x00280000, 0x00280100, - 0x01200000, 0x01200100, 0x01280000, 0x01280100, - 0x00200010, 0x00200110, 0x00280010, 0x00280110, - 0x01200010, 0x01200110, 0x01280010, 0x01280110, - 0x00000200, 0x00000300, 0x00080200, 0x00080300, - 0x01000200, 0x01000300, 0x01080200, 0x01080300, - 0x00000210, 0x00000310, 0x00080210, 0x00080310, - 0x01000210, 0x01000310, 0x01080210, 0x01080310, - 0x00200200, 0x00200300, 0x00280200, 0x00280300, - 0x01200200, 0x01200300, 0x01280200, 0x01280300, - 0x00200210, 0x00200310, 0x00280210, 0x00280310, - 0x01200210, 0x01200310, 0x01280210, 0x01280310, - /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */ - 0x00000000, 0x04000000, 0x00040000, 0x04040000, - 0x00000002, 0x04000002, 0x00040002, 0x04040002, - 0x00002000, 0x04002000, 0x00042000, 0x04042000, - 0x00002002, 0x04002002, 0x00042002, 0x04042002, - 0x00000020, 0x04000020, 0x00040020, 0x04040020, - 0x00000022, 0x04000022, 0x00040022, 0x04040022, - 0x00002020, 0x04002020, 0x00042020, 0x04042020, - 0x00002022, 0x04002022, 0x00042022, 0x04042022, - 0x00000800, 0x04000800, 0x00040800, 0x04040800, - 0x00000802, 0x04000802, 0x00040802, 0x04040802, - 0x00002800, 0x04002800, 0x00042800, 0x04042800, - 0x00002802, 0x04002802, 0x00042802, 0x04042802, - 0x00000820, 0x04000820, 0x00040820, 0x04040820, - 0x00000822, 0x04000822, 0x00040822, 0x04040822, - 0x00002820, 0x04002820, 0x00042820, 0x04042820, - 0x00002822, 0x04002822, 0x00042822, 0x04042822 -}; - -#ifdef VECT_SIZE1 -#define BOX(i,n,S) u32x ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(i,n,S) u32x ((S)[(n)][(i).x], (S)[(n)][(i).y]) -#endif - -__device__ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], u32 s_SPtrans[8][64]) -{ - u32x tt; - - u32x r = data[0]; - u32x l = data[1]; - - #pragma unroll 16 - for (u32 i = 0; i < 16; i++) - { - u32x u = Kc[i] ^ r; - u32x t = Kd[i] ^ rotl32 (r, 28u); - - l ^= BOX (((u >> 2) & 0x3f), 0, s_SPtrans) - | BOX (((u >> 10) & 0x3f), 2, s_SPtrans) - | BOX (((u >> 18) & 0x3f), 4, s_SPtrans) - | BOX (((u >> 26) & 0x3f), 6, s_SPtrans) - | BOX (((t >> 2) & 0x3f), 1, s_SPtrans) - | BOX (((t >> 10) & 0x3f), 3, s_SPtrans) - | BOX (((t >> 18) & 0x3f), 5, s_SPtrans) - | BOX (((t >> 26) & 0x3f), 7, s_SPtrans); - - tt = l; - l = r; - r = tt; - } - - iv[0] = l; - iv[1] = r; -} - -__device__ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], u32 s_skb[8][64]) -{ - u32x tt; - - PERM_OP (d, c, tt, 4, 0x0f0f0f0f); - HPERM_OP (c, tt, 2, 0xcccc0000); - HPERM_OP (d, tt, 2, 0xcccc0000); - PERM_OP (d, c, tt, 1, 0x55555555); - PERM_OP (c, d, tt, 8, 0x00ff00ff); - PERM_OP (d, c, tt, 1, 0x55555555); - - d = ((d & 0x000000ff) << 16) - | ((d & 0x0000ff00) << 0) - | ((d & 0x00ff0000) >> 16) - | ((c & 0xf0000000) >> 4); - - c = c & 0x0fffffff; - - #pragma unroll 16 - for (u32 i = 0; i < 16; i++) - { - const u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 }; - const u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 }; - - c = c >> shifts3s0[i] | c << shifts3s1[i]; - d = d >> shifts3s0[i] | d << shifts3s1[i]; - - c = c & 0x0fffffff; - d = d & 0x0fffffff; - - u32x s = BOX ((( c >> 0) & 0x3f), 0, s_skb) - | BOX ((((c >> 6) & 0x03) - | ((c >> 7) & 0x3c)), 1, s_skb) - | BOX ((((c >> 13) & 0x0f) - | ((c >> 14) & 0x30)), 2, s_skb) - | BOX ((((c >> 20) & 0x01) - | ((c >> 21) & 0x06) - | ((c >> 22) & 0x38)), 3, s_skb); - - u32x t = BOX ((( d >> 0) & 0x3f), 4, s_skb) - | BOX ((((d >> 7) & 0x03) - | ((d >> 8) & 0x3c)), 5, s_skb) - | BOX ((((d >> 15) & 0x3f)), 6, s_skb) - | BOX ((((d >> 21) & 0x0f) - | ((d >> 22) & 0x30)), 7, s_skb); - - #if __CUDA_ARCH__ >= 200 - Kc[i] = __byte_perm (s, t, 0x5410); - Kd[i] = __byte_perm (s, t, 0x7632); - #else - Kc[i] = ((t << 16) | (s & 0x0000ffff)); - Kd[i] = ((s >> 16) | (t & 0xffff0000)); - #endif - - Kc[i] = rotl32 (Kc[i], 2u); - Kd[i] = rotl32 (Kd[i], 2u); - } -} - -__device__ static void transform_netntlmv1_key (const u32x w0, const u32x w1, u32x out[2]) -{ - u32x t[8]; - - t[0] = (w0 >> 0) & 0xff; - t[1] = (w0 >> 8) & 0xff; - t[2] = (w0 >> 16) & 0xff; - t[3] = (w0 >> 24) & 0xff; - t[4] = (w1 >> 0) & 0xff; - t[5] = (w1 >> 8) & 0xff; - t[6] = (w1 >> 16) & 0xff; - t[7] = (w1 >> 24) & 0xff; - - u32x k[8]; - - k[0] = (t[0] >> 0); - k[1] = (t[0] << 7) | (t[1] >> 1); - k[2] = (t[1] << 6) | (t[2] >> 2); - k[3] = (t[2] << 5) | (t[3] >> 3); - k[4] = (t[3] << 4) | (t[4] >> 4); - k[5] = (t[4] << 3) | (t[5] >> 5); - k[6] = (t[5] << 2) | (t[6] >> 6); - k[7] = (t[6] << 1); - - out[0] = ((k[0] & 0xff) << 0) - | ((k[1] & 0xff) << 8) - | ((k[2] & 0xff) << 16) - | ((k[3] & 0xff) << 24); - - out[1] = ((k[4] & 0xff) << 0) - | ((k[5] & 0xff) << 8) - | ((k[6] & 0xff) << 16) - | ((k[7] & 0xff) << 24); -} - -__device__ __shared__ u32 s_skb[8][64]; -__device__ __shared__ u32 s_SPtrans[8][64]; - -__device__ __constant__ u32x c_bfs[1024]; - -__device__ static void m05500m (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - const u32 s0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 s1 = salt_bufs[salt_pos].salt_buf[1]; - const u32 s2 = salt_bufs[salt_pos].salt_buf[2]; - - u32x data[2]; - - data[0] = s0; - data[1] = s1; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; - - #define w0_t w0 - #define w1_t w[ 1] - #define w2_t w[ 2] - #define w3_t w[ 3] - #define w4_t w[ 4] - #define w5_t w[ 5] - #define w6_t w[ 6] - #define w7_t w[ 7] - #define w8_t w[ 8] - #define w9_t w[ 9] - #define wa_t w[10] - #define wb_t w[11] - #define wc_t w[12] - #define wd_t w[13] - #define we_t w[14] - #define wf_t w[15] - - MD4_STEP (MD4_Fo, a, b, c, d, w0_t, MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w1_t, MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w2_t, MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w3_t, MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w4_t, MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w5_t, MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w6_t, MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w7_t, MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w8_t, MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w9_t, MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, wa_t, MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, wb_t, MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, wc_t, MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, wd_t, MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, we_t, MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, wf_t, MD4C00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0_t, MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w4_t, MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w8_t, MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, wc_t, MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w1_t, MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w5_t, MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w9_t, MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, wd_t, MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w2_t, MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w6_t, MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, wa_t, MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, we_t, MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w3_t, MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w7_t, MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, wb_t, MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, wf_t, MD4C01, MD4S13); - - MD4_STEP (MD4_H , a, b, c, d, w0_t, MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w8_t, MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w4_t, MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, wc_t, MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w2_t, MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, wa_t, MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w6_t, MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, we_t, MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w1_t, MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w9_t, MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w5_t, MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, wd_t, MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w3_t, MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, wb_t, MD4C02, MD4S21); - - if (s2 != ((d + MD4M_D) >> 16)) continue; - - MD4_STEP (MD4_H , c, d, a, b, w7_t, MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, wf_t, MD4C02, MD4S23); - - a += MD4M_A; - b += MD4M_B; - c += MD4M_C; - d += MD4M_D; - - /** - * DES1 - */ - - u32x key[2]; - - transform_netntlmv1_key (a, b, key); - - u32x Kc[16]; - u32x Kd[16]; - - _des_crypt_keysetup (key[0], key[1], Kc, Kd, s_skb); - - u32x iv1[2]; - - _des_crypt_encrypt (iv1, data, Kc, Kd, s_SPtrans); - - /** - * DES2 - */ - - transform_netntlmv1_key (((b >> 24) | (c << 8)), ((c >> 24) | (d << 8)), key); - - _des_crypt_keysetup (key[0], key[1], Kc, Kd, s_skb); - - u32x iv2[2]; - - _des_crypt_encrypt (iv2, data, Kc, Kd, s_SPtrans); - - /** - * compare - */ - - const u32x r0 = iv1[0]; - const u32x r1 = iv1[1]; - const u32x r2 = iv2[0]; - const u32x r3 = iv2[1]; - - #include VECT_COMPARE_M - } -} - -__device__ static void m05500s (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - const u32 s0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 s1 = salt_bufs[salt_pos].salt_buf[1]; - const u32 s2 = salt_bufs[salt_pos].salt_buf[2]; - - u32x data[2]; - - data[0] = s0; - data[1] = s1; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x a = MD4M_A; - u32x b = MD4M_B; - u32x c = MD4M_C; - u32x d = MD4M_D; - - #define w0_t w0 - #define w1_t w[ 1] - #define w2_t w[ 2] - #define w3_t w[ 3] - #define w4_t w[ 4] - #define w5_t w[ 5] - #define w6_t w[ 6] - #define w7_t w[ 7] - #define w8_t w[ 8] - #define w9_t w[ 9] - #define wa_t w[10] - #define wb_t w[11] - #define wc_t w[12] - #define wd_t w[13] - #define we_t w[14] - #define wf_t w[15] - - MD4_STEP (MD4_Fo, a, b, c, d, w0_t, MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w1_t, MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w2_t, MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w3_t, MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w4_t, MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w5_t, MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w6_t, MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w7_t, MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w8_t, MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w9_t, MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, wa_t, MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, wb_t, MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, wc_t, MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, wd_t, MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, we_t, MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, wf_t, MD4C00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0_t, MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w4_t, MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w8_t, MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, wc_t, MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w1_t, MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w5_t, MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w9_t, MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, wd_t, MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w2_t, MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w6_t, MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, wa_t, MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, we_t, MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w3_t, MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w7_t, MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, wb_t, MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, wf_t, MD4C01, MD4S13); - - MD4_STEP (MD4_H , a, b, c, d, w0_t, MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w8_t, MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w4_t, MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, wc_t, MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w2_t, MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, wa_t, MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w6_t, MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, we_t, MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w1_t, MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w9_t, MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w5_t, MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, wd_t, MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w3_t, MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, wb_t, MD4C02, MD4S21); - - if (s2 != ((d + MD4M_D) >> 16)) continue; - - MD4_STEP (MD4_H , c, d, a, b, w7_t, MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, wf_t, MD4C02, MD4S23); - - a += MD4M_A; - b += MD4M_B; - c += MD4M_C; - d += MD4M_D; - - /** - * DES1 - */ - - u32x key[2]; - - transform_netntlmv1_key (a, b, key); - - u32x Kc[16]; - u32x Kd[16]; - - _des_crypt_keysetup (key[0], key[1], Kc, Kd, s_skb); - - u32x iv1[2]; - - _des_crypt_encrypt (iv1, data, Kc, Kd, s_SPtrans); - - /** - * DES2 - */ - - transform_netntlmv1_key (((b >> 24) | (c << 8)), ((c >> 24) | (d << 8)), key); - - _des_crypt_keysetup (key[0], key[1], Kc, Kd, s_skb); - - u32x iv2[2]; - - _des_crypt_encrypt (iv2, data, Kc, Kd, s_SPtrans); - - /** - * compare - */ - - const u32x r0 = iv1[0]; - const u32x r1 = iv1[1]; - const u32x r2 = iv2[0]; - const u32x r3 = iv2[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05500_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * sbox, kbox - */ - - if (lid < 64) - { - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m05500m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05500_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * sbox, kbox - */ - - if (lid < 64) - { - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m05500m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05500_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05500_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * sbox, kbox - */ - - if (lid < 64) - { - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m05500s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05500_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * sbox, kbox - */ - - if (lid < 64) - { - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m05500s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05500_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m05600_a0.cu b/nv/m05600_a0.cu deleted file mode 100644 index eb1291b..0000000 --- a/nv/m05600_a0.cu +++ /dev/null @@ -1,844 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _NETNTLMV2_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ static void md4_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - MD4_STEP (MD4_Fo, a, b, c, d, w0_t, MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w1_t, MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w2_t, MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w3_t, MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w4_t, MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w5_t, MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w6_t, MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w7_t, MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w8_t, MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w9_t, MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, wa_t, MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, wb_t, MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, wc_t, MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, wd_t, MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, we_t, MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, wf_t, MD4C00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0_t, MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w4_t, MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w8_t, MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, wc_t, MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w1_t, MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w5_t, MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w9_t, MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, wd_t, MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w2_t, MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w6_t, MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, wa_t, MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, we_t, MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w3_t, MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w7_t, MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, wb_t, MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, wf_t, MD4C01, MD4S13); - - MD4_STEP (MD4_H , a, b, c, d, w0_t, MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w8_t, MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w4_t, MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, wc_t, MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w2_t, MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, wa_t, MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w6_t, MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, we_t, MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w1_t, MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w9_t, MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w5_t, MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, wd_t, MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w3_t, MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, wb_t, MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w7_t, MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, wf_t, MD4C02, MD4S23); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = MD5M_A; - ipad[1] = MD5M_B; - ipad[2] = MD5M_C; - ipad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = MD5M_A; - opad[1] = MD5M_B; - opad[2] = MD5M_C; - opad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - - md5_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = 0x80; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = (64 + 16) * 8; - w3[3] = 0; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - - md5_transform (w0, w1, w2, w3, digest); -} - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m05600_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const netntlm_t *netntlm_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - __shared__ u32 s_userdomain_buf[64]; - __shared__ u32 s_chall_buf[256]; - - const u32 userdomain_len = netntlm_bufs[salt_pos].user_len - + netntlm_bufs[salt_pos].domain_len; - - const u32 chall_len = netntlm_bufs[salt_pos].srvchall_len - + netntlm_bufs[salt_pos].clichall_len; - - if (lid < 64) - { - s_userdomain_buf[lid] = netntlm_bufs[salt_pos].userdomain_buf[lid]; - } - - s_chall_buf[lid] = netntlm_bufs[salt_pos].chall_buf[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w3_t[2] = out_len * 8 * 2; - - u32x digest[4]; - - digest[0] = MD4M_A; - digest[1] = MD4M_B; - digest[2] = MD4M_C; - digest[3] = MD4M_D; - - md4_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - u32x ipad[4]; - u32x opad[4]; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - int left; - int off; - - for (left = userdomain_len, off = 0; left >= 56; left -= 64, off += 16) - { - w0_t[0] = s_userdomain_buf[off + 0]; - w0_t[1] = s_userdomain_buf[off + 1]; - w0_t[2] = s_userdomain_buf[off + 2]; - w0_t[3] = s_userdomain_buf[off + 3]; - w1_t[0] = s_userdomain_buf[off + 4]; - w1_t[1] = s_userdomain_buf[off + 5]; - w1_t[2] = s_userdomain_buf[off + 6]; - w1_t[3] = s_userdomain_buf[off + 7]; - w2_t[0] = s_userdomain_buf[off + 8]; - w2_t[1] = s_userdomain_buf[off + 9]; - w2_t[2] = s_userdomain_buf[off + 10]; - w2_t[3] = s_userdomain_buf[off + 11]; - w3_t[0] = s_userdomain_buf[off + 12]; - w3_t[1] = s_userdomain_buf[off + 13]; - w3_t[2] = s_userdomain_buf[off + 14]; - w3_t[3] = s_userdomain_buf[off + 15]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, ipad); - } - - w0_t[0] = s_userdomain_buf[off + 0]; - w0_t[1] = s_userdomain_buf[off + 1]; - w0_t[2] = s_userdomain_buf[off + 2]; - w0_t[3] = s_userdomain_buf[off + 3]; - w1_t[0] = s_userdomain_buf[off + 4]; - w1_t[1] = s_userdomain_buf[off + 5]; - w1_t[2] = s_userdomain_buf[off + 6]; - w1_t[3] = s_userdomain_buf[off + 7]; - w2_t[0] = s_userdomain_buf[off + 8]; - w2_t[1] = s_userdomain_buf[off + 9]; - w2_t[2] = s_userdomain_buf[off + 10]; - w2_t[3] = s_userdomain_buf[off + 11]; - w3_t[0] = s_userdomain_buf[off + 12]; - w3_t[1] = s_userdomain_buf[off + 13]; - w3_t[2] = (64 + userdomain_len) * 8; - w3_t[3] = 0; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - for (left = chall_len, off = 0; left >= 56; left -= 64, off += 16) - { - w0_t[0] = s_chall_buf[off + 0]; - w0_t[1] = s_chall_buf[off + 1]; - w0_t[2] = s_chall_buf[off + 2]; - w0_t[3] = s_chall_buf[off + 3]; - w1_t[0] = s_chall_buf[off + 4]; - w1_t[1] = s_chall_buf[off + 5]; - w1_t[2] = s_chall_buf[off + 6]; - w1_t[3] = s_chall_buf[off + 7]; - w2_t[0] = s_chall_buf[off + 8]; - w2_t[1] = s_chall_buf[off + 9]; - w2_t[2] = s_chall_buf[off + 10]; - w2_t[3] = s_chall_buf[off + 11]; - w3_t[0] = s_chall_buf[off + 12]; - w3_t[1] = s_chall_buf[off + 13]; - w3_t[2] = s_chall_buf[off + 14]; - w3_t[3] = s_chall_buf[off + 15]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, ipad); - } - - w0_t[0] = s_chall_buf[off + 0]; - w0_t[1] = s_chall_buf[off + 1]; - w0_t[2] = s_chall_buf[off + 2]; - w0_t[3] = s_chall_buf[off + 3]; - w1_t[0] = s_chall_buf[off + 4]; - w1_t[1] = s_chall_buf[off + 5]; - w1_t[2] = s_chall_buf[off + 6]; - w1_t[3] = s_chall_buf[off + 7]; - w2_t[0] = s_chall_buf[off + 8]; - w2_t[1] = s_chall_buf[off + 9]; - w2_t[2] = s_chall_buf[off + 10]; - w2_t[3] = s_chall_buf[off + 11]; - w3_t[0] = s_chall_buf[off + 12]; - w3_t[1] = s_chall_buf[off + 13]; - w3_t[2] = (64 + chall_len) * 8; - w3_t[3] = 0; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05600_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const netntlm_t *netntlm_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05600_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const netntlm_t *netntlm_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05600_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const netntlm_t *netntlm_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - __shared__ u32 s_userdomain_buf[64]; - __shared__ u32 s_chall_buf[256]; - - const u32 userdomain_len = netntlm_bufs[salt_pos].user_len - + netntlm_bufs[salt_pos].domain_len; - - const u32 chall_len = netntlm_bufs[salt_pos].srvchall_len - + netntlm_bufs[salt_pos].clichall_len; - - if (lid < 64) - { - s_userdomain_buf[lid] = netntlm_bufs[salt_pos].userdomain_buf[lid]; - } - - s_chall_buf[lid] = netntlm_bufs[salt_pos].chall_buf[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w3_t[2] = out_len * 8 * 2; - - u32x digest[4]; - - digest[0] = MD4M_A; - digest[1] = MD4M_B; - digest[2] = MD4M_C; - digest[3] = MD4M_D; - - md4_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - u32x ipad[4]; - u32x opad[4]; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - int left; - int off; - - for (left = userdomain_len, off = 0; left >= 56; left -= 64, off += 16) - { - w0_t[0] = s_userdomain_buf[off + 0]; - w0_t[1] = s_userdomain_buf[off + 1]; - w0_t[2] = s_userdomain_buf[off + 2]; - w0_t[3] = s_userdomain_buf[off + 3]; - w1_t[0] = s_userdomain_buf[off + 4]; - w1_t[1] = s_userdomain_buf[off + 5]; - w1_t[2] = s_userdomain_buf[off + 6]; - w1_t[3] = s_userdomain_buf[off + 7]; - w2_t[0] = s_userdomain_buf[off + 8]; - w2_t[1] = s_userdomain_buf[off + 9]; - w2_t[2] = s_userdomain_buf[off + 10]; - w2_t[3] = s_userdomain_buf[off + 11]; - w3_t[0] = s_userdomain_buf[off + 12]; - w3_t[1] = s_userdomain_buf[off + 13]; - w3_t[2] = s_userdomain_buf[off + 14]; - w3_t[3] = s_userdomain_buf[off + 15]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, ipad); - } - - w0_t[0] = s_userdomain_buf[off + 0]; - w0_t[1] = s_userdomain_buf[off + 1]; - w0_t[2] = s_userdomain_buf[off + 2]; - w0_t[3] = s_userdomain_buf[off + 3]; - w1_t[0] = s_userdomain_buf[off + 4]; - w1_t[1] = s_userdomain_buf[off + 5]; - w1_t[2] = s_userdomain_buf[off + 6]; - w1_t[3] = s_userdomain_buf[off + 7]; - w2_t[0] = s_userdomain_buf[off + 8]; - w2_t[1] = s_userdomain_buf[off + 9]; - w2_t[2] = s_userdomain_buf[off + 10]; - w2_t[3] = s_userdomain_buf[off + 11]; - w3_t[0] = s_userdomain_buf[off + 12]; - w3_t[1] = s_userdomain_buf[off + 13]; - w3_t[2] = (64 + userdomain_len) * 8; - w3_t[3] = 0; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - for (left = chall_len, off = 0; left >= 56; left -= 64, off += 16) - { - w0_t[0] = s_chall_buf[off + 0]; - w0_t[1] = s_chall_buf[off + 1]; - w0_t[2] = s_chall_buf[off + 2]; - w0_t[3] = s_chall_buf[off + 3]; - w1_t[0] = s_chall_buf[off + 4]; - w1_t[1] = s_chall_buf[off + 5]; - w1_t[2] = s_chall_buf[off + 6]; - w1_t[3] = s_chall_buf[off + 7]; - w2_t[0] = s_chall_buf[off + 8]; - w2_t[1] = s_chall_buf[off + 9]; - w2_t[2] = s_chall_buf[off + 10]; - w2_t[3] = s_chall_buf[off + 11]; - w3_t[0] = s_chall_buf[off + 12]; - w3_t[1] = s_chall_buf[off + 13]; - w3_t[2] = s_chall_buf[off + 14]; - w3_t[3] = s_chall_buf[off + 15]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, ipad); - } - - w0_t[0] = s_chall_buf[off + 0]; - w0_t[1] = s_chall_buf[off + 1]; - w0_t[2] = s_chall_buf[off + 2]; - w0_t[3] = s_chall_buf[off + 3]; - w1_t[0] = s_chall_buf[off + 4]; - w1_t[1] = s_chall_buf[off + 5]; - w1_t[2] = s_chall_buf[off + 6]; - w1_t[3] = s_chall_buf[off + 7]; - w2_t[0] = s_chall_buf[off + 8]; - w2_t[1] = s_chall_buf[off + 9]; - w2_t[2] = s_chall_buf[off + 10]; - w2_t[3] = s_chall_buf[off + 11]; - w3_t[0] = s_chall_buf[off + 12]; - w3_t[1] = s_chall_buf[off + 13]; - w3_t[2] = (64 + chall_len) * 8; - w3_t[3] = 0; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05600_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const netntlm_t *netntlm_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05600_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const netntlm_t *netntlm_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m05600_a1.cu b/nv/m05600_a1.cu deleted file mode 100644 index d031a27..0000000 --- a/nv/m05600_a1.cu +++ /dev/null @@ -1,950 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _NETNTLMV2_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ static void md4_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - MD4_STEP (MD4_Fo, a, b, c, d, w0_t, MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w1_t, MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w2_t, MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w3_t, MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w4_t, MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w5_t, MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w6_t, MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w7_t, MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w8_t, MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w9_t, MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, wa_t, MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, wb_t, MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, wc_t, MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, wd_t, MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, we_t, MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, wf_t, MD4C00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0_t, MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w4_t, MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w8_t, MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, wc_t, MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w1_t, MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w5_t, MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w9_t, MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, wd_t, MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w2_t, MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w6_t, MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, wa_t, MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, we_t, MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w3_t, MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w7_t, MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, wb_t, MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, wf_t, MD4C01, MD4S13); - - MD4_STEP (MD4_H , a, b, c, d, w0_t, MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w8_t, MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w4_t, MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, wc_t, MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w2_t, MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, wa_t, MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w6_t, MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, we_t, MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w1_t, MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w9_t, MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w5_t, MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, wd_t, MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w3_t, MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, wb_t, MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w7_t, MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, wf_t, MD4C02, MD4S23); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = MD5M_A; - ipad[1] = MD5M_B; - ipad[2] = MD5M_C; - ipad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = MD5M_A; - opad[1] = MD5M_B; - opad[2] = MD5M_C; - opad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - - md5_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = 0x80; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = (64 + 16) * 8; - w3[3] = 0; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - - md5_transform (w0, w1, w2, w3, digest); -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m05600_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const netntlm_t *netntlm_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - __shared__ u32 s_userdomain_buf[64]; - __shared__ u32 s_chall_buf[256]; - - const u32 userdomain_len = netntlm_bufs[salt_pos].user_len - + netntlm_bufs[salt_pos].domain_len; - - const u32 chall_len = netntlm_bufs[salt_pos].srvchall_len - + netntlm_bufs[salt_pos].clichall_len; - - if (lid < 64) - { - s_userdomain_buf[lid] = netntlm_bufs[salt_pos].userdomain_buf[lid]; - } - - s_chall_buf[lid] = netntlm_bufs[salt_pos].chall_buf[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w3_t[2] = pw_len * 8 * 2; - - u32x digest[4]; - - digest[0] = MD4M_A; - digest[1] = MD4M_B; - digest[2] = MD4M_C; - digest[3] = MD4M_D; - - md4_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - u32x ipad[4]; - u32x opad[4]; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - int left; - int off; - - for (left = userdomain_len, off = 0; left >= 56; left -= 64, off += 16) - { - w0_t[0] = s_userdomain_buf[off + 0]; - w0_t[1] = s_userdomain_buf[off + 1]; - w0_t[2] = s_userdomain_buf[off + 2]; - w0_t[3] = s_userdomain_buf[off + 3]; - w1_t[0] = s_userdomain_buf[off + 4]; - w1_t[1] = s_userdomain_buf[off + 5]; - w1_t[2] = s_userdomain_buf[off + 6]; - w1_t[3] = s_userdomain_buf[off + 7]; - w2_t[0] = s_userdomain_buf[off + 8]; - w2_t[1] = s_userdomain_buf[off + 9]; - w2_t[2] = s_userdomain_buf[off + 10]; - w2_t[3] = s_userdomain_buf[off + 11]; - w3_t[0] = s_userdomain_buf[off + 12]; - w3_t[1] = s_userdomain_buf[off + 13]; - w3_t[2] = s_userdomain_buf[off + 14]; - w3_t[3] = s_userdomain_buf[off + 15]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, ipad); - } - - w0_t[0] = s_userdomain_buf[off + 0]; - w0_t[1] = s_userdomain_buf[off + 1]; - w0_t[2] = s_userdomain_buf[off + 2]; - w0_t[3] = s_userdomain_buf[off + 3]; - w1_t[0] = s_userdomain_buf[off + 4]; - w1_t[1] = s_userdomain_buf[off + 5]; - w1_t[2] = s_userdomain_buf[off + 6]; - w1_t[3] = s_userdomain_buf[off + 7]; - w2_t[0] = s_userdomain_buf[off + 8]; - w2_t[1] = s_userdomain_buf[off + 9]; - w2_t[2] = s_userdomain_buf[off + 10]; - w2_t[3] = s_userdomain_buf[off + 11]; - w3_t[0] = s_userdomain_buf[off + 12]; - w3_t[1] = s_userdomain_buf[off + 13]; - w3_t[2] = (64 + userdomain_len) * 8; - w3_t[3] = 0; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - for (left = chall_len, off = 0; left >= 56; left -= 64, off += 16) - { - w0_t[0] = s_chall_buf[off + 0]; - w0_t[1] = s_chall_buf[off + 1]; - w0_t[2] = s_chall_buf[off + 2]; - w0_t[3] = s_chall_buf[off + 3]; - w1_t[0] = s_chall_buf[off + 4]; - w1_t[1] = s_chall_buf[off + 5]; - w1_t[2] = s_chall_buf[off + 6]; - w1_t[3] = s_chall_buf[off + 7]; - w2_t[0] = s_chall_buf[off + 8]; - w2_t[1] = s_chall_buf[off + 9]; - w2_t[2] = s_chall_buf[off + 10]; - w2_t[3] = s_chall_buf[off + 11]; - w3_t[0] = s_chall_buf[off + 12]; - w3_t[1] = s_chall_buf[off + 13]; - w3_t[2] = s_chall_buf[off + 14]; - w3_t[3] = s_chall_buf[off + 15]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, ipad); - } - - w0_t[0] = s_chall_buf[off + 0]; - w0_t[1] = s_chall_buf[off + 1]; - w0_t[2] = s_chall_buf[off + 2]; - w0_t[3] = s_chall_buf[off + 3]; - w1_t[0] = s_chall_buf[off + 4]; - w1_t[1] = s_chall_buf[off + 5]; - w1_t[2] = s_chall_buf[off + 6]; - w1_t[3] = s_chall_buf[off + 7]; - w2_t[0] = s_chall_buf[off + 8]; - w2_t[1] = s_chall_buf[off + 9]; - w2_t[2] = s_chall_buf[off + 10]; - w2_t[3] = s_chall_buf[off + 11]; - w3_t[0] = s_chall_buf[off + 12]; - w3_t[1] = s_chall_buf[off + 13]; - w3_t[2] = (64 + chall_len) * 8; - w3_t[3] = 0; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05600_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const netntlm_t *netntlm_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05600_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const netntlm_t *netntlm_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05600_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const netntlm_t *netntlm_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - __shared__ u32 s_userdomain_buf[64]; - __shared__ u32 s_chall_buf[256]; - - const u32 userdomain_len = netntlm_bufs[salt_pos].user_len - + netntlm_bufs[salt_pos].domain_len; - - const u32 chall_len = netntlm_bufs[salt_pos].srvchall_len - + netntlm_bufs[salt_pos].clichall_len; - - if (lid < 64) - { - s_userdomain_buf[lid] = netntlm_bufs[salt_pos].userdomain_buf[lid]; - } - - s_chall_buf[lid] = netntlm_bufs[salt_pos].chall_buf[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w3_t[2] = pw_len * 8 * 2; - - u32x digest[4]; - - digest[0] = MD4M_A; - digest[1] = MD4M_B; - digest[2] = MD4M_C; - digest[3] = MD4M_D; - - md4_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - u32x ipad[4]; - u32x opad[4]; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - int left; - int off; - - for (left = userdomain_len, off = 0; left >= 56; left -= 64, off += 16) - { - w0_t[0] = s_userdomain_buf[off + 0]; - w0_t[1] = s_userdomain_buf[off + 1]; - w0_t[2] = s_userdomain_buf[off + 2]; - w0_t[3] = s_userdomain_buf[off + 3]; - w1_t[0] = s_userdomain_buf[off + 4]; - w1_t[1] = s_userdomain_buf[off + 5]; - w1_t[2] = s_userdomain_buf[off + 6]; - w1_t[3] = s_userdomain_buf[off + 7]; - w2_t[0] = s_userdomain_buf[off + 8]; - w2_t[1] = s_userdomain_buf[off + 9]; - w2_t[2] = s_userdomain_buf[off + 10]; - w2_t[3] = s_userdomain_buf[off + 11]; - w3_t[0] = s_userdomain_buf[off + 12]; - w3_t[1] = s_userdomain_buf[off + 13]; - w3_t[2] = s_userdomain_buf[off + 14]; - w3_t[3] = s_userdomain_buf[off + 15]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, ipad); - } - - w0_t[0] = s_userdomain_buf[off + 0]; - w0_t[1] = s_userdomain_buf[off + 1]; - w0_t[2] = s_userdomain_buf[off + 2]; - w0_t[3] = s_userdomain_buf[off + 3]; - w1_t[0] = s_userdomain_buf[off + 4]; - w1_t[1] = s_userdomain_buf[off + 5]; - w1_t[2] = s_userdomain_buf[off + 6]; - w1_t[3] = s_userdomain_buf[off + 7]; - w2_t[0] = s_userdomain_buf[off + 8]; - w2_t[1] = s_userdomain_buf[off + 9]; - w2_t[2] = s_userdomain_buf[off + 10]; - w2_t[3] = s_userdomain_buf[off + 11]; - w3_t[0] = s_userdomain_buf[off + 12]; - w3_t[1] = s_userdomain_buf[off + 13]; - w3_t[2] = (64 + userdomain_len) * 8; - w3_t[3] = 0; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - for (left = chall_len, off = 0; left >= 56; left -= 64, off += 16) - { - w0_t[0] = s_chall_buf[off + 0]; - w0_t[1] = s_chall_buf[off + 1]; - w0_t[2] = s_chall_buf[off + 2]; - w0_t[3] = s_chall_buf[off + 3]; - w1_t[0] = s_chall_buf[off + 4]; - w1_t[1] = s_chall_buf[off + 5]; - w1_t[2] = s_chall_buf[off + 6]; - w1_t[3] = s_chall_buf[off + 7]; - w2_t[0] = s_chall_buf[off + 8]; - w2_t[1] = s_chall_buf[off + 9]; - w2_t[2] = s_chall_buf[off + 10]; - w2_t[3] = s_chall_buf[off + 11]; - w3_t[0] = s_chall_buf[off + 12]; - w3_t[1] = s_chall_buf[off + 13]; - w3_t[2] = s_chall_buf[off + 14]; - w3_t[3] = s_chall_buf[off + 15]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, ipad); - } - - w0_t[0] = s_chall_buf[off + 0]; - w0_t[1] = s_chall_buf[off + 1]; - w0_t[2] = s_chall_buf[off + 2]; - w0_t[3] = s_chall_buf[off + 3]; - w1_t[0] = s_chall_buf[off + 4]; - w1_t[1] = s_chall_buf[off + 5]; - w1_t[2] = s_chall_buf[off + 6]; - w1_t[3] = s_chall_buf[off + 7]; - w2_t[0] = s_chall_buf[off + 8]; - w2_t[1] = s_chall_buf[off + 9]; - w2_t[2] = s_chall_buf[off + 10]; - w2_t[3] = s_chall_buf[off + 11]; - w3_t[0] = s_chall_buf[off + 12]; - w3_t[1] = s_chall_buf[off + 13]; - w3_t[2] = (64 + chall_len) * 8; - w3_t[3] = 0; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05600_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const netntlm_t *netntlm_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05600_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const netntlm_t *netntlm_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m05600_a3.cu b/nv/m05600_a3.cu deleted file mode 100644 index 1a1e2e3..0000000 --- a/nv/m05600_a3.cu +++ /dev/null @@ -1,958 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _NETNTLMV2_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ static void md4_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - MD4_STEP (MD4_Fo, a, b, c, d, w0_t, MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w1_t, MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w2_t, MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w3_t, MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w4_t, MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w5_t, MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w6_t, MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w7_t, MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w8_t, MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w9_t, MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, wa_t, MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, wb_t, MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, wc_t, MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, wd_t, MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, we_t, MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, wf_t, MD4C00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0_t, MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w4_t, MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w8_t, MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, wc_t, MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w1_t, MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w5_t, MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w9_t, MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, wd_t, MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w2_t, MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w6_t, MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, wa_t, MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, we_t, MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w3_t, MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w7_t, MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, wb_t, MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, wf_t, MD4C01, MD4S13); - - MD4_STEP (MD4_H , a, b, c, d, w0_t, MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w8_t, MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w4_t, MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, wc_t, MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w2_t, MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, wa_t, MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w6_t, MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, we_t, MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w1_t, MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w9_t, MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w5_t, MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, wd_t, MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w3_t, MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, wb_t, MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w7_t, MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, wf_t, MD4C02, MD4S23); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = MD5M_A; - ipad[1] = MD5M_B; - ipad[2] = MD5M_C; - ipad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = MD5M_A; - opad[1] = MD5M_B; - opad[2] = MD5M_C; - opad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - - md5_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = 0x80; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = (64 + 16) * 8; - w3[3] = 0; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - - md5_transform (w0, w1, w2, w3, digest); -} - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m05600m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const netntlm_t *netntlm_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, u32 s_userdomain_buf[64], u32 s_chall_buf[256]) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * prepare - */ - - const u32 userdomain_len = netntlm_bufs[salt_pos].user_len - + netntlm_bufs[salt_pos].domain_len; - - const u32 chall_len = netntlm_bufs[salt_pos].srvchall_len - + netntlm_bufs[salt_pos].clichall_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x digest[4]; - - digest[0] = MD4M_A; - digest[1] = MD4M_B; - digest[2] = MD4M_C; - digest[3] = MD4M_D; - - md4_transform (w0, w1, w2, w3, digest); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - u32x ipad[4]; - u32x opad[4]; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - int left; - int off; - - for (left = userdomain_len, off = 0; left >= 56; left -= 64, off += 16) - { - w0_t[0] = s_userdomain_buf[off + 0]; - w0_t[1] = s_userdomain_buf[off + 1]; - w0_t[2] = s_userdomain_buf[off + 2]; - w0_t[3] = s_userdomain_buf[off + 3]; - w1_t[0] = s_userdomain_buf[off + 4]; - w1_t[1] = s_userdomain_buf[off + 5]; - w1_t[2] = s_userdomain_buf[off + 6]; - w1_t[3] = s_userdomain_buf[off + 7]; - w2_t[0] = s_userdomain_buf[off + 8]; - w2_t[1] = s_userdomain_buf[off + 9]; - w2_t[2] = s_userdomain_buf[off + 10]; - w2_t[3] = s_userdomain_buf[off + 11]; - w3_t[0] = s_userdomain_buf[off + 12]; - w3_t[1] = s_userdomain_buf[off + 13]; - w3_t[2] = s_userdomain_buf[off + 14]; - w3_t[3] = s_userdomain_buf[off + 15]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, ipad); - } - - w0_t[0] = s_userdomain_buf[off + 0]; - w0_t[1] = s_userdomain_buf[off + 1]; - w0_t[2] = s_userdomain_buf[off + 2]; - w0_t[3] = s_userdomain_buf[off + 3]; - w1_t[0] = s_userdomain_buf[off + 4]; - w1_t[1] = s_userdomain_buf[off + 5]; - w1_t[2] = s_userdomain_buf[off + 6]; - w1_t[3] = s_userdomain_buf[off + 7]; - w2_t[0] = s_userdomain_buf[off + 8]; - w2_t[1] = s_userdomain_buf[off + 9]; - w2_t[2] = s_userdomain_buf[off + 10]; - w2_t[3] = s_userdomain_buf[off + 11]; - w3_t[0] = s_userdomain_buf[off + 12]; - w3_t[1] = s_userdomain_buf[off + 13]; - w3_t[2] = (64 + userdomain_len) * 8; - w3_t[3] = 0; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - for (left = chall_len, off = 0; left >= 56; left -= 64, off += 16) - { - w0_t[0] = s_chall_buf[off + 0]; - w0_t[1] = s_chall_buf[off + 1]; - w0_t[2] = s_chall_buf[off + 2]; - w0_t[3] = s_chall_buf[off + 3]; - w1_t[0] = s_chall_buf[off + 4]; - w1_t[1] = s_chall_buf[off + 5]; - w1_t[2] = s_chall_buf[off + 6]; - w1_t[3] = s_chall_buf[off + 7]; - w2_t[0] = s_chall_buf[off + 8]; - w2_t[1] = s_chall_buf[off + 9]; - w2_t[2] = s_chall_buf[off + 10]; - w2_t[3] = s_chall_buf[off + 11]; - w3_t[0] = s_chall_buf[off + 12]; - w3_t[1] = s_chall_buf[off + 13]; - w3_t[2] = s_chall_buf[off + 14]; - w3_t[3] = s_chall_buf[off + 15]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, ipad); - } - - w0_t[0] = s_chall_buf[off + 0]; - w0_t[1] = s_chall_buf[off + 1]; - w0_t[2] = s_chall_buf[off + 2]; - w0_t[3] = s_chall_buf[off + 3]; - w1_t[0] = s_chall_buf[off + 4]; - w1_t[1] = s_chall_buf[off + 5]; - w1_t[2] = s_chall_buf[off + 6]; - w1_t[3] = s_chall_buf[off + 7]; - w2_t[0] = s_chall_buf[off + 8]; - w2_t[1] = s_chall_buf[off + 9]; - w2_t[2] = s_chall_buf[off + 10]; - w2_t[3] = s_chall_buf[off + 11]; - w3_t[0] = s_chall_buf[off + 12]; - w3_t[1] = s_chall_buf[off + 13]; - w3_t[2] = (64 + chall_len) * 8; - w3_t[3] = 0; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -__device__ static void m05600s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const netntlm_t *netntlm_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, u32 s_userdomain_buf[64], u32 s_chall_buf[256]) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * prepare - */ - - const u32 userdomain_len = netntlm_bufs[salt_pos].user_len - + netntlm_bufs[salt_pos].domain_len; - - const u32 chall_len = netntlm_bufs[salt_pos].srvchall_len - + netntlm_bufs[salt_pos].clichall_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x digest[4]; - - digest[0] = MD4M_A; - digest[1] = MD4M_B; - digest[2] = MD4M_C; - digest[3] = MD4M_D; - - md4_transform (w0, w1, w2, w3, digest); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - u32x ipad[4]; - u32x opad[4]; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - int left; - int off; - - for (left = userdomain_len, off = 0; left >= 56; left -= 64, off += 16) - { - w0_t[0] = s_userdomain_buf[off + 0]; - w0_t[1] = s_userdomain_buf[off + 1]; - w0_t[2] = s_userdomain_buf[off + 2]; - w0_t[3] = s_userdomain_buf[off + 3]; - w1_t[0] = s_userdomain_buf[off + 4]; - w1_t[1] = s_userdomain_buf[off + 5]; - w1_t[2] = s_userdomain_buf[off + 6]; - w1_t[3] = s_userdomain_buf[off + 7]; - w2_t[0] = s_userdomain_buf[off + 8]; - w2_t[1] = s_userdomain_buf[off + 9]; - w2_t[2] = s_userdomain_buf[off + 10]; - w2_t[3] = s_userdomain_buf[off + 11]; - w3_t[0] = s_userdomain_buf[off + 12]; - w3_t[1] = s_userdomain_buf[off + 13]; - w3_t[2] = s_userdomain_buf[off + 14]; - w3_t[3] = s_userdomain_buf[off + 15]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, ipad); - } - - w0_t[0] = s_userdomain_buf[off + 0]; - w0_t[1] = s_userdomain_buf[off + 1]; - w0_t[2] = s_userdomain_buf[off + 2]; - w0_t[3] = s_userdomain_buf[off + 3]; - w1_t[0] = s_userdomain_buf[off + 4]; - w1_t[1] = s_userdomain_buf[off + 5]; - w1_t[2] = s_userdomain_buf[off + 6]; - w1_t[3] = s_userdomain_buf[off + 7]; - w2_t[0] = s_userdomain_buf[off + 8]; - w2_t[1] = s_userdomain_buf[off + 9]; - w2_t[2] = s_userdomain_buf[off + 10]; - w2_t[3] = s_userdomain_buf[off + 11]; - w3_t[0] = s_userdomain_buf[off + 12]; - w3_t[1] = s_userdomain_buf[off + 13]; - w3_t[2] = (64 + userdomain_len) * 8; - w3_t[3] = 0; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - for (left = chall_len, off = 0; left >= 56; left -= 64, off += 16) - { - w0_t[0] = s_chall_buf[off + 0]; - w0_t[1] = s_chall_buf[off + 1]; - w0_t[2] = s_chall_buf[off + 2]; - w0_t[3] = s_chall_buf[off + 3]; - w1_t[0] = s_chall_buf[off + 4]; - w1_t[1] = s_chall_buf[off + 5]; - w1_t[2] = s_chall_buf[off + 6]; - w1_t[3] = s_chall_buf[off + 7]; - w2_t[0] = s_chall_buf[off + 8]; - w2_t[1] = s_chall_buf[off + 9]; - w2_t[2] = s_chall_buf[off + 10]; - w2_t[3] = s_chall_buf[off + 11]; - w3_t[0] = s_chall_buf[off + 12]; - w3_t[1] = s_chall_buf[off + 13]; - w3_t[2] = s_chall_buf[off + 14]; - w3_t[3] = s_chall_buf[off + 15]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, ipad); - } - - w0_t[0] = s_chall_buf[off + 0]; - w0_t[1] = s_chall_buf[off + 1]; - w0_t[2] = s_chall_buf[off + 2]; - w0_t[3] = s_chall_buf[off + 3]; - w1_t[0] = s_chall_buf[off + 4]; - w1_t[1] = s_chall_buf[off + 5]; - w1_t[2] = s_chall_buf[off + 6]; - w1_t[3] = s_chall_buf[off + 7]; - w2_t[0] = s_chall_buf[off + 8]; - w2_t[1] = s_chall_buf[off + 9]; - w2_t[2] = s_chall_buf[off + 10]; - w2_t[3] = s_chall_buf[off + 11]; - w3_t[0] = s_chall_buf[off + 12]; - w3_t[1] = s_chall_buf[off + 13]; - w3_t[2] = (64 + chall_len) * 8; - w3_t[3] = 0; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[0]; - const u32x r1 = digest[3]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05600_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const netntlm_t *netntlm_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - __shared__ u32 s_userdomain_buf[64]; - __shared__ u32 s_chall_buf[256]; - - if (lid < 64) - { - s_userdomain_buf[lid] = netntlm_bufs[salt_pos].userdomain_buf[lid]; - } - - s_chall_buf[lid] = netntlm_bufs[salt_pos].chall_buf[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m05600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, netntlm_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, s_userdomain_buf, s_chall_buf); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05600_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const netntlm_t *netntlm_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - __shared__ u32 s_userdomain_buf[64]; - __shared__ u32 s_chall_buf[256]; - - if (lid < 64) - { - s_userdomain_buf[lid] = netntlm_bufs[salt_pos].userdomain_buf[lid]; - } - - s_chall_buf[lid] = netntlm_bufs[salt_pos].chall_buf[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m05600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, netntlm_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, s_userdomain_buf, s_chall_buf); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05600_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const netntlm_t *netntlm_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05600_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const netntlm_t *netntlm_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - __shared__ u32 s_userdomain_buf[64]; - __shared__ u32 s_chall_buf[256]; - - if (lid < 64) - { - s_userdomain_buf[lid] = netntlm_bufs[salt_pos].userdomain_buf[lid]; - } - - s_chall_buf[lid] = netntlm_bufs[salt_pos].chall_buf[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m05600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, netntlm_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, s_userdomain_buf, s_chall_buf); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05600_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const netntlm_t *netntlm_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - __shared__ u32 s_userdomain_buf[64]; - __shared__ u32 s_chall_buf[256]; - - if (lid < 64) - { - s_userdomain_buf[lid] = netntlm_bufs[salt_pos].userdomain_buf[lid]; - } - - s_chall_buf[lid] = netntlm_bufs[salt_pos].chall_buf[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m05600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, netntlm_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset, s_userdomain_buf, s_chall_buf); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05600_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const netntlm_t *netntlm_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m05800.cu b/nv/m05800.cu deleted file mode 100644 index ce52532..0000000 --- a/nv/m05800.cu +++ /dev/null @@ -1,729 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -typedef struct -{ - u32 dec; - u32 len; - -} entry_t; - -__device__ __constant__ entry_t pc[1024] = -{ - 0x00000030, 1, 0x00000031, 1, 0x00000032, 1, 0x00000033, 1, 0x00000034, 1, 0x00000035, 1, 0x00000036, 1, 0x00000037, 1, - 0x00000038, 1, 0x00000039, 1, 0x00003031, 2, 0x00003131, 2, 0x00003231, 2, 0x00003331, 2, 0x00003431, 2, 0x00003531, 2, - 0x00003631, 2, 0x00003731, 2, 0x00003831, 2, 0x00003931, 2, 0x00003032, 2, 0x00003132, 2, 0x00003232, 2, 0x00003332, 2, - 0x00003432, 2, 0x00003532, 2, 0x00003632, 2, 0x00003732, 2, 0x00003832, 2, 0x00003932, 2, 0x00003033, 2, 0x00003133, 2, - 0x00003233, 2, 0x00003333, 2, 0x00003433, 2, 0x00003533, 2, 0x00003633, 2, 0x00003733, 2, 0x00003833, 2, 0x00003933, 2, - 0x00003034, 2, 0x00003134, 2, 0x00003234, 2, 0x00003334, 2, 0x00003434, 2, 0x00003534, 2, 0x00003634, 2, 0x00003734, 2, - 0x00003834, 2, 0x00003934, 2, 0x00003035, 2, 0x00003135, 2, 0x00003235, 2, 0x00003335, 2, 0x00003435, 2, 0x00003535, 2, - 0x00003635, 2, 0x00003735, 2, 0x00003835, 2, 0x00003935, 2, 0x00003036, 2, 0x00003136, 2, 0x00003236, 2, 0x00003336, 2, - 0x00003436, 2, 0x00003536, 2, 0x00003636, 2, 0x00003736, 2, 0x00003836, 2, 0x00003936, 2, 0x00003037, 2, 0x00003137, 2, - 0x00003237, 2, 0x00003337, 2, 0x00003437, 2, 0x00003537, 2, 0x00003637, 2, 0x00003737, 2, 0x00003837, 2, 0x00003937, 2, - 0x00003038, 2, 0x00003138, 2, 0x00003238, 2, 0x00003338, 2, 0x00003438, 2, 0x00003538, 2, 0x00003638, 2, 0x00003738, 2, - 0x00003838, 2, 0x00003938, 2, 0x00003039, 2, 0x00003139, 2, 0x00003239, 2, 0x00003339, 2, 0x00003439, 2, 0x00003539, 2, - 0x00003639, 2, 0x00003739, 2, 0x00003839, 2, 0x00003939, 2, 0x00303031, 3, 0x00313031, 3, 0x00323031, 3, 0x00333031, 3, - 0x00343031, 3, 0x00353031, 3, 0x00363031, 3, 0x00373031, 3, 0x00383031, 3, 0x00393031, 3, 0x00303131, 3, 0x00313131, 3, - 0x00323131, 3, 0x00333131, 3, 0x00343131, 3, 0x00353131, 3, 0x00363131, 3, 0x00373131, 3, 0x00383131, 3, 0x00393131, 3, - 0x00303231, 3, 0x00313231, 3, 0x00323231, 3, 0x00333231, 3, 0x00343231, 3, 0x00353231, 3, 0x00363231, 3, 0x00373231, 3, - 0x00383231, 3, 0x00393231, 3, 0x00303331, 3, 0x00313331, 3, 0x00323331, 3, 0x00333331, 3, 0x00343331, 3, 0x00353331, 3, - 0x00363331, 3, 0x00373331, 3, 0x00383331, 3, 0x00393331, 3, 0x00303431, 3, 0x00313431, 3, 0x00323431, 3, 0x00333431, 3, - 0x00343431, 3, 0x00353431, 3, 0x00363431, 3, 0x00373431, 3, 0x00383431, 3, 0x00393431, 3, 0x00303531, 3, 0x00313531, 3, - 0x00323531, 3, 0x00333531, 3, 0x00343531, 3, 0x00353531, 3, 0x00363531, 3, 0x00373531, 3, 0x00383531, 3, 0x00393531, 3, - 0x00303631, 3, 0x00313631, 3, 0x00323631, 3, 0x00333631, 3, 0x00343631, 3, 0x00353631, 3, 0x00363631, 3, 0x00373631, 3, - 0x00383631, 3, 0x00393631, 3, 0x00303731, 3, 0x00313731, 3, 0x00323731, 3, 0x00333731, 3, 0x00343731, 3, 0x00353731, 3, - 0x00363731, 3, 0x00373731, 3, 0x00383731, 3, 0x00393731, 3, 0x00303831, 3, 0x00313831, 3, 0x00323831, 3, 0x00333831, 3, - 0x00343831, 3, 0x00353831, 3, 0x00363831, 3, 0x00373831, 3, 0x00383831, 3, 0x00393831, 3, 0x00303931, 3, 0x00313931, 3, - 0x00323931, 3, 0x00333931, 3, 0x00343931, 3, 0x00353931, 3, 0x00363931, 3, 0x00373931, 3, 0x00383931, 3, 0x00393931, 3, - 0x00303032, 3, 0x00313032, 3, 0x00323032, 3, 0x00333032, 3, 0x00343032, 3, 0x00353032, 3, 0x00363032, 3, 0x00373032, 3, - 0x00383032, 3, 0x00393032, 3, 0x00303132, 3, 0x00313132, 3, 0x00323132, 3, 0x00333132, 3, 0x00343132, 3, 0x00353132, 3, - 0x00363132, 3, 0x00373132, 3, 0x00383132, 3, 0x00393132, 3, 0x00303232, 3, 0x00313232, 3, 0x00323232, 3, 0x00333232, 3, - 0x00343232, 3, 0x00353232, 3, 0x00363232, 3, 0x00373232, 3, 0x00383232, 3, 0x00393232, 3, 0x00303332, 3, 0x00313332, 3, - 0x00323332, 3, 0x00333332, 3, 0x00343332, 3, 0x00353332, 3, 0x00363332, 3, 0x00373332, 3, 0x00383332, 3, 0x00393332, 3, - 0x00303432, 3, 0x00313432, 3, 0x00323432, 3, 0x00333432, 3, 0x00343432, 3, 0x00353432, 3, 0x00363432, 3, 0x00373432, 3, - 0x00383432, 3, 0x00393432, 3, 0x00303532, 3, 0x00313532, 3, 0x00323532, 3, 0x00333532, 3, 0x00343532, 3, 0x00353532, 3, - 0x00363532, 3, 0x00373532, 3, 0x00383532, 3, 0x00393532, 3, 0x00303632, 3, 0x00313632, 3, 0x00323632, 3, 0x00333632, 3, - 0x00343632, 3, 0x00353632, 3, 0x00363632, 3, 0x00373632, 3, 0x00383632, 3, 0x00393632, 3, 0x00303732, 3, 0x00313732, 3, - 0x00323732, 3, 0x00333732, 3, 0x00343732, 3, 0x00353732, 3, 0x00363732, 3, 0x00373732, 3, 0x00383732, 3, 0x00393732, 3, - 0x00303832, 3, 0x00313832, 3, 0x00323832, 3, 0x00333832, 3, 0x00343832, 3, 0x00353832, 3, 0x00363832, 3, 0x00373832, 3, - 0x00383832, 3, 0x00393832, 3, 0x00303932, 3, 0x00313932, 3, 0x00323932, 3, 0x00333932, 3, 0x00343932, 3, 0x00353932, 3, - 0x00363932, 3, 0x00373932, 3, 0x00383932, 3, 0x00393932, 3, 0x00303033, 3, 0x00313033, 3, 0x00323033, 3, 0x00333033, 3, - 0x00343033, 3, 0x00353033, 3, 0x00363033, 3, 0x00373033, 3, 0x00383033, 3, 0x00393033, 3, 0x00303133, 3, 0x00313133, 3, - 0x00323133, 3, 0x00333133, 3, 0x00343133, 3, 0x00353133, 3, 0x00363133, 3, 0x00373133, 3, 0x00383133, 3, 0x00393133, 3, - 0x00303233, 3, 0x00313233, 3, 0x00323233, 3, 0x00333233, 3, 0x00343233, 3, 0x00353233, 3, 0x00363233, 3, 0x00373233, 3, - 0x00383233, 3, 0x00393233, 3, 0x00303333, 3, 0x00313333, 3, 0x00323333, 3, 0x00333333, 3, 0x00343333, 3, 0x00353333, 3, - 0x00363333, 3, 0x00373333, 3, 0x00383333, 3, 0x00393333, 3, 0x00303433, 3, 0x00313433, 3, 0x00323433, 3, 0x00333433, 3, - 0x00343433, 3, 0x00353433, 3, 0x00363433, 3, 0x00373433, 3, 0x00383433, 3, 0x00393433, 3, 0x00303533, 3, 0x00313533, 3, - 0x00323533, 3, 0x00333533, 3, 0x00343533, 3, 0x00353533, 3, 0x00363533, 3, 0x00373533, 3, 0x00383533, 3, 0x00393533, 3, - 0x00303633, 3, 0x00313633, 3, 0x00323633, 3, 0x00333633, 3, 0x00343633, 3, 0x00353633, 3, 0x00363633, 3, 0x00373633, 3, - 0x00383633, 3, 0x00393633, 3, 0x00303733, 3, 0x00313733, 3, 0x00323733, 3, 0x00333733, 3, 0x00343733, 3, 0x00353733, 3, - 0x00363733, 3, 0x00373733, 3, 0x00383733, 3, 0x00393733, 3, 0x00303833, 3, 0x00313833, 3, 0x00323833, 3, 0x00333833, 3, - 0x00343833, 3, 0x00353833, 3, 0x00363833, 3, 0x00373833, 3, 0x00383833, 3, 0x00393833, 3, 0x00303933, 3, 0x00313933, 3, - 0x00323933, 3, 0x00333933, 3, 0x00343933, 3, 0x00353933, 3, 0x00363933, 3, 0x00373933, 3, 0x00383933, 3, 0x00393933, 3, - 0x00303034, 3, 0x00313034, 3, 0x00323034, 3, 0x00333034, 3, 0x00343034, 3, 0x00353034, 3, 0x00363034, 3, 0x00373034, 3, - 0x00383034, 3, 0x00393034, 3, 0x00303134, 3, 0x00313134, 3, 0x00323134, 3, 0x00333134, 3, 0x00343134, 3, 0x00353134, 3, - 0x00363134, 3, 0x00373134, 3, 0x00383134, 3, 0x00393134, 3, 0x00303234, 3, 0x00313234, 3, 0x00323234, 3, 0x00333234, 3, - 0x00343234, 3, 0x00353234, 3, 0x00363234, 3, 0x00373234, 3, 0x00383234, 3, 0x00393234, 3, 0x00303334, 3, 0x00313334, 3, - 0x00323334, 3, 0x00333334, 3, 0x00343334, 3, 0x00353334, 3, 0x00363334, 3, 0x00373334, 3, 0x00383334, 3, 0x00393334, 3, - 0x00303434, 3, 0x00313434, 3, 0x00323434, 3, 0x00333434, 3, 0x00343434, 3, 0x00353434, 3, 0x00363434, 3, 0x00373434, 3, - 0x00383434, 3, 0x00393434, 3, 0x00303534, 3, 0x00313534, 3, 0x00323534, 3, 0x00333534, 3, 0x00343534, 3, 0x00353534, 3, - 0x00363534, 3, 0x00373534, 3, 0x00383534, 3, 0x00393534, 3, 0x00303634, 3, 0x00313634, 3, 0x00323634, 3, 0x00333634, 3, - 0x00343634, 3, 0x00353634, 3, 0x00363634, 3, 0x00373634, 3, 0x00383634, 3, 0x00393634, 3, 0x00303734, 3, 0x00313734, 3, - 0x00323734, 3, 0x00333734, 3, 0x00343734, 3, 0x00353734, 3, 0x00363734, 3, 0x00373734, 3, 0x00383734, 3, 0x00393734, 3, - 0x00303834, 3, 0x00313834, 3, 0x00323834, 3, 0x00333834, 3, 0x00343834, 3, 0x00353834, 3, 0x00363834, 3, 0x00373834, 3, - 0x00383834, 3, 0x00393834, 3, 0x00303934, 3, 0x00313934, 3, 0x00323934, 3, 0x00333934, 3, 0x00343934, 3, 0x00353934, 3, - 0x00363934, 3, 0x00373934, 3, 0x00383934, 3, 0x00393934, 3, 0x00303035, 3, 0x00313035, 3, 0x00323035, 3, 0x00333035, 3, - 0x00343035, 3, 0x00353035, 3, 0x00363035, 3, 0x00373035, 3, 0x00383035, 3, 0x00393035, 3, 0x00303135, 3, 0x00313135, 3, - 0x00323135, 3, 0x00333135, 3, 0x00343135, 3, 0x00353135, 3, 0x00363135, 3, 0x00373135, 3, 0x00383135, 3, 0x00393135, 3, - 0x00303235, 3, 0x00313235, 3, 0x00323235, 3, 0x00333235, 3, 0x00343235, 3, 0x00353235, 3, 0x00363235, 3, 0x00373235, 3, - 0x00383235, 3, 0x00393235, 3, 0x00303335, 3, 0x00313335, 3, 0x00323335, 3, 0x00333335, 3, 0x00343335, 3, 0x00353335, 3, - 0x00363335, 3, 0x00373335, 3, 0x00383335, 3, 0x00393335, 3, 0x00303435, 3, 0x00313435, 3, 0x00323435, 3, 0x00333435, 3, - 0x00343435, 3, 0x00353435, 3, 0x00363435, 3, 0x00373435, 3, 0x00383435, 3, 0x00393435, 3, 0x00303535, 3, 0x00313535, 3, - 0x00323535, 3, 0x00333535, 3, 0x00343535, 3, 0x00353535, 3, 0x00363535, 3, 0x00373535, 3, 0x00383535, 3, 0x00393535, 3, - 0x00303635, 3, 0x00313635, 3, 0x00323635, 3, 0x00333635, 3, 0x00343635, 3, 0x00353635, 3, 0x00363635, 3, 0x00373635, 3, - 0x00383635, 3, 0x00393635, 3, 0x00303735, 3, 0x00313735, 3, 0x00323735, 3, 0x00333735, 3, 0x00343735, 3, 0x00353735, 3, - 0x00363735, 3, 0x00373735, 3, 0x00383735, 3, 0x00393735, 3, 0x00303835, 3, 0x00313835, 3, 0x00323835, 3, 0x00333835, 3, - 0x00343835, 3, 0x00353835, 3, 0x00363835, 3, 0x00373835, 3, 0x00383835, 3, 0x00393835, 3, 0x00303935, 3, 0x00313935, 3, - 0x00323935, 3, 0x00333935, 3, 0x00343935, 3, 0x00353935, 3, 0x00363935, 3, 0x00373935, 3, 0x00383935, 3, 0x00393935, 3, - 0x00303036, 3, 0x00313036, 3, 0x00323036, 3, 0x00333036, 3, 0x00343036, 3, 0x00353036, 3, 0x00363036, 3, 0x00373036, 3, - 0x00383036, 3, 0x00393036, 3, 0x00303136, 3, 0x00313136, 3, 0x00323136, 3, 0x00333136, 3, 0x00343136, 3, 0x00353136, 3, - 0x00363136, 3, 0x00373136, 3, 0x00383136, 3, 0x00393136, 3, 0x00303236, 3, 0x00313236, 3, 0x00323236, 3, 0x00333236, 3, - 0x00343236, 3, 0x00353236, 3, 0x00363236, 3, 0x00373236, 3, 0x00383236, 3, 0x00393236, 3, 0x00303336, 3, 0x00313336, 3, - 0x00323336, 3, 0x00333336, 3, 0x00343336, 3, 0x00353336, 3, 0x00363336, 3, 0x00373336, 3, 0x00383336, 3, 0x00393336, 3, - 0x00303436, 3, 0x00313436, 3, 0x00323436, 3, 0x00333436, 3, 0x00343436, 3, 0x00353436, 3, 0x00363436, 3, 0x00373436, 3, - 0x00383436, 3, 0x00393436, 3, 0x00303536, 3, 0x00313536, 3, 0x00323536, 3, 0x00333536, 3, 0x00343536, 3, 0x00353536, 3, - 0x00363536, 3, 0x00373536, 3, 0x00383536, 3, 0x00393536, 3, 0x00303636, 3, 0x00313636, 3, 0x00323636, 3, 0x00333636, 3, - 0x00343636, 3, 0x00353636, 3, 0x00363636, 3, 0x00373636, 3, 0x00383636, 3, 0x00393636, 3, 0x00303736, 3, 0x00313736, 3, - 0x00323736, 3, 0x00333736, 3, 0x00343736, 3, 0x00353736, 3, 0x00363736, 3, 0x00373736, 3, 0x00383736, 3, 0x00393736, 3, - 0x00303836, 3, 0x00313836, 3, 0x00323836, 3, 0x00333836, 3, 0x00343836, 3, 0x00353836, 3, 0x00363836, 3, 0x00373836, 3, - 0x00383836, 3, 0x00393836, 3, 0x00303936, 3, 0x00313936, 3, 0x00323936, 3, 0x00333936, 3, 0x00343936, 3, 0x00353936, 3, - 0x00363936, 3, 0x00373936, 3, 0x00383936, 3, 0x00393936, 3, 0x00303037, 3, 0x00313037, 3, 0x00323037, 3, 0x00333037, 3, - 0x00343037, 3, 0x00353037, 3, 0x00363037, 3, 0x00373037, 3, 0x00383037, 3, 0x00393037, 3, 0x00303137, 3, 0x00313137, 3, - 0x00323137, 3, 0x00333137, 3, 0x00343137, 3, 0x00353137, 3, 0x00363137, 3, 0x00373137, 3, 0x00383137, 3, 0x00393137, 3, - 0x00303237, 3, 0x00313237, 3, 0x00323237, 3, 0x00333237, 3, 0x00343237, 3, 0x00353237, 3, 0x00363237, 3, 0x00373237, 3, - 0x00383237, 3, 0x00393237, 3, 0x00303337, 3, 0x00313337, 3, 0x00323337, 3, 0x00333337, 3, 0x00343337, 3, 0x00353337, 3, - 0x00363337, 3, 0x00373337, 3, 0x00383337, 3, 0x00393337, 3, 0x00303437, 3, 0x00313437, 3, 0x00323437, 3, 0x00333437, 3, - 0x00343437, 3, 0x00353437, 3, 0x00363437, 3, 0x00373437, 3, 0x00383437, 3, 0x00393437, 3, 0x00303537, 3, 0x00313537, 3, - 0x00323537, 3, 0x00333537, 3, 0x00343537, 3, 0x00353537, 3, 0x00363537, 3, 0x00373537, 3, 0x00383537, 3, 0x00393537, 3, - 0x00303637, 3, 0x00313637, 3, 0x00323637, 3, 0x00333637, 3, 0x00343637, 3, 0x00353637, 3, 0x00363637, 3, 0x00373637, 3, - 0x00383637, 3, 0x00393637, 3, 0x00303737, 3, 0x00313737, 3, 0x00323737, 3, 0x00333737, 3, 0x00343737, 3, 0x00353737, 3, - 0x00363737, 3, 0x00373737, 3, 0x00383737, 3, 0x00393737, 3, 0x00303837, 3, 0x00313837, 3, 0x00323837, 3, 0x00333837, 3, - 0x00343837, 3, 0x00353837, 3, 0x00363837, 3, 0x00373837, 3, 0x00383837, 3, 0x00393837, 3, 0x00303937, 3, 0x00313937, 3, - 0x00323937, 3, 0x00333937, 3, 0x00343937, 3, 0x00353937, 3, 0x00363937, 3, 0x00373937, 3, 0x00383937, 3, 0x00393937, 3, - 0x00303038, 3, 0x00313038, 3, 0x00323038, 3, 0x00333038, 3, 0x00343038, 3, 0x00353038, 3, 0x00363038, 3, 0x00373038, 3, - 0x00383038, 3, 0x00393038, 3, 0x00303138, 3, 0x00313138, 3, 0x00323138, 3, 0x00333138, 3, 0x00343138, 3, 0x00353138, 3, - 0x00363138, 3, 0x00373138, 3, 0x00383138, 3, 0x00393138, 3, 0x00303238, 3, 0x00313238, 3, 0x00323238, 3, 0x00333238, 3, - 0x00343238, 3, 0x00353238, 3, 0x00363238, 3, 0x00373238, 3, 0x00383238, 3, 0x00393238, 3, 0x00303338, 3, 0x00313338, 3, - 0x00323338, 3, 0x00333338, 3, 0x00343338, 3, 0x00353338, 3, 0x00363338, 3, 0x00373338, 3, 0x00383338, 3, 0x00393338, 3, - 0x00303438, 3, 0x00313438, 3, 0x00323438, 3, 0x00333438, 3, 0x00343438, 3, 0x00353438, 3, 0x00363438, 3, 0x00373438, 3, - 0x00383438, 3, 0x00393438, 3, 0x00303538, 3, 0x00313538, 3, 0x00323538, 3, 0x00333538, 3, 0x00343538, 3, 0x00353538, 3, - 0x00363538, 3, 0x00373538, 3, 0x00383538, 3, 0x00393538, 3, 0x00303638, 3, 0x00313638, 3, 0x00323638, 3, 0x00333638, 3, - 0x00343638, 3, 0x00353638, 3, 0x00363638, 3, 0x00373638, 3, 0x00383638, 3, 0x00393638, 3, 0x00303738, 3, 0x00313738, 3, - 0x00323738, 3, 0x00333738, 3, 0x00343738, 3, 0x00353738, 3, 0x00363738, 3, 0x00373738, 3, 0x00383738, 3, 0x00393738, 3, - 0x00303838, 3, 0x00313838, 3, 0x00323838, 3, 0x00333838, 3, 0x00343838, 3, 0x00353838, 3, 0x00363838, 3, 0x00373838, 3, - 0x00383838, 3, 0x00393838, 3, 0x00303938, 3, 0x00313938, 3, 0x00323938, 3, 0x00333938, 3, 0x00343938, 3, 0x00353938, 3, - 0x00363938, 3, 0x00373938, 3, 0x00383938, 3, 0x00393938, 3, 0x00303039, 3, 0x00313039, 3, 0x00323039, 3, 0x00333039, 3, - 0x00343039, 3, 0x00353039, 3, 0x00363039, 3, 0x00373039, 3, 0x00383039, 3, 0x00393039, 3, 0x00303139, 3, 0x00313139, 3, - 0x00323139, 3, 0x00333139, 3, 0x00343139, 3, 0x00353139, 3, 0x00363139, 3, 0x00373139, 3, 0x00383139, 3, 0x00393139, 3, - 0x00303239, 3, 0x00313239, 3, 0x00323239, 3, 0x00333239, 3, 0x00343239, 3, 0x00353239, 3, 0x00363239, 3, 0x00373239, 3, - 0x00383239, 3, 0x00393239, 3, 0x00303339, 3, 0x00313339, 3, 0x00323339, 3, 0x00333339, 3, 0x00343339, 3, 0x00353339, 3, - 0x00363339, 3, 0x00373339, 3, 0x00383339, 3, 0x00393339, 3, 0x00303439, 3, 0x00313439, 3, 0x00323439, 3, 0x00333439, 3, - 0x00343439, 3, 0x00353439, 3, 0x00363439, 3, 0x00373439, 3, 0x00383439, 3, 0x00393439, 3, 0x00303539, 3, 0x00313539, 3, - 0x00323539, 3, 0x00333539, 3, 0x00343539, 3, 0x00353539, 3, 0x00363539, 3, 0x00373539, 3, 0x00383539, 3, 0x00393539, 3, - 0x00303639, 3, 0x00313639, 3, 0x00323639, 3, 0x00333639, 3, 0x00343639, 3, 0x00353639, 3, 0x00363639, 3, 0x00373639, 3, - 0x00383639, 3, 0x00393639, 3, 0x00303739, 3, 0x00313739, 3, 0x00323739, 3, 0x00333739, 3, 0x00343739, 3, 0x00353739, 3, - 0x00363739, 3, 0x00373739, 3, 0x00383739, 3, 0x00393739, 3, 0x00303839, 3, 0x00313839, 3, 0x00323839, 3, 0x00333839, 3, - 0x00343839, 3, 0x00353839, 3, 0x00363839, 3, 0x00373839, 3, 0x00383839, 3, 0x00393839, 3, 0x00303939, 3, 0x00313939, 3, - 0x00323939, 3, 0x00333939, 3, 0x00343939, 3, 0x00353939, 3, 0x00363939, 3, 0x00373939, 3, 0x00383939, 3, 0x00393939, 3, - 0x30303031, 4, 0x31303031, 4, 0x32303031, 4, 0x33303031, 4, 0x34303031, 4, 0x35303031, 4, 0x36303031, 4, 0x37303031, 4, - 0x38303031, 4, 0x39303031, 4, 0x30313031, 4, 0x31313031, 4, 0x32313031, 4, 0x33313031, 4, 0x34313031, 4, 0x35313031, 4, - 0x36313031, 4, 0x37313031, 4, 0x38313031, 4, 0x39313031, 4, 0x30323031, 4, 0x31323031, 4, 0x32323031, 4, 0x33323031, 4, -}; - -__device__ static void append_word (u32x w0[4], u32x w1[4], const u32x append[4], const u32 offset) -{ - switch (offset) - { - case 1: - w0[0] = w0[0] | append[0] << 8; - w0[1] = append[0] >> 24 | append[1] << 8; - w0[2] = append[1] >> 24 | append[2] << 8; - w0[3] = append[2] >> 24 | append[3] << 8; - break; - - case 2: - w0[0] = w0[0] | append[0] << 16; - w0[1] = append[0] >> 16 | append[1] << 16; - w0[2] = append[1] >> 16 | append[2] << 16; - w0[3] = append[2] >> 16 | append[3] << 16; - break; - - case 3: - w0[0] = w0[0] | append[0] << 24; - w0[1] = append[0] >> 8 | append[1] << 24; - w0[2] = append[1] >> 8 | append[2] << 24; - w0[3] = append[2] >> 8 | append[3] << 24; - break; - - case 4: - w0[1] = append[0]; - w0[2] = append[1]; - w0[3] = append[2]; - w1[0] = append[3]; - break; - } -} - -__device__ static void append_salt (u32x w0[4], u32x w1[4], u32x w2[4], const u32 append[5], const u32 offset) -{ - switch (offset) - { - case 2: - w0[0] = w0[0] | append[0] << 16; - w0[1] = append[0] >> 16 | append[1] << 16; - w0[2] = append[1] >> 16 | append[2] << 16; - w0[3] = append[2] >> 16 | append[3] << 16; - w1[0] = append[3] >> 16 | append[4] << 16; - w1[1] = append[4] >> 16; - break; - - case 3: - w0[0] = w0[0] | append[0] << 24; - w0[1] = append[0] >> 8 | append[1] << 24; - w0[2] = append[1] >> 8 | append[2] << 24; - w0[3] = append[2] >> 8 | append[3] << 24; - w1[0] = append[3] >> 8 | append[4] << 24; - w1[1] = append[4] >> 8; - break; - - case 4: - w0[1] = append[0]; - w0[2] = append[1]; - w0[3] = append[2]; - w1[0] = append[3]; - w1[1] = append[4]; - break; - - case 5: - w0[1] = w0[1] | append[0] << 8; - w0[2] = append[0] >> 24 | append[1] << 8; - w0[3] = append[1] >> 24 | append[2] << 8; - w1[0] = append[2] >> 24 | append[3] << 8; - w1[1] = append[3] >> 24 | append[4] << 8; - w1[2] = append[4] >> 24; - break; - - case 6: - w0[1] = w0[1] | append[0] << 16; - w0[2] = append[0] >> 16 | append[1] << 16; - w0[3] = append[1] >> 16 | append[2] << 16; - w1[0] = append[2] >> 16 | append[3] << 16; - w1[1] = append[3] >> 16 | append[4] << 16; - w1[2] = append[4] >> 16; - break; - - case 7: - w0[1] = w0[1] | append[0] << 24; - w0[2] = append[0] >> 8 | append[1] << 24; - w0[3] = append[1] >> 8 | append[2] << 24; - w1[0] = append[2] >> 8 | append[3] << 24; - w1[1] = append[3] >> 8 | append[4] << 24; - w1[2] = append[4] >> 8; - break; - - case 8: - w0[2] = append[0]; - w0[3] = append[1]; - w1[0] = append[2]; - w1[1] = append[3]; - w1[2] = append[4]; - break; - - case 9: - w0[2] = w0[2] | append[0] << 8; - w0[3] = append[0] >> 24 | append[1] << 8; - w1[0] = append[1] >> 24 | append[2] << 8; - w1[1] = append[2] >> 24 | append[3] << 8; - w1[2] = append[3] >> 24 | append[4] << 8; - w1[3] = append[4] >> 24; - break; - - case 10: - w0[2] = w0[2] | append[0] << 16; - w0[3] = append[0] >> 16 | append[1] << 16; - w1[0] = append[1] >> 16 | append[2] << 16; - w1[1] = append[2] >> 16 | append[3] << 16; - w1[2] = append[3] >> 16 | append[4] << 16; - w1[3] = append[4] >> 16; - break; - - case 11: - w0[2] = w0[2] | append[0] << 24; - w0[3] = append[0] >> 8 | append[1] << 24; - w1[0] = append[1] >> 8 | append[2] << 24; - w1[1] = append[2] >> 8 | append[3] << 24; - w1[2] = append[3] >> 8 | append[4] << 24; - w1[3] = append[4] >> 8; - break; - - case 12: - w0[3] = append[0]; - w1[0] = append[1]; - w1[1] = append[2]; - w1[2] = append[3]; - w1[3] = append[4]; - break; - - case 13: - w0[3] = w0[3] | append[0] << 8; - w1[0] = append[0] >> 24 | append[1] << 8; - w1[1] = append[1] >> 24 | append[2] << 8; - w1[2] = append[2] >> 24 | append[3] << 8; - w1[3] = append[3] >> 24 | append[4] << 8; - w2[0] = append[4] >> 24; - break; - - case 14: - w0[3] = w0[3] | append[0] << 16; - w1[0] = append[0] >> 16 | append[1] << 16; - w1[1] = append[1] >> 16 | append[2] << 16; - w1[2] = append[2] >> 16 | append[3] << 16; - w1[3] = append[3] >> 16 | append[4] << 16; - w2[0] = append[4] >> 16; - break; - - case 15: - w0[3] = w0[3] | append[0] << 24; - w1[0] = append[0] >> 8 | append[1] << 24; - w1[1] = append[1] >> 8 | append[2] << 24; - w1[2] = append[2] >> 8 | append[3] << 24; - w1[3] = append[3] >> 8 | append[4] << 24; - w2[0] = append[4] >> 8; - break; - - case 16: - w1[0] = append[0]; - w1[1] = append[1]; - w1[2] = append[2]; - w1[3] = append[3]; - w2[0] = append[4]; - break; - - case 17: - w1[0] = w1[0] | append[0] << 8; - w1[1] = append[0] >> 24 | append[1] << 8; - w1[2] = append[1] >> 24 | append[2] << 8; - w1[3] = append[2] >> 24 | append[3] << 8; - w2[0] = append[3] >> 24 | append[4] << 8; - w2[1] = append[4] >> 24; - break; - - case 18: - w1[0] = w1[0] | append[0] << 16; - w1[1] = append[0] >> 16 | append[1] << 16; - w1[2] = append[1] >> 16 | append[2] << 16; - w1[3] = append[2] >> 16 | append[3] << 16; - w2[0] = append[3] >> 16 | append[4] << 16; - w2[1] = append[4] >> 16; - break; - - case 19: - w1[0] = w1[0] | append[0] << 24; - w1[1] = append[0] >> 8 | append[1] << 24; - w1[2] = append[1] >> 8 | append[2] << 24; - w1[3] = append[2] >> 8 | append[3] << 24; - w2[0] = append[3] >> 8 | append[4] << 24; - w2[1] = append[4] >> 8; - break; - } -} - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05800_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, androidpin_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x word_buf[4]; - - word_buf[0] = pws[gid].i[ 0]; - word_buf[1] = pws[gid].i[ 1]; - word_buf[2] = pws[gid].i[ 2]; - word_buf[3] = pws[gid].i[ 3]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 salt_buf[5]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; - - /** - * init - */ - - const u32 pc_len = 1; - const u32 pc_dec = 0x30; - - u32x data0[4] = { 0, 0, 0, 0 }; - u32x data1[4] = { 0, 0, 0, 0 }; - u32x data2[4] = { 0, 0, 0, 0 }; - - data0[0] = pc_dec; - - append_word (data0, data1, word_buf, pc_len); - - append_salt (data0, data1, data2, salt_buf, pc_len + pw_len); - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = swap_workaround (data0[0]); - w0[1] = swap_workaround (data0[1]); - w0[2] = swap_workaround (data0[2]); - w0[3] = swap_workaround (data0[3]); - w1[0] = swap_workaround (data1[0]); - w1[1] = swap_workaround (data1[1]); - w1[2] = swap_workaround (data1[2]); - w1[3] = swap_workaround (data1[3]); - w2[0] = swap_workaround (data2[0]); - w2[1] = swap_workaround (data2[1]); - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (pc_len + pw_len + salt_len) * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, digest); - - tmps[gid].digest_buf[0] = digest[0]; - tmps[gid].digest_buf[1] = digest[1]; - tmps[gid].digest_buf[2] = digest[2]; - tmps[gid].digest_buf[3] = digest[3]; - tmps[gid].digest_buf[4] = digest[4]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05800_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, androidpin_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x word_buf[4]; - - word_buf[0] = pws[gid].i[ 0]; - word_buf[1] = pws[gid].i[ 1]; - word_buf[2] = pws[gid].i[ 2]; - word_buf[3] = pws[gid].i[ 3]; - - const u32 pw_len = pws[gid].pw_len; - - u32x digest[5]; - - digest[0] = tmps[gid].digest_buf[0]; - digest[1] = tmps[gid].digest_buf[1]; - digest[2] = tmps[gid].digest_buf[2]; - digest[3] = tmps[gid].digest_buf[3]; - digest[4] = tmps[gid].digest_buf[4]; - - /** - * cache precomputed conversion table in shared memory - */ - - const u32 lid = threadIdx.x; - - __shared__ entry_t s_pc[1024]; - - const u32 lid4 = lid * 4; - - s_pc[lid4 + 0] = pc[lid4 + 0]; - s_pc[lid4 + 1] = pc[lid4 + 1]; - s_pc[lid4 + 2] = pc[lid4 + 2]; - s_pc[lid4 + 3] = pc[lid4 + 3]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * salt - */ - - u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 salt_buf[5]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; - - /** - * loop - */ - - for (u32 i = 0, j = loop_pos + 1; i < loop_cnt; i++, j++) - { - const u32 pc_len = s_pc[j].len; - const u32 pc_dec = s_pc[j].dec; - - u32x data0[4] = { 0, 0, 0, 0 }; - u32x data1[4] = { 0, 0, 0, 0 }; - u32x data2[4] = { 0, 0, 0, 0 }; - - data0[0] = pc_dec; - - append_word (data0, data1, word_buf, pc_len); - - append_salt (data0, data1, data2, salt_buf, pc_len + pw_len); - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = swap_workaround (data0[0]); - w1[2] = swap_workaround (data0[1]); - w1[3] = swap_workaround (data0[2]); - w2[0] = swap_workaround (data0[3]); - w2[1] = swap_workaround (data1[0]); - w2[2] = swap_workaround (data1[1]); - w2[3] = swap_workaround (data1[2]); - w3[0] = swap_workaround (data1[3]); - w3[1] = swap_workaround (data2[0]); - w3[2] = 0; - w3[3] = (20 + pc_len + pw_len + salt_len) * 8; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, digest); - } - - tmps[gid].digest_buf[0] = digest[0]; - tmps[gid].digest_buf[1] = digest[1]; - tmps[gid].digest_buf[2] = digest[2]; - tmps[gid].digest_buf[3] = digest[3]; - tmps[gid].digest_buf[4] = digest[4]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m05800_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, androidpin_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32x r0 = tmps[gid].digest_buf[DGST_R0]; - const u32x r1 = tmps[gid].digest_buf[DGST_R1]; - const u32x r2 = tmps[gid].digest_buf[DGST_R2]; - const u32x r3 = tmps[gid].digest_buf[DGST_R3]; - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m06000_a0.cu b/nv/m06000_a0.cu deleted file mode 100644 index 1fe235e..0000000 --- a/nv/m06000_a0.cu +++ /dev/null @@ -1,482 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _RIPEMD160_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ static void ripemd160_transform (const u32x w[16], u32x dgst[5]) -{ - u32x a1 = dgst[0]; - u32x b1 = dgst[1]; - u32x c1 = dgst[2]; - u32x d1 = dgst[3]; - u32x e1 = dgst[4]; - - RIPEMD160_STEP (RIPEMD160_F , a1, b1, c1, d1, e1, w[ 0], RIPEMD160C00, RIPEMD160S00); - RIPEMD160_STEP (RIPEMD160_F , e1, a1, b1, c1, d1, w[ 1], RIPEMD160C00, RIPEMD160S01); - RIPEMD160_STEP (RIPEMD160_F , d1, e1, a1, b1, c1, w[ 2], RIPEMD160C00, RIPEMD160S02); - RIPEMD160_STEP (RIPEMD160_F , c1, d1, e1, a1, b1, w[ 3], RIPEMD160C00, RIPEMD160S03); - RIPEMD160_STEP (RIPEMD160_F , b1, c1, d1, e1, a1, w[ 4], RIPEMD160C00, RIPEMD160S04); - RIPEMD160_STEP (RIPEMD160_F , a1, b1, c1, d1, e1, w[ 5], RIPEMD160C00, RIPEMD160S05); - RIPEMD160_STEP (RIPEMD160_F , e1, a1, b1, c1, d1, w[ 6], RIPEMD160C00, RIPEMD160S06); - RIPEMD160_STEP (RIPEMD160_F , d1, e1, a1, b1, c1, w[ 7], RIPEMD160C00, RIPEMD160S07); - RIPEMD160_STEP (RIPEMD160_F , c1, d1, e1, a1, b1, w[ 8], RIPEMD160C00, RIPEMD160S08); - RIPEMD160_STEP (RIPEMD160_F , b1, c1, d1, e1, a1, w[ 9], RIPEMD160C00, RIPEMD160S09); - RIPEMD160_STEP (RIPEMD160_F , a1, b1, c1, d1, e1, w[10], RIPEMD160C00, RIPEMD160S0A); - RIPEMD160_STEP (RIPEMD160_F , e1, a1, b1, c1, d1, w[11], RIPEMD160C00, RIPEMD160S0B); - RIPEMD160_STEP (RIPEMD160_F , d1, e1, a1, b1, c1, w[12], RIPEMD160C00, RIPEMD160S0C); - RIPEMD160_STEP (RIPEMD160_F , c1, d1, e1, a1, b1, w[13], RIPEMD160C00, RIPEMD160S0D); - RIPEMD160_STEP (RIPEMD160_F , b1, c1, d1, e1, a1, w[14], RIPEMD160C00, RIPEMD160S0E); - RIPEMD160_STEP (RIPEMD160_F , a1, b1, c1, d1, e1, w[15], RIPEMD160C00, RIPEMD160S0F); - - RIPEMD160_STEP (RIPEMD160_Go, e1, a1, b1, c1, d1, w[ 7], RIPEMD160C10, RIPEMD160S10); - RIPEMD160_STEP (RIPEMD160_Go, d1, e1, a1, b1, c1, w[ 4], RIPEMD160C10, RIPEMD160S11); - RIPEMD160_STEP (RIPEMD160_Go, c1, d1, e1, a1, b1, w[13], RIPEMD160C10, RIPEMD160S12); - RIPEMD160_STEP (RIPEMD160_Go, b1, c1, d1, e1, a1, w[ 1], RIPEMD160C10, RIPEMD160S13); - RIPEMD160_STEP (RIPEMD160_Go, a1, b1, c1, d1, e1, w[10], RIPEMD160C10, RIPEMD160S14); - RIPEMD160_STEP (RIPEMD160_Go, e1, a1, b1, c1, d1, w[ 6], RIPEMD160C10, RIPEMD160S15); - RIPEMD160_STEP (RIPEMD160_Go, d1, e1, a1, b1, c1, w[15], RIPEMD160C10, RIPEMD160S16); - RIPEMD160_STEP (RIPEMD160_Go, c1, d1, e1, a1, b1, w[ 3], RIPEMD160C10, RIPEMD160S17); - RIPEMD160_STEP (RIPEMD160_Go, b1, c1, d1, e1, a1, w[12], RIPEMD160C10, RIPEMD160S18); - RIPEMD160_STEP (RIPEMD160_Go, a1, b1, c1, d1, e1, w[ 0], RIPEMD160C10, RIPEMD160S19); - RIPEMD160_STEP (RIPEMD160_Go, e1, a1, b1, c1, d1, w[ 9], RIPEMD160C10, RIPEMD160S1A); - RIPEMD160_STEP (RIPEMD160_Go, d1, e1, a1, b1, c1, w[ 5], RIPEMD160C10, RIPEMD160S1B); - RIPEMD160_STEP (RIPEMD160_Go, c1, d1, e1, a1, b1, w[ 2], RIPEMD160C10, RIPEMD160S1C); - RIPEMD160_STEP (RIPEMD160_Go, b1, c1, d1, e1, a1, w[14], RIPEMD160C10, RIPEMD160S1D); - RIPEMD160_STEP (RIPEMD160_Go, a1, b1, c1, d1, e1, w[11], RIPEMD160C10, RIPEMD160S1E); - RIPEMD160_STEP (RIPEMD160_Go, e1, a1, b1, c1, d1, w[ 8], RIPEMD160C10, RIPEMD160S1F); - - RIPEMD160_STEP (RIPEMD160_H , d1, e1, a1, b1, c1, w[ 3], RIPEMD160C20, RIPEMD160S20); - RIPEMD160_STEP (RIPEMD160_H , c1, d1, e1, a1, b1, w[10], RIPEMD160C20, RIPEMD160S21); - RIPEMD160_STEP (RIPEMD160_H , b1, c1, d1, e1, a1, w[14], RIPEMD160C20, RIPEMD160S22); - RIPEMD160_STEP (RIPEMD160_H , a1, b1, c1, d1, e1, w[ 4], RIPEMD160C20, RIPEMD160S23); - RIPEMD160_STEP (RIPEMD160_H , e1, a1, b1, c1, d1, w[ 9], RIPEMD160C20, RIPEMD160S24); - RIPEMD160_STEP (RIPEMD160_H , d1, e1, a1, b1, c1, w[15], RIPEMD160C20, RIPEMD160S25); - RIPEMD160_STEP (RIPEMD160_H , c1, d1, e1, a1, b1, w[ 8], RIPEMD160C20, RIPEMD160S26); - RIPEMD160_STEP (RIPEMD160_H , b1, c1, d1, e1, a1, w[ 1], RIPEMD160C20, RIPEMD160S27); - RIPEMD160_STEP (RIPEMD160_H , a1, b1, c1, d1, e1, w[ 2], RIPEMD160C20, RIPEMD160S28); - RIPEMD160_STEP (RIPEMD160_H , e1, a1, b1, c1, d1, w[ 7], RIPEMD160C20, RIPEMD160S29); - RIPEMD160_STEP (RIPEMD160_H , d1, e1, a1, b1, c1, w[ 0], RIPEMD160C20, RIPEMD160S2A); - RIPEMD160_STEP (RIPEMD160_H , c1, d1, e1, a1, b1, w[ 6], RIPEMD160C20, RIPEMD160S2B); - RIPEMD160_STEP (RIPEMD160_H , b1, c1, d1, e1, a1, w[13], RIPEMD160C20, RIPEMD160S2C); - RIPEMD160_STEP (RIPEMD160_H , a1, b1, c1, d1, e1, w[11], RIPEMD160C20, RIPEMD160S2D); - RIPEMD160_STEP (RIPEMD160_H , e1, a1, b1, c1, d1, w[ 5], RIPEMD160C20, RIPEMD160S2E); - RIPEMD160_STEP (RIPEMD160_H , d1, e1, a1, b1, c1, w[12], RIPEMD160C20, RIPEMD160S2F); - - RIPEMD160_STEP (RIPEMD160_Io, c1, d1, e1, a1, b1, w[ 1], RIPEMD160C30, RIPEMD160S30); - RIPEMD160_STEP (RIPEMD160_Io, b1, c1, d1, e1, a1, w[ 9], RIPEMD160C30, RIPEMD160S31); - RIPEMD160_STEP (RIPEMD160_Io, a1, b1, c1, d1, e1, w[11], RIPEMD160C30, RIPEMD160S32); - RIPEMD160_STEP (RIPEMD160_Io, e1, a1, b1, c1, d1, w[10], RIPEMD160C30, RIPEMD160S33); - RIPEMD160_STEP (RIPEMD160_Io, d1, e1, a1, b1, c1, w[ 0], RIPEMD160C30, RIPEMD160S34); - RIPEMD160_STEP (RIPEMD160_Io, c1, d1, e1, a1, b1, w[ 8], RIPEMD160C30, RIPEMD160S35); - RIPEMD160_STEP (RIPEMD160_Io, b1, c1, d1, e1, a1, w[12], RIPEMD160C30, RIPEMD160S36); - RIPEMD160_STEP (RIPEMD160_Io, a1, b1, c1, d1, e1, w[ 4], RIPEMD160C30, RIPEMD160S37); - RIPEMD160_STEP (RIPEMD160_Io, e1, a1, b1, c1, d1, w[13], RIPEMD160C30, RIPEMD160S38); - RIPEMD160_STEP (RIPEMD160_Io, d1, e1, a1, b1, c1, w[ 3], RIPEMD160C30, RIPEMD160S39); - RIPEMD160_STEP (RIPEMD160_Io, c1, d1, e1, a1, b1, w[ 7], RIPEMD160C30, RIPEMD160S3A); - RIPEMD160_STEP (RIPEMD160_Io, b1, c1, d1, e1, a1, w[15], RIPEMD160C30, RIPEMD160S3B); - RIPEMD160_STEP (RIPEMD160_Io, a1, b1, c1, d1, e1, w[14], RIPEMD160C30, RIPEMD160S3C); - RIPEMD160_STEP (RIPEMD160_Io, e1, a1, b1, c1, d1, w[ 5], RIPEMD160C30, RIPEMD160S3D); - RIPEMD160_STEP (RIPEMD160_Io, d1, e1, a1, b1, c1, w[ 6], RIPEMD160C30, RIPEMD160S3E); - RIPEMD160_STEP (RIPEMD160_Io, c1, d1, e1, a1, b1, w[ 2], RIPEMD160C30, RIPEMD160S3F); - - RIPEMD160_STEP (RIPEMD160_J , b1, c1, d1, e1, a1, w[ 4], RIPEMD160C40, RIPEMD160S40); - RIPEMD160_STEP (RIPEMD160_J , a1, b1, c1, d1, e1, w[ 0], RIPEMD160C40, RIPEMD160S41); - RIPEMD160_STEP (RIPEMD160_J , e1, a1, b1, c1, d1, w[ 5], RIPEMD160C40, RIPEMD160S42); - RIPEMD160_STEP (RIPEMD160_J , d1, e1, a1, b1, c1, w[ 9], RIPEMD160C40, RIPEMD160S43); - RIPEMD160_STEP (RIPEMD160_J , c1, d1, e1, a1, b1, w[ 7], RIPEMD160C40, RIPEMD160S44); - RIPEMD160_STEP (RIPEMD160_J , b1, c1, d1, e1, a1, w[12], RIPEMD160C40, RIPEMD160S45); - RIPEMD160_STEP (RIPEMD160_J , a1, b1, c1, d1, e1, w[ 2], RIPEMD160C40, RIPEMD160S46); - RIPEMD160_STEP (RIPEMD160_J , e1, a1, b1, c1, d1, w[10], RIPEMD160C40, RIPEMD160S47); - RIPEMD160_STEP (RIPEMD160_J , d1, e1, a1, b1, c1, w[14], RIPEMD160C40, RIPEMD160S48); - RIPEMD160_STEP (RIPEMD160_J , c1, d1, e1, a1, b1, w[ 1], RIPEMD160C40, RIPEMD160S49); - RIPEMD160_STEP (RIPEMD160_J , b1, c1, d1, e1, a1, w[ 3], RIPEMD160C40, RIPEMD160S4A); - RIPEMD160_STEP (RIPEMD160_J , a1, b1, c1, d1, e1, w[ 8], RIPEMD160C40, RIPEMD160S4B); - RIPEMD160_STEP (RIPEMD160_J , e1, a1, b1, c1, d1, w[11], RIPEMD160C40, RIPEMD160S4C); - RIPEMD160_STEP (RIPEMD160_J , d1, e1, a1, b1, c1, w[ 6], RIPEMD160C40, RIPEMD160S4D); - RIPEMD160_STEP (RIPEMD160_J , c1, d1, e1, a1, b1, w[15], RIPEMD160C40, RIPEMD160S4E); - RIPEMD160_STEP (RIPEMD160_J , b1, c1, d1, e1, a1, w[13], RIPEMD160C40, RIPEMD160S4F); - - u32x a2 = dgst[0]; - u32x b2 = dgst[1]; - u32x c2 = dgst[2]; - u32x d2 = dgst[3]; - u32x e2 = dgst[4]; - - //RIPEMD160_STEP_WORKAROUND_BUG (RIPEMD160_J , a2, b2, c2, d2, e2, w[ 5], RIPEMD160C50, RIPEMD160S50); - RIPEMD160_STEP (RIPEMD160_J , a2, b2, c2, d2, e2, w[ 5], RIPEMD160C50, RIPEMD160S50); - RIPEMD160_STEP (RIPEMD160_J , e2, a2, b2, c2, d2, w[14], RIPEMD160C50, RIPEMD160S51); - RIPEMD160_STEP (RIPEMD160_J , d2, e2, a2, b2, c2, w[ 7], RIPEMD160C50, RIPEMD160S52); - RIPEMD160_STEP (RIPEMD160_J , c2, d2, e2, a2, b2, w[ 0], RIPEMD160C50, RIPEMD160S53); - RIPEMD160_STEP (RIPEMD160_J , b2, c2, d2, e2, a2, w[ 9], RIPEMD160C50, RIPEMD160S54); - RIPEMD160_STEP (RIPEMD160_J , a2, b2, c2, d2, e2, w[ 2], RIPEMD160C50, RIPEMD160S55); - RIPEMD160_STEP (RIPEMD160_J , e2, a2, b2, c2, d2, w[11], RIPEMD160C50, RIPEMD160S56); - RIPEMD160_STEP (RIPEMD160_J , d2, e2, a2, b2, c2, w[ 4], RIPEMD160C50, RIPEMD160S57); - RIPEMD160_STEP (RIPEMD160_J , c2, d2, e2, a2, b2, w[13], RIPEMD160C50, RIPEMD160S58); - RIPEMD160_STEP (RIPEMD160_J , b2, c2, d2, e2, a2, w[ 6], RIPEMD160C50, RIPEMD160S59); - RIPEMD160_STEP (RIPEMD160_J , a2, b2, c2, d2, e2, w[15], RIPEMD160C50, RIPEMD160S5A); - RIPEMD160_STEP (RIPEMD160_J , e2, a2, b2, c2, d2, w[ 8], RIPEMD160C50, RIPEMD160S5B); - RIPEMD160_STEP (RIPEMD160_J , d2, e2, a2, b2, c2, w[ 1], RIPEMD160C50, RIPEMD160S5C); - RIPEMD160_STEP (RIPEMD160_J , c2, d2, e2, a2, b2, w[10], RIPEMD160C50, RIPEMD160S5D); - RIPEMD160_STEP (RIPEMD160_J , b2, c2, d2, e2, a2, w[ 3], RIPEMD160C50, RIPEMD160S5E); - RIPEMD160_STEP (RIPEMD160_J , a2, b2, c2, d2, e2, w[12], RIPEMD160C50, RIPEMD160S5F); - - RIPEMD160_STEP (RIPEMD160_Io, e2, a2, b2, c2, d2, w[ 6], RIPEMD160C60, RIPEMD160S60); - RIPEMD160_STEP (RIPEMD160_Io, d2, e2, a2, b2, c2, w[11], RIPEMD160C60, RIPEMD160S61); - RIPEMD160_STEP (RIPEMD160_Io, c2, d2, e2, a2, b2, w[ 3], RIPEMD160C60, RIPEMD160S62); - RIPEMD160_STEP (RIPEMD160_Io, b2, c2, d2, e2, a2, w[ 7], RIPEMD160C60, RIPEMD160S63); - RIPEMD160_STEP (RIPEMD160_Io, a2, b2, c2, d2, e2, w[ 0], RIPEMD160C60, RIPEMD160S64); - RIPEMD160_STEP (RIPEMD160_Io, e2, a2, b2, c2, d2, w[13], RIPEMD160C60, RIPEMD160S65); - RIPEMD160_STEP (RIPEMD160_Io, d2, e2, a2, b2, c2, w[ 5], RIPEMD160C60, RIPEMD160S66); - RIPEMD160_STEP (RIPEMD160_Io, c2, d2, e2, a2, b2, w[10], RIPEMD160C60, RIPEMD160S67); - RIPEMD160_STEP (RIPEMD160_Io, b2, c2, d2, e2, a2, w[14], RIPEMD160C60, RIPEMD160S68); - RIPEMD160_STEP (RIPEMD160_Io, a2, b2, c2, d2, e2, w[15], RIPEMD160C60, RIPEMD160S69); - RIPEMD160_STEP (RIPEMD160_Io, e2, a2, b2, c2, d2, w[ 8], RIPEMD160C60, RIPEMD160S6A); - RIPEMD160_STEP (RIPEMD160_Io, d2, e2, a2, b2, c2, w[12], RIPEMD160C60, RIPEMD160S6B); - RIPEMD160_STEP (RIPEMD160_Io, c2, d2, e2, a2, b2, w[ 4], RIPEMD160C60, RIPEMD160S6C); - RIPEMD160_STEP (RIPEMD160_Io, b2, c2, d2, e2, a2, w[ 9], RIPEMD160C60, RIPEMD160S6D); - RIPEMD160_STEP (RIPEMD160_Io, a2, b2, c2, d2, e2, w[ 1], RIPEMD160C60, RIPEMD160S6E); - RIPEMD160_STEP (RIPEMD160_Io, e2, a2, b2, c2, d2, w[ 2], RIPEMD160C60, RIPEMD160S6F); - - RIPEMD160_STEP (RIPEMD160_H , d2, e2, a2, b2, c2, w[15], RIPEMD160C70, RIPEMD160S70); - RIPEMD160_STEP (RIPEMD160_H , c2, d2, e2, a2, b2, w[ 5], RIPEMD160C70, RIPEMD160S71); - RIPEMD160_STEP (RIPEMD160_H , b2, c2, d2, e2, a2, w[ 1], RIPEMD160C70, RIPEMD160S72); - RIPEMD160_STEP (RIPEMD160_H , a2, b2, c2, d2, e2, w[ 3], RIPEMD160C70, RIPEMD160S73); - RIPEMD160_STEP (RIPEMD160_H , e2, a2, b2, c2, d2, w[ 7], RIPEMD160C70, RIPEMD160S74); - RIPEMD160_STEP (RIPEMD160_H , d2, e2, a2, b2, c2, w[14], RIPEMD160C70, RIPEMD160S75); - RIPEMD160_STEP (RIPEMD160_H , c2, d2, e2, a2, b2, w[ 6], RIPEMD160C70, RIPEMD160S76); - RIPEMD160_STEP (RIPEMD160_H , b2, c2, d2, e2, a2, w[ 9], RIPEMD160C70, RIPEMD160S77); - RIPEMD160_STEP (RIPEMD160_H , a2, b2, c2, d2, e2, w[11], RIPEMD160C70, RIPEMD160S78); - RIPEMD160_STEP (RIPEMD160_H , e2, a2, b2, c2, d2, w[ 8], RIPEMD160C70, RIPEMD160S79); - RIPEMD160_STEP (RIPEMD160_H , d2, e2, a2, b2, c2, w[12], RIPEMD160C70, RIPEMD160S7A); - RIPEMD160_STEP (RIPEMD160_H , c2, d2, e2, a2, b2, w[ 2], RIPEMD160C70, RIPEMD160S7B); - RIPEMD160_STEP (RIPEMD160_H , b2, c2, d2, e2, a2, w[10], RIPEMD160C70, RIPEMD160S7C); - RIPEMD160_STEP (RIPEMD160_H , a2, b2, c2, d2, e2, w[ 0], RIPEMD160C70, RIPEMD160S7D); - RIPEMD160_STEP (RIPEMD160_H , e2, a2, b2, c2, d2, w[ 4], RIPEMD160C70, RIPEMD160S7E); - RIPEMD160_STEP (RIPEMD160_H , d2, e2, a2, b2, c2, w[13], RIPEMD160C70, RIPEMD160S7F); - - RIPEMD160_STEP (RIPEMD160_Go, c2, d2, e2, a2, b2, w[ 8], RIPEMD160C80, RIPEMD160S80); - RIPEMD160_STEP (RIPEMD160_Go, b2, c2, d2, e2, a2, w[ 6], RIPEMD160C80, RIPEMD160S81); - RIPEMD160_STEP (RIPEMD160_Go, a2, b2, c2, d2, e2, w[ 4], RIPEMD160C80, RIPEMD160S82); - RIPEMD160_STEP (RIPEMD160_Go, e2, a2, b2, c2, d2, w[ 1], RIPEMD160C80, RIPEMD160S83); - RIPEMD160_STEP (RIPEMD160_Go, d2, e2, a2, b2, c2, w[ 3], RIPEMD160C80, RIPEMD160S84); - RIPEMD160_STEP (RIPEMD160_Go, c2, d2, e2, a2, b2, w[11], RIPEMD160C80, RIPEMD160S85); - RIPEMD160_STEP (RIPEMD160_Go, b2, c2, d2, e2, a2, w[15], RIPEMD160C80, RIPEMD160S86); - RIPEMD160_STEP (RIPEMD160_Go, a2, b2, c2, d2, e2, w[ 0], RIPEMD160C80, RIPEMD160S87); - RIPEMD160_STEP (RIPEMD160_Go, e2, a2, b2, c2, d2, w[ 5], RIPEMD160C80, RIPEMD160S88); - RIPEMD160_STEP (RIPEMD160_Go, d2, e2, a2, b2, c2, w[12], RIPEMD160C80, RIPEMD160S89); - RIPEMD160_STEP (RIPEMD160_Go, c2, d2, e2, a2, b2, w[ 2], RIPEMD160C80, RIPEMD160S8A); - RIPEMD160_STEP (RIPEMD160_Go, b2, c2, d2, e2, a2, w[13], RIPEMD160C80, RIPEMD160S8B); - RIPEMD160_STEP (RIPEMD160_Go, a2, b2, c2, d2, e2, w[ 9], RIPEMD160C80, RIPEMD160S8C); - RIPEMD160_STEP (RIPEMD160_Go, e2, a2, b2, c2, d2, w[ 7], RIPEMD160C80, RIPEMD160S8D); - RIPEMD160_STEP (RIPEMD160_Go, d2, e2, a2, b2, c2, w[10], RIPEMD160C80, RIPEMD160S8E); - RIPEMD160_STEP (RIPEMD160_Go, c2, d2, e2, a2, b2, w[14], RIPEMD160C80, RIPEMD160S8F); - - RIPEMD160_STEP (RIPEMD160_F , b2, c2, d2, e2, a2, w[12], RIPEMD160C90, RIPEMD160S90); - RIPEMD160_STEP (RIPEMD160_F , a2, b2, c2, d2, e2, w[15], RIPEMD160C90, RIPEMD160S91); - RIPEMD160_STEP (RIPEMD160_F , e2, a2, b2, c2, d2, w[10], RIPEMD160C90, RIPEMD160S92); - RIPEMD160_STEP (RIPEMD160_F , d2, e2, a2, b2, c2, w[ 4], RIPEMD160C90, RIPEMD160S93); - RIPEMD160_STEP (RIPEMD160_F , c2, d2, e2, a2, b2, w[ 1], RIPEMD160C90, RIPEMD160S94); - RIPEMD160_STEP (RIPEMD160_F , b2, c2, d2, e2, a2, w[ 5], RIPEMD160C90, RIPEMD160S95); - RIPEMD160_STEP (RIPEMD160_F , a2, b2, c2, d2, e2, w[ 8], RIPEMD160C90, RIPEMD160S96); - RIPEMD160_STEP (RIPEMD160_F , e2, a2, b2, c2, d2, w[ 7], RIPEMD160C90, RIPEMD160S97); - RIPEMD160_STEP (RIPEMD160_F , d2, e2, a2, b2, c2, w[ 6], RIPEMD160C90, RIPEMD160S98); - RIPEMD160_STEP (RIPEMD160_F , c2, d2, e2, a2, b2, w[ 2], RIPEMD160C90, RIPEMD160S99); - RIPEMD160_STEP (RIPEMD160_F , b2, c2, d2, e2, a2, w[13], RIPEMD160C90, RIPEMD160S9A); - RIPEMD160_STEP (RIPEMD160_F , a2, b2, c2, d2, e2, w[14], RIPEMD160C90, RIPEMD160S9B); - RIPEMD160_STEP (RIPEMD160_F , e2, a2, b2, c2, d2, w[ 0], RIPEMD160C90, RIPEMD160S9C); - RIPEMD160_STEP (RIPEMD160_F , d2, e2, a2, b2, c2, w[ 3], RIPEMD160C90, RIPEMD160S9D); - RIPEMD160_STEP (RIPEMD160_F , c2, d2, e2, a2, b2, w[ 9], RIPEMD160C90, RIPEMD160S9E); - RIPEMD160_STEP (RIPEMD160_F , b2, c2, d2, e2, a2, w[11], RIPEMD160C90, RIPEMD160S9F); - - const u32x a = dgst[1] + c1 + d2; - const u32x b = dgst[2] + d1 + e2; - const u32x c = dgst[3] + e1 + a2; - const u32x d = dgst[4] + a1 + b2; - const u32x e = dgst[0] + b1 + c2; - - dgst[0] = a; - dgst[1] = b; - dgst[2] = c; - dgst[3] = d; - dgst[4] = e; -} - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m06000_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - u32x wl[16]; - - wl[ 0] = w0[0]; - wl[ 1] = w0[1]; - wl[ 2] = w0[2]; - wl[ 3] = w0[3]; - wl[ 4] = w1[0]; - wl[ 5] = w1[1]; - wl[ 6] = w1[2]; - wl[ 7] = w1[3]; - wl[ 8] = 0; - wl[ 9] = 0; - wl[10] = 0; - wl[11] = 0; - wl[12] = 0; - wl[13] = 0; - wl[14] = out_len * 8; - wl[15] = 0; - - u32x dgst[5]; - - dgst[0] = RIPEMD160M_A; - dgst[1] = RIPEMD160M_B; - dgst[2] = RIPEMD160M_C; - dgst[3] = RIPEMD160M_D; - dgst[4] = RIPEMD160M_E; - - ripemd160_transform (wl, dgst); - - const u32x r0 = dgst[0]; - const u32x r1 = dgst[1]; - const u32x r2 = dgst[2]; - const u32x r3 = dgst[3]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06000_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06000_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06000_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - u32x wl[16]; - - wl[ 0] = w0[0]; - wl[ 1] = w0[1]; - wl[ 2] = w0[2]; - wl[ 3] = w0[3]; - wl[ 4] = w1[0]; - wl[ 5] = w1[1]; - wl[ 6] = w1[2]; - wl[ 7] = w1[3]; - wl[ 8] = 0; - wl[ 9] = 0; - wl[10] = 0; - wl[11] = 0; - wl[12] = 0; - wl[13] = 0; - wl[14] = out_len * 8; - wl[15] = 0; - - u32x dgst[5]; - - dgst[0] = RIPEMD160M_A; - dgst[1] = RIPEMD160M_B; - dgst[2] = RIPEMD160M_C; - dgst[3] = RIPEMD160M_D; - dgst[4] = RIPEMD160M_E; - - ripemd160_transform (wl, dgst); - - const u32x r0 = dgst[0]; - const u32x r1 = dgst[1]; - const u32x r2 = dgst[2]; - const u32x r3 = dgst[3]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06000_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06000_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m06000_a1.cu b/nv/m06000_a1.cu deleted file mode 100644 index 44e5c4b..0000000 --- a/nv/m06000_a1.cu +++ /dev/null @@ -1,588 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _RIPEMD160_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ static void ripemd160_transform (const u32x w[16], u32x dgst[5]) -{ - u32x a1 = dgst[0]; - u32x b1 = dgst[1]; - u32x c1 = dgst[2]; - u32x d1 = dgst[3]; - u32x e1 = dgst[4]; - - RIPEMD160_STEP (RIPEMD160_F , a1, b1, c1, d1, e1, w[ 0], RIPEMD160C00, RIPEMD160S00); - RIPEMD160_STEP (RIPEMD160_F , e1, a1, b1, c1, d1, w[ 1], RIPEMD160C00, RIPEMD160S01); - RIPEMD160_STEP (RIPEMD160_F , d1, e1, a1, b1, c1, w[ 2], RIPEMD160C00, RIPEMD160S02); - RIPEMD160_STEP (RIPEMD160_F , c1, d1, e1, a1, b1, w[ 3], RIPEMD160C00, RIPEMD160S03); - RIPEMD160_STEP (RIPEMD160_F , b1, c1, d1, e1, a1, w[ 4], RIPEMD160C00, RIPEMD160S04); - RIPEMD160_STEP (RIPEMD160_F , a1, b1, c1, d1, e1, w[ 5], RIPEMD160C00, RIPEMD160S05); - RIPEMD160_STEP (RIPEMD160_F , e1, a1, b1, c1, d1, w[ 6], RIPEMD160C00, RIPEMD160S06); - RIPEMD160_STEP (RIPEMD160_F , d1, e1, a1, b1, c1, w[ 7], RIPEMD160C00, RIPEMD160S07); - RIPEMD160_STEP (RIPEMD160_F , c1, d1, e1, a1, b1, w[ 8], RIPEMD160C00, RIPEMD160S08); - RIPEMD160_STEP (RIPEMD160_F , b1, c1, d1, e1, a1, w[ 9], RIPEMD160C00, RIPEMD160S09); - RIPEMD160_STEP (RIPEMD160_F , a1, b1, c1, d1, e1, w[10], RIPEMD160C00, RIPEMD160S0A); - RIPEMD160_STEP (RIPEMD160_F , e1, a1, b1, c1, d1, w[11], RIPEMD160C00, RIPEMD160S0B); - RIPEMD160_STEP (RIPEMD160_F , d1, e1, a1, b1, c1, w[12], RIPEMD160C00, RIPEMD160S0C); - RIPEMD160_STEP (RIPEMD160_F , c1, d1, e1, a1, b1, w[13], RIPEMD160C00, RIPEMD160S0D); - RIPEMD160_STEP (RIPEMD160_F , b1, c1, d1, e1, a1, w[14], RIPEMD160C00, RIPEMD160S0E); - RIPEMD160_STEP (RIPEMD160_F , a1, b1, c1, d1, e1, w[15], RIPEMD160C00, RIPEMD160S0F); - - RIPEMD160_STEP (RIPEMD160_Go, e1, a1, b1, c1, d1, w[ 7], RIPEMD160C10, RIPEMD160S10); - RIPEMD160_STEP (RIPEMD160_Go, d1, e1, a1, b1, c1, w[ 4], RIPEMD160C10, RIPEMD160S11); - RIPEMD160_STEP (RIPEMD160_Go, c1, d1, e1, a1, b1, w[13], RIPEMD160C10, RIPEMD160S12); - RIPEMD160_STEP (RIPEMD160_Go, b1, c1, d1, e1, a1, w[ 1], RIPEMD160C10, RIPEMD160S13); - RIPEMD160_STEP (RIPEMD160_Go, a1, b1, c1, d1, e1, w[10], RIPEMD160C10, RIPEMD160S14); - RIPEMD160_STEP (RIPEMD160_Go, e1, a1, b1, c1, d1, w[ 6], RIPEMD160C10, RIPEMD160S15); - RIPEMD160_STEP (RIPEMD160_Go, d1, e1, a1, b1, c1, w[15], RIPEMD160C10, RIPEMD160S16); - RIPEMD160_STEP (RIPEMD160_Go, c1, d1, e1, a1, b1, w[ 3], RIPEMD160C10, RIPEMD160S17); - RIPEMD160_STEP (RIPEMD160_Go, b1, c1, d1, e1, a1, w[12], RIPEMD160C10, RIPEMD160S18); - RIPEMD160_STEP (RIPEMD160_Go, a1, b1, c1, d1, e1, w[ 0], RIPEMD160C10, RIPEMD160S19); - RIPEMD160_STEP (RIPEMD160_Go, e1, a1, b1, c1, d1, w[ 9], RIPEMD160C10, RIPEMD160S1A); - RIPEMD160_STEP (RIPEMD160_Go, d1, e1, a1, b1, c1, w[ 5], RIPEMD160C10, RIPEMD160S1B); - RIPEMD160_STEP (RIPEMD160_Go, c1, d1, e1, a1, b1, w[ 2], RIPEMD160C10, RIPEMD160S1C); - RIPEMD160_STEP (RIPEMD160_Go, b1, c1, d1, e1, a1, w[14], RIPEMD160C10, RIPEMD160S1D); - RIPEMD160_STEP (RIPEMD160_Go, a1, b1, c1, d1, e1, w[11], RIPEMD160C10, RIPEMD160S1E); - RIPEMD160_STEP (RIPEMD160_Go, e1, a1, b1, c1, d1, w[ 8], RIPEMD160C10, RIPEMD160S1F); - - RIPEMD160_STEP (RIPEMD160_H , d1, e1, a1, b1, c1, w[ 3], RIPEMD160C20, RIPEMD160S20); - RIPEMD160_STEP (RIPEMD160_H , c1, d1, e1, a1, b1, w[10], RIPEMD160C20, RIPEMD160S21); - RIPEMD160_STEP (RIPEMD160_H , b1, c1, d1, e1, a1, w[14], RIPEMD160C20, RIPEMD160S22); - RIPEMD160_STEP (RIPEMD160_H , a1, b1, c1, d1, e1, w[ 4], RIPEMD160C20, RIPEMD160S23); - RIPEMD160_STEP (RIPEMD160_H , e1, a1, b1, c1, d1, w[ 9], RIPEMD160C20, RIPEMD160S24); - RIPEMD160_STEP (RIPEMD160_H , d1, e1, a1, b1, c1, w[15], RIPEMD160C20, RIPEMD160S25); - RIPEMD160_STEP (RIPEMD160_H , c1, d1, e1, a1, b1, w[ 8], RIPEMD160C20, RIPEMD160S26); - RIPEMD160_STEP (RIPEMD160_H , b1, c1, d1, e1, a1, w[ 1], RIPEMD160C20, RIPEMD160S27); - RIPEMD160_STEP (RIPEMD160_H , a1, b1, c1, d1, e1, w[ 2], RIPEMD160C20, RIPEMD160S28); - RIPEMD160_STEP (RIPEMD160_H , e1, a1, b1, c1, d1, w[ 7], RIPEMD160C20, RIPEMD160S29); - RIPEMD160_STEP (RIPEMD160_H , d1, e1, a1, b1, c1, w[ 0], RIPEMD160C20, RIPEMD160S2A); - RIPEMD160_STEP (RIPEMD160_H , c1, d1, e1, a1, b1, w[ 6], RIPEMD160C20, RIPEMD160S2B); - RIPEMD160_STEP (RIPEMD160_H , b1, c1, d1, e1, a1, w[13], RIPEMD160C20, RIPEMD160S2C); - RIPEMD160_STEP (RIPEMD160_H , a1, b1, c1, d1, e1, w[11], RIPEMD160C20, RIPEMD160S2D); - RIPEMD160_STEP (RIPEMD160_H , e1, a1, b1, c1, d1, w[ 5], RIPEMD160C20, RIPEMD160S2E); - RIPEMD160_STEP (RIPEMD160_H , d1, e1, a1, b1, c1, w[12], RIPEMD160C20, RIPEMD160S2F); - - RIPEMD160_STEP (RIPEMD160_Io, c1, d1, e1, a1, b1, w[ 1], RIPEMD160C30, RIPEMD160S30); - RIPEMD160_STEP (RIPEMD160_Io, b1, c1, d1, e1, a1, w[ 9], RIPEMD160C30, RIPEMD160S31); - RIPEMD160_STEP (RIPEMD160_Io, a1, b1, c1, d1, e1, w[11], RIPEMD160C30, RIPEMD160S32); - RIPEMD160_STEP (RIPEMD160_Io, e1, a1, b1, c1, d1, w[10], RIPEMD160C30, RIPEMD160S33); - RIPEMD160_STEP (RIPEMD160_Io, d1, e1, a1, b1, c1, w[ 0], RIPEMD160C30, RIPEMD160S34); - RIPEMD160_STEP (RIPEMD160_Io, c1, d1, e1, a1, b1, w[ 8], RIPEMD160C30, RIPEMD160S35); - RIPEMD160_STEP (RIPEMD160_Io, b1, c1, d1, e1, a1, w[12], RIPEMD160C30, RIPEMD160S36); - RIPEMD160_STEP (RIPEMD160_Io, a1, b1, c1, d1, e1, w[ 4], RIPEMD160C30, RIPEMD160S37); - RIPEMD160_STEP (RIPEMD160_Io, e1, a1, b1, c1, d1, w[13], RIPEMD160C30, RIPEMD160S38); - RIPEMD160_STEP (RIPEMD160_Io, d1, e1, a1, b1, c1, w[ 3], RIPEMD160C30, RIPEMD160S39); - RIPEMD160_STEP (RIPEMD160_Io, c1, d1, e1, a1, b1, w[ 7], RIPEMD160C30, RIPEMD160S3A); - RIPEMD160_STEP (RIPEMD160_Io, b1, c1, d1, e1, a1, w[15], RIPEMD160C30, RIPEMD160S3B); - RIPEMD160_STEP (RIPEMD160_Io, a1, b1, c1, d1, e1, w[14], RIPEMD160C30, RIPEMD160S3C); - RIPEMD160_STEP (RIPEMD160_Io, e1, a1, b1, c1, d1, w[ 5], RIPEMD160C30, RIPEMD160S3D); - RIPEMD160_STEP (RIPEMD160_Io, d1, e1, a1, b1, c1, w[ 6], RIPEMD160C30, RIPEMD160S3E); - RIPEMD160_STEP (RIPEMD160_Io, c1, d1, e1, a1, b1, w[ 2], RIPEMD160C30, RIPEMD160S3F); - - RIPEMD160_STEP (RIPEMD160_J , b1, c1, d1, e1, a1, w[ 4], RIPEMD160C40, RIPEMD160S40); - RIPEMD160_STEP (RIPEMD160_J , a1, b1, c1, d1, e1, w[ 0], RIPEMD160C40, RIPEMD160S41); - RIPEMD160_STEP (RIPEMD160_J , e1, a1, b1, c1, d1, w[ 5], RIPEMD160C40, RIPEMD160S42); - RIPEMD160_STEP (RIPEMD160_J , d1, e1, a1, b1, c1, w[ 9], RIPEMD160C40, RIPEMD160S43); - RIPEMD160_STEP (RIPEMD160_J , c1, d1, e1, a1, b1, w[ 7], RIPEMD160C40, RIPEMD160S44); - RIPEMD160_STEP (RIPEMD160_J , b1, c1, d1, e1, a1, w[12], RIPEMD160C40, RIPEMD160S45); - RIPEMD160_STEP (RIPEMD160_J , a1, b1, c1, d1, e1, w[ 2], RIPEMD160C40, RIPEMD160S46); - RIPEMD160_STEP (RIPEMD160_J , e1, a1, b1, c1, d1, w[10], RIPEMD160C40, RIPEMD160S47); - RIPEMD160_STEP (RIPEMD160_J , d1, e1, a1, b1, c1, w[14], RIPEMD160C40, RIPEMD160S48); - RIPEMD160_STEP (RIPEMD160_J , c1, d1, e1, a1, b1, w[ 1], RIPEMD160C40, RIPEMD160S49); - RIPEMD160_STEP (RIPEMD160_J , b1, c1, d1, e1, a1, w[ 3], RIPEMD160C40, RIPEMD160S4A); - RIPEMD160_STEP (RIPEMD160_J , a1, b1, c1, d1, e1, w[ 8], RIPEMD160C40, RIPEMD160S4B); - RIPEMD160_STEP (RIPEMD160_J , e1, a1, b1, c1, d1, w[11], RIPEMD160C40, RIPEMD160S4C); - RIPEMD160_STEP (RIPEMD160_J , d1, e1, a1, b1, c1, w[ 6], RIPEMD160C40, RIPEMD160S4D); - RIPEMD160_STEP (RIPEMD160_J , c1, d1, e1, a1, b1, w[15], RIPEMD160C40, RIPEMD160S4E); - RIPEMD160_STEP (RIPEMD160_J , b1, c1, d1, e1, a1, w[13], RIPEMD160C40, RIPEMD160S4F); - - u32x a2 = dgst[0]; - u32x b2 = dgst[1]; - u32x c2 = dgst[2]; - u32x d2 = dgst[3]; - u32x e2 = dgst[4]; - - //RIPEMD160_STEP_WORKAROUND_BUG (RIPEMD160_J , a2, b2, c2, d2, e2, w[ 5], RIPEMD160C50, RIPEMD160S50); - RIPEMD160_STEP (RIPEMD160_J , a2, b2, c2, d2, e2, w[ 5], RIPEMD160C50, RIPEMD160S50); - RIPEMD160_STEP (RIPEMD160_J , e2, a2, b2, c2, d2, w[14], RIPEMD160C50, RIPEMD160S51); - RIPEMD160_STEP (RIPEMD160_J , d2, e2, a2, b2, c2, w[ 7], RIPEMD160C50, RIPEMD160S52); - RIPEMD160_STEP (RIPEMD160_J , c2, d2, e2, a2, b2, w[ 0], RIPEMD160C50, RIPEMD160S53); - RIPEMD160_STEP (RIPEMD160_J , b2, c2, d2, e2, a2, w[ 9], RIPEMD160C50, RIPEMD160S54); - RIPEMD160_STEP (RIPEMD160_J , a2, b2, c2, d2, e2, w[ 2], RIPEMD160C50, RIPEMD160S55); - RIPEMD160_STEP (RIPEMD160_J , e2, a2, b2, c2, d2, w[11], RIPEMD160C50, RIPEMD160S56); - RIPEMD160_STEP (RIPEMD160_J , d2, e2, a2, b2, c2, w[ 4], RIPEMD160C50, RIPEMD160S57); - RIPEMD160_STEP (RIPEMD160_J , c2, d2, e2, a2, b2, w[13], RIPEMD160C50, RIPEMD160S58); - RIPEMD160_STEP (RIPEMD160_J , b2, c2, d2, e2, a2, w[ 6], RIPEMD160C50, RIPEMD160S59); - RIPEMD160_STEP (RIPEMD160_J , a2, b2, c2, d2, e2, w[15], RIPEMD160C50, RIPEMD160S5A); - RIPEMD160_STEP (RIPEMD160_J , e2, a2, b2, c2, d2, w[ 8], RIPEMD160C50, RIPEMD160S5B); - RIPEMD160_STEP (RIPEMD160_J , d2, e2, a2, b2, c2, w[ 1], RIPEMD160C50, RIPEMD160S5C); - RIPEMD160_STEP (RIPEMD160_J , c2, d2, e2, a2, b2, w[10], RIPEMD160C50, RIPEMD160S5D); - RIPEMD160_STEP (RIPEMD160_J , b2, c2, d2, e2, a2, w[ 3], RIPEMD160C50, RIPEMD160S5E); - RIPEMD160_STEP (RIPEMD160_J , a2, b2, c2, d2, e2, w[12], RIPEMD160C50, RIPEMD160S5F); - - RIPEMD160_STEP (RIPEMD160_Io, e2, a2, b2, c2, d2, w[ 6], RIPEMD160C60, RIPEMD160S60); - RIPEMD160_STEP (RIPEMD160_Io, d2, e2, a2, b2, c2, w[11], RIPEMD160C60, RIPEMD160S61); - RIPEMD160_STEP (RIPEMD160_Io, c2, d2, e2, a2, b2, w[ 3], RIPEMD160C60, RIPEMD160S62); - RIPEMD160_STEP (RIPEMD160_Io, b2, c2, d2, e2, a2, w[ 7], RIPEMD160C60, RIPEMD160S63); - RIPEMD160_STEP (RIPEMD160_Io, a2, b2, c2, d2, e2, w[ 0], RIPEMD160C60, RIPEMD160S64); - RIPEMD160_STEP (RIPEMD160_Io, e2, a2, b2, c2, d2, w[13], RIPEMD160C60, RIPEMD160S65); - RIPEMD160_STEP (RIPEMD160_Io, d2, e2, a2, b2, c2, w[ 5], RIPEMD160C60, RIPEMD160S66); - RIPEMD160_STEP (RIPEMD160_Io, c2, d2, e2, a2, b2, w[10], RIPEMD160C60, RIPEMD160S67); - RIPEMD160_STEP (RIPEMD160_Io, b2, c2, d2, e2, a2, w[14], RIPEMD160C60, RIPEMD160S68); - RIPEMD160_STEP (RIPEMD160_Io, a2, b2, c2, d2, e2, w[15], RIPEMD160C60, RIPEMD160S69); - RIPEMD160_STEP (RIPEMD160_Io, e2, a2, b2, c2, d2, w[ 8], RIPEMD160C60, RIPEMD160S6A); - RIPEMD160_STEP (RIPEMD160_Io, d2, e2, a2, b2, c2, w[12], RIPEMD160C60, RIPEMD160S6B); - RIPEMD160_STEP (RIPEMD160_Io, c2, d2, e2, a2, b2, w[ 4], RIPEMD160C60, RIPEMD160S6C); - RIPEMD160_STEP (RIPEMD160_Io, b2, c2, d2, e2, a2, w[ 9], RIPEMD160C60, RIPEMD160S6D); - RIPEMD160_STEP (RIPEMD160_Io, a2, b2, c2, d2, e2, w[ 1], RIPEMD160C60, RIPEMD160S6E); - RIPEMD160_STEP (RIPEMD160_Io, e2, a2, b2, c2, d2, w[ 2], RIPEMD160C60, RIPEMD160S6F); - - RIPEMD160_STEP (RIPEMD160_H , d2, e2, a2, b2, c2, w[15], RIPEMD160C70, RIPEMD160S70); - RIPEMD160_STEP (RIPEMD160_H , c2, d2, e2, a2, b2, w[ 5], RIPEMD160C70, RIPEMD160S71); - RIPEMD160_STEP (RIPEMD160_H , b2, c2, d2, e2, a2, w[ 1], RIPEMD160C70, RIPEMD160S72); - RIPEMD160_STEP (RIPEMD160_H , a2, b2, c2, d2, e2, w[ 3], RIPEMD160C70, RIPEMD160S73); - RIPEMD160_STEP (RIPEMD160_H , e2, a2, b2, c2, d2, w[ 7], RIPEMD160C70, RIPEMD160S74); - RIPEMD160_STEP (RIPEMD160_H , d2, e2, a2, b2, c2, w[14], RIPEMD160C70, RIPEMD160S75); - RIPEMD160_STEP (RIPEMD160_H , c2, d2, e2, a2, b2, w[ 6], RIPEMD160C70, RIPEMD160S76); - RIPEMD160_STEP (RIPEMD160_H , b2, c2, d2, e2, a2, w[ 9], RIPEMD160C70, RIPEMD160S77); - RIPEMD160_STEP (RIPEMD160_H , a2, b2, c2, d2, e2, w[11], RIPEMD160C70, RIPEMD160S78); - RIPEMD160_STEP (RIPEMD160_H , e2, a2, b2, c2, d2, w[ 8], RIPEMD160C70, RIPEMD160S79); - RIPEMD160_STEP (RIPEMD160_H , d2, e2, a2, b2, c2, w[12], RIPEMD160C70, RIPEMD160S7A); - RIPEMD160_STEP (RIPEMD160_H , c2, d2, e2, a2, b2, w[ 2], RIPEMD160C70, RIPEMD160S7B); - RIPEMD160_STEP (RIPEMD160_H , b2, c2, d2, e2, a2, w[10], RIPEMD160C70, RIPEMD160S7C); - RIPEMD160_STEP (RIPEMD160_H , a2, b2, c2, d2, e2, w[ 0], RIPEMD160C70, RIPEMD160S7D); - RIPEMD160_STEP (RIPEMD160_H , e2, a2, b2, c2, d2, w[ 4], RIPEMD160C70, RIPEMD160S7E); - RIPEMD160_STEP (RIPEMD160_H , d2, e2, a2, b2, c2, w[13], RIPEMD160C70, RIPEMD160S7F); - - RIPEMD160_STEP (RIPEMD160_Go, c2, d2, e2, a2, b2, w[ 8], RIPEMD160C80, RIPEMD160S80); - RIPEMD160_STEP (RIPEMD160_Go, b2, c2, d2, e2, a2, w[ 6], RIPEMD160C80, RIPEMD160S81); - RIPEMD160_STEP (RIPEMD160_Go, a2, b2, c2, d2, e2, w[ 4], RIPEMD160C80, RIPEMD160S82); - RIPEMD160_STEP (RIPEMD160_Go, e2, a2, b2, c2, d2, w[ 1], RIPEMD160C80, RIPEMD160S83); - RIPEMD160_STEP (RIPEMD160_Go, d2, e2, a2, b2, c2, w[ 3], RIPEMD160C80, RIPEMD160S84); - RIPEMD160_STEP (RIPEMD160_Go, c2, d2, e2, a2, b2, w[11], RIPEMD160C80, RIPEMD160S85); - RIPEMD160_STEP (RIPEMD160_Go, b2, c2, d2, e2, a2, w[15], RIPEMD160C80, RIPEMD160S86); - RIPEMD160_STEP (RIPEMD160_Go, a2, b2, c2, d2, e2, w[ 0], RIPEMD160C80, RIPEMD160S87); - RIPEMD160_STEP (RIPEMD160_Go, e2, a2, b2, c2, d2, w[ 5], RIPEMD160C80, RIPEMD160S88); - RIPEMD160_STEP (RIPEMD160_Go, d2, e2, a2, b2, c2, w[12], RIPEMD160C80, RIPEMD160S89); - RIPEMD160_STEP (RIPEMD160_Go, c2, d2, e2, a2, b2, w[ 2], RIPEMD160C80, RIPEMD160S8A); - RIPEMD160_STEP (RIPEMD160_Go, b2, c2, d2, e2, a2, w[13], RIPEMD160C80, RIPEMD160S8B); - RIPEMD160_STEP (RIPEMD160_Go, a2, b2, c2, d2, e2, w[ 9], RIPEMD160C80, RIPEMD160S8C); - RIPEMD160_STEP (RIPEMD160_Go, e2, a2, b2, c2, d2, w[ 7], RIPEMD160C80, RIPEMD160S8D); - RIPEMD160_STEP (RIPEMD160_Go, d2, e2, a2, b2, c2, w[10], RIPEMD160C80, RIPEMD160S8E); - RIPEMD160_STEP (RIPEMD160_Go, c2, d2, e2, a2, b2, w[14], RIPEMD160C80, RIPEMD160S8F); - - RIPEMD160_STEP (RIPEMD160_F , b2, c2, d2, e2, a2, w[12], RIPEMD160C90, RIPEMD160S90); - RIPEMD160_STEP (RIPEMD160_F , a2, b2, c2, d2, e2, w[15], RIPEMD160C90, RIPEMD160S91); - RIPEMD160_STEP (RIPEMD160_F , e2, a2, b2, c2, d2, w[10], RIPEMD160C90, RIPEMD160S92); - RIPEMD160_STEP (RIPEMD160_F , d2, e2, a2, b2, c2, w[ 4], RIPEMD160C90, RIPEMD160S93); - RIPEMD160_STEP (RIPEMD160_F , c2, d2, e2, a2, b2, w[ 1], RIPEMD160C90, RIPEMD160S94); - RIPEMD160_STEP (RIPEMD160_F , b2, c2, d2, e2, a2, w[ 5], RIPEMD160C90, RIPEMD160S95); - RIPEMD160_STEP (RIPEMD160_F , a2, b2, c2, d2, e2, w[ 8], RIPEMD160C90, RIPEMD160S96); - RIPEMD160_STEP (RIPEMD160_F , e2, a2, b2, c2, d2, w[ 7], RIPEMD160C90, RIPEMD160S97); - RIPEMD160_STEP (RIPEMD160_F , d2, e2, a2, b2, c2, w[ 6], RIPEMD160C90, RIPEMD160S98); - RIPEMD160_STEP (RIPEMD160_F , c2, d2, e2, a2, b2, w[ 2], RIPEMD160C90, RIPEMD160S99); - RIPEMD160_STEP (RIPEMD160_F , b2, c2, d2, e2, a2, w[13], RIPEMD160C90, RIPEMD160S9A); - RIPEMD160_STEP (RIPEMD160_F , a2, b2, c2, d2, e2, w[14], RIPEMD160C90, RIPEMD160S9B); - RIPEMD160_STEP (RIPEMD160_F , e2, a2, b2, c2, d2, w[ 0], RIPEMD160C90, RIPEMD160S9C); - RIPEMD160_STEP (RIPEMD160_F , d2, e2, a2, b2, c2, w[ 3], RIPEMD160C90, RIPEMD160S9D); - RIPEMD160_STEP (RIPEMD160_F , c2, d2, e2, a2, b2, w[ 9], RIPEMD160C90, RIPEMD160S9E); - RIPEMD160_STEP (RIPEMD160_F , b2, c2, d2, e2, a2, w[11], RIPEMD160C90, RIPEMD160S9F); - - const u32x a = dgst[1] + c1 + d2; - const u32x b = dgst[2] + d1 + e2; - const u32x c = dgst[3] + e1 + a2; - const u32x d = dgst[4] + a1 + b2; - const u32x e = dgst[0] + b1 + c2; - - dgst[0] = a; - dgst[1] = b; - dgst[2] = c; - dgst[3] = d; - dgst[4] = e; -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m06000_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = pw_len * 8; - w3[3] = 0; - - u32x wl[16]; - - wl[ 0] = w0[0]; - wl[ 1] = w0[1]; - wl[ 2] = w0[2]; - wl[ 3] = w0[3]; - wl[ 4] = w1[0]; - wl[ 5] = w1[1]; - wl[ 6] = w1[2]; - wl[ 7] = w1[3]; - wl[ 8] = 0; - wl[ 9] = 0; - wl[10] = 0; - wl[11] = 0; - wl[12] = 0; - wl[13] = 0; - wl[14] = pw_len * 8; - wl[15] = 0; - - u32x dgst[5]; - - dgst[0] = RIPEMD160M_A; - dgst[1] = RIPEMD160M_B; - dgst[2] = RIPEMD160M_C; - dgst[3] = RIPEMD160M_D; - dgst[4] = RIPEMD160M_E; - - ripemd160_transform (wl, dgst); - - const u32x r0 = dgst[0]; - const u32x r1 = dgst[1]; - const u32x r2 = dgst[2]; - const u32x r3 = dgst[3]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06000_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06000_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06000_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = pw_len * 8; - w3[3] = 0; - - u32x wl[16]; - - wl[ 0] = w0[0]; - wl[ 1] = w0[1]; - wl[ 2] = w0[2]; - wl[ 3] = w0[3]; - wl[ 4] = w1[0]; - wl[ 5] = w1[1]; - wl[ 6] = w1[2]; - wl[ 7] = w1[3]; - wl[ 8] = 0; - wl[ 9] = 0; - wl[10] = 0; - wl[11] = 0; - wl[12] = 0; - wl[13] = 0; - wl[14] = pw_len * 8; - wl[15] = 0; - - u32x dgst[5]; - - dgst[0] = RIPEMD160M_A; - dgst[1] = RIPEMD160M_B; - dgst[2] = RIPEMD160M_C; - dgst[3] = RIPEMD160M_D; - dgst[4] = RIPEMD160M_E; - - ripemd160_transform (wl, dgst); - - const u32x r0 = dgst[0]; - const u32x r1 = dgst[1]; - const u32x r2 = dgst[2]; - const u32x r3 = dgst[3]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06000_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06000_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m06000_a3.cu b/nv/m06000_a3.cu deleted file mode 100644 index 66fb938..0000000 --- a/nv/m06000_a3.cu +++ /dev/null @@ -1,660 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _RIPEMD160_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ static void ripemd160_transform (const u32x w[16], u32x dgst[5]) -{ - u32x a1 = dgst[0]; - u32x b1 = dgst[1]; - u32x c1 = dgst[2]; - u32x d1 = dgst[3]; - u32x e1 = dgst[4]; - - RIPEMD160_STEP (RIPEMD160_F , a1, b1, c1, d1, e1, w[ 0], RIPEMD160C00, RIPEMD160S00); - RIPEMD160_STEP (RIPEMD160_F , e1, a1, b1, c1, d1, w[ 1], RIPEMD160C00, RIPEMD160S01); - RIPEMD160_STEP (RIPEMD160_F , d1, e1, a1, b1, c1, w[ 2], RIPEMD160C00, RIPEMD160S02); - RIPEMD160_STEP (RIPEMD160_F , c1, d1, e1, a1, b1, w[ 3], RIPEMD160C00, RIPEMD160S03); - RIPEMD160_STEP (RIPEMD160_F , b1, c1, d1, e1, a1, w[ 4], RIPEMD160C00, RIPEMD160S04); - RIPEMD160_STEP (RIPEMD160_F , a1, b1, c1, d1, e1, w[ 5], RIPEMD160C00, RIPEMD160S05); - RIPEMD160_STEP (RIPEMD160_F , e1, a1, b1, c1, d1, w[ 6], RIPEMD160C00, RIPEMD160S06); - RIPEMD160_STEP (RIPEMD160_F , d1, e1, a1, b1, c1, w[ 7], RIPEMD160C00, RIPEMD160S07); - RIPEMD160_STEP (RIPEMD160_F , c1, d1, e1, a1, b1, w[ 8], RIPEMD160C00, RIPEMD160S08); - RIPEMD160_STEP (RIPEMD160_F , b1, c1, d1, e1, a1, w[ 9], RIPEMD160C00, RIPEMD160S09); - RIPEMD160_STEP (RIPEMD160_F , a1, b1, c1, d1, e1, w[10], RIPEMD160C00, RIPEMD160S0A); - RIPEMD160_STEP (RIPEMD160_F , e1, a1, b1, c1, d1, w[11], RIPEMD160C00, RIPEMD160S0B); - RIPEMD160_STEP (RIPEMD160_F , d1, e1, a1, b1, c1, w[12], RIPEMD160C00, RIPEMD160S0C); - RIPEMD160_STEP (RIPEMD160_F , c1, d1, e1, a1, b1, w[13], RIPEMD160C00, RIPEMD160S0D); - RIPEMD160_STEP (RIPEMD160_F , b1, c1, d1, e1, a1, w[14], RIPEMD160C00, RIPEMD160S0E); - RIPEMD160_STEP (RIPEMD160_F , a1, b1, c1, d1, e1, w[15], RIPEMD160C00, RIPEMD160S0F); - - RIPEMD160_STEP (RIPEMD160_Go, e1, a1, b1, c1, d1, w[ 7], RIPEMD160C10, RIPEMD160S10); - RIPEMD160_STEP (RIPEMD160_Go, d1, e1, a1, b1, c1, w[ 4], RIPEMD160C10, RIPEMD160S11); - RIPEMD160_STEP (RIPEMD160_Go, c1, d1, e1, a1, b1, w[13], RIPEMD160C10, RIPEMD160S12); - RIPEMD160_STEP (RIPEMD160_Go, b1, c1, d1, e1, a1, w[ 1], RIPEMD160C10, RIPEMD160S13); - RIPEMD160_STEP (RIPEMD160_Go, a1, b1, c1, d1, e1, w[10], RIPEMD160C10, RIPEMD160S14); - RIPEMD160_STEP (RIPEMD160_Go, e1, a1, b1, c1, d1, w[ 6], RIPEMD160C10, RIPEMD160S15); - RIPEMD160_STEP (RIPEMD160_Go, d1, e1, a1, b1, c1, w[15], RIPEMD160C10, RIPEMD160S16); - RIPEMD160_STEP (RIPEMD160_Go, c1, d1, e1, a1, b1, w[ 3], RIPEMD160C10, RIPEMD160S17); - RIPEMD160_STEP (RIPEMD160_Go, b1, c1, d1, e1, a1, w[12], RIPEMD160C10, RIPEMD160S18); - RIPEMD160_STEP (RIPEMD160_Go, a1, b1, c1, d1, e1, w[ 0], RIPEMD160C10, RIPEMD160S19); - RIPEMD160_STEP (RIPEMD160_Go, e1, a1, b1, c1, d1, w[ 9], RIPEMD160C10, RIPEMD160S1A); - RIPEMD160_STEP (RIPEMD160_Go, d1, e1, a1, b1, c1, w[ 5], RIPEMD160C10, RIPEMD160S1B); - RIPEMD160_STEP (RIPEMD160_Go, c1, d1, e1, a1, b1, w[ 2], RIPEMD160C10, RIPEMD160S1C); - RIPEMD160_STEP (RIPEMD160_Go, b1, c1, d1, e1, a1, w[14], RIPEMD160C10, RIPEMD160S1D); - RIPEMD160_STEP (RIPEMD160_Go, a1, b1, c1, d1, e1, w[11], RIPEMD160C10, RIPEMD160S1E); - RIPEMD160_STEP (RIPEMD160_Go, e1, a1, b1, c1, d1, w[ 8], RIPEMD160C10, RIPEMD160S1F); - - RIPEMD160_STEP (RIPEMD160_H , d1, e1, a1, b1, c1, w[ 3], RIPEMD160C20, RIPEMD160S20); - RIPEMD160_STEP (RIPEMD160_H , c1, d1, e1, a1, b1, w[10], RIPEMD160C20, RIPEMD160S21); - RIPEMD160_STEP (RIPEMD160_H , b1, c1, d1, e1, a1, w[14], RIPEMD160C20, RIPEMD160S22); - RIPEMD160_STEP (RIPEMD160_H , a1, b1, c1, d1, e1, w[ 4], RIPEMD160C20, RIPEMD160S23); - RIPEMD160_STEP (RIPEMD160_H , e1, a1, b1, c1, d1, w[ 9], RIPEMD160C20, RIPEMD160S24); - RIPEMD160_STEP (RIPEMD160_H , d1, e1, a1, b1, c1, w[15], RIPEMD160C20, RIPEMD160S25); - RIPEMD160_STEP (RIPEMD160_H , c1, d1, e1, a1, b1, w[ 8], RIPEMD160C20, RIPEMD160S26); - RIPEMD160_STEP (RIPEMD160_H , b1, c1, d1, e1, a1, w[ 1], RIPEMD160C20, RIPEMD160S27); - RIPEMD160_STEP (RIPEMD160_H , a1, b1, c1, d1, e1, w[ 2], RIPEMD160C20, RIPEMD160S28); - RIPEMD160_STEP (RIPEMD160_H , e1, a1, b1, c1, d1, w[ 7], RIPEMD160C20, RIPEMD160S29); - RIPEMD160_STEP (RIPEMD160_H , d1, e1, a1, b1, c1, w[ 0], RIPEMD160C20, RIPEMD160S2A); - RIPEMD160_STEP (RIPEMD160_H , c1, d1, e1, a1, b1, w[ 6], RIPEMD160C20, RIPEMD160S2B); - RIPEMD160_STEP (RIPEMD160_H , b1, c1, d1, e1, a1, w[13], RIPEMD160C20, RIPEMD160S2C); - RIPEMD160_STEP (RIPEMD160_H , a1, b1, c1, d1, e1, w[11], RIPEMD160C20, RIPEMD160S2D); - RIPEMD160_STEP (RIPEMD160_H , e1, a1, b1, c1, d1, w[ 5], RIPEMD160C20, RIPEMD160S2E); - RIPEMD160_STEP (RIPEMD160_H , d1, e1, a1, b1, c1, w[12], RIPEMD160C20, RIPEMD160S2F); - - RIPEMD160_STEP (RIPEMD160_Io, c1, d1, e1, a1, b1, w[ 1], RIPEMD160C30, RIPEMD160S30); - RIPEMD160_STEP (RIPEMD160_Io, b1, c1, d1, e1, a1, w[ 9], RIPEMD160C30, RIPEMD160S31); - RIPEMD160_STEP (RIPEMD160_Io, a1, b1, c1, d1, e1, w[11], RIPEMD160C30, RIPEMD160S32); - RIPEMD160_STEP (RIPEMD160_Io, e1, a1, b1, c1, d1, w[10], RIPEMD160C30, RIPEMD160S33); - RIPEMD160_STEP (RIPEMD160_Io, d1, e1, a1, b1, c1, w[ 0], RIPEMD160C30, RIPEMD160S34); - RIPEMD160_STEP (RIPEMD160_Io, c1, d1, e1, a1, b1, w[ 8], RIPEMD160C30, RIPEMD160S35); - RIPEMD160_STEP (RIPEMD160_Io, b1, c1, d1, e1, a1, w[12], RIPEMD160C30, RIPEMD160S36); - RIPEMD160_STEP (RIPEMD160_Io, a1, b1, c1, d1, e1, w[ 4], RIPEMD160C30, RIPEMD160S37); - RIPEMD160_STEP (RIPEMD160_Io, e1, a1, b1, c1, d1, w[13], RIPEMD160C30, RIPEMD160S38); - RIPEMD160_STEP (RIPEMD160_Io, d1, e1, a1, b1, c1, w[ 3], RIPEMD160C30, RIPEMD160S39); - RIPEMD160_STEP (RIPEMD160_Io, c1, d1, e1, a1, b1, w[ 7], RIPEMD160C30, RIPEMD160S3A); - RIPEMD160_STEP (RIPEMD160_Io, b1, c1, d1, e1, a1, w[15], RIPEMD160C30, RIPEMD160S3B); - RIPEMD160_STEP (RIPEMD160_Io, a1, b1, c1, d1, e1, w[14], RIPEMD160C30, RIPEMD160S3C); - RIPEMD160_STEP (RIPEMD160_Io, e1, a1, b1, c1, d1, w[ 5], RIPEMD160C30, RIPEMD160S3D); - RIPEMD160_STEP (RIPEMD160_Io, d1, e1, a1, b1, c1, w[ 6], RIPEMD160C30, RIPEMD160S3E); - RIPEMD160_STEP (RIPEMD160_Io, c1, d1, e1, a1, b1, w[ 2], RIPEMD160C30, RIPEMD160S3F); - - RIPEMD160_STEP (RIPEMD160_J , b1, c1, d1, e1, a1, w[ 4], RIPEMD160C40, RIPEMD160S40); - RIPEMD160_STEP (RIPEMD160_J , a1, b1, c1, d1, e1, w[ 0], RIPEMD160C40, RIPEMD160S41); - RIPEMD160_STEP (RIPEMD160_J , e1, a1, b1, c1, d1, w[ 5], RIPEMD160C40, RIPEMD160S42); - RIPEMD160_STEP (RIPEMD160_J , d1, e1, a1, b1, c1, w[ 9], RIPEMD160C40, RIPEMD160S43); - RIPEMD160_STEP (RIPEMD160_J , c1, d1, e1, a1, b1, w[ 7], RIPEMD160C40, RIPEMD160S44); - RIPEMD160_STEP (RIPEMD160_J , b1, c1, d1, e1, a1, w[12], RIPEMD160C40, RIPEMD160S45); - RIPEMD160_STEP (RIPEMD160_J , a1, b1, c1, d1, e1, w[ 2], RIPEMD160C40, RIPEMD160S46); - RIPEMD160_STEP (RIPEMD160_J , e1, a1, b1, c1, d1, w[10], RIPEMD160C40, RIPEMD160S47); - RIPEMD160_STEP (RIPEMD160_J , d1, e1, a1, b1, c1, w[14], RIPEMD160C40, RIPEMD160S48); - RIPEMD160_STEP (RIPEMD160_J , c1, d1, e1, a1, b1, w[ 1], RIPEMD160C40, RIPEMD160S49); - RIPEMD160_STEP (RIPEMD160_J , b1, c1, d1, e1, a1, w[ 3], RIPEMD160C40, RIPEMD160S4A); - RIPEMD160_STEP (RIPEMD160_J , a1, b1, c1, d1, e1, w[ 8], RIPEMD160C40, RIPEMD160S4B); - RIPEMD160_STEP (RIPEMD160_J , e1, a1, b1, c1, d1, w[11], RIPEMD160C40, RIPEMD160S4C); - RIPEMD160_STEP (RIPEMD160_J , d1, e1, a1, b1, c1, w[ 6], RIPEMD160C40, RIPEMD160S4D); - RIPEMD160_STEP (RIPEMD160_J , c1, d1, e1, a1, b1, w[15], RIPEMD160C40, RIPEMD160S4E); - RIPEMD160_STEP (RIPEMD160_J , b1, c1, d1, e1, a1, w[13], RIPEMD160C40, RIPEMD160S4F); - - u32x a2 = dgst[0]; - u32x b2 = dgst[1]; - u32x c2 = dgst[2]; - u32x d2 = dgst[3]; - u32x e2 = dgst[4]; - - //RIPEMD160_STEP_WORKAROUND_BUG (RIPEMD160_J , a2, b2, c2, d2, e2, w[ 5], RIPEMD160C50, RIPEMD160S50); - RIPEMD160_STEP (RIPEMD160_J , a2, b2, c2, d2, e2, w[ 5], RIPEMD160C50, RIPEMD160S50); - RIPEMD160_STEP (RIPEMD160_J , e2, a2, b2, c2, d2, w[14], RIPEMD160C50, RIPEMD160S51); - RIPEMD160_STEP (RIPEMD160_J , d2, e2, a2, b2, c2, w[ 7], RIPEMD160C50, RIPEMD160S52); - RIPEMD160_STEP (RIPEMD160_J , c2, d2, e2, a2, b2, w[ 0], RIPEMD160C50, RIPEMD160S53); - RIPEMD160_STEP (RIPEMD160_J , b2, c2, d2, e2, a2, w[ 9], RIPEMD160C50, RIPEMD160S54); - RIPEMD160_STEP (RIPEMD160_J , a2, b2, c2, d2, e2, w[ 2], RIPEMD160C50, RIPEMD160S55); - RIPEMD160_STEP (RIPEMD160_J , e2, a2, b2, c2, d2, w[11], RIPEMD160C50, RIPEMD160S56); - RIPEMD160_STEP (RIPEMD160_J , d2, e2, a2, b2, c2, w[ 4], RIPEMD160C50, RIPEMD160S57); - RIPEMD160_STEP (RIPEMD160_J , c2, d2, e2, a2, b2, w[13], RIPEMD160C50, RIPEMD160S58); - RIPEMD160_STEP (RIPEMD160_J , b2, c2, d2, e2, a2, w[ 6], RIPEMD160C50, RIPEMD160S59); - RIPEMD160_STEP (RIPEMD160_J , a2, b2, c2, d2, e2, w[15], RIPEMD160C50, RIPEMD160S5A); - RIPEMD160_STEP (RIPEMD160_J , e2, a2, b2, c2, d2, w[ 8], RIPEMD160C50, RIPEMD160S5B); - RIPEMD160_STEP (RIPEMD160_J , d2, e2, a2, b2, c2, w[ 1], RIPEMD160C50, RIPEMD160S5C); - RIPEMD160_STEP (RIPEMD160_J , c2, d2, e2, a2, b2, w[10], RIPEMD160C50, RIPEMD160S5D); - RIPEMD160_STEP (RIPEMD160_J , b2, c2, d2, e2, a2, w[ 3], RIPEMD160C50, RIPEMD160S5E); - RIPEMD160_STEP (RIPEMD160_J , a2, b2, c2, d2, e2, w[12], RIPEMD160C50, RIPEMD160S5F); - - RIPEMD160_STEP (RIPEMD160_Io, e2, a2, b2, c2, d2, w[ 6], RIPEMD160C60, RIPEMD160S60); - RIPEMD160_STEP (RIPEMD160_Io, d2, e2, a2, b2, c2, w[11], RIPEMD160C60, RIPEMD160S61); - RIPEMD160_STEP (RIPEMD160_Io, c2, d2, e2, a2, b2, w[ 3], RIPEMD160C60, RIPEMD160S62); - RIPEMD160_STEP (RIPEMD160_Io, b2, c2, d2, e2, a2, w[ 7], RIPEMD160C60, RIPEMD160S63); - RIPEMD160_STEP (RIPEMD160_Io, a2, b2, c2, d2, e2, w[ 0], RIPEMD160C60, RIPEMD160S64); - RIPEMD160_STEP (RIPEMD160_Io, e2, a2, b2, c2, d2, w[13], RIPEMD160C60, RIPEMD160S65); - RIPEMD160_STEP (RIPEMD160_Io, d2, e2, a2, b2, c2, w[ 5], RIPEMD160C60, RIPEMD160S66); - RIPEMD160_STEP (RIPEMD160_Io, c2, d2, e2, a2, b2, w[10], RIPEMD160C60, RIPEMD160S67); - RIPEMD160_STEP (RIPEMD160_Io, b2, c2, d2, e2, a2, w[14], RIPEMD160C60, RIPEMD160S68); - RIPEMD160_STEP (RIPEMD160_Io, a2, b2, c2, d2, e2, w[15], RIPEMD160C60, RIPEMD160S69); - RIPEMD160_STEP (RIPEMD160_Io, e2, a2, b2, c2, d2, w[ 8], RIPEMD160C60, RIPEMD160S6A); - RIPEMD160_STEP (RIPEMD160_Io, d2, e2, a2, b2, c2, w[12], RIPEMD160C60, RIPEMD160S6B); - RIPEMD160_STEP (RIPEMD160_Io, c2, d2, e2, a2, b2, w[ 4], RIPEMD160C60, RIPEMD160S6C); - RIPEMD160_STEP (RIPEMD160_Io, b2, c2, d2, e2, a2, w[ 9], RIPEMD160C60, RIPEMD160S6D); - RIPEMD160_STEP (RIPEMD160_Io, a2, b2, c2, d2, e2, w[ 1], RIPEMD160C60, RIPEMD160S6E); - RIPEMD160_STEP (RIPEMD160_Io, e2, a2, b2, c2, d2, w[ 2], RIPEMD160C60, RIPEMD160S6F); - - RIPEMD160_STEP (RIPEMD160_H , d2, e2, a2, b2, c2, w[15], RIPEMD160C70, RIPEMD160S70); - RIPEMD160_STEP (RIPEMD160_H , c2, d2, e2, a2, b2, w[ 5], RIPEMD160C70, RIPEMD160S71); - RIPEMD160_STEP (RIPEMD160_H , b2, c2, d2, e2, a2, w[ 1], RIPEMD160C70, RIPEMD160S72); - RIPEMD160_STEP (RIPEMD160_H , a2, b2, c2, d2, e2, w[ 3], RIPEMD160C70, RIPEMD160S73); - RIPEMD160_STEP (RIPEMD160_H , e2, a2, b2, c2, d2, w[ 7], RIPEMD160C70, RIPEMD160S74); - RIPEMD160_STEP (RIPEMD160_H , d2, e2, a2, b2, c2, w[14], RIPEMD160C70, RIPEMD160S75); - RIPEMD160_STEP (RIPEMD160_H , c2, d2, e2, a2, b2, w[ 6], RIPEMD160C70, RIPEMD160S76); - RIPEMD160_STEP (RIPEMD160_H , b2, c2, d2, e2, a2, w[ 9], RIPEMD160C70, RIPEMD160S77); - RIPEMD160_STEP (RIPEMD160_H , a2, b2, c2, d2, e2, w[11], RIPEMD160C70, RIPEMD160S78); - RIPEMD160_STEP (RIPEMD160_H , e2, a2, b2, c2, d2, w[ 8], RIPEMD160C70, RIPEMD160S79); - RIPEMD160_STEP (RIPEMD160_H , d2, e2, a2, b2, c2, w[12], RIPEMD160C70, RIPEMD160S7A); - RIPEMD160_STEP (RIPEMD160_H , c2, d2, e2, a2, b2, w[ 2], RIPEMD160C70, RIPEMD160S7B); - RIPEMD160_STEP (RIPEMD160_H , b2, c2, d2, e2, a2, w[10], RIPEMD160C70, RIPEMD160S7C); - RIPEMD160_STEP (RIPEMD160_H , a2, b2, c2, d2, e2, w[ 0], RIPEMD160C70, RIPEMD160S7D); - RIPEMD160_STEP (RIPEMD160_H , e2, a2, b2, c2, d2, w[ 4], RIPEMD160C70, RIPEMD160S7E); - RIPEMD160_STEP (RIPEMD160_H , d2, e2, a2, b2, c2, w[13], RIPEMD160C70, RIPEMD160S7F); - - RIPEMD160_STEP (RIPEMD160_Go, c2, d2, e2, a2, b2, w[ 8], RIPEMD160C80, RIPEMD160S80); - RIPEMD160_STEP (RIPEMD160_Go, b2, c2, d2, e2, a2, w[ 6], RIPEMD160C80, RIPEMD160S81); - RIPEMD160_STEP (RIPEMD160_Go, a2, b2, c2, d2, e2, w[ 4], RIPEMD160C80, RIPEMD160S82); - RIPEMD160_STEP (RIPEMD160_Go, e2, a2, b2, c2, d2, w[ 1], RIPEMD160C80, RIPEMD160S83); - RIPEMD160_STEP (RIPEMD160_Go, d2, e2, a2, b2, c2, w[ 3], RIPEMD160C80, RIPEMD160S84); - RIPEMD160_STEP (RIPEMD160_Go, c2, d2, e2, a2, b2, w[11], RIPEMD160C80, RIPEMD160S85); - RIPEMD160_STEP (RIPEMD160_Go, b2, c2, d2, e2, a2, w[15], RIPEMD160C80, RIPEMD160S86); - RIPEMD160_STEP (RIPEMD160_Go, a2, b2, c2, d2, e2, w[ 0], RIPEMD160C80, RIPEMD160S87); - RIPEMD160_STEP (RIPEMD160_Go, e2, a2, b2, c2, d2, w[ 5], RIPEMD160C80, RIPEMD160S88); - RIPEMD160_STEP (RIPEMD160_Go, d2, e2, a2, b2, c2, w[12], RIPEMD160C80, RIPEMD160S89); - RIPEMD160_STEP (RIPEMD160_Go, c2, d2, e2, a2, b2, w[ 2], RIPEMD160C80, RIPEMD160S8A); - RIPEMD160_STEP (RIPEMD160_Go, b2, c2, d2, e2, a2, w[13], RIPEMD160C80, RIPEMD160S8B); - RIPEMD160_STEP (RIPEMD160_Go, a2, b2, c2, d2, e2, w[ 9], RIPEMD160C80, RIPEMD160S8C); - RIPEMD160_STEP (RIPEMD160_Go, e2, a2, b2, c2, d2, w[ 7], RIPEMD160C80, RIPEMD160S8D); - RIPEMD160_STEP (RIPEMD160_Go, d2, e2, a2, b2, c2, w[10], RIPEMD160C80, RIPEMD160S8E); - RIPEMD160_STEP (RIPEMD160_Go, c2, d2, e2, a2, b2, w[14], RIPEMD160C80, RIPEMD160S8F); - - RIPEMD160_STEP (RIPEMD160_F , b2, c2, d2, e2, a2, w[12], RIPEMD160C90, RIPEMD160S90); - RIPEMD160_STEP (RIPEMD160_F , a2, b2, c2, d2, e2, w[15], RIPEMD160C90, RIPEMD160S91); - RIPEMD160_STEP (RIPEMD160_F , e2, a2, b2, c2, d2, w[10], RIPEMD160C90, RIPEMD160S92); - RIPEMD160_STEP (RIPEMD160_F , d2, e2, a2, b2, c2, w[ 4], RIPEMD160C90, RIPEMD160S93); - RIPEMD160_STEP (RIPEMD160_F , c2, d2, e2, a2, b2, w[ 1], RIPEMD160C90, RIPEMD160S94); - RIPEMD160_STEP (RIPEMD160_F , b2, c2, d2, e2, a2, w[ 5], RIPEMD160C90, RIPEMD160S95); - RIPEMD160_STEP (RIPEMD160_F , a2, b2, c2, d2, e2, w[ 8], RIPEMD160C90, RIPEMD160S96); - RIPEMD160_STEP (RIPEMD160_F , e2, a2, b2, c2, d2, w[ 7], RIPEMD160C90, RIPEMD160S97); - RIPEMD160_STEP (RIPEMD160_F , d2, e2, a2, b2, c2, w[ 6], RIPEMD160C90, RIPEMD160S98); - RIPEMD160_STEP (RIPEMD160_F , c2, d2, e2, a2, b2, w[ 2], RIPEMD160C90, RIPEMD160S99); - RIPEMD160_STEP (RIPEMD160_F , b2, c2, d2, e2, a2, w[13], RIPEMD160C90, RIPEMD160S9A); - RIPEMD160_STEP (RIPEMD160_F , a2, b2, c2, d2, e2, w[14], RIPEMD160C90, RIPEMD160S9B); - RIPEMD160_STEP (RIPEMD160_F , e2, a2, b2, c2, d2, w[ 0], RIPEMD160C90, RIPEMD160S9C); - RIPEMD160_STEP (RIPEMD160_F , d2, e2, a2, b2, c2, w[ 3], RIPEMD160C90, RIPEMD160S9D); - RIPEMD160_STEP (RIPEMD160_F , c2, d2, e2, a2, b2, w[ 9], RIPEMD160C90, RIPEMD160S9E); - RIPEMD160_STEP (RIPEMD160_F , b2, c2, d2, e2, a2, w[11], RIPEMD160C90, RIPEMD160S9F); - - const u32x a = dgst[1] + c1 + d2; - const u32x b = dgst[2] + d1 + e2; - const u32x c = dgst[3] + e1 + a2; - const u32x d = dgst[4] + a1 + b2; - const u32x e = dgst[0] + b1 + c2; - - dgst[0] = a; - dgst[1] = b; - dgst[2] = c; - dgst[3] = d; - dgst[4] = e; -} - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m06000m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 w14 = pw_len * 8; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x wl[16]; - - wl[ 0] = w0[0]; - wl[ 1] = w0[1]; - wl[ 2] = w0[2]; - wl[ 3] = w0[3]; - wl[ 4] = w1[0]; - wl[ 5] = w1[1]; - wl[ 6] = w1[2]; - wl[ 7] = w1[3]; - wl[ 8] = w2[0]; - wl[ 9] = w2[1]; - wl[10] = w2[2]; - wl[11] = w2[3]; - wl[12] = w3[0]; - wl[13] = w3[1]; - wl[14] = w14; - wl[15] = 0; - - u32x dgst[5]; - - dgst[0] = RIPEMD160M_A; - dgst[1] = RIPEMD160M_B; - dgst[2] = RIPEMD160M_C; - dgst[3] = RIPEMD160M_D; - dgst[4] = RIPEMD160M_E; - - ripemd160_transform (wl, dgst); - - const u32x r0 = dgst[0]; - const u32x r1 = dgst[1]; - const u32x r2 = dgst[2]; - const u32x r3 = dgst[3]; - - #include VECT_COMPARE_M - } -} - -__device__ static void m06000s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 w14 = pw_len * 8; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x wl[16]; - - wl[ 0] = w0[0]; - wl[ 1] = w0[1]; - wl[ 2] = w0[2]; - wl[ 3] = w0[3]; - wl[ 4] = w1[0]; - wl[ 5] = w1[1]; - wl[ 6] = w1[2]; - wl[ 7] = w1[3]; - wl[ 8] = w2[0]; - wl[ 9] = w2[1]; - wl[10] = w2[2]; - wl[11] = w2[3]; - wl[12] = w3[0]; - wl[13] = w3[1]; - wl[14] = w14; - wl[15] = 0; - - u32x dgst[5]; - - dgst[0] = RIPEMD160M_A; - dgst[1] = RIPEMD160M_B; - dgst[2] = RIPEMD160M_C; - dgst[3] = RIPEMD160M_D; - dgst[4] = RIPEMD160M_E; - - ripemd160_transform (wl, dgst); - - const u32x r0 = dgst[0]; - const u32x r1 = dgst[1]; - const u32x r2 = dgst[2]; - const u32x r3 = dgst[3]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06000_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m06000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06000_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m06000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06000_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m06000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06000_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m06000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06000_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m06000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06000_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m06000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m06100_a0.cu b/nv/m06100_a0.cu deleted file mode 100644 index 36a16b2..0000000 --- a/nv/m06100_a0.cu +++ /dev/null @@ -1,1619 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _WHIRLPOOL_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#define R 10 - -#ifdef VECT_SIZE1 -#define BOX(S,n,i) u32x ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(S,n,i) u32x ((S)[(n)][(i).x], (S)[(n)][(i).y]) -#endif - -__device__ static u32 Ch[8][256] = -{ - { - 0x18186018, 0x23238c23, 0xc6c63fc6, 0xe8e887e8, - 0x87872687, 0xb8b8dab8, 0x01010401, 0x4f4f214f, - 0x3636d836, 0xa6a6a2a6, 0xd2d26fd2, 0xf5f5f3f5, - 0x7979f979, 0x6f6fa16f, 0x91917e91, 0x52525552, - 0x60609d60, 0xbcbccabc, 0x9b9b569b, 0x8e8e028e, - 0xa3a3b6a3, 0x0c0c300c, 0x7b7bf17b, 0x3535d435, - 0x1d1d741d, 0xe0e0a7e0, 0xd7d77bd7, 0xc2c22fc2, - 0x2e2eb82e, 0x4b4b314b, 0xfefedffe, 0x57574157, - 0x15155415, 0x7777c177, 0x3737dc37, 0xe5e5b3e5, - 0x9f9f469f, 0xf0f0e7f0, 0x4a4a354a, 0xdada4fda, - 0x58587d58, 0xc9c903c9, 0x2929a429, 0x0a0a280a, - 0xb1b1feb1, 0xa0a0baa0, 0x6b6bb16b, 0x85852e85, - 0xbdbdcebd, 0x5d5d695d, 0x10104010, 0xf4f4f7f4, - 0xcbcb0bcb, 0x3e3ef83e, 0x05051405, 0x67678167, - 0xe4e4b7e4, 0x27279c27, 0x41411941, 0x8b8b168b, - 0xa7a7a6a7, 0x7d7de97d, 0x95956e95, 0xd8d847d8, - 0xfbfbcbfb, 0xeeee9fee, 0x7c7ced7c, 0x66668566, - 0xdddd53dd, 0x17175c17, 0x47470147, 0x9e9e429e, - 0xcaca0fca, 0x2d2db42d, 0xbfbfc6bf, 0x07071c07, - 0xadad8ead, 0x5a5a755a, 0x83833683, 0x3333cc33, - 0x63639163, 0x02020802, 0xaaaa92aa, 0x7171d971, - 0xc8c807c8, 0x19196419, 0x49493949, 0xd9d943d9, - 0xf2f2eff2, 0xe3e3abe3, 0x5b5b715b, 0x88881a88, - 0x9a9a529a, 0x26269826, 0x3232c832, 0xb0b0fab0, - 0xe9e983e9, 0x0f0f3c0f, 0xd5d573d5, 0x80803a80, - 0xbebec2be, 0xcdcd13cd, 0x3434d034, 0x48483d48, - 0xffffdbff, 0x7a7af57a, 0x90907a90, 0x5f5f615f, - 0x20208020, 0x6868bd68, 0x1a1a681a, 0xaeae82ae, - 0xb4b4eab4, 0x54544d54, 0x93937693, 0x22228822, - 0x64648d64, 0xf1f1e3f1, 0x7373d173, 0x12124812, - 0x40401d40, 0x08082008, 0xc3c32bc3, 0xecec97ec, - 0xdbdb4bdb, 0xa1a1bea1, 0x8d8d0e8d, 0x3d3df43d, - 0x97976697, 0x00000000, 0xcfcf1bcf, 0x2b2bac2b, - 0x7676c576, 0x82823282, 0xd6d67fd6, 0x1b1b6c1b, - 0xb5b5eeb5, 0xafaf86af, 0x6a6ab56a, 0x50505d50, - 0x45450945, 0xf3f3ebf3, 0x3030c030, 0xefef9bef, - 0x3f3ffc3f, 0x55554955, 0xa2a2b2a2, 0xeaea8fea, - 0x65658965, 0xbabad2ba, 0x2f2fbc2f, 0xc0c027c0, - 0xdede5fde, 0x1c1c701c, 0xfdfdd3fd, 0x4d4d294d, - 0x92927292, 0x7575c975, 0x06061806, 0x8a8a128a, - 0xb2b2f2b2, 0xe6e6bfe6, 0x0e0e380e, 0x1f1f7c1f, - 0x62629562, 0xd4d477d4, 0xa8a89aa8, 0x96966296, - 0xf9f9c3f9, 0xc5c533c5, 0x25259425, 0x59597959, - 0x84842a84, 0x7272d572, 0x3939e439, 0x4c4c2d4c, - 0x5e5e655e, 0x7878fd78, 0x3838e038, 0x8c8c0a8c, - 0xd1d163d1, 0xa5a5aea5, 0xe2e2afe2, 0x61619961, - 0xb3b3f6b3, 0x21218421, 0x9c9c4a9c, 0x1e1e781e, - 0x43431143, 0xc7c73bc7, 0xfcfcd7fc, 0x04041004, - 0x51515951, 0x99995e99, 0x6d6da96d, 0x0d0d340d, - 0xfafacffa, 0xdfdf5bdf, 0x7e7ee57e, 0x24249024, - 0x3b3bec3b, 0xabab96ab, 0xcece1fce, 0x11114411, - 0x8f8f068f, 0x4e4e254e, 0xb7b7e6b7, 0xebeb8beb, - 0x3c3cf03c, 0x81813e81, 0x94946a94, 0xf7f7fbf7, - 0xb9b9deb9, 0x13134c13, 0x2c2cb02c, 0xd3d36bd3, - 0xe7e7bbe7, 0x6e6ea56e, 0xc4c437c4, 0x03030c03, - 0x56564556, 0x44440d44, 0x7f7fe17f, 0xa9a99ea9, - 0x2a2aa82a, 0xbbbbd6bb, 0xc1c123c1, 0x53535153, - 0xdcdc57dc, 0x0b0b2c0b, 0x9d9d4e9d, 0x6c6cad6c, - 0x3131c431, 0x7474cd74, 0xf6f6fff6, 0x46460546, - 0xacac8aac, 0x89891e89, 0x14145014, 0xe1e1a3e1, - 0x16165816, 0x3a3ae83a, 0x6969b969, 0x09092409, - 0x7070dd70, 0xb6b6e2b6, 0xd0d067d0, 0xeded93ed, - 0xcccc17cc, 0x42421542, 0x98985a98, 0xa4a4aaa4, - 0x2828a028, 0x5c5c6d5c, 0xf8f8c7f8, 0x86862286, - }, - { - 0xd8181860, 0x2623238c, 0xb8c6c63f, 0xfbe8e887, - 0xcb878726, 0x11b8b8da, 0x09010104, 0x0d4f4f21, - 0x9b3636d8, 0xffa6a6a2, 0x0cd2d26f, 0x0ef5f5f3, - 0x967979f9, 0x306f6fa1, 0x6d91917e, 0xf8525255, - 0x4760609d, 0x35bcbcca, 0x379b9b56, 0x8a8e8e02, - 0xd2a3a3b6, 0x6c0c0c30, 0x847b7bf1, 0x803535d4, - 0xf51d1d74, 0xb3e0e0a7, 0x21d7d77b, 0x9cc2c22f, - 0x432e2eb8, 0x294b4b31, 0x5dfefedf, 0xd5575741, - 0xbd151554, 0xe87777c1, 0x923737dc, 0x9ee5e5b3, - 0x139f9f46, 0x23f0f0e7, 0x204a4a35, 0x44dada4f, - 0xa258587d, 0xcfc9c903, 0x7c2929a4, 0x5a0a0a28, - 0x50b1b1fe, 0xc9a0a0ba, 0x146b6bb1, 0xd985852e, - 0x3cbdbdce, 0x8f5d5d69, 0x90101040, 0x07f4f4f7, - 0xddcbcb0b, 0xd33e3ef8, 0x2d050514, 0x78676781, - 0x97e4e4b7, 0x0227279c, 0x73414119, 0xa78b8b16, - 0xf6a7a7a6, 0xb27d7de9, 0x4995956e, 0x56d8d847, - 0x70fbfbcb, 0xcdeeee9f, 0xbb7c7ced, 0x71666685, - 0x7bdddd53, 0xaf17175c, 0x45474701, 0x1a9e9e42, - 0xd4caca0f, 0x582d2db4, 0x2ebfbfc6, 0x3f07071c, - 0xacadad8e, 0xb05a5a75, 0xef838336, 0xb63333cc, - 0x5c636391, 0x12020208, 0x93aaaa92, 0xde7171d9, - 0xc6c8c807, 0xd1191964, 0x3b494939, 0x5fd9d943, - 0x31f2f2ef, 0xa8e3e3ab, 0xb95b5b71, 0xbc88881a, - 0x3e9a9a52, 0x0b262698, 0xbf3232c8, 0x59b0b0fa, - 0xf2e9e983, 0x770f0f3c, 0x33d5d573, 0xf480803a, - 0x27bebec2, 0xebcdcd13, 0x893434d0, 0x3248483d, - 0x54ffffdb, 0x8d7a7af5, 0x6490907a, 0x9d5f5f61, - 0x3d202080, 0x0f6868bd, 0xca1a1a68, 0xb7aeae82, - 0x7db4b4ea, 0xce54544d, 0x7f939376, 0x2f222288, - 0x6364648d, 0x2af1f1e3, 0xcc7373d1, 0x82121248, - 0x7a40401d, 0x48080820, 0x95c3c32b, 0xdfecec97, - 0x4ddbdb4b, 0xc0a1a1be, 0x918d8d0e, 0xc83d3df4, - 0x5b979766, 0x00000000, 0xf9cfcf1b, 0x6e2b2bac, - 0xe17676c5, 0xe6828232, 0x28d6d67f, 0xc31b1b6c, - 0x74b5b5ee, 0xbeafaf86, 0x1d6a6ab5, 0xea50505d, - 0x57454509, 0x38f3f3eb, 0xad3030c0, 0xc4efef9b, - 0xda3f3ffc, 0xc7555549, 0xdba2a2b2, 0xe9eaea8f, - 0x6a656589, 0x03babad2, 0x4a2f2fbc, 0x8ec0c027, - 0x60dede5f, 0xfc1c1c70, 0x46fdfdd3, 0x1f4d4d29, - 0x76929272, 0xfa7575c9, 0x36060618, 0xae8a8a12, - 0x4bb2b2f2, 0x85e6e6bf, 0x7e0e0e38, 0xe71f1f7c, - 0x55626295, 0x3ad4d477, 0x81a8a89a, 0x52969662, - 0x62f9f9c3, 0xa3c5c533, 0x10252594, 0xab595979, - 0xd084842a, 0xc57272d5, 0xec3939e4, 0x164c4c2d, - 0x945e5e65, 0x9f7878fd, 0xe53838e0, 0x988c8c0a, - 0x17d1d163, 0xe4a5a5ae, 0xa1e2e2af, 0x4e616199, - 0x42b3b3f6, 0x34212184, 0x089c9c4a, 0xee1e1e78, - 0x61434311, 0xb1c7c73b, 0x4ffcfcd7, 0x24040410, - 0xe3515159, 0x2599995e, 0x226d6da9, 0x650d0d34, - 0x79fafacf, 0x69dfdf5b, 0xa97e7ee5, 0x19242490, - 0xfe3b3bec, 0x9aabab96, 0xf0cece1f, 0x99111144, - 0x838f8f06, 0x044e4e25, 0x66b7b7e6, 0xe0ebeb8b, - 0xc13c3cf0, 0xfd81813e, 0x4094946a, 0x1cf7f7fb, - 0x18b9b9de, 0x8b13134c, 0x512c2cb0, 0x05d3d36b, - 0x8ce7e7bb, 0x396e6ea5, 0xaac4c437, 0x1b03030c, - 0xdc565645, 0x5e44440d, 0xa07f7fe1, 0x88a9a99e, - 0x672a2aa8, 0x0abbbbd6, 0x87c1c123, 0xf1535351, - 0x72dcdc57, 0x530b0b2c, 0x019d9d4e, 0x2b6c6cad, - 0xa43131c4, 0xf37474cd, 0x15f6f6ff, 0x4c464605, - 0xa5acac8a, 0xb589891e, 0xb4141450, 0xbae1e1a3, - 0xa6161658, 0xf73a3ae8, 0x066969b9, 0x41090924, - 0xd77070dd, 0x6fb6b6e2, 0x1ed0d067, 0xd6eded93, - 0xe2cccc17, 0x68424215, 0x2c98985a, 0xeda4a4aa, - 0x752828a0, 0x865c5c6d, 0x6bf8f8c7, 0xc2868622, - }, - { - 0x30d81818, 0x46262323, 0x91b8c6c6, 0xcdfbe8e8, - 0x13cb8787, 0x6d11b8b8, 0x02090101, 0x9e0d4f4f, - 0x6c9b3636, 0x51ffa6a6, 0xb90cd2d2, 0xf70ef5f5, - 0xf2967979, 0xde306f6f, 0x3f6d9191, 0xa4f85252, - 0xc0476060, 0x6535bcbc, 0x2b379b9b, 0x018a8e8e, - 0x5bd2a3a3, 0x186c0c0c, 0xf6847b7b, 0x6a803535, - 0x3af51d1d, 0xddb3e0e0, 0xb321d7d7, 0x999cc2c2, - 0x5c432e2e, 0x96294b4b, 0xe15dfefe, 0xaed55757, - 0x2abd1515, 0xeee87777, 0x6e923737, 0xd79ee5e5, - 0x23139f9f, 0xfd23f0f0, 0x94204a4a, 0xa944dada, - 0xb0a25858, 0x8fcfc9c9, 0x527c2929, 0x145a0a0a, - 0x7f50b1b1, 0x5dc9a0a0, 0xd6146b6b, 0x17d98585, - 0x673cbdbd, 0xba8f5d5d, 0x20901010, 0xf507f4f4, - 0x8bddcbcb, 0x7cd33e3e, 0x0a2d0505, 0xce786767, - 0xd597e4e4, 0x4e022727, 0x82734141, 0x0ba78b8b, - 0x53f6a7a7, 0xfab27d7d, 0x37499595, 0xad56d8d8, - 0xeb70fbfb, 0xc1cdeeee, 0xf8bb7c7c, 0xcc716666, - 0xa77bdddd, 0x2eaf1717, 0x8e454747, 0x211a9e9e, - 0x89d4caca, 0x5a582d2d, 0x632ebfbf, 0x0e3f0707, - 0x47acadad, 0xb4b05a5a, 0x1bef8383, 0x66b63333, - 0xc65c6363, 0x04120202, 0x4993aaaa, 0xe2de7171, - 0x8dc6c8c8, 0x32d11919, 0x923b4949, 0xaf5fd9d9, - 0xf931f2f2, 0xdba8e3e3, 0xb6b95b5b, 0x0dbc8888, - 0x293e9a9a, 0x4c0b2626, 0x64bf3232, 0x7d59b0b0, - 0xcff2e9e9, 0x1e770f0f, 0xb733d5d5, 0x1df48080, - 0x6127bebe, 0x87ebcdcd, 0x68893434, 0x90324848, - 0xe354ffff, 0xf48d7a7a, 0x3d649090, 0xbe9d5f5f, - 0x403d2020, 0xd00f6868, 0x34ca1a1a, 0x41b7aeae, - 0x757db4b4, 0xa8ce5454, 0x3b7f9393, 0x442f2222, - 0xc8636464, 0xff2af1f1, 0xe6cc7373, 0x24821212, - 0x807a4040, 0x10480808, 0x9b95c3c3, 0xc5dfecec, - 0xab4ddbdb, 0x5fc0a1a1, 0x07918d8d, 0x7ac83d3d, - 0x335b9797, 0x00000000, 0x83f9cfcf, 0x566e2b2b, - 0xece17676, 0x19e68282, 0xb128d6d6, 0x36c31b1b, - 0x7774b5b5, 0x43beafaf, 0xd41d6a6a, 0xa0ea5050, - 0x8a574545, 0xfb38f3f3, 0x60ad3030, 0xc3c4efef, - 0x7eda3f3f, 0xaac75555, 0x59dba2a2, 0xc9e9eaea, - 0xca6a6565, 0x6903baba, 0x5e4a2f2f, 0x9d8ec0c0, - 0xa160dede, 0x38fc1c1c, 0xe746fdfd, 0x9a1f4d4d, - 0x39769292, 0xeafa7575, 0x0c360606, 0x09ae8a8a, - 0x794bb2b2, 0xd185e6e6, 0x1c7e0e0e, 0x3ee71f1f, - 0xc4556262, 0xb53ad4d4, 0x4d81a8a8, 0x31529696, - 0xef62f9f9, 0x97a3c5c5, 0x4a102525, 0xb2ab5959, - 0x15d08484, 0xe4c57272, 0x72ec3939, 0x98164c4c, - 0xbc945e5e, 0xf09f7878, 0x70e53838, 0x05988c8c, - 0xbf17d1d1, 0x57e4a5a5, 0xd9a1e2e2, 0xc24e6161, - 0x7b42b3b3, 0x42342121, 0x25089c9c, 0x3cee1e1e, - 0x86614343, 0x93b1c7c7, 0xe54ffcfc, 0x08240404, - 0xa2e35151, 0x2f259999, 0xda226d6d, 0x1a650d0d, - 0xe979fafa, 0xa369dfdf, 0xfca97e7e, 0x48192424, - 0x76fe3b3b, 0x4b9aabab, 0x81f0cece, 0x22991111, - 0x03838f8f, 0x9c044e4e, 0x7366b7b7, 0xcbe0ebeb, - 0x78c13c3c, 0x1ffd8181, 0x35409494, 0xf31cf7f7, - 0x6f18b9b9, 0x268b1313, 0x58512c2c, 0xbb05d3d3, - 0xd38ce7e7, 0xdc396e6e, 0x95aac4c4, 0x061b0303, - 0xacdc5656, 0x885e4444, 0xfea07f7f, 0x4f88a9a9, - 0x54672a2a, 0x6b0abbbb, 0x9f87c1c1, 0xa6f15353, - 0xa572dcdc, 0x16530b0b, 0x27019d9d, 0xd82b6c6c, - 0x62a43131, 0xe8f37474, 0xf115f6f6, 0x8c4c4646, - 0x45a5acac, 0x0fb58989, 0x28b41414, 0xdfbae1e1, - 0x2ca61616, 0x74f73a3a, 0xd2066969, 0x12410909, - 0xe0d77070, 0x716fb6b6, 0xbd1ed0d0, 0xc7d6eded, - 0x85e2cccc, 0x84684242, 0x2d2c9898, 0x55eda4a4, - 0x50752828, 0xb8865c5c, 0xed6bf8f8, 0x11c28686, - }, - { - 0x7830d818, 0xaf462623, 0xf991b8c6, 0x6fcdfbe8, - 0xa113cb87, 0x626d11b8, 0x05020901, 0x6e9e0d4f, - 0xee6c9b36, 0x0451ffa6, 0xbdb90cd2, 0x06f70ef5, - 0x80f29679, 0xcede306f, 0xef3f6d91, 0x07a4f852, - 0xfdc04760, 0x766535bc, 0xcd2b379b, 0x8c018a8e, - 0x155bd2a3, 0x3c186c0c, 0x8af6847b, 0xe16a8035, - 0x693af51d, 0x47ddb3e0, 0xacb321d7, 0xed999cc2, - 0x965c432e, 0x7a96294b, 0x21e15dfe, 0x16aed557, - 0x412abd15, 0xb6eee877, 0xeb6e9237, 0x56d79ee5, - 0xd923139f, 0x17fd23f0, 0x7f94204a, 0x95a944da, - 0x25b0a258, 0xca8fcfc9, 0x8d527c29, 0x22145a0a, - 0x4f7f50b1, 0x1a5dc9a0, 0xdad6146b, 0xab17d985, - 0x73673cbd, 0x34ba8f5d, 0x50209010, 0x03f507f4, - 0xc08bddcb, 0xc67cd33e, 0x110a2d05, 0xe6ce7867, - 0x53d597e4, 0xbb4e0227, 0x58827341, 0x9d0ba78b, - 0x0153f6a7, 0x94fab27d, 0xfb374995, 0x9fad56d8, - 0x30eb70fb, 0x71c1cdee, 0x91f8bb7c, 0xe3cc7166, - 0x8ea77bdd, 0x4b2eaf17, 0x468e4547, 0xdc211a9e, - 0xc589d4ca, 0x995a582d, 0x79632ebf, 0x1b0e3f07, - 0x2347acad, 0x2fb4b05a, 0xb51bef83, 0xff66b633, - 0xf2c65c63, 0x0a041202, 0x384993aa, 0xa8e2de71, - 0xcf8dc6c8, 0x7d32d119, 0x70923b49, 0x9aaf5fd9, - 0x1df931f2, 0x48dba8e3, 0x2ab6b95b, 0x920dbc88, - 0xc8293e9a, 0xbe4c0b26, 0xfa64bf32, 0x4a7d59b0, - 0x6acff2e9, 0x331e770f, 0xa6b733d5, 0xba1df480, - 0x7c6127be, 0xde87ebcd, 0xe4688934, 0x75903248, - 0x24e354ff, 0x8ff48d7a, 0xea3d6490, 0x3ebe9d5f, - 0xa0403d20, 0xd5d00f68, 0x7234ca1a, 0x2c41b7ae, - 0x5e757db4, 0x19a8ce54, 0xe53b7f93, 0xaa442f22, - 0xe9c86364, 0x12ff2af1, 0xa2e6cc73, 0x5a248212, - 0x5d807a40, 0x28104808, 0xe89b95c3, 0x7bc5dfec, - 0x90ab4ddb, 0x1f5fc0a1, 0x8307918d, 0xc97ac83d, - 0xf1335b97, 0x00000000, 0xd483f9cf, 0x87566e2b, - 0xb3ece176, 0xb019e682, 0xa9b128d6, 0x7736c31b, - 0x5b7774b5, 0x2943beaf, 0xdfd41d6a, 0x0da0ea50, - 0x4c8a5745, 0x18fb38f3, 0xf060ad30, 0x74c3c4ef, - 0xc37eda3f, 0x1caac755, 0x1059dba2, 0x65c9e9ea, - 0xecca6a65, 0x686903ba, 0x935e4a2f, 0xe79d8ec0, - 0x81a160de, 0x6c38fc1c, 0x2ee746fd, 0x649a1f4d, - 0xe0397692, 0xbceafa75, 0x1e0c3606, 0x9809ae8a, - 0x40794bb2, 0x59d185e6, 0x361c7e0e, 0x633ee71f, - 0xf7c45562, 0xa3b53ad4, 0x324d81a8, 0xf4315296, - 0x3aef62f9, 0xf697a3c5, 0xb14a1025, 0x20b2ab59, - 0xae15d084, 0xa7e4c572, 0xdd72ec39, 0x6198164c, - 0x3bbc945e, 0x85f09f78, 0xd870e538, 0x8605988c, - 0xb2bf17d1, 0x0b57e4a5, 0x4dd9a1e2, 0xf8c24e61, - 0x457b42b3, 0xa5423421, 0xd625089c, 0x663cee1e, - 0x52866143, 0xfc93b1c7, 0x2be54ffc, 0x14082404, - 0x08a2e351, 0xc72f2599, 0xc4da226d, 0x391a650d, - 0x35e979fa, 0x84a369df, 0x9bfca97e, 0xb4481924, - 0xd776fe3b, 0x3d4b9aab, 0xd181f0ce, 0x55229911, - 0x8903838f, 0x6b9c044e, 0x517366b7, 0x60cbe0eb, - 0xcc78c13c, 0xbf1ffd81, 0xfe354094, 0x0cf31cf7, - 0x676f18b9, 0x5f268b13, 0x9c58512c, 0xb8bb05d3, - 0x5cd38ce7, 0xcbdc396e, 0xf395aac4, 0x0f061b03, - 0x13acdc56, 0x49885e44, 0x9efea07f, 0x374f88a9, - 0x8254672a, 0x6d6b0abb, 0xe29f87c1, 0x02a6f153, - 0x8ba572dc, 0x2716530b, 0xd327019d, 0xc1d82b6c, - 0xf562a431, 0xb9e8f374, 0x09f115f6, 0x438c4c46, - 0x2645a5ac, 0x970fb589, 0x4428b414, 0x42dfbae1, - 0x4e2ca616, 0xd274f73a, 0xd0d20669, 0x2d124109, - 0xade0d770, 0x54716fb6, 0xb7bd1ed0, 0x7ec7d6ed, - 0xdb85e2cc, 0x57846842, 0xc22d2c98, 0x0e55eda4, - 0x88507528, 0x31b8865c, 0x3fed6bf8, 0xa411c286, - }, - { - 0xc07830d8, 0x05af4626, 0x7ef991b8, 0x136fcdfb, - 0x4ca113cb, 0xa9626d11, 0x08050209, 0x426e9e0d, - 0xadee6c9b, 0x590451ff, 0xdebdb90c, 0xfb06f70e, - 0xef80f296, 0x5fcede30, 0xfcef3f6d, 0xaa07a4f8, - 0x27fdc047, 0x89766535, 0xaccd2b37, 0x048c018a, - 0x71155bd2, 0x603c186c, 0xff8af684, 0xb5e16a80, - 0xe8693af5, 0x5347ddb3, 0xf6acb321, 0x5eed999c, - 0x6d965c43, 0x627a9629, 0xa321e15d, 0x8216aed5, - 0xa8412abd, 0x9fb6eee8, 0xa5eb6e92, 0x7b56d79e, - 0x8cd92313, 0xd317fd23, 0x6a7f9420, 0x9e95a944, - 0xfa25b0a2, 0x06ca8fcf, 0x558d527c, 0x5022145a, - 0xe14f7f50, 0x691a5dc9, 0x7fdad614, 0x5cab17d9, - 0x8173673c, 0xd234ba8f, 0x80502090, 0xf303f507, - 0x16c08bdd, 0xedc67cd3, 0x28110a2d, 0x1fe6ce78, - 0x7353d597, 0x25bb4e02, 0x32588273, 0x2c9d0ba7, - 0x510153f6, 0xcf94fab2, 0xdcfb3749, 0x8e9fad56, - 0x8b30eb70, 0x2371c1cd, 0xc791f8bb, 0x17e3cc71, - 0xa68ea77b, 0xb84b2eaf, 0x02468e45, 0x84dc211a, - 0x1ec589d4, 0x75995a58, 0x9179632e, 0x381b0e3f, - 0x012347ac, 0xea2fb4b0, 0x6cb51bef, 0x85ff66b6, - 0x3ff2c65c, 0x100a0412, 0x39384993, 0xafa8e2de, - 0x0ecf8dc6, 0xc87d32d1, 0x7270923b, 0x869aaf5f, - 0xc31df931, 0x4b48dba8, 0xe22ab6b9, 0x34920dbc, - 0xa4c8293e, 0x2dbe4c0b, 0x8dfa64bf, 0xe94a7d59, - 0x1b6acff2, 0x78331e77, 0xe6a6b733, 0x74ba1df4, - 0x997c6127, 0x26de87eb, 0xbde46889, 0x7a759032, - 0xab24e354, 0xf78ff48d, 0xf4ea3d64, 0xc23ebe9d, - 0x1da0403d, 0x67d5d00f, 0xd07234ca, 0x192c41b7, - 0xc95e757d, 0x9a19a8ce, 0xece53b7f, 0x0daa442f, - 0x07e9c863, 0xdb12ff2a, 0xbfa2e6cc, 0x905a2482, - 0x3a5d807a, 0x40281048, 0x56e89b95, 0x337bc5df, - 0x9690ab4d, 0x611f5fc0, 0x1c830791, 0xf5c97ac8, - 0xccf1335b, 0x00000000, 0x36d483f9, 0x4587566e, - 0x97b3ece1, 0x64b019e6, 0xfea9b128, 0xd87736c3, - 0xc15b7774, 0x112943be, 0x77dfd41d, 0xba0da0ea, - 0x124c8a57, 0xcb18fb38, 0x9df060ad, 0x2b74c3c4, - 0xe5c37eda, 0x921caac7, 0x791059db, 0x0365c9e9, - 0x0fecca6a, 0xb9686903, 0x65935e4a, 0x4ee79d8e, - 0xbe81a160, 0xe06c38fc, 0xbb2ee746, 0x52649a1f, - 0xe4e03976, 0x8fbceafa, 0x301e0c36, 0x249809ae, - 0xf940794b, 0x6359d185, 0x70361c7e, 0xf8633ee7, - 0x37f7c455, 0xeea3b53a, 0x29324d81, 0xc4f43152, - 0x9b3aef62, 0x66f697a3, 0x35b14a10, 0xf220b2ab, - 0x54ae15d0, 0xb7a7e4c5, 0xd5dd72ec, 0x5a619816, - 0xca3bbc94, 0xe785f09f, 0xddd870e5, 0x14860598, - 0xc6b2bf17, 0x410b57e4, 0x434dd9a1, 0x2ff8c24e, - 0xf1457b42, 0x15a54234, 0x94d62508, 0xf0663cee, - 0x22528661, 0x76fc93b1, 0xb32be54f, 0x20140824, - 0xb208a2e3, 0xbcc72f25, 0x4fc4da22, 0x68391a65, - 0x8335e979, 0xb684a369, 0xd79bfca9, 0x3db44819, - 0xc5d776fe, 0x313d4b9a, 0x3ed181f0, 0x88552299, - 0x0c890383, 0x4a6b9c04, 0xd1517366, 0x0b60cbe0, - 0xfdcc78c1, 0x7cbf1ffd, 0xd4fe3540, 0xeb0cf31c, - 0xa1676f18, 0x985f268b, 0x7d9c5851, 0xd6b8bb05, - 0x6b5cd38c, 0x57cbdc39, 0x6ef395aa, 0x180f061b, - 0x8a13acdc, 0x1a49885e, 0xdf9efea0, 0x21374f88, - 0x4d825467, 0xb16d6b0a, 0x46e29f87, 0xa202a6f1, - 0xae8ba572, 0x58271653, 0x9cd32701, 0x47c1d82b, - 0x95f562a4, 0x87b9e8f3, 0xe309f115, 0x0a438c4c, - 0x092645a5, 0x3c970fb5, 0xa04428b4, 0x5b42dfba, - 0xb04e2ca6, 0xcdd274f7, 0x6fd0d206, 0x482d1241, - 0xa7ade0d7, 0xd954716f, 0xceb7bd1e, 0x3b7ec7d6, - 0x2edb85e2, 0x2a578468, 0xb4c22d2c, 0x490e55ed, - 0x5d885075, 0xda31b886, 0x933fed6b, 0x44a411c2, - }, - { - 0x18c07830, 0x2305af46, 0xc67ef991, 0xe8136fcd, - 0x874ca113, 0xb8a9626d, 0x01080502, 0x4f426e9e, - 0x36adee6c, 0xa6590451, 0xd2debdb9, 0xf5fb06f7, - 0x79ef80f2, 0x6f5fcede, 0x91fcef3f, 0x52aa07a4, - 0x6027fdc0, 0xbc897665, 0x9baccd2b, 0x8e048c01, - 0xa371155b, 0x0c603c18, 0x7bff8af6, 0x35b5e16a, - 0x1de8693a, 0xe05347dd, 0xd7f6acb3, 0xc25eed99, - 0x2e6d965c, 0x4b627a96, 0xfea321e1, 0x578216ae, - 0x15a8412a, 0x779fb6ee, 0x37a5eb6e, 0xe57b56d7, - 0x9f8cd923, 0xf0d317fd, 0x4a6a7f94, 0xda9e95a9, - 0x58fa25b0, 0xc906ca8f, 0x29558d52, 0x0a502214, - 0xb1e14f7f, 0xa0691a5d, 0x6b7fdad6, 0x855cab17, - 0xbd817367, 0x5dd234ba, 0x10805020, 0xf4f303f5, - 0xcb16c08b, 0x3eedc67c, 0x0528110a, 0x671fe6ce, - 0xe47353d5, 0x2725bb4e, 0x41325882, 0x8b2c9d0b, - 0xa7510153, 0x7dcf94fa, 0x95dcfb37, 0xd88e9fad, - 0xfb8b30eb, 0xee2371c1, 0x7cc791f8, 0x6617e3cc, - 0xdda68ea7, 0x17b84b2e, 0x4702468e, 0x9e84dc21, - 0xca1ec589, 0x2d75995a, 0xbf917963, 0x07381b0e, - 0xad012347, 0x5aea2fb4, 0x836cb51b, 0x3385ff66, - 0x633ff2c6, 0x02100a04, 0xaa393849, 0x71afa8e2, - 0xc80ecf8d, 0x19c87d32, 0x49727092, 0xd9869aaf, - 0xf2c31df9, 0xe34b48db, 0x5be22ab6, 0x8834920d, - 0x9aa4c829, 0x262dbe4c, 0x328dfa64, 0xb0e94a7d, - 0xe91b6acf, 0x0f78331e, 0xd5e6a6b7, 0x8074ba1d, - 0xbe997c61, 0xcd26de87, 0x34bde468, 0x487a7590, - 0xffab24e3, 0x7af78ff4, 0x90f4ea3d, 0x5fc23ebe, - 0x201da040, 0x6867d5d0, 0x1ad07234, 0xae192c41, - 0xb4c95e75, 0x549a19a8, 0x93ece53b, 0x220daa44, - 0x6407e9c8, 0xf1db12ff, 0x73bfa2e6, 0x12905a24, - 0x403a5d80, 0x08402810, 0xc356e89b, 0xec337bc5, - 0xdb9690ab, 0xa1611f5f, 0x8d1c8307, 0x3df5c97a, - 0x97ccf133, 0x00000000, 0xcf36d483, 0x2b458756, - 0x7697b3ec, 0x8264b019, 0xd6fea9b1, 0x1bd87736, - 0xb5c15b77, 0xaf112943, 0x6a77dfd4, 0x50ba0da0, - 0x45124c8a, 0xf3cb18fb, 0x309df060, 0xef2b74c3, - 0x3fe5c37e, 0x55921caa, 0xa2791059, 0xea0365c9, - 0x650fecca, 0xbab96869, 0x2f65935e, 0xc04ee79d, - 0xdebe81a1, 0x1ce06c38, 0xfdbb2ee7, 0x4d52649a, - 0x92e4e039, 0x758fbcea, 0x06301e0c, 0x8a249809, - 0xb2f94079, 0xe66359d1, 0x0e70361c, 0x1ff8633e, - 0x6237f7c4, 0xd4eea3b5, 0xa829324d, 0x96c4f431, - 0xf99b3aef, 0xc566f697, 0x2535b14a, 0x59f220b2, - 0x8454ae15, 0x72b7a7e4, 0x39d5dd72, 0x4c5a6198, - 0x5eca3bbc, 0x78e785f0, 0x38ddd870, 0x8c148605, - 0xd1c6b2bf, 0xa5410b57, 0xe2434dd9, 0x612ff8c2, - 0xb3f1457b, 0x2115a542, 0x9c94d625, 0x1ef0663c, - 0x43225286, 0xc776fc93, 0xfcb32be5, 0x04201408, - 0x51b208a2, 0x99bcc72f, 0x6d4fc4da, 0x0d68391a, - 0xfa8335e9, 0xdfb684a3, 0x7ed79bfc, 0x243db448, - 0x3bc5d776, 0xab313d4b, 0xce3ed181, 0x11885522, - 0x8f0c8903, 0x4e4a6b9c, 0xb7d15173, 0xeb0b60cb, - 0x3cfdcc78, 0x817cbf1f, 0x94d4fe35, 0xf7eb0cf3, - 0xb9a1676f, 0x13985f26, 0x2c7d9c58, 0xd3d6b8bb, - 0xe76b5cd3, 0x6e57cbdc, 0xc46ef395, 0x03180f06, - 0x568a13ac, 0x441a4988, 0x7fdf9efe, 0xa921374f, - 0x2a4d8254, 0xbbb16d6b, 0xc146e29f, 0x53a202a6, - 0xdcae8ba5, 0x0b582716, 0x9d9cd327, 0x6c47c1d8, - 0x3195f562, 0x7487b9e8, 0xf6e309f1, 0x460a438c, - 0xac092645, 0x893c970f, 0x14a04428, 0xe15b42df, - 0x16b04e2c, 0x3acdd274, 0x696fd0d2, 0x09482d12, - 0x70a7ade0, 0xb6d95471, 0xd0ceb7bd, 0xed3b7ec7, - 0xcc2edb85, 0x422a5784, 0x98b4c22d, 0xa4490e55, - 0x285d8850, 0x5cda31b8, 0xf8933fed, 0x8644a411, - }, - { - 0x6018c078, 0x8c2305af, 0x3fc67ef9, 0x87e8136f, - 0x26874ca1, 0xdab8a962, 0x04010805, 0x214f426e, - 0xd836adee, 0xa2a65904, 0x6fd2debd, 0xf3f5fb06, - 0xf979ef80, 0xa16f5fce, 0x7e91fcef, 0x5552aa07, - 0x9d6027fd, 0xcabc8976, 0x569baccd, 0x028e048c, - 0xb6a37115, 0x300c603c, 0xf17bff8a, 0xd435b5e1, - 0x741de869, 0xa7e05347, 0x7bd7f6ac, 0x2fc25eed, - 0xb82e6d96, 0x314b627a, 0xdffea321, 0x41578216, - 0x5415a841, 0xc1779fb6, 0xdc37a5eb, 0xb3e57b56, - 0x469f8cd9, 0xe7f0d317, 0x354a6a7f, 0x4fda9e95, - 0x7d58fa25, 0x03c906ca, 0xa429558d, 0x280a5022, - 0xfeb1e14f, 0xbaa0691a, 0xb16b7fda, 0x2e855cab, - 0xcebd8173, 0x695dd234, 0x40108050, 0xf7f4f303, - 0x0bcb16c0, 0xf83eedc6, 0x14052811, 0x81671fe6, - 0xb7e47353, 0x9c2725bb, 0x19413258, 0x168b2c9d, - 0xa6a75101, 0xe97dcf94, 0x6e95dcfb, 0x47d88e9f, - 0xcbfb8b30, 0x9fee2371, 0xed7cc791, 0x856617e3, - 0x53dda68e, 0x5c17b84b, 0x01470246, 0x429e84dc, - 0x0fca1ec5, 0xb42d7599, 0xc6bf9179, 0x1c07381b, - 0x8ead0123, 0x755aea2f, 0x36836cb5, 0xcc3385ff, - 0x91633ff2, 0x0802100a, 0x92aa3938, 0xd971afa8, - 0x07c80ecf, 0x6419c87d, 0x39497270, 0x43d9869a, - 0xeff2c31d, 0xabe34b48, 0x715be22a, 0x1a883492, - 0x529aa4c8, 0x98262dbe, 0xc8328dfa, 0xfab0e94a, - 0x83e91b6a, 0x3c0f7833, 0x73d5e6a6, 0x3a8074ba, - 0xc2be997c, 0x13cd26de, 0xd034bde4, 0x3d487a75, - 0xdbffab24, 0xf57af78f, 0x7a90f4ea, 0x615fc23e, - 0x80201da0, 0xbd6867d5, 0x681ad072, 0x82ae192c, - 0xeab4c95e, 0x4d549a19, 0x7693ece5, 0x88220daa, - 0x8d6407e9, 0xe3f1db12, 0xd173bfa2, 0x4812905a, - 0x1d403a5d, 0x20084028, 0x2bc356e8, 0x97ec337b, - 0x4bdb9690, 0xbea1611f, 0x0e8d1c83, 0xf43df5c9, - 0x6697ccf1, 0x00000000, 0x1bcf36d4, 0xac2b4587, - 0xc57697b3, 0x328264b0, 0x7fd6fea9, 0x6c1bd877, - 0xeeb5c15b, 0x86af1129, 0xb56a77df, 0x5d50ba0d, - 0x0945124c, 0xebf3cb18, 0xc0309df0, 0x9bef2b74, - 0xfc3fe5c3, 0x4955921c, 0xb2a27910, 0x8fea0365, - 0x89650fec, 0xd2bab968, 0xbc2f6593, 0x27c04ee7, - 0x5fdebe81, 0x701ce06c, 0xd3fdbb2e, 0x294d5264, - 0x7292e4e0, 0xc9758fbc, 0x1806301e, 0x128a2498, - 0xf2b2f940, 0xbfe66359, 0x380e7036, 0x7c1ff863, - 0x956237f7, 0x77d4eea3, 0x9aa82932, 0x6296c4f4, - 0xc3f99b3a, 0x33c566f6, 0x942535b1, 0x7959f220, - 0x2a8454ae, 0xd572b7a7, 0xe439d5dd, 0x2d4c5a61, - 0x655eca3b, 0xfd78e785, 0xe038ddd8, 0x0a8c1486, - 0x63d1c6b2, 0xaea5410b, 0xafe2434d, 0x99612ff8, - 0xf6b3f145, 0x842115a5, 0x4a9c94d6, 0x781ef066, - 0x11432252, 0x3bc776fc, 0xd7fcb32b, 0x10042014, - 0x5951b208, 0x5e99bcc7, 0xa96d4fc4, 0x340d6839, - 0xcffa8335, 0x5bdfb684, 0xe57ed79b, 0x90243db4, - 0xec3bc5d7, 0x96ab313d, 0x1fce3ed1, 0x44118855, - 0x068f0c89, 0x254e4a6b, 0xe6b7d151, 0x8beb0b60, - 0xf03cfdcc, 0x3e817cbf, 0x6a94d4fe, 0xfbf7eb0c, - 0xdeb9a167, 0x4c13985f, 0xb02c7d9c, 0x6bd3d6b8, - 0xbbe76b5c, 0xa56e57cb, 0x37c46ef3, 0x0c03180f, - 0x45568a13, 0x0d441a49, 0xe17fdf9e, 0x9ea92137, - 0xa82a4d82, 0xd6bbb16d, 0x23c146e2, 0x5153a202, - 0x57dcae8b, 0x2c0b5827, 0x4e9d9cd3, 0xad6c47c1, - 0xc43195f5, 0xcd7487b9, 0xfff6e309, 0x05460a43, - 0x8aac0926, 0x1e893c97, 0x5014a044, 0xa3e15b42, - 0x5816b04e, 0xe83acdd2, 0xb9696fd0, 0x2409482d, - 0xdd70a7ad, 0xe2b6d954, 0x67d0ceb7, 0x93ed3b7e, - 0x17cc2edb, 0x15422a57, 0x5a98b4c2, 0xaaa4490e, - 0xa0285d88, 0x6d5cda31, 0xc7f8933f, 0x228644a4, - }, - { - 0x186018c0, 0x238c2305, 0xc63fc67e, 0xe887e813, - 0x8726874c, 0xb8dab8a9, 0x01040108, 0x4f214f42, - 0x36d836ad, 0xa6a2a659, 0xd26fd2de, 0xf5f3f5fb, - 0x79f979ef, 0x6fa16f5f, 0x917e91fc, 0x525552aa, - 0x609d6027, 0xbccabc89, 0x9b569bac, 0x8e028e04, - 0xa3b6a371, 0x0c300c60, 0x7bf17bff, 0x35d435b5, - 0x1d741de8, 0xe0a7e053, 0xd77bd7f6, 0xc22fc25e, - 0x2eb82e6d, 0x4b314b62, 0xfedffea3, 0x57415782, - 0x155415a8, 0x77c1779f, 0x37dc37a5, 0xe5b3e57b, - 0x9f469f8c, 0xf0e7f0d3, 0x4a354a6a, 0xda4fda9e, - 0x587d58fa, 0xc903c906, 0x29a42955, 0x0a280a50, - 0xb1feb1e1, 0xa0baa069, 0x6bb16b7f, 0x852e855c, - 0xbdcebd81, 0x5d695dd2, 0x10401080, 0xf4f7f4f3, - 0xcb0bcb16, 0x3ef83eed, 0x05140528, 0x6781671f, - 0xe4b7e473, 0x279c2725, 0x41194132, 0x8b168b2c, - 0xa7a6a751, 0x7de97dcf, 0x956e95dc, 0xd847d88e, - 0xfbcbfb8b, 0xee9fee23, 0x7ced7cc7, 0x66856617, - 0xdd53dda6, 0x175c17b8, 0x47014702, 0x9e429e84, - 0xca0fca1e, 0x2db42d75, 0xbfc6bf91, 0x071c0738, - 0xad8ead01, 0x5a755aea, 0x8336836c, 0x33cc3385, - 0x6391633f, 0x02080210, 0xaa92aa39, 0x71d971af, - 0xc807c80e, 0x196419c8, 0x49394972, 0xd943d986, - 0xf2eff2c3, 0xe3abe34b, 0x5b715be2, 0x881a8834, - 0x9a529aa4, 0x2698262d, 0x32c8328d, 0xb0fab0e9, - 0xe983e91b, 0x0f3c0f78, 0xd573d5e6, 0x803a8074, - 0xbec2be99, 0xcd13cd26, 0x34d034bd, 0x483d487a, - 0xffdbffab, 0x7af57af7, 0x907a90f4, 0x5f615fc2, - 0x2080201d, 0x68bd6867, 0x1a681ad0, 0xae82ae19, - 0xb4eab4c9, 0x544d549a, 0x937693ec, 0x2288220d, - 0x648d6407, 0xf1e3f1db, 0x73d173bf, 0x12481290, - 0x401d403a, 0x08200840, 0xc32bc356, 0xec97ec33, - 0xdb4bdb96, 0xa1bea161, 0x8d0e8d1c, 0x3df43df5, - 0x976697cc, 0x00000000, 0xcf1bcf36, 0x2bac2b45, - 0x76c57697, 0x82328264, 0xd67fd6fe, 0x1b6c1bd8, - 0xb5eeb5c1, 0xaf86af11, 0x6ab56a77, 0x505d50ba, - 0x45094512, 0xf3ebf3cb, 0x30c0309d, 0xef9bef2b, - 0x3ffc3fe5, 0x55495592, 0xa2b2a279, 0xea8fea03, - 0x6589650f, 0xbad2bab9, 0x2fbc2f65, 0xc027c04e, - 0xde5fdebe, 0x1c701ce0, 0xfdd3fdbb, 0x4d294d52, - 0x927292e4, 0x75c9758f, 0x06180630, 0x8a128a24, - 0xb2f2b2f9, 0xe6bfe663, 0x0e380e70, 0x1f7c1ff8, - 0x62956237, 0xd477d4ee, 0xa89aa829, 0x966296c4, - 0xf9c3f99b, 0xc533c566, 0x25942535, 0x597959f2, - 0x842a8454, 0x72d572b7, 0x39e439d5, 0x4c2d4c5a, - 0x5e655eca, 0x78fd78e7, 0x38e038dd, 0x8c0a8c14, - 0xd163d1c6, 0xa5aea541, 0xe2afe243, 0x6199612f, - 0xb3f6b3f1, 0x21842115, 0x9c4a9c94, 0x1e781ef0, - 0x43114322, 0xc73bc776, 0xfcd7fcb3, 0x04100420, - 0x515951b2, 0x995e99bc, 0x6da96d4f, 0x0d340d68, - 0xfacffa83, 0xdf5bdfb6, 0x7ee57ed7, 0x2490243d, - 0x3bec3bc5, 0xab96ab31, 0xce1fce3e, 0x11441188, - 0x8f068f0c, 0x4e254e4a, 0xb7e6b7d1, 0xeb8beb0b, - 0x3cf03cfd, 0x813e817c, 0x946a94d4, 0xf7fbf7eb, - 0xb9deb9a1, 0x134c1398, 0x2cb02c7d, 0xd36bd3d6, - 0xe7bbe76b, 0x6ea56e57, 0xc437c46e, 0x030c0318, - 0x5645568a, 0x440d441a, 0x7fe17fdf, 0xa99ea921, - 0x2aa82a4d, 0xbbd6bbb1, 0xc123c146, 0x535153a2, - 0xdc57dcae, 0x0b2c0b58, 0x9d4e9d9c, 0x6cad6c47, - 0x31c43195, 0x74cd7487, 0xf6fff6e3, 0x4605460a, - 0xac8aac09, 0x891e893c, 0x145014a0, 0xe1a3e15b, - 0x165816b0, 0x3ae83acd, 0x69b9696f, 0x09240948, - 0x70dd70a7, 0xb6e2b6d9, 0xd067d0ce, 0xed93ed3b, - 0xcc17cc2e, 0x4215422a, 0x985a98b4, 0xa4aaa449, - 0x28a0285d, 0x5c6d5cda, 0xf8c7f893, 0x86228644, - } -}; - -__device__ static u32 Cl[8][256] = -{ - { - 0xc07830d8, 0x05af4626, 0x7ef991b8, 0x136fcdfb, - 0x4ca113cb, 0xa9626d11, 0x08050209, 0x426e9e0d, - 0xadee6c9b, 0x590451ff, 0xdebdb90c, 0xfb06f70e, - 0xef80f296, 0x5fcede30, 0xfcef3f6d, 0xaa07a4f8, - 0x27fdc047, 0x89766535, 0xaccd2b37, 0x048c018a, - 0x71155bd2, 0x603c186c, 0xff8af684, 0xb5e16a80, - 0xe8693af5, 0x5347ddb3, 0xf6acb321, 0x5eed999c, - 0x6d965c43, 0x627a9629, 0xa321e15d, 0x8216aed5, - 0xa8412abd, 0x9fb6eee8, 0xa5eb6e92, 0x7b56d79e, - 0x8cd92313, 0xd317fd23, 0x6a7f9420, 0x9e95a944, - 0xfa25b0a2, 0x06ca8fcf, 0x558d527c, 0x5022145a, - 0xe14f7f50, 0x691a5dc9, 0x7fdad614, 0x5cab17d9, - 0x8173673c, 0xd234ba8f, 0x80502090, 0xf303f507, - 0x16c08bdd, 0xedc67cd3, 0x28110a2d, 0x1fe6ce78, - 0x7353d597, 0x25bb4e02, 0x32588273, 0x2c9d0ba7, - 0x510153f6, 0xcf94fab2, 0xdcfb3749, 0x8e9fad56, - 0x8b30eb70, 0x2371c1cd, 0xc791f8bb, 0x17e3cc71, - 0xa68ea77b, 0xb84b2eaf, 0x02468e45, 0x84dc211a, - 0x1ec589d4, 0x75995a58, 0x9179632e, 0x381b0e3f, - 0x012347ac, 0xea2fb4b0, 0x6cb51bef, 0x85ff66b6, - 0x3ff2c65c, 0x100a0412, 0x39384993, 0xafa8e2de, - 0x0ecf8dc6, 0xc87d32d1, 0x7270923b, 0x869aaf5f, - 0xc31df931, 0x4b48dba8, 0xe22ab6b9, 0x34920dbc, - 0xa4c8293e, 0x2dbe4c0b, 0x8dfa64bf, 0xe94a7d59, - 0x1b6acff2, 0x78331e77, 0xe6a6b733, 0x74ba1df4, - 0x997c6127, 0x26de87eb, 0xbde46889, 0x7a759032, - 0xab24e354, 0xf78ff48d, 0xf4ea3d64, 0xc23ebe9d, - 0x1da0403d, 0x67d5d00f, 0xd07234ca, 0x192c41b7, - 0xc95e757d, 0x9a19a8ce, 0xece53b7f, 0x0daa442f, - 0x07e9c863, 0xdb12ff2a, 0xbfa2e6cc, 0x905a2482, - 0x3a5d807a, 0x40281048, 0x56e89b95, 0x337bc5df, - 0x9690ab4d, 0x611f5fc0, 0x1c830791, 0xf5c97ac8, - 0xccf1335b, 0x00000000, 0x36d483f9, 0x4587566e, - 0x97b3ece1, 0x64b019e6, 0xfea9b128, 0xd87736c3, - 0xc15b7774, 0x112943be, 0x77dfd41d, 0xba0da0ea, - 0x124c8a57, 0xcb18fb38, 0x9df060ad, 0x2b74c3c4, - 0xe5c37eda, 0x921caac7, 0x791059db, 0x0365c9e9, - 0x0fecca6a, 0xb9686903, 0x65935e4a, 0x4ee79d8e, - 0xbe81a160, 0xe06c38fc, 0xbb2ee746, 0x52649a1f, - 0xe4e03976, 0x8fbceafa, 0x301e0c36, 0x249809ae, - 0xf940794b, 0x6359d185, 0x70361c7e, 0xf8633ee7, - 0x37f7c455, 0xeea3b53a, 0x29324d81, 0xc4f43152, - 0x9b3aef62, 0x66f697a3, 0x35b14a10, 0xf220b2ab, - 0x54ae15d0, 0xb7a7e4c5, 0xd5dd72ec, 0x5a619816, - 0xca3bbc94, 0xe785f09f, 0xddd870e5, 0x14860598, - 0xc6b2bf17, 0x410b57e4, 0x434dd9a1, 0x2ff8c24e, - 0xf1457b42, 0x15a54234, 0x94d62508, 0xf0663cee, - 0x22528661, 0x76fc93b1, 0xb32be54f, 0x20140824, - 0xb208a2e3, 0xbcc72f25, 0x4fc4da22, 0x68391a65, - 0x8335e979, 0xb684a369, 0xd79bfca9, 0x3db44819, - 0xc5d776fe, 0x313d4b9a, 0x3ed181f0, 0x88552299, - 0x0c890383, 0x4a6b9c04, 0xd1517366, 0x0b60cbe0, - 0xfdcc78c1, 0x7cbf1ffd, 0xd4fe3540, 0xeb0cf31c, - 0xa1676f18, 0x985f268b, 0x7d9c5851, 0xd6b8bb05, - 0x6b5cd38c, 0x57cbdc39, 0x6ef395aa, 0x180f061b, - 0x8a13acdc, 0x1a49885e, 0xdf9efea0, 0x21374f88, - 0x4d825467, 0xb16d6b0a, 0x46e29f87, 0xa202a6f1, - 0xae8ba572, 0x58271653, 0x9cd32701, 0x47c1d82b, - 0x95f562a4, 0x87b9e8f3, 0xe309f115, 0x0a438c4c, - 0x092645a5, 0x3c970fb5, 0xa04428b4, 0x5b42dfba, - 0xb04e2ca6, 0xcdd274f7, 0x6fd0d206, 0x482d1241, - 0xa7ade0d7, 0xd954716f, 0xceb7bd1e, 0x3b7ec7d6, - 0x2edb85e2, 0x2a578468, 0xb4c22d2c, 0x490e55ed, - 0x5d885075, 0xda31b886, 0x933fed6b, 0x44a411c2, - }, - { - 0x18c07830, 0x2305af46, 0xc67ef991, 0xe8136fcd, - 0x874ca113, 0xb8a9626d, 0x01080502, 0x4f426e9e, - 0x36adee6c, 0xa6590451, 0xd2debdb9, 0xf5fb06f7, - 0x79ef80f2, 0x6f5fcede, 0x91fcef3f, 0x52aa07a4, - 0x6027fdc0, 0xbc897665, 0x9baccd2b, 0x8e048c01, - 0xa371155b, 0x0c603c18, 0x7bff8af6, 0x35b5e16a, - 0x1de8693a, 0xe05347dd, 0xd7f6acb3, 0xc25eed99, - 0x2e6d965c, 0x4b627a96, 0xfea321e1, 0x578216ae, - 0x15a8412a, 0x779fb6ee, 0x37a5eb6e, 0xe57b56d7, - 0x9f8cd923, 0xf0d317fd, 0x4a6a7f94, 0xda9e95a9, - 0x58fa25b0, 0xc906ca8f, 0x29558d52, 0x0a502214, - 0xb1e14f7f, 0xa0691a5d, 0x6b7fdad6, 0x855cab17, - 0xbd817367, 0x5dd234ba, 0x10805020, 0xf4f303f5, - 0xcb16c08b, 0x3eedc67c, 0x0528110a, 0x671fe6ce, - 0xe47353d5, 0x2725bb4e, 0x41325882, 0x8b2c9d0b, - 0xa7510153, 0x7dcf94fa, 0x95dcfb37, 0xd88e9fad, - 0xfb8b30eb, 0xee2371c1, 0x7cc791f8, 0x6617e3cc, - 0xdda68ea7, 0x17b84b2e, 0x4702468e, 0x9e84dc21, - 0xca1ec589, 0x2d75995a, 0xbf917963, 0x07381b0e, - 0xad012347, 0x5aea2fb4, 0x836cb51b, 0x3385ff66, - 0x633ff2c6, 0x02100a04, 0xaa393849, 0x71afa8e2, - 0xc80ecf8d, 0x19c87d32, 0x49727092, 0xd9869aaf, - 0xf2c31df9, 0xe34b48db, 0x5be22ab6, 0x8834920d, - 0x9aa4c829, 0x262dbe4c, 0x328dfa64, 0xb0e94a7d, - 0xe91b6acf, 0x0f78331e, 0xd5e6a6b7, 0x8074ba1d, - 0xbe997c61, 0xcd26de87, 0x34bde468, 0x487a7590, - 0xffab24e3, 0x7af78ff4, 0x90f4ea3d, 0x5fc23ebe, - 0x201da040, 0x6867d5d0, 0x1ad07234, 0xae192c41, - 0xb4c95e75, 0x549a19a8, 0x93ece53b, 0x220daa44, - 0x6407e9c8, 0xf1db12ff, 0x73bfa2e6, 0x12905a24, - 0x403a5d80, 0x08402810, 0xc356e89b, 0xec337bc5, - 0xdb9690ab, 0xa1611f5f, 0x8d1c8307, 0x3df5c97a, - 0x97ccf133, 0x00000000, 0xcf36d483, 0x2b458756, - 0x7697b3ec, 0x8264b019, 0xd6fea9b1, 0x1bd87736, - 0xb5c15b77, 0xaf112943, 0x6a77dfd4, 0x50ba0da0, - 0x45124c8a, 0xf3cb18fb, 0x309df060, 0xef2b74c3, - 0x3fe5c37e, 0x55921caa, 0xa2791059, 0xea0365c9, - 0x650fecca, 0xbab96869, 0x2f65935e, 0xc04ee79d, - 0xdebe81a1, 0x1ce06c38, 0xfdbb2ee7, 0x4d52649a, - 0x92e4e039, 0x758fbcea, 0x06301e0c, 0x8a249809, - 0xb2f94079, 0xe66359d1, 0x0e70361c, 0x1ff8633e, - 0x6237f7c4, 0xd4eea3b5, 0xa829324d, 0x96c4f431, - 0xf99b3aef, 0xc566f697, 0x2535b14a, 0x59f220b2, - 0x8454ae15, 0x72b7a7e4, 0x39d5dd72, 0x4c5a6198, - 0x5eca3bbc, 0x78e785f0, 0x38ddd870, 0x8c148605, - 0xd1c6b2bf, 0xa5410b57, 0xe2434dd9, 0x612ff8c2, - 0xb3f1457b, 0x2115a542, 0x9c94d625, 0x1ef0663c, - 0x43225286, 0xc776fc93, 0xfcb32be5, 0x04201408, - 0x51b208a2, 0x99bcc72f, 0x6d4fc4da, 0x0d68391a, - 0xfa8335e9, 0xdfb684a3, 0x7ed79bfc, 0x243db448, - 0x3bc5d776, 0xab313d4b, 0xce3ed181, 0x11885522, - 0x8f0c8903, 0x4e4a6b9c, 0xb7d15173, 0xeb0b60cb, - 0x3cfdcc78, 0x817cbf1f, 0x94d4fe35, 0xf7eb0cf3, - 0xb9a1676f, 0x13985f26, 0x2c7d9c58, 0xd3d6b8bb, - 0xe76b5cd3, 0x6e57cbdc, 0xc46ef395, 0x03180f06, - 0x568a13ac, 0x441a4988, 0x7fdf9efe, 0xa921374f, - 0x2a4d8254, 0xbbb16d6b, 0xc146e29f, 0x53a202a6, - 0xdcae8ba5, 0x0b582716, 0x9d9cd327, 0x6c47c1d8, - 0x3195f562, 0x7487b9e8, 0xf6e309f1, 0x460a438c, - 0xac092645, 0x893c970f, 0x14a04428, 0xe15b42df, - 0x16b04e2c, 0x3acdd274, 0x696fd0d2, 0x09482d12, - 0x70a7ade0, 0xb6d95471, 0xd0ceb7bd, 0xed3b7ec7, - 0xcc2edb85, 0x422a5784, 0x98b4c22d, 0xa4490e55, - 0x285d8850, 0x5cda31b8, 0xf8933fed, 0x8644a411, - }, - { - 0x6018c078, 0x8c2305af, 0x3fc67ef9, 0x87e8136f, - 0x26874ca1, 0xdab8a962, 0x04010805, 0x214f426e, - 0xd836adee, 0xa2a65904, 0x6fd2debd, 0xf3f5fb06, - 0xf979ef80, 0xa16f5fce, 0x7e91fcef, 0x5552aa07, - 0x9d6027fd, 0xcabc8976, 0x569baccd, 0x028e048c, - 0xb6a37115, 0x300c603c, 0xf17bff8a, 0xd435b5e1, - 0x741de869, 0xa7e05347, 0x7bd7f6ac, 0x2fc25eed, - 0xb82e6d96, 0x314b627a, 0xdffea321, 0x41578216, - 0x5415a841, 0xc1779fb6, 0xdc37a5eb, 0xb3e57b56, - 0x469f8cd9, 0xe7f0d317, 0x354a6a7f, 0x4fda9e95, - 0x7d58fa25, 0x03c906ca, 0xa429558d, 0x280a5022, - 0xfeb1e14f, 0xbaa0691a, 0xb16b7fda, 0x2e855cab, - 0xcebd8173, 0x695dd234, 0x40108050, 0xf7f4f303, - 0x0bcb16c0, 0xf83eedc6, 0x14052811, 0x81671fe6, - 0xb7e47353, 0x9c2725bb, 0x19413258, 0x168b2c9d, - 0xa6a75101, 0xe97dcf94, 0x6e95dcfb, 0x47d88e9f, - 0xcbfb8b30, 0x9fee2371, 0xed7cc791, 0x856617e3, - 0x53dda68e, 0x5c17b84b, 0x01470246, 0x429e84dc, - 0x0fca1ec5, 0xb42d7599, 0xc6bf9179, 0x1c07381b, - 0x8ead0123, 0x755aea2f, 0x36836cb5, 0xcc3385ff, - 0x91633ff2, 0x0802100a, 0x92aa3938, 0xd971afa8, - 0x07c80ecf, 0x6419c87d, 0x39497270, 0x43d9869a, - 0xeff2c31d, 0xabe34b48, 0x715be22a, 0x1a883492, - 0x529aa4c8, 0x98262dbe, 0xc8328dfa, 0xfab0e94a, - 0x83e91b6a, 0x3c0f7833, 0x73d5e6a6, 0x3a8074ba, - 0xc2be997c, 0x13cd26de, 0xd034bde4, 0x3d487a75, - 0xdbffab24, 0xf57af78f, 0x7a90f4ea, 0x615fc23e, - 0x80201da0, 0xbd6867d5, 0x681ad072, 0x82ae192c, - 0xeab4c95e, 0x4d549a19, 0x7693ece5, 0x88220daa, - 0x8d6407e9, 0xe3f1db12, 0xd173bfa2, 0x4812905a, - 0x1d403a5d, 0x20084028, 0x2bc356e8, 0x97ec337b, - 0x4bdb9690, 0xbea1611f, 0x0e8d1c83, 0xf43df5c9, - 0x6697ccf1, 0x00000000, 0x1bcf36d4, 0xac2b4587, - 0xc57697b3, 0x328264b0, 0x7fd6fea9, 0x6c1bd877, - 0xeeb5c15b, 0x86af1129, 0xb56a77df, 0x5d50ba0d, - 0x0945124c, 0xebf3cb18, 0xc0309df0, 0x9bef2b74, - 0xfc3fe5c3, 0x4955921c, 0xb2a27910, 0x8fea0365, - 0x89650fec, 0xd2bab968, 0xbc2f6593, 0x27c04ee7, - 0x5fdebe81, 0x701ce06c, 0xd3fdbb2e, 0x294d5264, - 0x7292e4e0, 0xc9758fbc, 0x1806301e, 0x128a2498, - 0xf2b2f940, 0xbfe66359, 0x380e7036, 0x7c1ff863, - 0x956237f7, 0x77d4eea3, 0x9aa82932, 0x6296c4f4, - 0xc3f99b3a, 0x33c566f6, 0x942535b1, 0x7959f220, - 0x2a8454ae, 0xd572b7a7, 0xe439d5dd, 0x2d4c5a61, - 0x655eca3b, 0xfd78e785, 0xe038ddd8, 0x0a8c1486, - 0x63d1c6b2, 0xaea5410b, 0xafe2434d, 0x99612ff8, - 0xf6b3f145, 0x842115a5, 0x4a9c94d6, 0x781ef066, - 0x11432252, 0x3bc776fc, 0xd7fcb32b, 0x10042014, - 0x5951b208, 0x5e99bcc7, 0xa96d4fc4, 0x340d6839, - 0xcffa8335, 0x5bdfb684, 0xe57ed79b, 0x90243db4, - 0xec3bc5d7, 0x96ab313d, 0x1fce3ed1, 0x44118855, - 0x068f0c89, 0x254e4a6b, 0xe6b7d151, 0x8beb0b60, - 0xf03cfdcc, 0x3e817cbf, 0x6a94d4fe, 0xfbf7eb0c, - 0xdeb9a167, 0x4c13985f, 0xb02c7d9c, 0x6bd3d6b8, - 0xbbe76b5c, 0xa56e57cb, 0x37c46ef3, 0x0c03180f, - 0x45568a13, 0x0d441a49, 0xe17fdf9e, 0x9ea92137, - 0xa82a4d82, 0xd6bbb16d, 0x23c146e2, 0x5153a202, - 0x57dcae8b, 0x2c0b5827, 0x4e9d9cd3, 0xad6c47c1, - 0xc43195f5, 0xcd7487b9, 0xfff6e309, 0x05460a43, - 0x8aac0926, 0x1e893c97, 0x5014a044, 0xa3e15b42, - 0x5816b04e, 0xe83acdd2, 0xb9696fd0, 0x2409482d, - 0xdd70a7ad, 0xe2b6d954, 0x67d0ceb7, 0x93ed3b7e, - 0x17cc2edb, 0x15422a57, 0x5a98b4c2, 0xaaa4490e, - 0xa0285d88, 0x6d5cda31, 0xc7f8933f, 0x228644a4, - }, - { - 0x186018c0, 0x238c2305, 0xc63fc67e, 0xe887e813, - 0x8726874c, 0xb8dab8a9, 0x01040108, 0x4f214f42, - 0x36d836ad, 0xa6a2a659, 0xd26fd2de, 0xf5f3f5fb, - 0x79f979ef, 0x6fa16f5f, 0x917e91fc, 0x525552aa, - 0x609d6027, 0xbccabc89, 0x9b569bac, 0x8e028e04, - 0xa3b6a371, 0x0c300c60, 0x7bf17bff, 0x35d435b5, - 0x1d741de8, 0xe0a7e053, 0xd77bd7f6, 0xc22fc25e, - 0x2eb82e6d, 0x4b314b62, 0xfedffea3, 0x57415782, - 0x155415a8, 0x77c1779f, 0x37dc37a5, 0xe5b3e57b, - 0x9f469f8c, 0xf0e7f0d3, 0x4a354a6a, 0xda4fda9e, - 0x587d58fa, 0xc903c906, 0x29a42955, 0x0a280a50, - 0xb1feb1e1, 0xa0baa069, 0x6bb16b7f, 0x852e855c, - 0xbdcebd81, 0x5d695dd2, 0x10401080, 0xf4f7f4f3, - 0xcb0bcb16, 0x3ef83eed, 0x05140528, 0x6781671f, - 0xe4b7e473, 0x279c2725, 0x41194132, 0x8b168b2c, - 0xa7a6a751, 0x7de97dcf, 0x956e95dc, 0xd847d88e, - 0xfbcbfb8b, 0xee9fee23, 0x7ced7cc7, 0x66856617, - 0xdd53dda6, 0x175c17b8, 0x47014702, 0x9e429e84, - 0xca0fca1e, 0x2db42d75, 0xbfc6bf91, 0x071c0738, - 0xad8ead01, 0x5a755aea, 0x8336836c, 0x33cc3385, - 0x6391633f, 0x02080210, 0xaa92aa39, 0x71d971af, - 0xc807c80e, 0x196419c8, 0x49394972, 0xd943d986, - 0xf2eff2c3, 0xe3abe34b, 0x5b715be2, 0x881a8834, - 0x9a529aa4, 0x2698262d, 0x32c8328d, 0xb0fab0e9, - 0xe983e91b, 0x0f3c0f78, 0xd573d5e6, 0x803a8074, - 0xbec2be99, 0xcd13cd26, 0x34d034bd, 0x483d487a, - 0xffdbffab, 0x7af57af7, 0x907a90f4, 0x5f615fc2, - 0x2080201d, 0x68bd6867, 0x1a681ad0, 0xae82ae19, - 0xb4eab4c9, 0x544d549a, 0x937693ec, 0x2288220d, - 0x648d6407, 0xf1e3f1db, 0x73d173bf, 0x12481290, - 0x401d403a, 0x08200840, 0xc32bc356, 0xec97ec33, - 0xdb4bdb96, 0xa1bea161, 0x8d0e8d1c, 0x3df43df5, - 0x976697cc, 0x00000000, 0xcf1bcf36, 0x2bac2b45, - 0x76c57697, 0x82328264, 0xd67fd6fe, 0x1b6c1bd8, - 0xb5eeb5c1, 0xaf86af11, 0x6ab56a77, 0x505d50ba, - 0x45094512, 0xf3ebf3cb, 0x30c0309d, 0xef9bef2b, - 0x3ffc3fe5, 0x55495592, 0xa2b2a279, 0xea8fea03, - 0x6589650f, 0xbad2bab9, 0x2fbc2f65, 0xc027c04e, - 0xde5fdebe, 0x1c701ce0, 0xfdd3fdbb, 0x4d294d52, - 0x927292e4, 0x75c9758f, 0x06180630, 0x8a128a24, - 0xb2f2b2f9, 0xe6bfe663, 0x0e380e70, 0x1f7c1ff8, - 0x62956237, 0xd477d4ee, 0xa89aa829, 0x966296c4, - 0xf9c3f99b, 0xc533c566, 0x25942535, 0x597959f2, - 0x842a8454, 0x72d572b7, 0x39e439d5, 0x4c2d4c5a, - 0x5e655eca, 0x78fd78e7, 0x38e038dd, 0x8c0a8c14, - 0xd163d1c6, 0xa5aea541, 0xe2afe243, 0x6199612f, - 0xb3f6b3f1, 0x21842115, 0x9c4a9c94, 0x1e781ef0, - 0x43114322, 0xc73bc776, 0xfcd7fcb3, 0x04100420, - 0x515951b2, 0x995e99bc, 0x6da96d4f, 0x0d340d68, - 0xfacffa83, 0xdf5bdfb6, 0x7ee57ed7, 0x2490243d, - 0x3bec3bc5, 0xab96ab31, 0xce1fce3e, 0x11441188, - 0x8f068f0c, 0x4e254e4a, 0xb7e6b7d1, 0xeb8beb0b, - 0x3cf03cfd, 0x813e817c, 0x946a94d4, 0xf7fbf7eb, - 0xb9deb9a1, 0x134c1398, 0x2cb02c7d, 0xd36bd3d6, - 0xe7bbe76b, 0x6ea56e57, 0xc437c46e, 0x030c0318, - 0x5645568a, 0x440d441a, 0x7fe17fdf, 0xa99ea921, - 0x2aa82a4d, 0xbbd6bbb1, 0xc123c146, 0x535153a2, - 0xdc57dcae, 0x0b2c0b58, 0x9d4e9d9c, 0x6cad6c47, - 0x31c43195, 0x74cd7487, 0xf6fff6e3, 0x4605460a, - 0xac8aac09, 0x891e893c, 0x145014a0, 0xe1a3e15b, - 0x165816b0, 0x3ae83acd, 0x69b9696f, 0x09240948, - 0x70dd70a7, 0xb6e2b6d9, 0xd067d0ce, 0xed93ed3b, - 0xcc17cc2e, 0x4215422a, 0x985a98b4, 0xa4aaa449, - 0x28a0285d, 0x5c6d5cda, 0xf8c7f893, 0x86228644, - }, - { - 0x18186018, 0x23238c23, 0xc6c63fc6, 0xe8e887e8, - 0x87872687, 0xb8b8dab8, 0x01010401, 0x4f4f214f, - 0x3636d836, 0xa6a6a2a6, 0xd2d26fd2, 0xf5f5f3f5, - 0x7979f979, 0x6f6fa16f, 0x91917e91, 0x52525552, - 0x60609d60, 0xbcbccabc, 0x9b9b569b, 0x8e8e028e, - 0xa3a3b6a3, 0x0c0c300c, 0x7b7bf17b, 0x3535d435, - 0x1d1d741d, 0xe0e0a7e0, 0xd7d77bd7, 0xc2c22fc2, - 0x2e2eb82e, 0x4b4b314b, 0xfefedffe, 0x57574157, - 0x15155415, 0x7777c177, 0x3737dc37, 0xe5e5b3e5, - 0x9f9f469f, 0xf0f0e7f0, 0x4a4a354a, 0xdada4fda, - 0x58587d58, 0xc9c903c9, 0x2929a429, 0x0a0a280a, - 0xb1b1feb1, 0xa0a0baa0, 0x6b6bb16b, 0x85852e85, - 0xbdbdcebd, 0x5d5d695d, 0x10104010, 0xf4f4f7f4, - 0xcbcb0bcb, 0x3e3ef83e, 0x05051405, 0x67678167, - 0xe4e4b7e4, 0x27279c27, 0x41411941, 0x8b8b168b, - 0xa7a7a6a7, 0x7d7de97d, 0x95956e95, 0xd8d847d8, - 0xfbfbcbfb, 0xeeee9fee, 0x7c7ced7c, 0x66668566, - 0xdddd53dd, 0x17175c17, 0x47470147, 0x9e9e429e, - 0xcaca0fca, 0x2d2db42d, 0xbfbfc6bf, 0x07071c07, - 0xadad8ead, 0x5a5a755a, 0x83833683, 0x3333cc33, - 0x63639163, 0x02020802, 0xaaaa92aa, 0x7171d971, - 0xc8c807c8, 0x19196419, 0x49493949, 0xd9d943d9, - 0xf2f2eff2, 0xe3e3abe3, 0x5b5b715b, 0x88881a88, - 0x9a9a529a, 0x26269826, 0x3232c832, 0xb0b0fab0, - 0xe9e983e9, 0x0f0f3c0f, 0xd5d573d5, 0x80803a80, - 0xbebec2be, 0xcdcd13cd, 0x3434d034, 0x48483d48, - 0xffffdbff, 0x7a7af57a, 0x90907a90, 0x5f5f615f, - 0x20208020, 0x6868bd68, 0x1a1a681a, 0xaeae82ae, - 0xb4b4eab4, 0x54544d54, 0x93937693, 0x22228822, - 0x64648d64, 0xf1f1e3f1, 0x7373d173, 0x12124812, - 0x40401d40, 0x08082008, 0xc3c32bc3, 0xecec97ec, - 0xdbdb4bdb, 0xa1a1bea1, 0x8d8d0e8d, 0x3d3df43d, - 0x97976697, 0x00000000, 0xcfcf1bcf, 0x2b2bac2b, - 0x7676c576, 0x82823282, 0xd6d67fd6, 0x1b1b6c1b, - 0xb5b5eeb5, 0xafaf86af, 0x6a6ab56a, 0x50505d50, - 0x45450945, 0xf3f3ebf3, 0x3030c030, 0xefef9bef, - 0x3f3ffc3f, 0x55554955, 0xa2a2b2a2, 0xeaea8fea, - 0x65658965, 0xbabad2ba, 0x2f2fbc2f, 0xc0c027c0, - 0xdede5fde, 0x1c1c701c, 0xfdfdd3fd, 0x4d4d294d, - 0x92927292, 0x7575c975, 0x06061806, 0x8a8a128a, - 0xb2b2f2b2, 0xe6e6bfe6, 0x0e0e380e, 0x1f1f7c1f, - 0x62629562, 0xd4d477d4, 0xa8a89aa8, 0x96966296, - 0xf9f9c3f9, 0xc5c533c5, 0x25259425, 0x59597959, - 0x84842a84, 0x7272d572, 0x3939e439, 0x4c4c2d4c, - 0x5e5e655e, 0x7878fd78, 0x3838e038, 0x8c8c0a8c, - 0xd1d163d1, 0xa5a5aea5, 0xe2e2afe2, 0x61619961, - 0xb3b3f6b3, 0x21218421, 0x9c9c4a9c, 0x1e1e781e, - 0x43431143, 0xc7c73bc7, 0xfcfcd7fc, 0x04041004, - 0x51515951, 0x99995e99, 0x6d6da96d, 0x0d0d340d, - 0xfafacffa, 0xdfdf5bdf, 0x7e7ee57e, 0x24249024, - 0x3b3bec3b, 0xabab96ab, 0xcece1fce, 0x11114411, - 0x8f8f068f, 0x4e4e254e, 0xb7b7e6b7, 0xebeb8beb, - 0x3c3cf03c, 0x81813e81, 0x94946a94, 0xf7f7fbf7, - 0xb9b9deb9, 0x13134c13, 0x2c2cb02c, 0xd3d36bd3, - 0xe7e7bbe7, 0x6e6ea56e, 0xc4c437c4, 0x03030c03, - 0x56564556, 0x44440d44, 0x7f7fe17f, 0xa9a99ea9, - 0x2a2aa82a, 0xbbbbd6bb, 0xc1c123c1, 0x53535153, - 0xdcdc57dc, 0x0b0b2c0b, 0x9d9d4e9d, 0x6c6cad6c, - 0x3131c431, 0x7474cd74, 0xf6f6fff6, 0x46460546, - 0xacac8aac, 0x89891e89, 0x14145014, 0xe1e1a3e1, - 0x16165816, 0x3a3ae83a, 0x6969b969, 0x09092409, - 0x7070dd70, 0xb6b6e2b6, 0xd0d067d0, 0xeded93ed, - 0xcccc17cc, 0x42421542, 0x98985a98, 0xa4a4aaa4, - 0x2828a028, 0x5c5c6d5c, 0xf8f8c7f8, 0x86862286, - }, - { - 0xd8181860, 0x2623238c, 0xb8c6c63f, 0xfbe8e887, - 0xcb878726, 0x11b8b8da, 0x09010104, 0x0d4f4f21, - 0x9b3636d8, 0xffa6a6a2, 0x0cd2d26f, 0x0ef5f5f3, - 0x967979f9, 0x306f6fa1, 0x6d91917e, 0xf8525255, - 0x4760609d, 0x35bcbcca, 0x379b9b56, 0x8a8e8e02, - 0xd2a3a3b6, 0x6c0c0c30, 0x847b7bf1, 0x803535d4, - 0xf51d1d74, 0xb3e0e0a7, 0x21d7d77b, 0x9cc2c22f, - 0x432e2eb8, 0x294b4b31, 0x5dfefedf, 0xd5575741, - 0xbd151554, 0xe87777c1, 0x923737dc, 0x9ee5e5b3, - 0x139f9f46, 0x23f0f0e7, 0x204a4a35, 0x44dada4f, - 0xa258587d, 0xcfc9c903, 0x7c2929a4, 0x5a0a0a28, - 0x50b1b1fe, 0xc9a0a0ba, 0x146b6bb1, 0xd985852e, - 0x3cbdbdce, 0x8f5d5d69, 0x90101040, 0x07f4f4f7, - 0xddcbcb0b, 0xd33e3ef8, 0x2d050514, 0x78676781, - 0x97e4e4b7, 0x0227279c, 0x73414119, 0xa78b8b16, - 0xf6a7a7a6, 0xb27d7de9, 0x4995956e, 0x56d8d847, - 0x70fbfbcb, 0xcdeeee9f, 0xbb7c7ced, 0x71666685, - 0x7bdddd53, 0xaf17175c, 0x45474701, 0x1a9e9e42, - 0xd4caca0f, 0x582d2db4, 0x2ebfbfc6, 0x3f07071c, - 0xacadad8e, 0xb05a5a75, 0xef838336, 0xb63333cc, - 0x5c636391, 0x12020208, 0x93aaaa92, 0xde7171d9, - 0xc6c8c807, 0xd1191964, 0x3b494939, 0x5fd9d943, - 0x31f2f2ef, 0xa8e3e3ab, 0xb95b5b71, 0xbc88881a, - 0x3e9a9a52, 0x0b262698, 0xbf3232c8, 0x59b0b0fa, - 0xf2e9e983, 0x770f0f3c, 0x33d5d573, 0xf480803a, - 0x27bebec2, 0xebcdcd13, 0x893434d0, 0x3248483d, - 0x54ffffdb, 0x8d7a7af5, 0x6490907a, 0x9d5f5f61, - 0x3d202080, 0x0f6868bd, 0xca1a1a68, 0xb7aeae82, - 0x7db4b4ea, 0xce54544d, 0x7f939376, 0x2f222288, - 0x6364648d, 0x2af1f1e3, 0xcc7373d1, 0x82121248, - 0x7a40401d, 0x48080820, 0x95c3c32b, 0xdfecec97, - 0x4ddbdb4b, 0xc0a1a1be, 0x918d8d0e, 0xc83d3df4, - 0x5b979766, 0x00000000, 0xf9cfcf1b, 0x6e2b2bac, - 0xe17676c5, 0xe6828232, 0x28d6d67f, 0xc31b1b6c, - 0x74b5b5ee, 0xbeafaf86, 0x1d6a6ab5, 0xea50505d, - 0x57454509, 0x38f3f3eb, 0xad3030c0, 0xc4efef9b, - 0xda3f3ffc, 0xc7555549, 0xdba2a2b2, 0xe9eaea8f, - 0x6a656589, 0x03babad2, 0x4a2f2fbc, 0x8ec0c027, - 0x60dede5f, 0xfc1c1c70, 0x46fdfdd3, 0x1f4d4d29, - 0x76929272, 0xfa7575c9, 0x36060618, 0xae8a8a12, - 0x4bb2b2f2, 0x85e6e6bf, 0x7e0e0e38, 0xe71f1f7c, - 0x55626295, 0x3ad4d477, 0x81a8a89a, 0x52969662, - 0x62f9f9c3, 0xa3c5c533, 0x10252594, 0xab595979, - 0xd084842a, 0xc57272d5, 0xec3939e4, 0x164c4c2d, - 0x945e5e65, 0x9f7878fd, 0xe53838e0, 0x988c8c0a, - 0x17d1d163, 0xe4a5a5ae, 0xa1e2e2af, 0x4e616199, - 0x42b3b3f6, 0x34212184, 0x089c9c4a, 0xee1e1e78, - 0x61434311, 0xb1c7c73b, 0x4ffcfcd7, 0x24040410, - 0xe3515159, 0x2599995e, 0x226d6da9, 0x650d0d34, - 0x79fafacf, 0x69dfdf5b, 0xa97e7ee5, 0x19242490, - 0xfe3b3bec, 0x9aabab96, 0xf0cece1f, 0x99111144, - 0x838f8f06, 0x044e4e25, 0x66b7b7e6, 0xe0ebeb8b, - 0xc13c3cf0, 0xfd81813e, 0x4094946a, 0x1cf7f7fb, - 0x18b9b9de, 0x8b13134c, 0x512c2cb0, 0x05d3d36b, - 0x8ce7e7bb, 0x396e6ea5, 0xaac4c437, 0x1b03030c, - 0xdc565645, 0x5e44440d, 0xa07f7fe1, 0x88a9a99e, - 0x672a2aa8, 0x0abbbbd6, 0x87c1c123, 0xf1535351, - 0x72dcdc57, 0x530b0b2c, 0x019d9d4e, 0x2b6c6cad, - 0xa43131c4, 0xf37474cd, 0x15f6f6ff, 0x4c464605, - 0xa5acac8a, 0xb589891e, 0xb4141450, 0xbae1e1a3, - 0xa6161658, 0xf73a3ae8, 0x066969b9, 0x41090924, - 0xd77070dd, 0x6fb6b6e2, 0x1ed0d067, 0xd6eded93, - 0xe2cccc17, 0x68424215, 0x2c98985a, 0xeda4a4aa, - 0x752828a0, 0x865c5c6d, 0x6bf8f8c7, 0xc2868622, - }, - { - 0x30d81818, 0x46262323, 0x91b8c6c6, 0xcdfbe8e8, - 0x13cb8787, 0x6d11b8b8, 0x02090101, 0x9e0d4f4f, - 0x6c9b3636, 0x51ffa6a6, 0xb90cd2d2, 0xf70ef5f5, - 0xf2967979, 0xde306f6f, 0x3f6d9191, 0xa4f85252, - 0xc0476060, 0x6535bcbc, 0x2b379b9b, 0x018a8e8e, - 0x5bd2a3a3, 0x186c0c0c, 0xf6847b7b, 0x6a803535, - 0x3af51d1d, 0xddb3e0e0, 0xb321d7d7, 0x999cc2c2, - 0x5c432e2e, 0x96294b4b, 0xe15dfefe, 0xaed55757, - 0x2abd1515, 0xeee87777, 0x6e923737, 0xd79ee5e5, - 0x23139f9f, 0xfd23f0f0, 0x94204a4a, 0xa944dada, - 0xb0a25858, 0x8fcfc9c9, 0x527c2929, 0x145a0a0a, - 0x7f50b1b1, 0x5dc9a0a0, 0xd6146b6b, 0x17d98585, - 0x673cbdbd, 0xba8f5d5d, 0x20901010, 0xf507f4f4, - 0x8bddcbcb, 0x7cd33e3e, 0x0a2d0505, 0xce786767, - 0xd597e4e4, 0x4e022727, 0x82734141, 0x0ba78b8b, - 0x53f6a7a7, 0xfab27d7d, 0x37499595, 0xad56d8d8, - 0xeb70fbfb, 0xc1cdeeee, 0xf8bb7c7c, 0xcc716666, - 0xa77bdddd, 0x2eaf1717, 0x8e454747, 0x211a9e9e, - 0x89d4caca, 0x5a582d2d, 0x632ebfbf, 0x0e3f0707, - 0x47acadad, 0xb4b05a5a, 0x1bef8383, 0x66b63333, - 0xc65c6363, 0x04120202, 0x4993aaaa, 0xe2de7171, - 0x8dc6c8c8, 0x32d11919, 0x923b4949, 0xaf5fd9d9, - 0xf931f2f2, 0xdba8e3e3, 0xb6b95b5b, 0x0dbc8888, - 0x293e9a9a, 0x4c0b2626, 0x64bf3232, 0x7d59b0b0, - 0xcff2e9e9, 0x1e770f0f, 0xb733d5d5, 0x1df48080, - 0x6127bebe, 0x87ebcdcd, 0x68893434, 0x90324848, - 0xe354ffff, 0xf48d7a7a, 0x3d649090, 0xbe9d5f5f, - 0x403d2020, 0xd00f6868, 0x34ca1a1a, 0x41b7aeae, - 0x757db4b4, 0xa8ce5454, 0x3b7f9393, 0x442f2222, - 0xc8636464, 0xff2af1f1, 0xe6cc7373, 0x24821212, - 0x807a4040, 0x10480808, 0x9b95c3c3, 0xc5dfecec, - 0xab4ddbdb, 0x5fc0a1a1, 0x07918d8d, 0x7ac83d3d, - 0x335b9797, 0x00000000, 0x83f9cfcf, 0x566e2b2b, - 0xece17676, 0x19e68282, 0xb128d6d6, 0x36c31b1b, - 0x7774b5b5, 0x43beafaf, 0xd41d6a6a, 0xa0ea5050, - 0x8a574545, 0xfb38f3f3, 0x60ad3030, 0xc3c4efef, - 0x7eda3f3f, 0xaac75555, 0x59dba2a2, 0xc9e9eaea, - 0xca6a6565, 0x6903baba, 0x5e4a2f2f, 0x9d8ec0c0, - 0xa160dede, 0x38fc1c1c, 0xe746fdfd, 0x9a1f4d4d, - 0x39769292, 0xeafa7575, 0x0c360606, 0x09ae8a8a, - 0x794bb2b2, 0xd185e6e6, 0x1c7e0e0e, 0x3ee71f1f, - 0xc4556262, 0xb53ad4d4, 0x4d81a8a8, 0x31529696, - 0xef62f9f9, 0x97a3c5c5, 0x4a102525, 0xb2ab5959, - 0x15d08484, 0xe4c57272, 0x72ec3939, 0x98164c4c, - 0xbc945e5e, 0xf09f7878, 0x70e53838, 0x05988c8c, - 0xbf17d1d1, 0x57e4a5a5, 0xd9a1e2e2, 0xc24e6161, - 0x7b42b3b3, 0x42342121, 0x25089c9c, 0x3cee1e1e, - 0x86614343, 0x93b1c7c7, 0xe54ffcfc, 0x08240404, - 0xa2e35151, 0x2f259999, 0xda226d6d, 0x1a650d0d, - 0xe979fafa, 0xa369dfdf, 0xfca97e7e, 0x48192424, - 0x76fe3b3b, 0x4b9aabab, 0x81f0cece, 0x22991111, - 0x03838f8f, 0x9c044e4e, 0x7366b7b7, 0xcbe0ebeb, - 0x78c13c3c, 0x1ffd8181, 0x35409494, 0xf31cf7f7, - 0x6f18b9b9, 0x268b1313, 0x58512c2c, 0xbb05d3d3, - 0xd38ce7e7, 0xdc396e6e, 0x95aac4c4, 0x061b0303, - 0xacdc5656, 0x885e4444, 0xfea07f7f, 0x4f88a9a9, - 0x54672a2a, 0x6b0abbbb, 0x9f87c1c1, 0xa6f15353, - 0xa572dcdc, 0x16530b0b, 0x27019d9d, 0xd82b6c6c, - 0x62a43131, 0xe8f37474, 0xf115f6f6, 0x8c4c4646, - 0x45a5acac, 0x0fb58989, 0x28b41414, 0xdfbae1e1, - 0x2ca61616, 0x74f73a3a, 0xd2066969, 0x12410909, - 0xe0d77070, 0x716fb6b6, 0xbd1ed0d0, 0xc7d6eded, - 0x85e2cccc, 0x84684242, 0x2d2c9898, 0x55eda4a4, - 0x50752828, 0xb8865c5c, 0xed6bf8f8, 0x11c28686, - }, - { - 0x7830d818, 0xaf462623, 0xf991b8c6, 0x6fcdfbe8, - 0xa113cb87, 0x626d11b8, 0x05020901, 0x6e9e0d4f, - 0xee6c9b36, 0x0451ffa6, 0xbdb90cd2, 0x06f70ef5, - 0x80f29679, 0xcede306f, 0xef3f6d91, 0x07a4f852, - 0xfdc04760, 0x766535bc, 0xcd2b379b, 0x8c018a8e, - 0x155bd2a3, 0x3c186c0c, 0x8af6847b, 0xe16a8035, - 0x693af51d, 0x47ddb3e0, 0xacb321d7, 0xed999cc2, - 0x965c432e, 0x7a96294b, 0x21e15dfe, 0x16aed557, - 0x412abd15, 0xb6eee877, 0xeb6e9237, 0x56d79ee5, - 0xd923139f, 0x17fd23f0, 0x7f94204a, 0x95a944da, - 0x25b0a258, 0xca8fcfc9, 0x8d527c29, 0x22145a0a, - 0x4f7f50b1, 0x1a5dc9a0, 0xdad6146b, 0xab17d985, - 0x73673cbd, 0x34ba8f5d, 0x50209010, 0x03f507f4, - 0xc08bddcb, 0xc67cd33e, 0x110a2d05, 0xe6ce7867, - 0x53d597e4, 0xbb4e0227, 0x58827341, 0x9d0ba78b, - 0x0153f6a7, 0x94fab27d, 0xfb374995, 0x9fad56d8, - 0x30eb70fb, 0x71c1cdee, 0x91f8bb7c, 0xe3cc7166, - 0x8ea77bdd, 0x4b2eaf17, 0x468e4547, 0xdc211a9e, - 0xc589d4ca, 0x995a582d, 0x79632ebf, 0x1b0e3f07, - 0x2347acad, 0x2fb4b05a, 0xb51bef83, 0xff66b633, - 0xf2c65c63, 0x0a041202, 0x384993aa, 0xa8e2de71, - 0xcf8dc6c8, 0x7d32d119, 0x70923b49, 0x9aaf5fd9, - 0x1df931f2, 0x48dba8e3, 0x2ab6b95b, 0x920dbc88, - 0xc8293e9a, 0xbe4c0b26, 0xfa64bf32, 0x4a7d59b0, - 0x6acff2e9, 0x331e770f, 0xa6b733d5, 0xba1df480, - 0x7c6127be, 0xde87ebcd, 0xe4688934, 0x75903248, - 0x24e354ff, 0x8ff48d7a, 0xea3d6490, 0x3ebe9d5f, - 0xa0403d20, 0xd5d00f68, 0x7234ca1a, 0x2c41b7ae, - 0x5e757db4, 0x19a8ce54, 0xe53b7f93, 0xaa442f22, - 0xe9c86364, 0x12ff2af1, 0xa2e6cc73, 0x5a248212, - 0x5d807a40, 0x28104808, 0xe89b95c3, 0x7bc5dfec, - 0x90ab4ddb, 0x1f5fc0a1, 0x8307918d, 0xc97ac83d, - 0xf1335b97, 0x00000000, 0xd483f9cf, 0x87566e2b, - 0xb3ece176, 0xb019e682, 0xa9b128d6, 0x7736c31b, - 0x5b7774b5, 0x2943beaf, 0xdfd41d6a, 0x0da0ea50, - 0x4c8a5745, 0x18fb38f3, 0xf060ad30, 0x74c3c4ef, - 0xc37eda3f, 0x1caac755, 0x1059dba2, 0x65c9e9ea, - 0xecca6a65, 0x686903ba, 0x935e4a2f, 0xe79d8ec0, - 0x81a160de, 0x6c38fc1c, 0x2ee746fd, 0x649a1f4d, - 0xe0397692, 0xbceafa75, 0x1e0c3606, 0x9809ae8a, - 0x40794bb2, 0x59d185e6, 0x361c7e0e, 0x633ee71f, - 0xf7c45562, 0xa3b53ad4, 0x324d81a8, 0xf4315296, - 0x3aef62f9, 0xf697a3c5, 0xb14a1025, 0x20b2ab59, - 0xae15d084, 0xa7e4c572, 0xdd72ec39, 0x6198164c, - 0x3bbc945e, 0x85f09f78, 0xd870e538, 0x8605988c, - 0xb2bf17d1, 0x0b57e4a5, 0x4dd9a1e2, 0xf8c24e61, - 0x457b42b3, 0xa5423421, 0xd625089c, 0x663cee1e, - 0x52866143, 0xfc93b1c7, 0x2be54ffc, 0x14082404, - 0x08a2e351, 0xc72f2599, 0xc4da226d, 0x391a650d, - 0x35e979fa, 0x84a369df, 0x9bfca97e, 0xb4481924, - 0xd776fe3b, 0x3d4b9aab, 0xd181f0ce, 0x55229911, - 0x8903838f, 0x6b9c044e, 0x517366b7, 0x60cbe0eb, - 0xcc78c13c, 0xbf1ffd81, 0xfe354094, 0x0cf31cf7, - 0x676f18b9, 0x5f268b13, 0x9c58512c, 0xb8bb05d3, - 0x5cd38ce7, 0xcbdc396e, 0xf395aac4, 0x0f061b03, - 0x13acdc56, 0x49885e44, 0x9efea07f, 0x374f88a9, - 0x8254672a, 0x6d6b0abb, 0xe29f87c1, 0x02a6f153, - 0x8ba572dc, 0x2716530b, 0xd327019d, 0xc1d82b6c, - 0xf562a431, 0xb9e8f374, 0x09f115f6, 0x438c4c46, - 0x2645a5ac, 0x970fb589, 0x4428b414, 0x42dfbae1, - 0x4e2ca616, 0xd274f73a, 0xd0d20669, 0x2d124109, - 0xade0d770, 0x54716fb6, 0xb7bd1ed0, 0x7ec7d6ed, - 0xdb85e2cc, 0x57846842, 0xc22d2c98, 0x0e55eda4, - 0x88507528, 0x31b8865c, 0x3fed6bf8, 0xa411c286, - }, -}; - -__device__ __constant__ u32 rch[R + 1] = -{ - 0x00000000, - 0x1823c6e8, - 0x36a6d2f5, - 0x60bc9b8e, - 0x1de0d7c2, - 0x157737e5, - 0x58c9290a, - 0xbd5d10f4, - 0xe427418b, - 0xfbee7c66, - 0xca2dbf07, -}; - -__device__ __constant__ u32 rcl[R + 1] = -{ - 0x00000000, - 0x87b8014f, - 0x796f9152, - 0xa30c7b35, - 0x2e4bfe57, - 0x9ff04ada, - 0xb1a06b85, - 0xcb3e0567, - 0xa77d95d8, - 0xdd17479e, - 0xad5a8333, -}; - -__device__ static void whirlpool_transform (const u32x w[16], u32x dgst[16], u32 s_Ch[8][256], u32 s_Cl[8][256]) -{ - u32x Kh[8]; - u32x Kl[8]; - - Kh[0] = 0x300beec0; - Kl[0] = 0xaf902967; - Kh[1] = 0x28282828; - Kl[1] = 0x28282828; - Kh[2] = 0x28282828; - Kl[2] = 0x28282828; - Kh[3] = 0x28282828; - Kl[3] = 0x28282828; - Kh[4] = 0x28282828; - Kl[4] = 0x28282828; - Kh[5] = 0x28282828; - Kl[5] = 0x28282828; - Kh[6] = 0x28282828; - Kl[6] = 0x28282828; - Kh[7] = 0x28282828; - Kl[7] = 0x28282828; - - u32x stateh[8]; - u32x statel[8]; - - stateh[0] = w[ 0]; - statel[0] = w[ 1]; - stateh[1] = w[ 2]; - statel[1] = w[ 3]; - stateh[2] = w[ 4]; - statel[2] = w[ 5]; - stateh[3] = w[ 6]; - statel[3] = w[ 7]; - stateh[4] = w[ 8]; - statel[4] = w[ 9]; - stateh[5] = w[10]; - statel[5] = w[11]; - stateh[6] = w[12]; - statel[6] = w[13]; - stateh[7] = w[14]; - statel[7] = w[15]; - - u32x Lh[8]; - u32x Ll[8]; - - #pragma unroll - for (int i = 0; i < 8; i++) - { - const u32x Lp0 = stateh[(i + 8) & 7] >> 24; - const u32x Lp1 = stateh[(i + 7) & 7] >> 16; - const u32x Lp2 = stateh[(i + 6) & 7] >> 8; - const u32x Lp3 = stateh[(i + 5) & 7] >> 0; - const u32x Lp4 = statel[(i + 4) & 7] >> 24; - const u32x Lp5 = statel[(i + 3) & 7] >> 16; - const u32x Lp6 = statel[(i + 2) & 7] >> 8; - const u32x Lp7 = statel[(i + 1) & 7] >> 0; - - Lh[i] = BOX (s_Ch, 0, Lp0 & 0xff) - ^ BOX (s_Ch, 1, Lp1 & 0xff) - ^ BOX (s_Ch, 2, Lp2 & 0xff) - ^ BOX (s_Ch, 3, Lp3 & 0xff) - ^ BOX (s_Ch, 4, Lp4 & 0xff) - ^ BOX (s_Ch, 5, Lp5 & 0xff) - ^ BOX (s_Ch, 6, Lp6 & 0xff) - ^ BOX (s_Ch, 7, Lp7 & 0xff); - - Ll[i] = BOX (s_Cl, 0, Lp0 & 0xff) - ^ BOX (s_Cl, 1, Lp1 & 0xff) - ^ BOX (s_Cl, 2, Lp2 & 0xff) - ^ BOX (s_Cl, 3, Lp3 & 0xff) - ^ BOX (s_Cl, 4, Lp4 & 0xff) - ^ BOX (s_Cl, 5, Lp5 & 0xff) - ^ BOX (s_Cl, 6, Lp6 & 0xff) - ^ BOX (s_Cl, 7, Lp7 & 0xff); - } - - stateh[0] = Lh[0] ^ Kh[0]; - statel[0] = Ll[0] ^ Kl[0]; - stateh[1] = Lh[1] ^ Kh[1]; - statel[1] = Ll[1] ^ Kl[1]; - stateh[2] = Lh[2] ^ Kh[2]; - statel[2] = Ll[2] ^ Kl[2]; - stateh[3] = Lh[3] ^ Kh[3]; - statel[3] = Ll[3] ^ Kl[3]; - stateh[4] = Lh[4] ^ Kh[4]; - statel[4] = Ll[4] ^ Kl[4]; - stateh[5] = Lh[5] ^ Kh[5]; - statel[5] = Ll[5] ^ Kl[5]; - stateh[6] = Lh[6] ^ Kh[6]; - statel[6] = Ll[6] ^ Kl[6]; - stateh[7] = Lh[7] ^ Kh[7]; - statel[7] = Ll[7] ^ Kl[7]; - - for (int r = 2; r <= R; r++) - { - u32x Lh[8]; - u32x Ll[8]; - - #pragma unroll - for (int i = 0; i < 8; i++) - { - const u32x Lp0 = Kh[(i + 8) & 7] >> 24; - const u32x Lp1 = Kh[(i + 7) & 7] >> 16; - const u32x Lp2 = Kh[(i + 6) & 7] >> 8; - const u32x Lp3 = Kh[(i + 5) & 7] >> 0; - const u32x Lp4 = Kl[(i + 4) & 7] >> 24; - const u32x Lp5 = Kl[(i + 3) & 7] >> 16; - const u32x Lp6 = Kl[(i + 2) & 7] >> 8; - const u32x Lp7 = Kl[(i + 1) & 7] >> 0; - - Lh[i] = BOX (s_Ch, 0, Lp0 & 0xff) - ^ BOX (s_Ch, 1, Lp1 & 0xff) - ^ BOX (s_Ch, 2, Lp2 & 0xff) - ^ BOX (s_Ch, 3, Lp3 & 0xff) - ^ BOX (s_Ch, 4, Lp4 & 0xff) - ^ BOX (s_Ch, 5, Lp5 & 0xff) - ^ BOX (s_Ch, 6, Lp6 & 0xff) - ^ BOX (s_Ch, 7, Lp7 & 0xff); - - Ll[i] = BOX (s_Cl, 0, Lp0 & 0xff) - ^ BOX (s_Cl, 1, Lp1 & 0xff) - ^ BOX (s_Cl, 2, Lp2 & 0xff) - ^ BOX (s_Cl, 3, Lp3 & 0xff) - ^ BOX (s_Cl, 4, Lp4 & 0xff) - ^ BOX (s_Cl, 5, Lp5 & 0xff) - ^ BOX (s_Cl, 6, Lp6 & 0xff) - ^ BOX (s_Cl, 7, Lp7 & 0xff); - } - - Kh[0] = Lh[0] ^ rch[r]; - Kl[0] = Ll[0] ^ rcl[r]; - Kh[1] = Lh[1]; - Kl[1] = Ll[1]; - Kh[2] = Lh[2]; - Kl[2] = Ll[2]; - Kh[3] = Lh[3]; - Kl[3] = Ll[3]; - Kh[4] = Lh[4]; - Kl[4] = Ll[4]; - Kh[5] = Lh[5]; - Kl[5] = Ll[5]; - Kh[6] = Lh[6]; - Kl[6] = Ll[6]; - Kh[7] = Lh[7]; - Kl[7] = Ll[7]; - - #pragma unroll 8 - for (int i = 0; i < 8; i++) - { - const u32x Lp0 = stateh[(i + 8) & 7] >> 24; - const u32x Lp1 = stateh[(i + 7) & 7] >> 16; - const u32x Lp2 = stateh[(i + 6) & 7] >> 8; - const u32x Lp3 = stateh[(i + 5) & 7] >> 0; - const u32x Lp4 = statel[(i + 4) & 7] >> 24; - const u32x Lp5 = statel[(i + 3) & 7] >> 16; - const u32x Lp6 = statel[(i + 2) & 7] >> 8; - const u32x Lp7 = statel[(i + 1) & 7] >> 0; - - Lh[i] = BOX (s_Ch, 0, Lp0 & 0xff) - ^ BOX (s_Ch, 1, Lp1 & 0xff) - ^ BOX (s_Ch, 2, Lp2 & 0xff) - ^ BOX (s_Ch, 3, Lp3 & 0xff) - ^ BOX (s_Ch, 4, Lp4 & 0xff) - ^ BOX (s_Ch, 5, Lp5 & 0xff) - ^ BOX (s_Ch, 6, Lp6 & 0xff) - ^ BOX (s_Ch, 7, Lp7 & 0xff); - - Ll[i] = BOX (s_Cl, 0, Lp0 & 0xff) - ^ BOX (s_Cl, 1, Lp1 & 0xff) - ^ BOX (s_Cl, 2, Lp2 & 0xff) - ^ BOX (s_Cl, 3, Lp3 & 0xff) - ^ BOX (s_Cl, 4, Lp4 & 0xff) - ^ BOX (s_Cl, 5, Lp5 & 0xff) - ^ BOX (s_Cl, 6, Lp6 & 0xff) - ^ BOX (s_Cl, 7, Lp7 & 0xff); - } - - stateh[0] = Lh[0] ^ Kh[0]; - statel[0] = Ll[0] ^ Kl[0]; - stateh[1] = Lh[1] ^ Kh[1]; - statel[1] = Ll[1] ^ Kl[1]; - stateh[2] = Lh[2] ^ Kh[2]; - statel[2] = Ll[2] ^ Kl[2]; - stateh[3] = Lh[3] ^ Kh[3]; - statel[3] = Ll[3] ^ Kl[3]; - stateh[4] = Lh[4] ^ Kh[4]; - statel[4] = Ll[4] ^ Kl[4]; - stateh[5] = Lh[5] ^ Kh[5]; - statel[5] = Ll[5] ^ Kl[5]; - stateh[6] = Lh[6] ^ Kh[6]; - statel[6] = Ll[6] ^ Kl[6]; - stateh[7] = Lh[7] ^ Kh[7]; - statel[7] = Ll[7] ^ Kl[7]; - } - - dgst[ 0] = stateh[0] ^ w[ 0]; - dgst[ 1] = statel[0] ^ w[ 1]; - dgst[ 2] = stateh[1] ^ w[ 2]; - dgst[ 3] = statel[1] ^ w[ 3]; - dgst[ 4] = stateh[2] ^ w[ 4]; - dgst[ 5] = statel[2] ^ w[ 5]; - dgst[ 6] = stateh[3] ^ w[ 6]; - dgst[ 7] = statel[3] ^ w[ 7]; - dgst[ 8] = stateh[4] ^ w[ 8]; - dgst[ 9] = statel[4] ^ w[ 9]; - dgst[10] = stateh[5] ^ w[10]; - dgst[11] = statel[5] ^ w[11]; - dgst[12] = stateh[6] ^ w[12]; - dgst[13] = statel[6] ^ w[13]; - dgst[14] = stateh[7] ^ w[14]; - dgst[15] = statel[7] ^ w[15]; -} - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m06100_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * modifier - */ - - __shared__ u32 s_Ch[8][256]; - __shared__ u32 s_Cl[8][256]; - - for (u32 i = 0; i < 8; i++) - { - s_Ch[i][lid] = Ch[i][lid]; - s_Cl[i][lid] = Cl[i][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - u32x wl[16]; - - wl[ 0] = swap_workaround (w0[0]); - wl[ 1] = swap_workaround (w0[1]); - wl[ 2] = swap_workaround (w0[2]); - wl[ 3] = swap_workaround (w0[3]); - wl[ 4] = swap_workaround (w1[0]); - wl[ 5] = swap_workaround (w1[1]); - wl[ 6] = swap_workaround (w1[2]); - wl[ 7] = swap_workaround (w1[3]); - wl[ 8] = 0; - wl[ 9] = 0; - wl[10] = 0; - wl[11] = 0; - wl[12] = 0; - wl[13] = 0; - wl[14] = 0; - wl[15] = out_len * 8; - - u32x dgst[16]; - - whirlpool_transform (wl, dgst, s_Ch, s_Cl); - - const u32x r0 = dgst[0]; - const u32x r1 = dgst[1]; - const u32x r2 = dgst[2]; - const u32x r3 = dgst[3]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06100_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06100_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06100_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * modifier - */ - - __shared__ u32 s_Ch[8][256]; - __shared__ u32 s_Cl[8][256]; - - for (u32 i = 0; i < 8; i++) - { - s_Ch[i][lid] = Ch[i][lid]; - s_Cl[i][lid] = Cl[i][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - u32x wl[16]; - - wl[ 0] = swap_workaround (w0[0]); - wl[ 1] = swap_workaround (w0[1]); - wl[ 2] = swap_workaround (w0[2]); - wl[ 3] = swap_workaround (w0[3]); - wl[ 4] = swap_workaround (w1[0]); - wl[ 5] = swap_workaround (w1[1]); - wl[ 6] = swap_workaround (w1[2]); - wl[ 7] = swap_workaround (w1[3]); - wl[ 8] = 0; - wl[ 9] = 0; - wl[10] = 0; - wl[11] = 0; - wl[12] = 0; - wl[13] = 0; - wl[14] = 0; - wl[15] = out_len * 8; - - u32x dgst[16]; - - whirlpool_transform (wl, dgst, s_Ch, s_Cl); - - const u32x r0 = dgst[0]; - const u32x r1 = dgst[1]; - const u32x r2 = dgst[2]; - const u32x r3 = dgst[3]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06100_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06100_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m06100_a1.cu b/nv/m06100_a1.cu deleted file mode 100644 index a058d58..0000000 --- a/nv/m06100_a1.cu +++ /dev/null @@ -1,1725 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _WHIRLPOOL_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#define R 10 - -#ifdef VECT_SIZE1 -#define BOX(S,n,i) u32x ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(S,n,i) u32x ((S)[(n)][(i).x], (S)[(n)][(i).y]) -#endif - -__device__ __constant__ u32 Ch[8][256] = -{ - { - 0x18186018, 0x23238c23, 0xc6c63fc6, 0xe8e887e8, - 0x87872687, 0xb8b8dab8, 0x01010401, 0x4f4f214f, - 0x3636d836, 0xa6a6a2a6, 0xd2d26fd2, 0xf5f5f3f5, - 0x7979f979, 0x6f6fa16f, 0x91917e91, 0x52525552, - 0x60609d60, 0xbcbccabc, 0x9b9b569b, 0x8e8e028e, - 0xa3a3b6a3, 0x0c0c300c, 0x7b7bf17b, 0x3535d435, - 0x1d1d741d, 0xe0e0a7e0, 0xd7d77bd7, 0xc2c22fc2, - 0x2e2eb82e, 0x4b4b314b, 0xfefedffe, 0x57574157, - 0x15155415, 0x7777c177, 0x3737dc37, 0xe5e5b3e5, - 0x9f9f469f, 0xf0f0e7f0, 0x4a4a354a, 0xdada4fda, - 0x58587d58, 0xc9c903c9, 0x2929a429, 0x0a0a280a, - 0xb1b1feb1, 0xa0a0baa0, 0x6b6bb16b, 0x85852e85, - 0xbdbdcebd, 0x5d5d695d, 0x10104010, 0xf4f4f7f4, - 0xcbcb0bcb, 0x3e3ef83e, 0x05051405, 0x67678167, - 0xe4e4b7e4, 0x27279c27, 0x41411941, 0x8b8b168b, - 0xa7a7a6a7, 0x7d7de97d, 0x95956e95, 0xd8d847d8, - 0xfbfbcbfb, 0xeeee9fee, 0x7c7ced7c, 0x66668566, - 0xdddd53dd, 0x17175c17, 0x47470147, 0x9e9e429e, - 0xcaca0fca, 0x2d2db42d, 0xbfbfc6bf, 0x07071c07, - 0xadad8ead, 0x5a5a755a, 0x83833683, 0x3333cc33, - 0x63639163, 0x02020802, 0xaaaa92aa, 0x7171d971, - 0xc8c807c8, 0x19196419, 0x49493949, 0xd9d943d9, - 0xf2f2eff2, 0xe3e3abe3, 0x5b5b715b, 0x88881a88, - 0x9a9a529a, 0x26269826, 0x3232c832, 0xb0b0fab0, - 0xe9e983e9, 0x0f0f3c0f, 0xd5d573d5, 0x80803a80, - 0xbebec2be, 0xcdcd13cd, 0x3434d034, 0x48483d48, - 0xffffdbff, 0x7a7af57a, 0x90907a90, 0x5f5f615f, - 0x20208020, 0x6868bd68, 0x1a1a681a, 0xaeae82ae, - 0xb4b4eab4, 0x54544d54, 0x93937693, 0x22228822, - 0x64648d64, 0xf1f1e3f1, 0x7373d173, 0x12124812, - 0x40401d40, 0x08082008, 0xc3c32bc3, 0xecec97ec, - 0xdbdb4bdb, 0xa1a1bea1, 0x8d8d0e8d, 0x3d3df43d, - 0x97976697, 0x00000000, 0xcfcf1bcf, 0x2b2bac2b, - 0x7676c576, 0x82823282, 0xd6d67fd6, 0x1b1b6c1b, - 0xb5b5eeb5, 0xafaf86af, 0x6a6ab56a, 0x50505d50, - 0x45450945, 0xf3f3ebf3, 0x3030c030, 0xefef9bef, - 0x3f3ffc3f, 0x55554955, 0xa2a2b2a2, 0xeaea8fea, - 0x65658965, 0xbabad2ba, 0x2f2fbc2f, 0xc0c027c0, - 0xdede5fde, 0x1c1c701c, 0xfdfdd3fd, 0x4d4d294d, - 0x92927292, 0x7575c975, 0x06061806, 0x8a8a128a, - 0xb2b2f2b2, 0xe6e6bfe6, 0x0e0e380e, 0x1f1f7c1f, - 0x62629562, 0xd4d477d4, 0xa8a89aa8, 0x96966296, - 0xf9f9c3f9, 0xc5c533c5, 0x25259425, 0x59597959, - 0x84842a84, 0x7272d572, 0x3939e439, 0x4c4c2d4c, - 0x5e5e655e, 0x7878fd78, 0x3838e038, 0x8c8c0a8c, - 0xd1d163d1, 0xa5a5aea5, 0xe2e2afe2, 0x61619961, - 0xb3b3f6b3, 0x21218421, 0x9c9c4a9c, 0x1e1e781e, - 0x43431143, 0xc7c73bc7, 0xfcfcd7fc, 0x04041004, - 0x51515951, 0x99995e99, 0x6d6da96d, 0x0d0d340d, - 0xfafacffa, 0xdfdf5bdf, 0x7e7ee57e, 0x24249024, - 0x3b3bec3b, 0xabab96ab, 0xcece1fce, 0x11114411, - 0x8f8f068f, 0x4e4e254e, 0xb7b7e6b7, 0xebeb8beb, - 0x3c3cf03c, 0x81813e81, 0x94946a94, 0xf7f7fbf7, - 0xb9b9deb9, 0x13134c13, 0x2c2cb02c, 0xd3d36bd3, - 0xe7e7bbe7, 0x6e6ea56e, 0xc4c437c4, 0x03030c03, - 0x56564556, 0x44440d44, 0x7f7fe17f, 0xa9a99ea9, - 0x2a2aa82a, 0xbbbbd6bb, 0xc1c123c1, 0x53535153, - 0xdcdc57dc, 0x0b0b2c0b, 0x9d9d4e9d, 0x6c6cad6c, - 0x3131c431, 0x7474cd74, 0xf6f6fff6, 0x46460546, - 0xacac8aac, 0x89891e89, 0x14145014, 0xe1e1a3e1, - 0x16165816, 0x3a3ae83a, 0x6969b969, 0x09092409, - 0x7070dd70, 0xb6b6e2b6, 0xd0d067d0, 0xeded93ed, - 0xcccc17cc, 0x42421542, 0x98985a98, 0xa4a4aaa4, - 0x2828a028, 0x5c5c6d5c, 0xf8f8c7f8, 0x86862286, - }, - { - 0xd8181860, 0x2623238c, 0xb8c6c63f, 0xfbe8e887, - 0xcb878726, 0x11b8b8da, 0x09010104, 0x0d4f4f21, - 0x9b3636d8, 0xffa6a6a2, 0x0cd2d26f, 0x0ef5f5f3, - 0x967979f9, 0x306f6fa1, 0x6d91917e, 0xf8525255, - 0x4760609d, 0x35bcbcca, 0x379b9b56, 0x8a8e8e02, - 0xd2a3a3b6, 0x6c0c0c30, 0x847b7bf1, 0x803535d4, - 0xf51d1d74, 0xb3e0e0a7, 0x21d7d77b, 0x9cc2c22f, - 0x432e2eb8, 0x294b4b31, 0x5dfefedf, 0xd5575741, - 0xbd151554, 0xe87777c1, 0x923737dc, 0x9ee5e5b3, - 0x139f9f46, 0x23f0f0e7, 0x204a4a35, 0x44dada4f, - 0xa258587d, 0xcfc9c903, 0x7c2929a4, 0x5a0a0a28, - 0x50b1b1fe, 0xc9a0a0ba, 0x146b6bb1, 0xd985852e, - 0x3cbdbdce, 0x8f5d5d69, 0x90101040, 0x07f4f4f7, - 0xddcbcb0b, 0xd33e3ef8, 0x2d050514, 0x78676781, - 0x97e4e4b7, 0x0227279c, 0x73414119, 0xa78b8b16, - 0xf6a7a7a6, 0xb27d7de9, 0x4995956e, 0x56d8d847, - 0x70fbfbcb, 0xcdeeee9f, 0xbb7c7ced, 0x71666685, - 0x7bdddd53, 0xaf17175c, 0x45474701, 0x1a9e9e42, - 0xd4caca0f, 0x582d2db4, 0x2ebfbfc6, 0x3f07071c, - 0xacadad8e, 0xb05a5a75, 0xef838336, 0xb63333cc, - 0x5c636391, 0x12020208, 0x93aaaa92, 0xde7171d9, - 0xc6c8c807, 0xd1191964, 0x3b494939, 0x5fd9d943, - 0x31f2f2ef, 0xa8e3e3ab, 0xb95b5b71, 0xbc88881a, - 0x3e9a9a52, 0x0b262698, 0xbf3232c8, 0x59b0b0fa, - 0xf2e9e983, 0x770f0f3c, 0x33d5d573, 0xf480803a, - 0x27bebec2, 0xebcdcd13, 0x893434d0, 0x3248483d, - 0x54ffffdb, 0x8d7a7af5, 0x6490907a, 0x9d5f5f61, - 0x3d202080, 0x0f6868bd, 0xca1a1a68, 0xb7aeae82, - 0x7db4b4ea, 0xce54544d, 0x7f939376, 0x2f222288, - 0x6364648d, 0x2af1f1e3, 0xcc7373d1, 0x82121248, - 0x7a40401d, 0x48080820, 0x95c3c32b, 0xdfecec97, - 0x4ddbdb4b, 0xc0a1a1be, 0x918d8d0e, 0xc83d3df4, - 0x5b979766, 0x00000000, 0xf9cfcf1b, 0x6e2b2bac, - 0xe17676c5, 0xe6828232, 0x28d6d67f, 0xc31b1b6c, - 0x74b5b5ee, 0xbeafaf86, 0x1d6a6ab5, 0xea50505d, - 0x57454509, 0x38f3f3eb, 0xad3030c0, 0xc4efef9b, - 0xda3f3ffc, 0xc7555549, 0xdba2a2b2, 0xe9eaea8f, - 0x6a656589, 0x03babad2, 0x4a2f2fbc, 0x8ec0c027, - 0x60dede5f, 0xfc1c1c70, 0x46fdfdd3, 0x1f4d4d29, - 0x76929272, 0xfa7575c9, 0x36060618, 0xae8a8a12, - 0x4bb2b2f2, 0x85e6e6bf, 0x7e0e0e38, 0xe71f1f7c, - 0x55626295, 0x3ad4d477, 0x81a8a89a, 0x52969662, - 0x62f9f9c3, 0xa3c5c533, 0x10252594, 0xab595979, - 0xd084842a, 0xc57272d5, 0xec3939e4, 0x164c4c2d, - 0x945e5e65, 0x9f7878fd, 0xe53838e0, 0x988c8c0a, - 0x17d1d163, 0xe4a5a5ae, 0xa1e2e2af, 0x4e616199, - 0x42b3b3f6, 0x34212184, 0x089c9c4a, 0xee1e1e78, - 0x61434311, 0xb1c7c73b, 0x4ffcfcd7, 0x24040410, - 0xe3515159, 0x2599995e, 0x226d6da9, 0x650d0d34, - 0x79fafacf, 0x69dfdf5b, 0xa97e7ee5, 0x19242490, - 0xfe3b3bec, 0x9aabab96, 0xf0cece1f, 0x99111144, - 0x838f8f06, 0x044e4e25, 0x66b7b7e6, 0xe0ebeb8b, - 0xc13c3cf0, 0xfd81813e, 0x4094946a, 0x1cf7f7fb, - 0x18b9b9de, 0x8b13134c, 0x512c2cb0, 0x05d3d36b, - 0x8ce7e7bb, 0x396e6ea5, 0xaac4c437, 0x1b03030c, - 0xdc565645, 0x5e44440d, 0xa07f7fe1, 0x88a9a99e, - 0x672a2aa8, 0x0abbbbd6, 0x87c1c123, 0xf1535351, - 0x72dcdc57, 0x530b0b2c, 0x019d9d4e, 0x2b6c6cad, - 0xa43131c4, 0xf37474cd, 0x15f6f6ff, 0x4c464605, - 0xa5acac8a, 0xb589891e, 0xb4141450, 0xbae1e1a3, - 0xa6161658, 0xf73a3ae8, 0x066969b9, 0x41090924, - 0xd77070dd, 0x6fb6b6e2, 0x1ed0d067, 0xd6eded93, - 0xe2cccc17, 0x68424215, 0x2c98985a, 0xeda4a4aa, - 0x752828a0, 0x865c5c6d, 0x6bf8f8c7, 0xc2868622, - }, - { - 0x30d81818, 0x46262323, 0x91b8c6c6, 0xcdfbe8e8, - 0x13cb8787, 0x6d11b8b8, 0x02090101, 0x9e0d4f4f, - 0x6c9b3636, 0x51ffa6a6, 0xb90cd2d2, 0xf70ef5f5, - 0xf2967979, 0xde306f6f, 0x3f6d9191, 0xa4f85252, - 0xc0476060, 0x6535bcbc, 0x2b379b9b, 0x018a8e8e, - 0x5bd2a3a3, 0x186c0c0c, 0xf6847b7b, 0x6a803535, - 0x3af51d1d, 0xddb3e0e0, 0xb321d7d7, 0x999cc2c2, - 0x5c432e2e, 0x96294b4b, 0xe15dfefe, 0xaed55757, - 0x2abd1515, 0xeee87777, 0x6e923737, 0xd79ee5e5, - 0x23139f9f, 0xfd23f0f0, 0x94204a4a, 0xa944dada, - 0xb0a25858, 0x8fcfc9c9, 0x527c2929, 0x145a0a0a, - 0x7f50b1b1, 0x5dc9a0a0, 0xd6146b6b, 0x17d98585, - 0x673cbdbd, 0xba8f5d5d, 0x20901010, 0xf507f4f4, - 0x8bddcbcb, 0x7cd33e3e, 0x0a2d0505, 0xce786767, - 0xd597e4e4, 0x4e022727, 0x82734141, 0x0ba78b8b, - 0x53f6a7a7, 0xfab27d7d, 0x37499595, 0xad56d8d8, - 0xeb70fbfb, 0xc1cdeeee, 0xf8bb7c7c, 0xcc716666, - 0xa77bdddd, 0x2eaf1717, 0x8e454747, 0x211a9e9e, - 0x89d4caca, 0x5a582d2d, 0x632ebfbf, 0x0e3f0707, - 0x47acadad, 0xb4b05a5a, 0x1bef8383, 0x66b63333, - 0xc65c6363, 0x04120202, 0x4993aaaa, 0xe2de7171, - 0x8dc6c8c8, 0x32d11919, 0x923b4949, 0xaf5fd9d9, - 0xf931f2f2, 0xdba8e3e3, 0xb6b95b5b, 0x0dbc8888, - 0x293e9a9a, 0x4c0b2626, 0x64bf3232, 0x7d59b0b0, - 0xcff2e9e9, 0x1e770f0f, 0xb733d5d5, 0x1df48080, - 0x6127bebe, 0x87ebcdcd, 0x68893434, 0x90324848, - 0xe354ffff, 0xf48d7a7a, 0x3d649090, 0xbe9d5f5f, - 0x403d2020, 0xd00f6868, 0x34ca1a1a, 0x41b7aeae, - 0x757db4b4, 0xa8ce5454, 0x3b7f9393, 0x442f2222, - 0xc8636464, 0xff2af1f1, 0xe6cc7373, 0x24821212, - 0x807a4040, 0x10480808, 0x9b95c3c3, 0xc5dfecec, - 0xab4ddbdb, 0x5fc0a1a1, 0x07918d8d, 0x7ac83d3d, - 0x335b9797, 0x00000000, 0x83f9cfcf, 0x566e2b2b, - 0xece17676, 0x19e68282, 0xb128d6d6, 0x36c31b1b, - 0x7774b5b5, 0x43beafaf, 0xd41d6a6a, 0xa0ea5050, - 0x8a574545, 0xfb38f3f3, 0x60ad3030, 0xc3c4efef, - 0x7eda3f3f, 0xaac75555, 0x59dba2a2, 0xc9e9eaea, - 0xca6a6565, 0x6903baba, 0x5e4a2f2f, 0x9d8ec0c0, - 0xa160dede, 0x38fc1c1c, 0xe746fdfd, 0x9a1f4d4d, - 0x39769292, 0xeafa7575, 0x0c360606, 0x09ae8a8a, - 0x794bb2b2, 0xd185e6e6, 0x1c7e0e0e, 0x3ee71f1f, - 0xc4556262, 0xb53ad4d4, 0x4d81a8a8, 0x31529696, - 0xef62f9f9, 0x97a3c5c5, 0x4a102525, 0xb2ab5959, - 0x15d08484, 0xe4c57272, 0x72ec3939, 0x98164c4c, - 0xbc945e5e, 0xf09f7878, 0x70e53838, 0x05988c8c, - 0xbf17d1d1, 0x57e4a5a5, 0xd9a1e2e2, 0xc24e6161, - 0x7b42b3b3, 0x42342121, 0x25089c9c, 0x3cee1e1e, - 0x86614343, 0x93b1c7c7, 0xe54ffcfc, 0x08240404, - 0xa2e35151, 0x2f259999, 0xda226d6d, 0x1a650d0d, - 0xe979fafa, 0xa369dfdf, 0xfca97e7e, 0x48192424, - 0x76fe3b3b, 0x4b9aabab, 0x81f0cece, 0x22991111, - 0x03838f8f, 0x9c044e4e, 0x7366b7b7, 0xcbe0ebeb, - 0x78c13c3c, 0x1ffd8181, 0x35409494, 0xf31cf7f7, - 0x6f18b9b9, 0x268b1313, 0x58512c2c, 0xbb05d3d3, - 0xd38ce7e7, 0xdc396e6e, 0x95aac4c4, 0x061b0303, - 0xacdc5656, 0x885e4444, 0xfea07f7f, 0x4f88a9a9, - 0x54672a2a, 0x6b0abbbb, 0x9f87c1c1, 0xa6f15353, - 0xa572dcdc, 0x16530b0b, 0x27019d9d, 0xd82b6c6c, - 0x62a43131, 0xe8f37474, 0xf115f6f6, 0x8c4c4646, - 0x45a5acac, 0x0fb58989, 0x28b41414, 0xdfbae1e1, - 0x2ca61616, 0x74f73a3a, 0xd2066969, 0x12410909, - 0xe0d77070, 0x716fb6b6, 0xbd1ed0d0, 0xc7d6eded, - 0x85e2cccc, 0x84684242, 0x2d2c9898, 0x55eda4a4, - 0x50752828, 0xb8865c5c, 0xed6bf8f8, 0x11c28686, - }, - { - 0x7830d818, 0xaf462623, 0xf991b8c6, 0x6fcdfbe8, - 0xa113cb87, 0x626d11b8, 0x05020901, 0x6e9e0d4f, - 0xee6c9b36, 0x0451ffa6, 0xbdb90cd2, 0x06f70ef5, - 0x80f29679, 0xcede306f, 0xef3f6d91, 0x07a4f852, - 0xfdc04760, 0x766535bc, 0xcd2b379b, 0x8c018a8e, - 0x155bd2a3, 0x3c186c0c, 0x8af6847b, 0xe16a8035, - 0x693af51d, 0x47ddb3e0, 0xacb321d7, 0xed999cc2, - 0x965c432e, 0x7a96294b, 0x21e15dfe, 0x16aed557, - 0x412abd15, 0xb6eee877, 0xeb6e9237, 0x56d79ee5, - 0xd923139f, 0x17fd23f0, 0x7f94204a, 0x95a944da, - 0x25b0a258, 0xca8fcfc9, 0x8d527c29, 0x22145a0a, - 0x4f7f50b1, 0x1a5dc9a0, 0xdad6146b, 0xab17d985, - 0x73673cbd, 0x34ba8f5d, 0x50209010, 0x03f507f4, - 0xc08bddcb, 0xc67cd33e, 0x110a2d05, 0xe6ce7867, - 0x53d597e4, 0xbb4e0227, 0x58827341, 0x9d0ba78b, - 0x0153f6a7, 0x94fab27d, 0xfb374995, 0x9fad56d8, - 0x30eb70fb, 0x71c1cdee, 0x91f8bb7c, 0xe3cc7166, - 0x8ea77bdd, 0x4b2eaf17, 0x468e4547, 0xdc211a9e, - 0xc589d4ca, 0x995a582d, 0x79632ebf, 0x1b0e3f07, - 0x2347acad, 0x2fb4b05a, 0xb51bef83, 0xff66b633, - 0xf2c65c63, 0x0a041202, 0x384993aa, 0xa8e2de71, - 0xcf8dc6c8, 0x7d32d119, 0x70923b49, 0x9aaf5fd9, - 0x1df931f2, 0x48dba8e3, 0x2ab6b95b, 0x920dbc88, - 0xc8293e9a, 0xbe4c0b26, 0xfa64bf32, 0x4a7d59b0, - 0x6acff2e9, 0x331e770f, 0xa6b733d5, 0xba1df480, - 0x7c6127be, 0xde87ebcd, 0xe4688934, 0x75903248, - 0x24e354ff, 0x8ff48d7a, 0xea3d6490, 0x3ebe9d5f, - 0xa0403d20, 0xd5d00f68, 0x7234ca1a, 0x2c41b7ae, - 0x5e757db4, 0x19a8ce54, 0xe53b7f93, 0xaa442f22, - 0xe9c86364, 0x12ff2af1, 0xa2e6cc73, 0x5a248212, - 0x5d807a40, 0x28104808, 0xe89b95c3, 0x7bc5dfec, - 0x90ab4ddb, 0x1f5fc0a1, 0x8307918d, 0xc97ac83d, - 0xf1335b97, 0x00000000, 0xd483f9cf, 0x87566e2b, - 0xb3ece176, 0xb019e682, 0xa9b128d6, 0x7736c31b, - 0x5b7774b5, 0x2943beaf, 0xdfd41d6a, 0x0da0ea50, - 0x4c8a5745, 0x18fb38f3, 0xf060ad30, 0x74c3c4ef, - 0xc37eda3f, 0x1caac755, 0x1059dba2, 0x65c9e9ea, - 0xecca6a65, 0x686903ba, 0x935e4a2f, 0xe79d8ec0, - 0x81a160de, 0x6c38fc1c, 0x2ee746fd, 0x649a1f4d, - 0xe0397692, 0xbceafa75, 0x1e0c3606, 0x9809ae8a, - 0x40794bb2, 0x59d185e6, 0x361c7e0e, 0x633ee71f, - 0xf7c45562, 0xa3b53ad4, 0x324d81a8, 0xf4315296, - 0x3aef62f9, 0xf697a3c5, 0xb14a1025, 0x20b2ab59, - 0xae15d084, 0xa7e4c572, 0xdd72ec39, 0x6198164c, - 0x3bbc945e, 0x85f09f78, 0xd870e538, 0x8605988c, - 0xb2bf17d1, 0x0b57e4a5, 0x4dd9a1e2, 0xf8c24e61, - 0x457b42b3, 0xa5423421, 0xd625089c, 0x663cee1e, - 0x52866143, 0xfc93b1c7, 0x2be54ffc, 0x14082404, - 0x08a2e351, 0xc72f2599, 0xc4da226d, 0x391a650d, - 0x35e979fa, 0x84a369df, 0x9bfca97e, 0xb4481924, - 0xd776fe3b, 0x3d4b9aab, 0xd181f0ce, 0x55229911, - 0x8903838f, 0x6b9c044e, 0x517366b7, 0x60cbe0eb, - 0xcc78c13c, 0xbf1ffd81, 0xfe354094, 0x0cf31cf7, - 0x676f18b9, 0x5f268b13, 0x9c58512c, 0xb8bb05d3, - 0x5cd38ce7, 0xcbdc396e, 0xf395aac4, 0x0f061b03, - 0x13acdc56, 0x49885e44, 0x9efea07f, 0x374f88a9, - 0x8254672a, 0x6d6b0abb, 0xe29f87c1, 0x02a6f153, - 0x8ba572dc, 0x2716530b, 0xd327019d, 0xc1d82b6c, - 0xf562a431, 0xb9e8f374, 0x09f115f6, 0x438c4c46, - 0x2645a5ac, 0x970fb589, 0x4428b414, 0x42dfbae1, - 0x4e2ca616, 0xd274f73a, 0xd0d20669, 0x2d124109, - 0xade0d770, 0x54716fb6, 0xb7bd1ed0, 0x7ec7d6ed, - 0xdb85e2cc, 0x57846842, 0xc22d2c98, 0x0e55eda4, - 0x88507528, 0x31b8865c, 0x3fed6bf8, 0xa411c286, - }, - { - 0xc07830d8, 0x05af4626, 0x7ef991b8, 0x136fcdfb, - 0x4ca113cb, 0xa9626d11, 0x08050209, 0x426e9e0d, - 0xadee6c9b, 0x590451ff, 0xdebdb90c, 0xfb06f70e, - 0xef80f296, 0x5fcede30, 0xfcef3f6d, 0xaa07a4f8, - 0x27fdc047, 0x89766535, 0xaccd2b37, 0x048c018a, - 0x71155bd2, 0x603c186c, 0xff8af684, 0xb5e16a80, - 0xe8693af5, 0x5347ddb3, 0xf6acb321, 0x5eed999c, - 0x6d965c43, 0x627a9629, 0xa321e15d, 0x8216aed5, - 0xa8412abd, 0x9fb6eee8, 0xa5eb6e92, 0x7b56d79e, - 0x8cd92313, 0xd317fd23, 0x6a7f9420, 0x9e95a944, - 0xfa25b0a2, 0x06ca8fcf, 0x558d527c, 0x5022145a, - 0xe14f7f50, 0x691a5dc9, 0x7fdad614, 0x5cab17d9, - 0x8173673c, 0xd234ba8f, 0x80502090, 0xf303f507, - 0x16c08bdd, 0xedc67cd3, 0x28110a2d, 0x1fe6ce78, - 0x7353d597, 0x25bb4e02, 0x32588273, 0x2c9d0ba7, - 0x510153f6, 0xcf94fab2, 0xdcfb3749, 0x8e9fad56, - 0x8b30eb70, 0x2371c1cd, 0xc791f8bb, 0x17e3cc71, - 0xa68ea77b, 0xb84b2eaf, 0x02468e45, 0x84dc211a, - 0x1ec589d4, 0x75995a58, 0x9179632e, 0x381b0e3f, - 0x012347ac, 0xea2fb4b0, 0x6cb51bef, 0x85ff66b6, - 0x3ff2c65c, 0x100a0412, 0x39384993, 0xafa8e2de, - 0x0ecf8dc6, 0xc87d32d1, 0x7270923b, 0x869aaf5f, - 0xc31df931, 0x4b48dba8, 0xe22ab6b9, 0x34920dbc, - 0xa4c8293e, 0x2dbe4c0b, 0x8dfa64bf, 0xe94a7d59, - 0x1b6acff2, 0x78331e77, 0xe6a6b733, 0x74ba1df4, - 0x997c6127, 0x26de87eb, 0xbde46889, 0x7a759032, - 0xab24e354, 0xf78ff48d, 0xf4ea3d64, 0xc23ebe9d, - 0x1da0403d, 0x67d5d00f, 0xd07234ca, 0x192c41b7, - 0xc95e757d, 0x9a19a8ce, 0xece53b7f, 0x0daa442f, - 0x07e9c863, 0xdb12ff2a, 0xbfa2e6cc, 0x905a2482, - 0x3a5d807a, 0x40281048, 0x56e89b95, 0x337bc5df, - 0x9690ab4d, 0x611f5fc0, 0x1c830791, 0xf5c97ac8, - 0xccf1335b, 0x00000000, 0x36d483f9, 0x4587566e, - 0x97b3ece1, 0x64b019e6, 0xfea9b128, 0xd87736c3, - 0xc15b7774, 0x112943be, 0x77dfd41d, 0xba0da0ea, - 0x124c8a57, 0xcb18fb38, 0x9df060ad, 0x2b74c3c4, - 0xe5c37eda, 0x921caac7, 0x791059db, 0x0365c9e9, - 0x0fecca6a, 0xb9686903, 0x65935e4a, 0x4ee79d8e, - 0xbe81a160, 0xe06c38fc, 0xbb2ee746, 0x52649a1f, - 0xe4e03976, 0x8fbceafa, 0x301e0c36, 0x249809ae, - 0xf940794b, 0x6359d185, 0x70361c7e, 0xf8633ee7, - 0x37f7c455, 0xeea3b53a, 0x29324d81, 0xc4f43152, - 0x9b3aef62, 0x66f697a3, 0x35b14a10, 0xf220b2ab, - 0x54ae15d0, 0xb7a7e4c5, 0xd5dd72ec, 0x5a619816, - 0xca3bbc94, 0xe785f09f, 0xddd870e5, 0x14860598, - 0xc6b2bf17, 0x410b57e4, 0x434dd9a1, 0x2ff8c24e, - 0xf1457b42, 0x15a54234, 0x94d62508, 0xf0663cee, - 0x22528661, 0x76fc93b1, 0xb32be54f, 0x20140824, - 0xb208a2e3, 0xbcc72f25, 0x4fc4da22, 0x68391a65, - 0x8335e979, 0xb684a369, 0xd79bfca9, 0x3db44819, - 0xc5d776fe, 0x313d4b9a, 0x3ed181f0, 0x88552299, - 0x0c890383, 0x4a6b9c04, 0xd1517366, 0x0b60cbe0, - 0xfdcc78c1, 0x7cbf1ffd, 0xd4fe3540, 0xeb0cf31c, - 0xa1676f18, 0x985f268b, 0x7d9c5851, 0xd6b8bb05, - 0x6b5cd38c, 0x57cbdc39, 0x6ef395aa, 0x180f061b, - 0x8a13acdc, 0x1a49885e, 0xdf9efea0, 0x21374f88, - 0x4d825467, 0xb16d6b0a, 0x46e29f87, 0xa202a6f1, - 0xae8ba572, 0x58271653, 0x9cd32701, 0x47c1d82b, - 0x95f562a4, 0x87b9e8f3, 0xe309f115, 0x0a438c4c, - 0x092645a5, 0x3c970fb5, 0xa04428b4, 0x5b42dfba, - 0xb04e2ca6, 0xcdd274f7, 0x6fd0d206, 0x482d1241, - 0xa7ade0d7, 0xd954716f, 0xceb7bd1e, 0x3b7ec7d6, - 0x2edb85e2, 0x2a578468, 0xb4c22d2c, 0x490e55ed, - 0x5d885075, 0xda31b886, 0x933fed6b, 0x44a411c2, - }, - { - 0x18c07830, 0x2305af46, 0xc67ef991, 0xe8136fcd, - 0x874ca113, 0xb8a9626d, 0x01080502, 0x4f426e9e, - 0x36adee6c, 0xa6590451, 0xd2debdb9, 0xf5fb06f7, - 0x79ef80f2, 0x6f5fcede, 0x91fcef3f, 0x52aa07a4, - 0x6027fdc0, 0xbc897665, 0x9baccd2b, 0x8e048c01, - 0xa371155b, 0x0c603c18, 0x7bff8af6, 0x35b5e16a, - 0x1de8693a, 0xe05347dd, 0xd7f6acb3, 0xc25eed99, - 0x2e6d965c, 0x4b627a96, 0xfea321e1, 0x578216ae, - 0x15a8412a, 0x779fb6ee, 0x37a5eb6e, 0xe57b56d7, - 0x9f8cd923, 0xf0d317fd, 0x4a6a7f94, 0xda9e95a9, - 0x58fa25b0, 0xc906ca8f, 0x29558d52, 0x0a502214, - 0xb1e14f7f, 0xa0691a5d, 0x6b7fdad6, 0x855cab17, - 0xbd817367, 0x5dd234ba, 0x10805020, 0xf4f303f5, - 0xcb16c08b, 0x3eedc67c, 0x0528110a, 0x671fe6ce, - 0xe47353d5, 0x2725bb4e, 0x41325882, 0x8b2c9d0b, - 0xa7510153, 0x7dcf94fa, 0x95dcfb37, 0xd88e9fad, - 0xfb8b30eb, 0xee2371c1, 0x7cc791f8, 0x6617e3cc, - 0xdda68ea7, 0x17b84b2e, 0x4702468e, 0x9e84dc21, - 0xca1ec589, 0x2d75995a, 0xbf917963, 0x07381b0e, - 0xad012347, 0x5aea2fb4, 0x836cb51b, 0x3385ff66, - 0x633ff2c6, 0x02100a04, 0xaa393849, 0x71afa8e2, - 0xc80ecf8d, 0x19c87d32, 0x49727092, 0xd9869aaf, - 0xf2c31df9, 0xe34b48db, 0x5be22ab6, 0x8834920d, - 0x9aa4c829, 0x262dbe4c, 0x328dfa64, 0xb0e94a7d, - 0xe91b6acf, 0x0f78331e, 0xd5e6a6b7, 0x8074ba1d, - 0xbe997c61, 0xcd26de87, 0x34bde468, 0x487a7590, - 0xffab24e3, 0x7af78ff4, 0x90f4ea3d, 0x5fc23ebe, - 0x201da040, 0x6867d5d0, 0x1ad07234, 0xae192c41, - 0xb4c95e75, 0x549a19a8, 0x93ece53b, 0x220daa44, - 0x6407e9c8, 0xf1db12ff, 0x73bfa2e6, 0x12905a24, - 0x403a5d80, 0x08402810, 0xc356e89b, 0xec337bc5, - 0xdb9690ab, 0xa1611f5f, 0x8d1c8307, 0x3df5c97a, - 0x97ccf133, 0x00000000, 0xcf36d483, 0x2b458756, - 0x7697b3ec, 0x8264b019, 0xd6fea9b1, 0x1bd87736, - 0xb5c15b77, 0xaf112943, 0x6a77dfd4, 0x50ba0da0, - 0x45124c8a, 0xf3cb18fb, 0x309df060, 0xef2b74c3, - 0x3fe5c37e, 0x55921caa, 0xa2791059, 0xea0365c9, - 0x650fecca, 0xbab96869, 0x2f65935e, 0xc04ee79d, - 0xdebe81a1, 0x1ce06c38, 0xfdbb2ee7, 0x4d52649a, - 0x92e4e039, 0x758fbcea, 0x06301e0c, 0x8a249809, - 0xb2f94079, 0xe66359d1, 0x0e70361c, 0x1ff8633e, - 0x6237f7c4, 0xd4eea3b5, 0xa829324d, 0x96c4f431, - 0xf99b3aef, 0xc566f697, 0x2535b14a, 0x59f220b2, - 0x8454ae15, 0x72b7a7e4, 0x39d5dd72, 0x4c5a6198, - 0x5eca3bbc, 0x78e785f0, 0x38ddd870, 0x8c148605, - 0xd1c6b2bf, 0xa5410b57, 0xe2434dd9, 0x612ff8c2, - 0xb3f1457b, 0x2115a542, 0x9c94d625, 0x1ef0663c, - 0x43225286, 0xc776fc93, 0xfcb32be5, 0x04201408, - 0x51b208a2, 0x99bcc72f, 0x6d4fc4da, 0x0d68391a, - 0xfa8335e9, 0xdfb684a3, 0x7ed79bfc, 0x243db448, - 0x3bc5d776, 0xab313d4b, 0xce3ed181, 0x11885522, - 0x8f0c8903, 0x4e4a6b9c, 0xb7d15173, 0xeb0b60cb, - 0x3cfdcc78, 0x817cbf1f, 0x94d4fe35, 0xf7eb0cf3, - 0xb9a1676f, 0x13985f26, 0x2c7d9c58, 0xd3d6b8bb, - 0xe76b5cd3, 0x6e57cbdc, 0xc46ef395, 0x03180f06, - 0x568a13ac, 0x441a4988, 0x7fdf9efe, 0xa921374f, - 0x2a4d8254, 0xbbb16d6b, 0xc146e29f, 0x53a202a6, - 0xdcae8ba5, 0x0b582716, 0x9d9cd327, 0x6c47c1d8, - 0x3195f562, 0x7487b9e8, 0xf6e309f1, 0x460a438c, - 0xac092645, 0x893c970f, 0x14a04428, 0xe15b42df, - 0x16b04e2c, 0x3acdd274, 0x696fd0d2, 0x09482d12, - 0x70a7ade0, 0xb6d95471, 0xd0ceb7bd, 0xed3b7ec7, - 0xcc2edb85, 0x422a5784, 0x98b4c22d, 0xa4490e55, - 0x285d8850, 0x5cda31b8, 0xf8933fed, 0x8644a411, - }, - { - 0x6018c078, 0x8c2305af, 0x3fc67ef9, 0x87e8136f, - 0x26874ca1, 0xdab8a962, 0x04010805, 0x214f426e, - 0xd836adee, 0xa2a65904, 0x6fd2debd, 0xf3f5fb06, - 0xf979ef80, 0xa16f5fce, 0x7e91fcef, 0x5552aa07, - 0x9d6027fd, 0xcabc8976, 0x569baccd, 0x028e048c, - 0xb6a37115, 0x300c603c, 0xf17bff8a, 0xd435b5e1, - 0x741de869, 0xa7e05347, 0x7bd7f6ac, 0x2fc25eed, - 0xb82e6d96, 0x314b627a, 0xdffea321, 0x41578216, - 0x5415a841, 0xc1779fb6, 0xdc37a5eb, 0xb3e57b56, - 0x469f8cd9, 0xe7f0d317, 0x354a6a7f, 0x4fda9e95, - 0x7d58fa25, 0x03c906ca, 0xa429558d, 0x280a5022, - 0xfeb1e14f, 0xbaa0691a, 0xb16b7fda, 0x2e855cab, - 0xcebd8173, 0x695dd234, 0x40108050, 0xf7f4f303, - 0x0bcb16c0, 0xf83eedc6, 0x14052811, 0x81671fe6, - 0xb7e47353, 0x9c2725bb, 0x19413258, 0x168b2c9d, - 0xa6a75101, 0xe97dcf94, 0x6e95dcfb, 0x47d88e9f, - 0xcbfb8b30, 0x9fee2371, 0xed7cc791, 0x856617e3, - 0x53dda68e, 0x5c17b84b, 0x01470246, 0x429e84dc, - 0x0fca1ec5, 0xb42d7599, 0xc6bf9179, 0x1c07381b, - 0x8ead0123, 0x755aea2f, 0x36836cb5, 0xcc3385ff, - 0x91633ff2, 0x0802100a, 0x92aa3938, 0xd971afa8, - 0x07c80ecf, 0x6419c87d, 0x39497270, 0x43d9869a, - 0xeff2c31d, 0xabe34b48, 0x715be22a, 0x1a883492, - 0x529aa4c8, 0x98262dbe, 0xc8328dfa, 0xfab0e94a, - 0x83e91b6a, 0x3c0f7833, 0x73d5e6a6, 0x3a8074ba, - 0xc2be997c, 0x13cd26de, 0xd034bde4, 0x3d487a75, - 0xdbffab24, 0xf57af78f, 0x7a90f4ea, 0x615fc23e, - 0x80201da0, 0xbd6867d5, 0x681ad072, 0x82ae192c, - 0xeab4c95e, 0x4d549a19, 0x7693ece5, 0x88220daa, - 0x8d6407e9, 0xe3f1db12, 0xd173bfa2, 0x4812905a, - 0x1d403a5d, 0x20084028, 0x2bc356e8, 0x97ec337b, - 0x4bdb9690, 0xbea1611f, 0x0e8d1c83, 0xf43df5c9, - 0x6697ccf1, 0x00000000, 0x1bcf36d4, 0xac2b4587, - 0xc57697b3, 0x328264b0, 0x7fd6fea9, 0x6c1bd877, - 0xeeb5c15b, 0x86af1129, 0xb56a77df, 0x5d50ba0d, - 0x0945124c, 0xebf3cb18, 0xc0309df0, 0x9bef2b74, - 0xfc3fe5c3, 0x4955921c, 0xb2a27910, 0x8fea0365, - 0x89650fec, 0xd2bab968, 0xbc2f6593, 0x27c04ee7, - 0x5fdebe81, 0x701ce06c, 0xd3fdbb2e, 0x294d5264, - 0x7292e4e0, 0xc9758fbc, 0x1806301e, 0x128a2498, - 0xf2b2f940, 0xbfe66359, 0x380e7036, 0x7c1ff863, - 0x956237f7, 0x77d4eea3, 0x9aa82932, 0x6296c4f4, - 0xc3f99b3a, 0x33c566f6, 0x942535b1, 0x7959f220, - 0x2a8454ae, 0xd572b7a7, 0xe439d5dd, 0x2d4c5a61, - 0x655eca3b, 0xfd78e785, 0xe038ddd8, 0x0a8c1486, - 0x63d1c6b2, 0xaea5410b, 0xafe2434d, 0x99612ff8, - 0xf6b3f145, 0x842115a5, 0x4a9c94d6, 0x781ef066, - 0x11432252, 0x3bc776fc, 0xd7fcb32b, 0x10042014, - 0x5951b208, 0x5e99bcc7, 0xa96d4fc4, 0x340d6839, - 0xcffa8335, 0x5bdfb684, 0xe57ed79b, 0x90243db4, - 0xec3bc5d7, 0x96ab313d, 0x1fce3ed1, 0x44118855, - 0x068f0c89, 0x254e4a6b, 0xe6b7d151, 0x8beb0b60, - 0xf03cfdcc, 0x3e817cbf, 0x6a94d4fe, 0xfbf7eb0c, - 0xdeb9a167, 0x4c13985f, 0xb02c7d9c, 0x6bd3d6b8, - 0xbbe76b5c, 0xa56e57cb, 0x37c46ef3, 0x0c03180f, - 0x45568a13, 0x0d441a49, 0xe17fdf9e, 0x9ea92137, - 0xa82a4d82, 0xd6bbb16d, 0x23c146e2, 0x5153a202, - 0x57dcae8b, 0x2c0b5827, 0x4e9d9cd3, 0xad6c47c1, - 0xc43195f5, 0xcd7487b9, 0xfff6e309, 0x05460a43, - 0x8aac0926, 0x1e893c97, 0x5014a044, 0xa3e15b42, - 0x5816b04e, 0xe83acdd2, 0xb9696fd0, 0x2409482d, - 0xdd70a7ad, 0xe2b6d954, 0x67d0ceb7, 0x93ed3b7e, - 0x17cc2edb, 0x15422a57, 0x5a98b4c2, 0xaaa4490e, - 0xa0285d88, 0x6d5cda31, 0xc7f8933f, 0x228644a4, - }, - { - 0x186018c0, 0x238c2305, 0xc63fc67e, 0xe887e813, - 0x8726874c, 0xb8dab8a9, 0x01040108, 0x4f214f42, - 0x36d836ad, 0xa6a2a659, 0xd26fd2de, 0xf5f3f5fb, - 0x79f979ef, 0x6fa16f5f, 0x917e91fc, 0x525552aa, - 0x609d6027, 0xbccabc89, 0x9b569bac, 0x8e028e04, - 0xa3b6a371, 0x0c300c60, 0x7bf17bff, 0x35d435b5, - 0x1d741de8, 0xe0a7e053, 0xd77bd7f6, 0xc22fc25e, - 0x2eb82e6d, 0x4b314b62, 0xfedffea3, 0x57415782, - 0x155415a8, 0x77c1779f, 0x37dc37a5, 0xe5b3e57b, - 0x9f469f8c, 0xf0e7f0d3, 0x4a354a6a, 0xda4fda9e, - 0x587d58fa, 0xc903c906, 0x29a42955, 0x0a280a50, - 0xb1feb1e1, 0xa0baa069, 0x6bb16b7f, 0x852e855c, - 0xbdcebd81, 0x5d695dd2, 0x10401080, 0xf4f7f4f3, - 0xcb0bcb16, 0x3ef83eed, 0x05140528, 0x6781671f, - 0xe4b7e473, 0x279c2725, 0x41194132, 0x8b168b2c, - 0xa7a6a751, 0x7de97dcf, 0x956e95dc, 0xd847d88e, - 0xfbcbfb8b, 0xee9fee23, 0x7ced7cc7, 0x66856617, - 0xdd53dda6, 0x175c17b8, 0x47014702, 0x9e429e84, - 0xca0fca1e, 0x2db42d75, 0xbfc6bf91, 0x071c0738, - 0xad8ead01, 0x5a755aea, 0x8336836c, 0x33cc3385, - 0x6391633f, 0x02080210, 0xaa92aa39, 0x71d971af, - 0xc807c80e, 0x196419c8, 0x49394972, 0xd943d986, - 0xf2eff2c3, 0xe3abe34b, 0x5b715be2, 0x881a8834, - 0x9a529aa4, 0x2698262d, 0x32c8328d, 0xb0fab0e9, - 0xe983e91b, 0x0f3c0f78, 0xd573d5e6, 0x803a8074, - 0xbec2be99, 0xcd13cd26, 0x34d034bd, 0x483d487a, - 0xffdbffab, 0x7af57af7, 0x907a90f4, 0x5f615fc2, - 0x2080201d, 0x68bd6867, 0x1a681ad0, 0xae82ae19, - 0xb4eab4c9, 0x544d549a, 0x937693ec, 0x2288220d, - 0x648d6407, 0xf1e3f1db, 0x73d173bf, 0x12481290, - 0x401d403a, 0x08200840, 0xc32bc356, 0xec97ec33, - 0xdb4bdb96, 0xa1bea161, 0x8d0e8d1c, 0x3df43df5, - 0x976697cc, 0x00000000, 0xcf1bcf36, 0x2bac2b45, - 0x76c57697, 0x82328264, 0xd67fd6fe, 0x1b6c1bd8, - 0xb5eeb5c1, 0xaf86af11, 0x6ab56a77, 0x505d50ba, - 0x45094512, 0xf3ebf3cb, 0x30c0309d, 0xef9bef2b, - 0x3ffc3fe5, 0x55495592, 0xa2b2a279, 0xea8fea03, - 0x6589650f, 0xbad2bab9, 0x2fbc2f65, 0xc027c04e, - 0xde5fdebe, 0x1c701ce0, 0xfdd3fdbb, 0x4d294d52, - 0x927292e4, 0x75c9758f, 0x06180630, 0x8a128a24, - 0xb2f2b2f9, 0xe6bfe663, 0x0e380e70, 0x1f7c1ff8, - 0x62956237, 0xd477d4ee, 0xa89aa829, 0x966296c4, - 0xf9c3f99b, 0xc533c566, 0x25942535, 0x597959f2, - 0x842a8454, 0x72d572b7, 0x39e439d5, 0x4c2d4c5a, - 0x5e655eca, 0x78fd78e7, 0x38e038dd, 0x8c0a8c14, - 0xd163d1c6, 0xa5aea541, 0xe2afe243, 0x6199612f, - 0xb3f6b3f1, 0x21842115, 0x9c4a9c94, 0x1e781ef0, - 0x43114322, 0xc73bc776, 0xfcd7fcb3, 0x04100420, - 0x515951b2, 0x995e99bc, 0x6da96d4f, 0x0d340d68, - 0xfacffa83, 0xdf5bdfb6, 0x7ee57ed7, 0x2490243d, - 0x3bec3bc5, 0xab96ab31, 0xce1fce3e, 0x11441188, - 0x8f068f0c, 0x4e254e4a, 0xb7e6b7d1, 0xeb8beb0b, - 0x3cf03cfd, 0x813e817c, 0x946a94d4, 0xf7fbf7eb, - 0xb9deb9a1, 0x134c1398, 0x2cb02c7d, 0xd36bd3d6, - 0xe7bbe76b, 0x6ea56e57, 0xc437c46e, 0x030c0318, - 0x5645568a, 0x440d441a, 0x7fe17fdf, 0xa99ea921, - 0x2aa82a4d, 0xbbd6bbb1, 0xc123c146, 0x535153a2, - 0xdc57dcae, 0x0b2c0b58, 0x9d4e9d9c, 0x6cad6c47, - 0x31c43195, 0x74cd7487, 0xf6fff6e3, 0x4605460a, - 0xac8aac09, 0x891e893c, 0x145014a0, 0xe1a3e15b, - 0x165816b0, 0x3ae83acd, 0x69b9696f, 0x09240948, - 0x70dd70a7, 0xb6e2b6d9, 0xd067d0ce, 0xed93ed3b, - 0xcc17cc2e, 0x4215422a, 0x985a98b4, 0xa4aaa449, - 0x28a0285d, 0x5c6d5cda, 0xf8c7f893, 0x86228644, - } -}; - -__device__ __constant__ u32 Cl[8][256] = -{ - { - 0xc07830d8, 0x05af4626, 0x7ef991b8, 0x136fcdfb, - 0x4ca113cb, 0xa9626d11, 0x08050209, 0x426e9e0d, - 0xadee6c9b, 0x590451ff, 0xdebdb90c, 0xfb06f70e, - 0xef80f296, 0x5fcede30, 0xfcef3f6d, 0xaa07a4f8, - 0x27fdc047, 0x89766535, 0xaccd2b37, 0x048c018a, - 0x71155bd2, 0x603c186c, 0xff8af684, 0xb5e16a80, - 0xe8693af5, 0x5347ddb3, 0xf6acb321, 0x5eed999c, - 0x6d965c43, 0x627a9629, 0xa321e15d, 0x8216aed5, - 0xa8412abd, 0x9fb6eee8, 0xa5eb6e92, 0x7b56d79e, - 0x8cd92313, 0xd317fd23, 0x6a7f9420, 0x9e95a944, - 0xfa25b0a2, 0x06ca8fcf, 0x558d527c, 0x5022145a, - 0xe14f7f50, 0x691a5dc9, 0x7fdad614, 0x5cab17d9, - 0x8173673c, 0xd234ba8f, 0x80502090, 0xf303f507, - 0x16c08bdd, 0xedc67cd3, 0x28110a2d, 0x1fe6ce78, - 0x7353d597, 0x25bb4e02, 0x32588273, 0x2c9d0ba7, - 0x510153f6, 0xcf94fab2, 0xdcfb3749, 0x8e9fad56, - 0x8b30eb70, 0x2371c1cd, 0xc791f8bb, 0x17e3cc71, - 0xa68ea77b, 0xb84b2eaf, 0x02468e45, 0x84dc211a, - 0x1ec589d4, 0x75995a58, 0x9179632e, 0x381b0e3f, - 0x012347ac, 0xea2fb4b0, 0x6cb51bef, 0x85ff66b6, - 0x3ff2c65c, 0x100a0412, 0x39384993, 0xafa8e2de, - 0x0ecf8dc6, 0xc87d32d1, 0x7270923b, 0x869aaf5f, - 0xc31df931, 0x4b48dba8, 0xe22ab6b9, 0x34920dbc, - 0xa4c8293e, 0x2dbe4c0b, 0x8dfa64bf, 0xe94a7d59, - 0x1b6acff2, 0x78331e77, 0xe6a6b733, 0x74ba1df4, - 0x997c6127, 0x26de87eb, 0xbde46889, 0x7a759032, - 0xab24e354, 0xf78ff48d, 0xf4ea3d64, 0xc23ebe9d, - 0x1da0403d, 0x67d5d00f, 0xd07234ca, 0x192c41b7, - 0xc95e757d, 0x9a19a8ce, 0xece53b7f, 0x0daa442f, - 0x07e9c863, 0xdb12ff2a, 0xbfa2e6cc, 0x905a2482, - 0x3a5d807a, 0x40281048, 0x56e89b95, 0x337bc5df, - 0x9690ab4d, 0x611f5fc0, 0x1c830791, 0xf5c97ac8, - 0xccf1335b, 0x00000000, 0x36d483f9, 0x4587566e, - 0x97b3ece1, 0x64b019e6, 0xfea9b128, 0xd87736c3, - 0xc15b7774, 0x112943be, 0x77dfd41d, 0xba0da0ea, - 0x124c8a57, 0xcb18fb38, 0x9df060ad, 0x2b74c3c4, - 0xe5c37eda, 0x921caac7, 0x791059db, 0x0365c9e9, - 0x0fecca6a, 0xb9686903, 0x65935e4a, 0x4ee79d8e, - 0xbe81a160, 0xe06c38fc, 0xbb2ee746, 0x52649a1f, - 0xe4e03976, 0x8fbceafa, 0x301e0c36, 0x249809ae, - 0xf940794b, 0x6359d185, 0x70361c7e, 0xf8633ee7, - 0x37f7c455, 0xeea3b53a, 0x29324d81, 0xc4f43152, - 0x9b3aef62, 0x66f697a3, 0x35b14a10, 0xf220b2ab, - 0x54ae15d0, 0xb7a7e4c5, 0xd5dd72ec, 0x5a619816, - 0xca3bbc94, 0xe785f09f, 0xddd870e5, 0x14860598, - 0xc6b2bf17, 0x410b57e4, 0x434dd9a1, 0x2ff8c24e, - 0xf1457b42, 0x15a54234, 0x94d62508, 0xf0663cee, - 0x22528661, 0x76fc93b1, 0xb32be54f, 0x20140824, - 0xb208a2e3, 0xbcc72f25, 0x4fc4da22, 0x68391a65, - 0x8335e979, 0xb684a369, 0xd79bfca9, 0x3db44819, - 0xc5d776fe, 0x313d4b9a, 0x3ed181f0, 0x88552299, - 0x0c890383, 0x4a6b9c04, 0xd1517366, 0x0b60cbe0, - 0xfdcc78c1, 0x7cbf1ffd, 0xd4fe3540, 0xeb0cf31c, - 0xa1676f18, 0x985f268b, 0x7d9c5851, 0xd6b8bb05, - 0x6b5cd38c, 0x57cbdc39, 0x6ef395aa, 0x180f061b, - 0x8a13acdc, 0x1a49885e, 0xdf9efea0, 0x21374f88, - 0x4d825467, 0xb16d6b0a, 0x46e29f87, 0xa202a6f1, - 0xae8ba572, 0x58271653, 0x9cd32701, 0x47c1d82b, - 0x95f562a4, 0x87b9e8f3, 0xe309f115, 0x0a438c4c, - 0x092645a5, 0x3c970fb5, 0xa04428b4, 0x5b42dfba, - 0xb04e2ca6, 0xcdd274f7, 0x6fd0d206, 0x482d1241, - 0xa7ade0d7, 0xd954716f, 0xceb7bd1e, 0x3b7ec7d6, - 0x2edb85e2, 0x2a578468, 0xb4c22d2c, 0x490e55ed, - 0x5d885075, 0xda31b886, 0x933fed6b, 0x44a411c2, - }, - { - 0x18c07830, 0x2305af46, 0xc67ef991, 0xe8136fcd, - 0x874ca113, 0xb8a9626d, 0x01080502, 0x4f426e9e, - 0x36adee6c, 0xa6590451, 0xd2debdb9, 0xf5fb06f7, - 0x79ef80f2, 0x6f5fcede, 0x91fcef3f, 0x52aa07a4, - 0x6027fdc0, 0xbc897665, 0x9baccd2b, 0x8e048c01, - 0xa371155b, 0x0c603c18, 0x7bff8af6, 0x35b5e16a, - 0x1de8693a, 0xe05347dd, 0xd7f6acb3, 0xc25eed99, - 0x2e6d965c, 0x4b627a96, 0xfea321e1, 0x578216ae, - 0x15a8412a, 0x779fb6ee, 0x37a5eb6e, 0xe57b56d7, - 0x9f8cd923, 0xf0d317fd, 0x4a6a7f94, 0xda9e95a9, - 0x58fa25b0, 0xc906ca8f, 0x29558d52, 0x0a502214, - 0xb1e14f7f, 0xa0691a5d, 0x6b7fdad6, 0x855cab17, - 0xbd817367, 0x5dd234ba, 0x10805020, 0xf4f303f5, - 0xcb16c08b, 0x3eedc67c, 0x0528110a, 0x671fe6ce, - 0xe47353d5, 0x2725bb4e, 0x41325882, 0x8b2c9d0b, - 0xa7510153, 0x7dcf94fa, 0x95dcfb37, 0xd88e9fad, - 0xfb8b30eb, 0xee2371c1, 0x7cc791f8, 0x6617e3cc, - 0xdda68ea7, 0x17b84b2e, 0x4702468e, 0x9e84dc21, - 0xca1ec589, 0x2d75995a, 0xbf917963, 0x07381b0e, - 0xad012347, 0x5aea2fb4, 0x836cb51b, 0x3385ff66, - 0x633ff2c6, 0x02100a04, 0xaa393849, 0x71afa8e2, - 0xc80ecf8d, 0x19c87d32, 0x49727092, 0xd9869aaf, - 0xf2c31df9, 0xe34b48db, 0x5be22ab6, 0x8834920d, - 0x9aa4c829, 0x262dbe4c, 0x328dfa64, 0xb0e94a7d, - 0xe91b6acf, 0x0f78331e, 0xd5e6a6b7, 0x8074ba1d, - 0xbe997c61, 0xcd26de87, 0x34bde468, 0x487a7590, - 0xffab24e3, 0x7af78ff4, 0x90f4ea3d, 0x5fc23ebe, - 0x201da040, 0x6867d5d0, 0x1ad07234, 0xae192c41, - 0xb4c95e75, 0x549a19a8, 0x93ece53b, 0x220daa44, - 0x6407e9c8, 0xf1db12ff, 0x73bfa2e6, 0x12905a24, - 0x403a5d80, 0x08402810, 0xc356e89b, 0xec337bc5, - 0xdb9690ab, 0xa1611f5f, 0x8d1c8307, 0x3df5c97a, - 0x97ccf133, 0x00000000, 0xcf36d483, 0x2b458756, - 0x7697b3ec, 0x8264b019, 0xd6fea9b1, 0x1bd87736, - 0xb5c15b77, 0xaf112943, 0x6a77dfd4, 0x50ba0da0, - 0x45124c8a, 0xf3cb18fb, 0x309df060, 0xef2b74c3, - 0x3fe5c37e, 0x55921caa, 0xa2791059, 0xea0365c9, - 0x650fecca, 0xbab96869, 0x2f65935e, 0xc04ee79d, - 0xdebe81a1, 0x1ce06c38, 0xfdbb2ee7, 0x4d52649a, - 0x92e4e039, 0x758fbcea, 0x06301e0c, 0x8a249809, - 0xb2f94079, 0xe66359d1, 0x0e70361c, 0x1ff8633e, - 0x6237f7c4, 0xd4eea3b5, 0xa829324d, 0x96c4f431, - 0xf99b3aef, 0xc566f697, 0x2535b14a, 0x59f220b2, - 0x8454ae15, 0x72b7a7e4, 0x39d5dd72, 0x4c5a6198, - 0x5eca3bbc, 0x78e785f0, 0x38ddd870, 0x8c148605, - 0xd1c6b2bf, 0xa5410b57, 0xe2434dd9, 0x612ff8c2, - 0xb3f1457b, 0x2115a542, 0x9c94d625, 0x1ef0663c, - 0x43225286, 0xc776fc93, 0xfcb32be5, 0x04201408, - 0x51b208a2, 0x99bcc72f, 0x6d4fc4da, 0x0d68391a, - 0xfa8335e9, 0xdfb684a3, 0x7ed79bfc, 0x243db448, - 0x3bc5d776, 0xab313d4b, 0xce3ed181, 0x11885522, - 0x8f0c8903, 0x4e4a6b9c, 0xb7d15173, 0xeb0b60cb, - 0x3cfdcc78, 0x817cbf1f, 0x94d4fe35, 0xf7eb0cf3, - 0xb9a1676f, 0x13985f26, 0x2c7d9c58, 0xd3d6b8bb, - 0xe76b5cd3, 0x6e57cbdc, 0xc46ef395, 0x03180f06, - 0x568a13ac, 0x441a4988, 0x7fdf9efe, 0xa921374f, - 0x2a4d8254, 0xbbb16d6b, 0xc146e29f, 0x53a202a6, - 0xdcae8ba5, 0x0b582716, 0x9d9cd327, 0x6c47c1d8, - 0x3195f562, 0x7487b9e8, 0xf6e309f1, 0x460a438c, - 0xac092645, 0x893c970f, 0x14a04428, 0xe15b42df, - 0x16b04e2c, 0x3acdd274, 0x696fd0d2, 0x09482d12, - 0x70a7ade0, 0xb6d95471, 0xd0ceb7bd, 0xed3b7ec7, - 0xcc2edb85, 0x422a5784, 0x98b4c22d, 0xa4490e55, - 0x285d8850, 0x5cda31b8, 0xf8933fed, 0x8644a411, - }, - { - 0x6018c078, 0x8c2305af, 0x3fc67ef9, 0x87e8136f, - 0x26874ca1, 0xdab8a962, 0x04010805, 0x214f426e, - 0xd836adee, 0xa2a65904, 0x6fd2debd, 0xf3f5fb06, - 0xf979ef80, 0xa16f5fce, 0x7e91fcef, 0x5552aa07, - 0x9d6027fd, 0xcabc8976, 0x569baccd, 0x028e048c, - 0xb6a37115, 0x300c603c, 0xf17bff8a, 0xd435b5e1, - 0x741de869, 0xa7e05347, 0x7bd7f6ac, 0x2fc25eed, - 0xb82e6d96, 0x314b627a, 0xdffea321, 0x41578216, - 0x5415a841, 0xc1779fb6, 0xdc37a5eb, 0xb3e57b56, - 0x469f8cd9, 0xe7f0d317, 0x354a6a7f, 0x4fda9e95, - 0x7d58fa25, 0x03c906ca, 0xa429558d, 0x280a5022, - 0xfeb1e14f, 0xbaa0691a, 0xb16b7fda, 0x2e855cab, - 0xcebd8173, 0x695dd234, 0x40108050, 0xf7f4f303, - 0x0bcb16c0, 0xf83eedc6, 0x14052811, 0x81671fe6, - 0xb7e47353, 0x9c2725bb, 0x19413258, 0x168b2c9d, - 0xa6a75101, 0xe97dcf94, 0x6e95dcfb, 0x47d88e9f, - 0xcbfb8b30, 0x9fee2371, 0xed7cc791, 0x856617e3, - 0x53dda68e, 0x5c17b84b, 0x01470246, 0x429e84dc, - 0x0fca1ec5, 0xb42d7599, 0xc6bf9179, 0x1c07381b, - 0x8ead0123, 0x755aea2f, 0x36836cb5, 0xcc3385ff, - 0x91633ff2, 0x0802100a, 0x92aa3938, 0xd971afa8, - 0x07c80ecf, 0x6419c87d, 0x39497270, 0x43d9869a, - 0xeff2c31d, 0xabe34b48, 0x715be22a, 0x1a883492, - 0x529aa4c8, 0x98262dbe, 0xc8328dfa, 0xfab0e94a, - 0x83e91b6a, 0x3c0f7833, 0x73d5e6a6, 0x3a8074ba, - 0xc2be997c, 0x13cd26de, 0xd034bde4, 0x3d487a75, - 0xdbffab24, 0xf57af78f, 0x7a90f4ea, 0x615fc23e, - 0x80201da0, 0xbd6867d5, 0x681ad072, 0x82ae192c, - 0xeab4c95e, 0x4d549a19, 0x7693ece5, 0x88220daa, - 0x8d6407e9, 0xe3f1db12, 0xd173bfa2, 0x4812905a, - 0x1d403a5d, 0x20084028, 0x2bc356e8, 0x97ec337b, - 0x4bdb9690, 0xbea1611f, 0x0e8d1c83, 0xf43df5c9, - 0x6697ccf1, 0x00000000, 0x1bcf36d4, 0xac2b4587, - 0xc57697b3, 0x328264b0, 0x7fd6fea9, 0x6c1bd877, - 0xeeb5c15b, 0x86af1129, 0xb56a77df, 0x5d50ba0d, - 0x0945124c, 0xebf3cb18, 0xc0309df0, 0x9bef2b74, - 0xfc3fe5c3, 0x4955921c, 0xb2a27910, 0x8fea0365, - 0x89650fec, 0xd2bab968, 0xbc2f6593, 0x27c04ee7, - 0x5fdebe81, 0x701ce06c, 0xd3fdbb2e, 0x294d5264, - 0x7292e4e0, 0xc9758fbc, 0x1806301e, 0x128a2498, - 0xf2b2f940, 0xbfe66359, 0x380e7036, 0x7c1ff863, - 0x956237f7, 0x77d4eea3, 0x9aa82932, 0x6296c4f4, - 0xc3f99b3a, 0x33c566f6, 0x942535b1, 0x7959f220, - 0x2a8454ae, 0xd572b7a7, 0xe439d5dd, 0x2d4c5a61, - 0x655eca3b, 0xfd78e785, 0xe038ddd8, 0x0a8c1486, - 0x63d1c6b2, 0xaea5410b, 0xafe2434d, 0x99612ff8, - 0xf6b3f145, 0x842115a5, 0x4a9c94d6, 0x781ef066, - 0x11432252, 0x3bc776fc, 0xd7fcb32b, 0x10042014, - 0x5951b208, 0x5e99bcc7, 0xa96d4fc4, 0x340d6839, - 0xcffa8335, 0x5bdfb684, 0xe57ed79b, 0x90243db4, - 0xec3bc5d7, 0x96ab313d, 0x1fce3ed1, 0x44118855, - 0x068f0c89, 0x254e4a6b, 0xe6b7d151, 0x8beb0b60, - 0xf03cfdcc, 0x3e817cbf, 0x6a94d4fe, 0xfbf7eb0c, - 0xdeb9a167, 0x4c13985f, 0xb02c7d9c, 0x6bd3d6b8, - 0xbbe76b5c, 0xa56e57cb, 0x37c46ef3, 0x0c03180f, - 0x45568a13, 0x0d441a49, 0xe17fdf9e, 0x9ea92137, - 0xa82a4d82, 0xd6bbb16d, 0x23c146e2, 0x5153a202, - 0x57dcae8b, 0x2c0b5827, 0x4e9d9cd3, 0xad6c47c1, - 0xc43195f5, 0xcd7487b9, 0xfff6e309, 0x05460a43, - 0x8aac0926, 0x1e893c97, 0x5014a044, 0xa3e15b42, - 0x5816b04e, 0xe83acdd2, 0xb9696fd0, 0x2409482d, - 0xdd70a7ad, 0xe2b6d954, 0x67d0ceb7, 0x93ed3b7e, - 0x17cc2edb, 0x15422a57, 0x5a98b4c2, 0xaaa4490e, - 0xa0285d88, 0x6d5cda31, 0xc7f8933f, 0x228644a4, - }, - { - 0x186018c0, 0x238c2305, 0xc63fc67e, 0xe887e813, - 0x8726874c, 0xb8dab8a9, 0x01040108, 0x4f214f42, - 0x36d836ad, 0xa6a2a659, 0xd26fd2de, 0xf5f3f5fb, - 0x79f979ef, 0x6fa16f5f, 0x917e91fc, 0x525552aa, - 0x609d6027, 0xbccabc89, 0x9b569bac, 0x8e028e04, - 0xa3b6a371, 0x0c300c60, 0x7bf17bff, 0x35d435b5, - 0x1d741de8, 0xe0a7e053, 0xd77bd7f6, 0xc22fc25e, - 0x2eb82e6d, 0x4b314b62, 0xfedffea3, 0x57415782, - 0x155415a8, 0x77c1779f, 0x37dc37a5, 0xe5b3e57b, - 0x9f469f8c, 0xf0e7f0d3, 0x4a354a6a, 0xda4fda9e, - 0x587d58fa, 0xc903c906, 0x29a42955, 0x0a280a50, - 0xb1feb1e1, 0xa0baa069, 0x6bb16b7f, 0x852e855c, - 0xbdcebd81, 0x5d695dd2, 0x10401080, 0xf4f7f4f3, - 0xcb0bcb16, 0x3ef83eed, 0x05140528, 0x6781671f, - 0xe4b7e473, 0x279c2725, 0x41194132, 0x8b168b2c, - 0xa7a6a751, 0x7de97dcf, 0x956e95dc, 0xd847d88e, - 0xfbcbfb8b, 0xee9fee23, 0x7ced7cc7, 0x66856617, - 0xdd53dda6, 0x175c17b8, 0x47014702, 0x9e429e84, - 0xca0fca1e, 0x2db42d75, 0xbfc6bf91, 0x071c0738, - 0xad8ead01, 0x5a755aea, 0x8336836c, 0x33cc3385, - 0x6391633f, 0x02080210, 0xaa92aa39, 0x71d971af, - 0xc807c80e, 0x196419c8, 0x49394972, 0xd943d986, - 0xf2eff2c3, 0xe3abe34b, 0x5b715be2, 0x881a8834, - 0x9a529aa4, 0x2698262d, 0x32c8328d, 0xb0fab0e9, - 0xe983e91b, 0x0f3c0f78, 0xd573d5e6, 0x803a8074, - 0xbec2be99, 0xcd13cd26, 0x34d034bd, 0x483d487a, - 0xffdbffab, 0x7af57af7, 0x907a90f4, 0x5f615fc2, - 0x2080201d, 0x68bd6867, 0x1a681ad0, 0xae82ae19, - 0xb4eab4c9, 0x544d549a, 0x937693ec, 0x2288220d, - 0x648d6407, 0xf1e3f1db, 0x73d173bf, 0x12481290, - 0x401d403a, 0x08200840, 0xc32bc356, 0xec97ec33, - 0xdb4bdb96, 0xa1bea161, 0x8d0e8d1c, 0x3df43df5, - 0x976697cc, 0x00000000, 0xcf1bcf36, 0x2bac2b45, - 0x76c57697, 0x82328264, 0xd67fd6fe, 0x1b6c1bd8, - 0xb5eeb5c1, 0xaf86af11, 0x6ab56a77, 0x505d50ba, - 0x45094512, 0xf3ebf3cb, 0x30c0309d, 0xef9bef2b, - 0x3ffc3fe5, 0x55495592, 0xa2b2a279, 0xea8fea03, - 0x6589650f, 0xbad2bab9, 0x2fbc2f65, 0xc027c04e, - 0xde5fdebe, 0x1c701ce0, 0xfdd3fdbb, 0x4d294d52, - 0x927292e4, 0x75c9758f, 0x06180630, 0x8a128a24, - 0xb2f2b2f9, 0xe6bfe663, 0x0e380e70, 0x1f7c1ff8, - 0x62956237, 0xd477d4ee, 0xa89aa829, 0x966296c4, - 0xf9c3f99b, 0xc533c566, 0x25942535, 0x597959f2, - 0x842a8454, 0x72d572b7, 0x39e439d5, 0x4c2d4c5a, - 0x5e655eca, 0x78fd78e7, 0x38e038dd, 0x8c0a8c14, - 0xd163d1c6, 0xa5aea541, 0xe2afe243, 0x6199612f, - 0xb3f6b3f1, 0x21842115, 0x9c4a9c94, 0x1e781ef0, - 0x43114322, 0xc73bc776, 0xfcd7fcb3, 0x04100420, - 0x515951b2, 0x995e99bc, 0x6da96d4f, 0x0d340d68, - 0xfacffa83, 0xdf5bdfb6, 0x7ee57ed7, 0x2490243d, - 0x3bec3bc5, 0xab96ab31, 0xce1fce3e, 0x11441188, - 0x8f068f0c, 0x4e254e4a, 0xb7e6b7d1, 0xeb8beb0b, - 0x3cf03cfd, 0x813e817c, 0x946a94d4, 0xf7fbf7eb, - 0xb9deb9a1, 0x134c1398, 0x2cb02c7d, 0xd36bd3d6, - 0xe7bbe76b, 0x6ea56e57, 0xc437c46e, 0x030c0318, - 0x5645568a, 0x440d441a, 0x7fe17fdf, 0xa99ea921, - 0x2aa82a4d, 0xbbd6bbb1, 0xc123c146, 0x535153a2, - 0xdc57dcae, 0x0b2c0b58, 0x9d4e9d9c, 0x6cad6c47, - 0x31c43195, 0x74cd7487, 0xf6fff6e3, 0x4605460a, - 0xac8aac09, 0x891e893c, 0x145014a0, 0xe1a3e15b, - 0x165816b0, 0x3ae83acd, 0x69b9696f, 0x09240948, - 0x70dd70a7, 0xb6e2b6d9, 0xd067d0ce, 0xed93ed3b, - 0xcc17cc2e, 0x4215422a, 0x985a98b4, 0xa4aaa449, - 0x28a0285d, 0x5c6d5cda, 0xf8c7f893, 0x86228644, - }, - { - 0x18186018, 0x23238c23, 0xc6c63fc6, 0xe8e887e8, - 0x87872687, 0xb8b8dab8, 0x01010401, 0x4f4f214f, - 0x3636d836, 0xa6a6a2a6, 0xd2d26fd2, 0xf5f5f3f5, - 0x7979f979, 0x6f6fa16f, 0x91917e91, 0x52525552, - 0x60609d60, 0xbcbccabc, 0x9b9b569b, 0x8e8e028e, - 0xa3a3b6a3, 0x0c0c300c, 0x7b7bf17b, 0x3535d435, - 0x1d1d741d, 0xe0e0a7e0, 0xd7d77bd7, 0xc2c22fc2, - 0x2e2eb82e, 0x4b4b314b, 0xfefedffe, 0x57574157, - 0x15155415, 0x7777c177, 0x3737dc37, 0xe5e5b3e5, - 0x9f9f469f, 0xf0f0e7f0, 0x4a4a354a, 0xdada4fda, - 0x58587d58, 0xc9c903c9, 0x2929a429, 0x0a0a280a, - 0xb1b1feb1, 0xa0a0baa0, 0x6b6bb16b, 0x85852e85, - 0xbdbdcebd, 0x5d5d695d, 0x10104010, 0xf4f4f7f4, - 0xcbcb0bcb, 0x3e3ef83e, 0x05051405, 0x67678167, - 0xe4e4b7e4, 0x27279c27, 0x41411941, 0x8b8b168b, - 0xa7a7a6a7, 0x7d7de97d, 0x95956e95, 0xd8d847d8, - 0xfbfbcbfb, 0xeeee9fee, 0x7c7ced7c, 0x66668566, - 0xdddd53dd, 0x17175c17, 0x47470147, 0x9e9e429e, - 0xcaca0fca, 0x2d2db42d, 0xbfbfc6bf, 0x07071c07, - 0xadad8ead, 0x5a5a755a, 0x83833683, 0x3333cc33, - 0x63639163, 0x02020802, 0xaaaa92aa, 0x7171d971, - 0xc8c807c8, 0x19196419, 0x49493949, 0xd9d943d9, - 0xf2f2eff2, 0xe3e3abe3, 0x5b5b715b, 0x88881a88, - 0x9a9a529a, 0x26269826, 0x3232c832, 0xb0b0fab0, - 0xe9e983e9, 0x0f0f3c0f, 0xd5d573d5, 0x80803a80, - 0xbebec2be, 0xcdcd13cd, 0x3434d034, 0x48483d48, - 0xffffdbff, 0x7a7af57a, 0x90907a90, 0x5f5f615f, - 0x20208020, 0x6868bd68, 0x1a1a681a, 0xaeae82ae, - 0xb4b4eab4, 0x54544d54, 0x93937693, 0x22228822, - 0x64648d64, 0xf1f1e3f1, 0x7373d173, 0x12124812, - 0x40401d40, 0x08082008, 0xc3c32bc3, 0xecec97ec, - 0xdbdb4bdb, 0xa1a1bea1, 0x8d8d0e8d, 0x3d3df43d, - 0x97976697, 0x00000000, 0xcfcf1bcf, 0x2b2bac2b, - 0x7676c576, 0x82823282, 0xd6d67fd6, 0x1b1b6c1b, - 0xb5b5eeb5, 0xafaf86af, 0x6a6ab56a, 0x50505d50, - 0x45450945, 0xf3f3ebf3, 0x3030c030, 0xefef9bef, - 0x3f3ffc3f, 0x55554955, 0xa2a2b2a2, 0xeaea8fea, - 0x65658965, 0xbabad2ba, 0x2f2fbc2f, 0xc0c027c0, - 0xdede5fde, 0x1c1c701c, 0xfdfdd3fd, 0x4d4d294d, - 0x92927292, 0x7575c975, 0x06061806, 0x8a8a128a, - 0xb2b2f2b2, 0xe6e6bfe6, 0x0e0e380e, 0x1f1f7c1f, - 0x62629562, 0xd4d477d4, 0xa8a89aa8, 0x96966296, - 0xf9f9c3f9, 0xc5c533c5, 0x25259425, 0x59597959, - 0x84842a84, 0x7272d572, 0x3939e439, 0x4c4c2d4c, - 0x5e5e655e, 0x7878fd78, 0x3838e038, 0x8c8c0a8c, - 0xd1d163d1, 0xa5a5aea5, 0xe2e2afe2, 0x61619961, - 0xb3b3f6b3, 0x21218421, 0x9c9c4a9c, 0x1e1e781e, - 0x43431143, 0xc7c73bc7, 0xfcfcd7fc, 0x04041004, - 0x51515951, 0x99995e99, 0x6d6da96d, 0x0d0d340d, - 0xfafacffa, 0xdfdf5bdf, 0x7e7ee57e, 0x24249024, - 0x3b3bec3b, 0xabab96ab, 0xcece1fce, 0x11114411, - 0x8f8f068f, 0x4e4e254e, 0xb7b7e6b7, 0xebeb8beb, - 0x3c3cf03c, 0x81813e81, 0x94946a94, 0xf7f7fbf7, - 0xb9b9deb9, 0x13134c13, 0x2c2cb02c, 0xd3d36bd3, - 0xe7e7bbe7, 0x6e6ea56e, 0xc4c437c4, 0x03030c03, - 0x56564556, 0x44440d44, 0x7f7fe17f, 0xa9a99ea9, - 0x2a2aa82a, 0xbbbbd6bb, 0xc1c123c1, 0x53535153, - 0xdcdc57dc, 0x0b0b2c0b, 0x9d9d4e9d, 0x6c6cad6c, - 0x3131c431, 0x7474cd74, 0xf6f6fff6, 0x46460546, - 0xacac8aac, 0x89891e89, 0x14145014, 0xe1e1a3e1, - 0x16165816, 0x3a3ae83a, 0x6969b969, 0x09092409, - 0x7070dd70, 0xb6b6e2b6, 0xd0d067d0, 0xeded93ed, - 0xcccc17cc, 0x42421542, 0x98985a98, 0xa4a4aaa4, - 0x2828a028, 0x5c5c6d5c, 0xf8f8c7f8, 0x86862286, - }, - { - 0xd8181860, 0x2623238c, 0xb8c6c63f, 0xfbe8e887, - 0xcb878726, 0x11b8b8da, 0x09010104, 0x0d4f4f21, - 0x9b3636d8, 0xffa6a6a2, 0x0cd2d26f, 0x0ef5f5f3, - 0x967979f9, 0x306f6fa1, 0x6d91917e, 0xf8525255, - 0x4760609d, 0x35bcbcca, 0x379b9b56, 0x8a8e8e02, - 0xd2a3a3b6, 0x6c0c0c30, 0x847b7bf1, 0x803535d4, - 0xf51d1d74, 0xb3e0e0a7, 0x21d7d77b, 0x9cc2c22f, - 0x432e2eb8, 0x294b4b31, 0x5dfefedf, 0xd5575741, - 0xbd151554, 0xe87777c1, 0x923737dc, 0x9ee5e5b3, - 0x139f9f46, 0x23f0f0e7, 0x204a4a35, 0x44dada4f, - 0xa258587d, 0xcfc9c903, 0x7c2929a4, 0x5a0a0a28, - 0x50b1b1fe, 0xc9a0a0ba, 0x146b6bb1, 0xd985852e, - 0x3cbdbdce, 0x8f5d5d69, 0x90101040, 0x07f4f4f7, - 0xddcbcb0b, 0xd33e3ef8, 0x2d050514, 0x78676781, - 0x97e4e4b7, 0x0227279c, 0x73414119, 0xa78b8b16, - 0xf6a7a7a6, 0xb27d7de9, 0x4995956e, 0x56d8d847, - 0x70fbfbcb, 0xcdeeee9f, 0xbb7c7ced, 0x71666685, - 0x7bdddd53, 0xaf17175c, 0x45474701, 0x1a9e9e42, - 0xd4caca0f, 0x582d2db4, 0x2ebfbfc6, 0x3f07071c, - 0xacadad8e, 0xb05a5a75, 0xef838336, 0xb63333cc, - 0x5c636391, 0x12020208, 0x93aaaa92, 0xde7171d9, - 0xc6c8c807, 0xd1191964, 0x3b494939, 0x5fd9d943, - 0x31f2f2ef, 0xa8e3e3ab, 0xb95b5b71, 0xbc88881a, - 0x3e9a9a52, 0x0b262698, 0xbf3232c8, 0x59b0b0fa, - 0xf2e9e983, 0x770f0f3c, 0x33d5d573, 0xf480803a, - 0x27bebec2, 0xebcdcd13, 0x893434d0, 0x3248483d, - 0x54ffffdb, 0x8d7a7af5, 0x6490907a, 0x9d5f5f61, - 0x3d202080, 0x0f6868bd, 0xca1a1a68, 0xb7aeae82, - 0x7db4b4ea, 0xce54544d, 0x7f939376, 0x2f222288, - 0x6364648d, 0x2af1f1e3, 0xcc7373d1, 0x82121248, - 0x7a40401d, 0x48080820, 0x95c3c32b, 0xdfecec97, - 0x4ddbdb4b, 0xc0a1a1be, 0x918d8d0e, 0xc83d3df4, - 0x5b979766, 0x00000000, 0xf9cfcf1b, 0x6e2b2bac, - 0xe17676c5, 0xe6828232, 0x28d6d67f, 0xc31b1b6c, - 0x74b5b5ee, 0xbeafaf86, 0x1d6a6ab5, 0xea50505d, - 0x57454509, 0x38f3f3eb, 0xad3030c0, 0xc4efef9b, - 0xda3f3ffc, 0xc7555549, 0xdba2a2b2, 0xe9eaea8f, - 0x6a656589, 0x03babad2, 0x4a2f2fbc, 0x8ec0c027, - 0x60dede5f, 0xfc1c1c70, 0x46fdfdd3, 0x1f4d4d29, - 0x76929272, 0xfa7575c9, 0x36060618, 0xae8a8a12, - 0x4bb2b2f2, 0x85e6e6bf, 0x7e0e0e38, 0xe71f1f7c, - 0x55626295, 0x3ad4d477, 0x81a8a89a, 0x52969662, - 0x62f9f9c3, 0xa3c5c533, 0x10252594, 0xab595979, - 0xd084842a, 0xc57272d5, 0xec3939e4, 0x164c4c2d, - 0x945e5e65, 0x9f7878fd, 0xe53838e0, 0x988c8c0a, - 0x17d1d163, 0xe4a5a5ae, 0xa1e2e2af, 0x4e616199, - 0x42b3b3f6, 0x34212184, 0x089c9c4a, 0xee1e1e78, - 0x61434311, 0xb1c7c73b, 0x4ffcfcd7, 0x24040410, - 0xe3515159, 0x2599995e, 0x226d6da9, 0x650d0d34, - 0x79fafacf, 0x69dfdf5b, 0xa97e7ee5, 0x19242490, - 0xfe3b3bec, 0x9aabab96, 0xf0cece1f, 0x99111144, - 0x838f8f06, 0x044e4e25, 0x66b7b7e6, 0xe0ebeb8b, - 0xc13c3cf0, 0xfd81813e, 0x4094946a, 0x1cf7f7fb, - 0x18b9b9de, 0x8b13134c, 0x512c2cb0, 0x05d3d36b, - 0x8ce7e7bb, 0x396e6ea5, 0xaac4c437, 0x1b03030c, - 0xdc565645, 0x5e44440d, 0xa07f7fe1, 0x88a9a99e, - 0x672a2aa8, 0x0abbbbd6, 0x87c1c123, 0xf1535351, - 0x72dcdc57, 0x530b0b2c, 0x019d9d4e, 0x2b6c6cad, - 0xa43131c4, 0xf37474cd, 0x15f6f6ff, 0x4c464605, - 0xa5acac8a, 0xb589891e, 0xb4141450, 0xbae1e1a3, - 0xa6161658, 0xf73a3ae8, 0x066969b9, 0x41090924, - 0xd77070dd, 0x6fb6b6e2, 0x1ed0d067, 0xd6eded93, - 0xe2cccc17, 0x68424215, 0x2c98985a, 0xeda4a4aa, - 0x752828a0, 0x865c5c6d, 0x6bf8f8c7, 0xc2868622, - }, - { - 0x30d81818, 0x46262323, 0x91b8c6c6, 0xcdfbe8e8, - 0x13cb8787, 0x6d11b8b8, 0x02090101, 0x9e0d4f4f, - 0x6c9b3636, 0x51ffa6a6, 0xb90cd2d2, 0xf70ef5f5, - 0xf2967979, 0xde306f6f, 0x3f6d9191, 0xa4f85252, - 0xc0476060, 0x6535bcbc, 0x2b379b9b, 0x018a8e8e, - 0x5bd2a3a3, 0x186c0c0c, 0xf6847b7b, 0x6a803535, - 0x3af51d1d, 0xddb3e0e0, 0xb321d7d7, 0x999cc2c2, - 0x5c432e2e, 0x96294b4b, 0xe15dfefe, 0xaed55757, - 0x2abd1515, 0xeee87777, 0x6e923737, 0xd79ee5e5, - 0x23139f9f, 0xfd23f0f0, 0x94204a4a, 0xa944dada, - 0xb0a25858, 0x8fcfc9c9, 0x527c2929, 0x145a0a0a, - 0x7f50b1b1, 0x5dc9a0a0, 0xd6146b6b, 0x17d98585, - 0x673cbdbd, 0xba8f5d5d, 0x20901010, 0xf507f4f4, - 0x8bddcbcb, 0x7cd33e3e, 0x0a2d0505, 0xce786767, - 0xd597e4e4, 0x4e022727, 0x82734141, 0x0ba78b8b, - 0x53f6a7a7, 0xfab27d7d, 0x37499595, 0xad56d8d8, - 0xeb70fbfb, 0xc1cdeeee, 0xf8bb7c7c, 0xcc716666, - 0xa77bdddd, 0x2eaf1717, 0x8e454747, 0x211a9e9e, - 0x89d4caca, 0x5a582d2d, 0x632ebfbf, 0x0e3f0707, - 0x47acadad, 0xb4b05a5a, 0x1bef8383, 0x66b63333, - 0xc65c6363, 0x04120202, 0x4993aaaa, 0xe2de7171, - 0x8dc6c8c8, 0x32d11919, 0x923b4949, 0xaf5fd9d9, - 0xf931f2f2, 0xdba8e3e3, 0xb6b95b5b, 0x0dbc8888, - 0x293e9a9a, 0x4c0b2626, 0x64bf3232, 0x7d59b0b0, - 0xcff2e9e9, 0x1e770f0f, 0xb733d5d5, 0x1df48080, - 0x6127bebe, 0x87ebcdcd, 0x68893434, 0x90324848, - 0xe354ffff, 0xf48d7a7a, 0x3d649090, 0xbe9d5f5f, - 0x403d2020, 0xd00f6868, 0x34ca1a1a, 0x41b7aeae, - 0x757db4b4, 0xa8ce5454, 0x3b7f9393, 0x442f2222, - 0xc8636464, 0xff2af1f1, 0xe6cc7373, 0x24821212, - 0x807a4040, 0x10480808, 0x9b95c3c3, 0xc5dfecec, - 0xab4ddbdb, 0x5fc0a1a1, 0x07918d8d, 0x7ac83d3d, - 0x335b9797, 0x00000000, 0x83f9cfcf, 0x566e2b2b, - 0xece17676, 0x19e68282, 0xb128d6d6, 0x36c31b1b, - 0x7774b5b5, 0x43beafaf, 0xd41d6a6a, 0xa0ea5050, - 0x8a574545, 0xfb38f3f3, 0x60ad3030, 0xc3c4efef, - 0x7eda3f3f, 0xaac75555, 0x59dba2a2, 0xc9e9eaea, - 0xca6a6565, 0x6903baba, 0x5e4a2f2f, 0x9d8ec0c0, - 0xa160dede, 0x38fc1c1c, 0xe746fdfd, 0x9a1f4d4d, - 0x39769292, 0xeafa7575, 0x0c360606, 0x09ae8a8a, - 0x794bb2b2, 0xd185e6e6, 0x1c7e0e0e, 0x3ee71f1f, - 0xc4556262, 0xb53ad4d4, 0x4d81a8a8, 0x31529696, - 0xef62f9f9, 0x97a3c5c5, 0x4a102525, 0xb2ab5959, - 0x15d08484, 0xe4c57272, 0x72ec3939, 0x98164c4c, - 0xbc945e5e, 0xf09f7878, 0x70e53838, 0x05988c8c, - 0xbf17d1d1, 0x57e4a5a5, 0xd9a1e2e2, 0xc24e6161, - 0x7b42b3b3, 0x42342121, 0x25089c9c, 0x3cee1e1e, - 0x86614343, 0x93b1c7c7, 0xe54ffcfc, 0x08240404, - 0xa2e35151, 0x2f259999, 0xda226d6d, 0x1a650d0d, - 0xe979fafa, 0xa369dfdf, 0xfca97e7e, 0x48192424, - 0x76fe3b3b, 0x4b9aabab, 0x81f0cece, 0x22991111, - 0x03838f8f, 0x9c044e4e, 0x7366b7b7, 0xcbe0ebeb, - 0x78c13c3c, 0x1ffd8181, 0x35409494, 0xf31cf7f7, - 0x6f18b9b9, 0x268b1313, 0x58512c2c, 0xbb05d3d3, - 0xd38ce7e7, 0xdc396e6e, 0x95aac4c4, 0x061b0303, - 0xacdc5656, 0x885e4444, 0xfea07f7f, 0x4f88a9a9, - 0x54672a2a, 0x6b0abbbb, 0x9f87c1c1, 0xa6f15353, - 0xa572dcdc, 0x16530b0b, 0x27019d9d, 0xd82b6c6c, - 0x62a43131, 0xe8f37474, 0xf115f6f6, 0x8c4c4646, - 0x45a5acac, 0x0fb58989, 0x28b41414, 0xdfbae1e1, - 0x2ca61616, 0x74f73a3a, 0xd2066969, 0x12410909, - 0xe0d77070, 0x716fb6b6, 0xbd1ed0d0, 0xc7d6eded, - 0x85e2cccc, 0x84684242, 0x2d2c9898, 0x55eda4a4, - 0x50752828, 0xb8865c5c, 0xed6bf8f8, 0x11c28686, - }, - { - 0x7830d818, 0xaf462623, 0xf991b8c6, 0x6fcdfbe8, - 0xa113cb87, 0x626d11b8, 0x05020901, 0x6e9e0d4f, - 0xee6c9b36, 0x0451ffa6, 0xbdb90cd2, 0x06f70ef5, - 0x80f29679, 0xcede306f, 0xef3f6d91, 0x07a4f852, - 0xfdc04760, 0x766535bc, 0xcd2b379b, 0x8c018a8e, - 0x155bd2a3, 0x3c186c0c, 0x8af6847b, 0xe16a8035, - 0x693af51d, 0x47ddb3e0, 0xacb321d7, 0xed999cc2, - 0x965c432e, 0x7a96294b, 0x21e15dfe, 0x16aed557, - 0x412abd15, 0xb6eee877, 0xeb6e9237, 0x56d79ee5, - 0xd923139f, 0x17fd23f0, 0x7f94204a, 0x95a944da, - 0x25b0a258, 0xca8fcfc9, 0x8d527c29, 0x22145a0a, - 0x4f7f50b1, 0x1a5dc9a0, 0xdad6146b, 0xab17d985, - 0x73673cbd, 0x34ba8f5d, 0x50209010, 0x03f507f4, - 0xc08bddcb, 0xc67cd33e, 0x110a2d05, 0xe6ce7867, - 0x53d597e4, 0xbb4e0227, 0x58827341, 0x9d0ba78b, - 0x0153f6a7, 0x94fab27d, 0xfb374995, 0x9fad56d8, - 0x30eb70fb, 0x71c1cdee, 0x91f8bb7c, 0xe3cc7166, - 0x8ea77bdd, 0x4b2eaf17, 0x468e4547, 0xdc211a9e, - 0xc589d4ca, 0x995a582d, 0x79632ebf, 0x1b0e3f07, - 0x2347acad, 0x2fb4b05a, 0xb51bef83, 0xff66b633, - 0xf2c65c63, 0x0a041202, 0x384993aa, 0xa8e2de71, - 0xcf8dc6c8, 0x7d32d119, 0x70923b49, 0x9aaf5fd9, - 0x1df931f2, 0x48dba8e3, 0x2ab6b95b, 0x920dbc88, - 0xc8293e9a, 0xbe4c0b26, 0xfa64bf32, 0x4a7d59b0, - 0x6acff2e9, 0x331e770f, 0xa6b733d5, 0xba1df480, - 0x7c6127be, 0xde87ebcd, 0xe4688934, 0x75903248, - 0x24e354ff, 0x8ff48d7a, 0xea3d6490, 0x3ebe9d5f, - 0xa0403d20, 0xd5d00f68, 0x7234ca1a, 0x2c41b7ae, - 0x5e757db4, 0x19a8ce54, 0xe53b7f93, 0xaa442f22, - 0xe9c86364, 0x12ff2af1, 0xa2e6cc73, 0x5a248212, - 0x5d807a40, 0x28104808, 0xe89b95c3, 0x7bc5dfec, - 0x90ab4ddb, 0x1f5fc0a1, 0x8307918d, 0xc97ac83d, - 0xf1335b97, 0x00000000, 0xd483f9cf, 0x87566e2b, - 0xb3ece176, 0xb019e682, 0xa9b128d6, 0x7736c31b, - 0x5b7774b5, 0x2943beaf, 0xdfd41d6a, 0x0da0ea50, - 0x4c8a5745, 0x18fb38f3, 0xf060ad30, 0x74c3c4ef, - 0xc37eda3f, 0x1caac755, 0x1059dba2, 0x65c9e9ea, - 0xecca6a65, 0x686903ba, 0x935e4a2f, 0xe79d8ec0, - 0x81a160de, 0x6c38fc1c, 0x2ee746fd, 0x649a1f4d, - 0xe0397692, 0xbceafa75, 0x1e0c3606, 0x9809ae8a, - 0x40794bb2, 0x59d185e6, 0x361c7e0e, 0x633ee71f, - 0xf7c45562, 0xa3b53ad4, 0x324d81a8, 0xf4315296, - 0x3aef62f9, 0xf697a3c5, 0xb14a1025, 0x20b2ab59, - 0xae15d084, 0xa7e4c572, 0xdd72ec39, 0x6198164c, - 0x3bbc945e, 0x85f09f78, 0xd870e538, 0x8605988c, - 0xb2bf17d1, 0x0b57e4a5, 0x4dd9a1e2, 0xf8c24e61, - 0x457b42b3, 0xa5423421, 0xd625089c, 0x663cee1e, - 0x52866143, 0xfc93b1c7, 0x2be54ffc, 0x14082404, - 0x08a2e351, 0xc72f2599, 0xc4da226d, 0x391a650d, - 0x35e979fa, 0x84a369df, 0x9bfca97e, 0xb4481924, - 0xd776fe3b, 0x3d4b9aab, 0xd181f0ce, 0x55229911, - 0x8903838f, 0x6b9c044e, 0x517366b7, 0x60cbe0eb, - 0xcc78c13c, 0xbf1ffd81, 0xfe354094, 0x0cf31cf7, - 0x676f18b9, 0x5f268b13, 0x9c58512c, 0xb8bb05d3, - 0x5cd38ce7, 0xcbdc396e, 0xf395aac4, 0x0f061b03, - 0x13acdc56, 0x49885e44, 0x9efea07f, 0x374f88a9, - 0x8254672a, 0x6d6b0abb, 0xe29f87c1, 0x02a6f153, - 0x8ba572dc, 0x2716530b, 0xd327019d, 0xc1d82b6c, - 0xf562a431, 0xb9e8f374, 0x09f115f6, 0x438c4c46, - 0x2645a5ac, 0x970fb589, 0x4428b414, 0x42dfbae1, - 0x4e2ca616, 0xd274f73a, 0xd0d20669, 0x2d124109, - 0xade0d770, 0x54716fb6, 0xb7bd1ed0, 0x7ec7d6ed, - 0xdb85e2cc, 0x57846842, 0xc22d2c98, 0x0e55eda4, - 0x88507528, 0x31b8865c, 0x3fed6bf8, 0xa411c286, - }, -}; - -__device__ __constant__ u32 rch[R + 1] = -{ - 0x00000000, - 0x1823c6e8, - 0x36a6d2f5, - 0x60bc9b8e, - 0x1de0d7c2, - 0x157737e5, - 0x58c9290a, - 0xbd5d10f4, - 0xe427418b, - 0xfbee7c66, - 0xca2dbf07, -}; - -__device__ __constant__ u32 rcl[R + 1] = -{ - 0x00000000, - 0x87b8014f, - 0x796f9152, - 0xa30c7b35, - 0x2e4bfe57, - 0x9ff04ada, - 0xb1a06b85, - 0xcb3e0567, - 0xa77d95d8, - 0xdd17479e, - 0xad5a8333, -}; - -__device__ static void whirlpool_transform (const u32x w[16], u32x dgst[16], u32 s_Ch[8][256], u32 s_Cl[8][256]) -{ - u32x Kh[8]; - u32x Kl[8]; - - Kh[0] = 0x300beec0; - Kl[0] = 0xaf902967; - Kh[1] = 0x28282828; - Kl[1] = 0x28282828; - Kh[2] = 0x28282828; - Kl[2] = 0x28282828; - Kh[3] = 0x28282828; - Kl[3] = 0x28282828; - Kh[4] = 0x28282828; - Kl[4] = 0x28282828; - Kh[5] = 0x28282828; - Kl[5] = 0x28282828; - Kh[6] = 0x28282828; - Kl[6] = 0x28282828; - Kh[7] = 0x28282828; - Kl[7] = 0x28282828; - - u32x stateh[8]; - u32x statel[8]; - - stateh[0] = w[ 0]; - statel[0] = w[ 1]; - stateh[1] = w[ 2]; - statel[1] = w[ 3]; - stateh[2] = w[ 4]; - statel[2] = w[ 5]; - stateh[3] = w[ 6]; - statel[3] = w[ 7]; - stateh[4] = w[ 8]; - statel[4] = w[ 9]; - stateh[5] = w[10]; - statel[5] = w[11]; - stateh[6] = w[12]; - statel[6] = w[13]; - stateh[7] = w[14]; - statel[7] = w[15]; - - u32x Lh[8]; - u32x Ll[8]; - - #pragma unroll - for (int i = 0; i < 8; i++) - { - const u32x Lp0 = stateh[(i + 8) & 7] >> 24; - const u32x Lp1 = stateh[(i + 7) & 7] >> 16; - const u32x Lp2 = stateh[(i + 6) & 7] >> 8; - const u32x Lp3 = stateh[(i + 5) & 7] >> 0; - const u32x Lp4 = statel[(i + 4) & 7] >> 24; - const u32x Lp5 = statel[(i + 3) & 7] >> 16; - const u32x Lp6 = statel[(i + 2) & 7] >> 8; - const u32x Lp7 = statel[(i + 1) & 7] >> 0; - - Lh[i] = BOX (s_Ch, 0, Lp0 & 0xff) - ^ BOX (s_Ch, 1, Lp1 & 0xff) - ^ BOX (s_Ch, 2, Lp2 & 0xff) - ^ BOX (s_Ch, 3, Lp3 & 0xff) - ^ BOX (s_Ch, 4, Lp4 & 0xff) - ^ BOX (s_Ch, 5, Lp5 & 0xff) - ^ BOX (s_Ch, 6, Lp6 & 0xff) - ^ BOX (s_Ch, 7, Lp7 & 0xff); - - Ll[i] = BOX (s_Cl, 0, Lp0 & 0xff) - ^ BOX (s_Cl, 1, Lp1 & 0xff) - ^ BOX (s_Cl, 2, Lp2 & 0xff) - ^ BOX (s_Cl, 3, Lp3 & 0xff) - ^ BOX (s_Cl, 4, Lp4 & 0xff) - ^ BOX (s_Cl, 5, Lp5 & 0xff) - ^ BOX (s_Cl, 6, Lp6 & 0xff) - ^ BOX (s_Cl, 7, Lp7 & 0xff); - } - - stateh[0] = Lh[0] ^ Kh[0]; - statel[0] = Ll[0] ^ Kl[0]; - stateh[1] = Lh[1] ^ Kh[1]; - statel[1] = Ll[1] ^ Kl[1]; - stateh[2] = Lh[2] ^ Kh[2]; - statel[2] = Ll[2] ^ Kl[2]; - stateh[3] = Lh[3] ^ Kh[3]; - statel[3] = Ll[3] ^ Kl[3]; - stateh[4] = Lh[4] ^ Kh[4]; - statel[4] = Ll[4] ^ Kl[4]; - stateh[5] = Lh[5] ^ Kh[5]; - statel[5] = Ll[5] ^ Kl[5]; - stateh[6] = Lh[6] ^ Kh[6]; - statel[6] = Ll[6] ^ Kl[6]; - stateh[7] = Lh[7] ^ Kh[7]; - statel[7] = Ll[7] ^ Kl[7]; - - for (int r = 2; r <= R; r++) - { - u32x Lh[8]; - u32x Ll[8]; - - #pragma unroll - for (int i = 0; i < 8; i++) - { - const u32x Lp0 = Kh[(i + 8) & 7] >> 24; - const u32x Lp1 = Kh[(i + 7) & 7] >> 16; - const u32x Lp2 = Kh[(i + 6) & 7] >> 8; - const u32x Lp3 = Kh[(i + 5) & 7] >> 0; - const u32x Lp4 = Kl[(i + 4) & 7] >> 24; - const u32x Lp5 = Kl[(i + 3) & 7] >> 16; - const u32x Lp6 = Kl[(i + 2) & 7] >> 8; - const u32x Lp7 = Kl[(i + 1) & 7] >> 0; - - Lh[i] = BOX (s_Ch, 0, Lp0 & 0xff) - ^ BOX (s_Ch, 1, Lp1 & 0xff) - ^ BOX (s_Ch, 2, Lp2 & 0xff) - ^ BOX (s_Ch, 3, Lp3 & 0xff) - ^ BOX (s_Ch, 4, Lp4 & 0xff) - ^ BOX (s_Ch, 5, Lp5 & 0xff) - ^ BOX (s_Ch, 6, Lp6 & 0xff) - ^ BOX (s_Ch, 7, Lp7 & 0xff); - - Ll[i] = BOX (s_Cl, 0, Lp0 & 0xff) - ^ BOX (s_Cl, 1, Lp1 & 0xff) - ^ BOX (s_Cl, 2, Lp2 & 0xff) - ^ BOX (s_Cl, 3, Lp3 & 0xff) - ^ BOX (s_Cl, 4, Lp4 & 0xff) - ^ BOX (s_Cl, 5, Lp5 & 0xff) - ^ BOX (s_Cl, 6, Lp6 & 0xff) - ^ BOX (s_Cl, 7, Lp7 & 0xff); - } - - Kh[0] = Lh[0] ^ rch[r]; - Kl[0] = Ll[0] ^ rcl[r]; - Kh[1] = Lh[1]; - Kl[1] = Ll[1]; - Kh[2] = Lh[2]; - Kl[2] = Ll[2]; - Kh[3] = Lh[3]; - Kl[3] = Ll[3]; - Kh[4] = Lh[4]; - Kl[4] = Ll[4]; - Kh[5] = Lh[5]; - Kl[5] = Ll[5]; - Kh[6] = Lh[6]; - Kl[6] = Ll[6]; - Kh[7] = Lh[7]; - Kl[7] = Ll[7]; - - #pragma unroll 8 - for (int i = 0; i < 8; i++) - { - const u32x Lp0 = stateh[(i + 8) & 7] >> 24; - const u32x Lp1 = stateh[(i + 7) & 7] >> 16; - const u32x Lp2 = stateh[(i + 6) & 7] >> 8; - const u32x Lp3 = stateh[(i + 5) & 7] >> 0; - const u32x Lp4 = statel[(i + 4) & 7] >> 24; - const u32x Lp5 = statel[(i + 3) & 7] >> 16; - const u32x Lp6 = statel[(i + 2) & 7] >> 8; - const u32x Lp7 = statel[(i + 1) & 7] >> 0; - - Lh[i] = BOX (s_Ch, 0, Lp0 & 0xff) - ^ BOX (s_Ch, 1, Lp1 & 0xff) - ^ BOX (s_Ch, 2, Lp2 & 0xff) - ^ BOX (s_Ch, 3, Lp3 & 0xff) - ^ BOX (s_Ch, 4, Lp4 & 0xff) - ^ BOX (s_Ch, 5, Lp5 & 0xff) - ^ BOX (s_Ch, 6, Lp6 & 0xff) - ^ BOX (s_Ch, 7, Lp7 & 0xff); - - Ll[i] = BOX (s_Cl, 0, Lp0 & 0xff) - ^ BOX (s_Cl, 1, Lp1 & 0xff) - ^ BOX (s_Cl, 2, Lp2 & 0xff) - ^ BOX (s_Cl, 3, Lp3 & 0xff) - ^ BOX (s_Cl, 4, Lp4 & 0xff) - ^ BOX (s_Cl, 5, Lp5 & 0xff) - ^ BOX (s_Cl, 6, Lp6 & 0xff) - ^ BOX (s_Cl, 7, Lp7 & 0xff); - } - - stateh[0] = Lh[0] ^ Kh[0]; - statel[0] = Ll[0] ^ Kl[0]; - stateh[1] = Lh[1] ^ Kh[1]; - statel[1] = Ll[1] ^ Kl[1]; - stateh[2] = Lh[2] ^ Kh[2]; - statel[2] = Ll[2] ^ Kl[2]; - stateh[3] = Lh[3] ^ Kh[3]; - statel[3] = Ll[3] ^ Kl[3]; - stateh[4] = Lh[4] ^ Kh[4]; - statel[4] = Ll[4] ^ Kl[4]; - stateh[5] = Lh[5] ^ Kh[5]; - statel[5] = Ll[5] ^ Kl[5]; - stateh[6] = Lh[6] ^ Kh[6]; - statel[6] = Ll[6] ^ Kl[6]; - stateh[7] = Lh[7] ^ Kh[7]; - statel[7] = Ll[7] ^ Kl[7]; - } - - dgst[ 0] = stateh[0] ^ w[ 0]; - dgst[ 1] = statel[0] ^ w[ 1]; - dgst[ 2] = stateh[1] ^ w[ 2]; - dgst[ 3] = statel[1] ^ w[ 3]; - dgst[ 4] = stateh[2] ^ w[ 4]; - dgst[ 5] = statel[2] ^ w[ 5]; - dgst[ 6] = stateh[3] ^ w[ 6]; - dgst[ 7] = statel[3] ^ w[ 7]; - dgst[ 8] = stateh[4] ^ w[ 8]; - dgst[ 9] = statel[4] ^ w[ 9]; - dgst[10] = stateh[5] ^ w[10]; - dgst[11] = statel[5] ^ w[11]; - dgst[12] = stateh[6] ^ w[12]; - dgst[13] = statel[6] ^ w[13]; - dgst[14] = stateh[7] ^ w[14]; - dgst[15] = statel[7] ^ w[15]; -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m06100_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * modifier - */ - - __shared__ u32 s_Ch[8][256]; - __shared__ u32 s_Cl[8][256]; - - for (u32 i = 0; i < 8; i++) - { - s_Ch[i][lid] = Ch[i][lid]; - s_Cl[i][lid] = Cl[i][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - u32x wl[16]; - - wl[ 0] = swap_workaround (w0[0]); - wl[ 1] = swap_workaround (w0[1]); - wl[ 2] = swap_workaround (w0[2]); - wl[ 3] = swap_workaround (w0[3]); - wl[ 4] = swap_workaround (w1[0]); - wl[ 5] = swap_workaround (w1[1]); - wl[ 6] = swap_workaround (w1[2]); - wl[ 7] = swap_workaround (w1[3]); - wl[ 8] = 0; - wl[ 9] = 0; - wl[10] = 0; - wl[11] = 0; - wl[12] = 0; - wl[13] = 0; - wl[14] = 0; - wl[15] = pw_len * 8; - - u32x dgst[16]; - - whirlpool_transform (wl, dgst, s_Ch, s_Cl); - - const u32x r0 = dgst[0]; - const u32x r1 = dgst[1]; - const u32x r2 = dgst[2]; - const u32x r3 = dgst[3]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06100_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06100_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06100_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * modifier - */ - - __shared__ u32 s_Ch[8][256]; - __shared__ u32 s_Cl[8][256]; - - for (u32 i = 0; i < 8; i++) - { - s_Ch[i][lid] = Ch[i][lid]; - s_Cl[i][lid] = Cl[i][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - u32x wl[16]; - - wl[ 0] = swap_workaround (w0[0]); - wl[ 1] = swap_workaround (w0[1]); - wl[ 2] = swap_workaround (w0[2]); - wl[ 3] = swap_workaround (w0[3]); - wl[ 4] = swap_workaround (w1[0]); - wl[ 5] = swap_workaround (w1[1]); - wl[ 6] = swap_workaround (w1[2]); - wl[ 7] = swap_workaround (w1[3]); - wl[ 8] = 0; - wl[ 9] = 0; - wl[10] = 0; - wl[11] = 0; - wl[12] = 0; - wl[13] = 0; - wl[14] = 0; - wl[15] = pw_len * 8; - - u32x dgst[16]; - - whirlpool_transform (wl, dgst, s_Ch, s_Cl); - - const u32x r0 = dgst[0]; - const u32x r1 = dgst[1]; - const u32x r2 = dgst[2]; - const u32x r3 = dgst[3]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06100_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06100_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m06100_a3.cu b/nv/m06100_a3.cu deleted file mode 100644 index 495f7d0..0000000 --- a/nv/m06100_a3.cu +++ /dev/null @@ -1,1836 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _WHIRLPOOL_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#define R 10 - -#ifdef VECT_SIZE1 -#define BOX(S,n,i) u32x ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(S,n,i) u32x ((S)[(n)][(i).x], (S)[(n)][(i).y]) -#endif - -__device__ __constant__ u32 Ch[8][256] = -{ - { - 0x18186018, 0x23238c23, 0xc6c63fc6, 0xe8e887e8, - 0x87872687, 0xb8b8dab8, 0x01010401, 0x4f4f214f, - 0x3636d836, 0xa6a6a2a6, 0xd2d26fd2, 0xf5f5f3f5, - 0x7979f979, 0x6f6fa16f, 0x91917e91, 0x52525552, - 0x60609d60, 0xbcbccabc, 0x9b9b569b, 0x8e8e028e, - 0xa3a3b6a3, 0x0c0c300c, 0x7b7bf17b, 0x3535d435, - 0x1d1d741d, 0xe0e0a7e0, 0xd7d77bd7, 0xc2c22fc2, - 0x2e2eb82e, 0x4b4b314b, 0xfefedffe, 0x57574157, - 0x15155415, 0x7777c177, 0x3737dc37, 0xe5e5b3e5, - 0x9f9f469f, 0xf0f0e7f0, 0x4a4a354a, 0xdada4fda, - 0x58587d58, 0xc9c903c9, 0x2929a429, 0x0a0a280a, - 0xb1b1feb1, 0xa0a0baa0, 0x6b6bb16b, 0x85852e85, - 0xbdbdcebd, 0x5d5d695d, 0x10104010, 0xf4f4f7f4, - 0xcbcb0bcb, 0x3e3ef83e, 0x05051405, 0x67678167, - 0xe4e4b7e4, 0x27279c27, 0x41411941, 0x8b8b168b, - 0xa7a7a6a7, 0x7d7de97d, 0x95956e95, 0xd8d847d8, - 0xfbfbcbfb, 0xeeee9fee, 0x7c7ced7c, 0x66668566, - 0xdddd53dd, 0x17175c17, 0x47470147, 0x9e9e429e, - 0xcaca0fca, 0x2d2db42d, 0xbfbfc6bf, 0x07071c07, - 0xadad8ead, 0x5a5a755a, 0x83833683, 0x3333cc33, - 0x63639163, 0x02020802, 0xaaaa92aa, 0x7171d971, - 0xc8c807c8, 0x19196419, 0x49493949, 0xd9d943d9, - 0xf2f2eff2, 0xe3e3abe3, 0x5b5b715b, 0x88881a88, - 0x9a9a529a, 0x26269826, 0x3232c832, 0xb0b0fab0, - 0xe9e983e9, 0x0f0f3c0f, 0xd5d573d5, 0x80803a80, - 0xbebec2be, 0xcdcd13cd, 0x3434d034, 0x48483d48, - 0xffffdbff, 0x7a7af57a, 0x90907a90, 0x5f5f615f, - 0x20208020, 0x6868bd68, 0x1a1a681a, 0xaeae82ae, - 0xb4b4eab4, 0x54544d54, 0x93937693, 0x22228822, - 0x64648d64, 0xf1f1e3f1, 0x7373d173, 0x12124812, - 0x40401d40, 0x08082008, 0xc3c32bc3, 0xecec97ec, - 0xdbdb4bdb, 0xa1a1bea1, 0x8d8d0e8d, 0x3d3df43d, - 0x97976697, 0x00000000, 0xcfcf1bcf, 0x2b2bac2b, - 0x7676c576, 0x82823282, 0xd6d67fd6, 0x1b1b6c1b, - 0xb5b5eeb5, 0xafaf86af, 0x6a6ab56a, 0x50505d50, - 0x45450945, 0xf3f3ebf3, 0x3030c030, 0xefef9bef, - 0x3f3ffc3f, 0x55554955, 0xa2a2b2a2, 0xeaea8fea, - 0x65658965, 0xbabad2ba, 0x2f2fbc2f, 0xc0c027c0, - 0xdede5fde, 0x1c1c701c, 0xfdfdd3fd, 0x4d4d294d, - 0x92927292, 0x7575c975, 0x06061806, 0x8a8a128a, - 0xb2b2f2b2, 0xe6e6bfe6, 0x0e0e380e, 0x1f1f7c1f, - 0x62629562, 0xd4d477d4, 0xa8a89aa8, 0x96966296, - 0xf9f9c3f9, 0xc5c533c5, 0x25259425, 0x59597959, - 0x84842a84, 0x7272d572, 0x3939e439, 0x4c4c2d4c, - 0x5e5e655e, 0x7878fd78, 0x3838e038, 0x8c8c0a8c, - 0xd1d163d1, 0xa5a5aea5, 0xe2e2afe2, 0x61619961, - 0xb3b3f6b3, 0x21218421, 0x9c9c4a9c, 0x1e1e781e, - 0x43431143, 0xc7c73bc7, 0xfcfcd7fc, 0x04041004, - 0x51515951, 0x99995e99, 0x6d6da96d, 0x0d0d340d, - 0xfafacffa, 0xdfdf5bdf, 0x7e7ee57e, 0x24249024, - 0x3b3bec3b, 0xabab96ab, 0xcece1fce, 0x11114411, - 0x8f8f068f, 0x4e4e254e, 0xb7b7e6b7, 0xebeb8beb, - 0x3c3cf03c, 0x81813e81, 0x94946a94, 0xf7f7fbf7, - 0xb9b9deb9, 0x13134c13, 0x2c2cb02c, 0xd3d36bd3, - 0xe7e7bbe7, 0x6e6ea56e, 0xc4c437c4, 0x03030c03, - 0x56564556, 0x44440d44, 0x7f7fe17f, 0xa9a99ea9, - 0x2a2aa82a, 0xbbbbd6bb, 0xc1c123c1, 0x53535153, - 0xdcdc57dc, 0x0b0b2c0b, 0x9d9d4e9d, 0x6c6cad6c, - 0x3131c431, 0x7474cd74, 0xf6f6fff6, 0x46460546, - 0xacac8aac, 0x89891e89, 0x14145014, 0xe1e1a3e1, - 0x16165816, 0x3a3ae83a, 0x6969b969, 0x09092409, - 0x7070dd70, 0xb6b6e2b6, 0xd0d067d0, 0xeded93ed, - 0xcccc17cc, 0x42421542, 0x98985a98, 0xa4a4aaa4, - 0x2828a028, 0x5c5c6d5c, 0xf8f8c7f8, 0x86862286, - }, - { - 0xd8181860, 0x2623238c, 0xb8c6c63f, 0xfbe8e887, - 0xcb878726, 0x11b8b8da, 0x09010104, 0x0d4f4f21, - 0x9b3636d8, 0xffa6a6a2, 0x0cd2d26f, 0x0ef5f5f3, - 0x967979f9, 0x306f6fa1, 0x6d91917e, 0xf8525255, - 0x4760609d, 0x35bcbcca, 0x379b9b56, 0x8a8e8e02, - 0xd2a3a3b6, 0x6c0c0c30, 0x847b7bf1, 0x803535d4, - 0xf51d1d74, 0xb3e0e0a7, 0x21d7d77b, 0x9cc2c22f, - 0x432e2eb8, 0x294b4b31, 0x5dfefedf, 0xd5575741, - 0xbd151554, 0xe87777c1, 0x923737dc, 0x9ee5e5b3, - 0x139f9f46, 0x23f0f0e7, 0x204a4a35, 0x44dada4f, - 0xa258587d, 0xcfc9c903, 0x7c2929a4, 0x5a0a0a28, - 0x50b1b1fe, 0xc9a0a0ba, 0x146b6bb1, 0xd985852e, - 0x3cbdbdce, 0x8f5d5d69, 0x90101040, 0x07f4f4f7, - 0xddcbcb0b, 0xd33e3ef8, 0x2d050514, 0x78676781, - 0x97e4e4b7, 0x0227279c, 0x73414119, 0xa78b8b16, - 0xf6a7a7a6, 0xb27d7de9, 0x4995956e, 0x56d8d847, - 0x70fbfbcb, 0xcdeeee9f, 0xbb7c7ced, 0x71666685, - 0x7bdddd53, 0xaf17175c, 0x45474701, 0x1a9e9e42, - 0xd4caca0f, 0x582d2db4, 0x2ebfbfc6, 0x3f07071c, - 0xacadad8e, 0xb05a5a75, 0xef838336, 0xb63333cc, - 0x5c636391, 0x12020208, 0x93aaaa92, 0xde7171d9, - 0xc6c8c807, 0xd1191964, 0x3b494939, 0x5fd9d943, - 0x31f2f2ef, 0xa8e3e3ab, 0xb95b5b71, 0xbc88881a, - 0x3e9a9a52, 0x0b262698, 0xbf3232c8, 0x59b0b0fa, - 0xf2e9e983, 0x770f0f3c, 0x33d5d573, 0xf480803a, - 0x27bebec2, 0xebcdcd13, 0x893434d0, 0x3248483d, - 0x54ffffdb, 0x8d7a7af5, 0x6490907a, 0x9d5f5f61, - 0x3d202080, 0x0f6868bd, 0xca1a1a68, 0xb7aeae82, - 0x7db4b4ea, 0xce54544d, 0x7f939376, 0x2f222288, - 0x6364648d, 0x2af1f1e3, 0xcc7373d1, 0x82121248, - 0x7a40401d, 0x48080820, 0x95c3c32b, 0xdfecec97, - 0x4ddbdb4b, 0xc0a1a1be, 0x918d8d0e, 0xc83d3df4, - 0x5b979766, 0x00000000, 0xf9cfcf1b, 0x6e2b2bac, - 0xe17676c5, 0xe6828232, 0x28d6d67f, 0xc31b1b6c, - 0x74b5b5ee, 0xbeafaf86, 0x1d6a6ab5, 0xea50505d, - 0x57454509, 0x38f3f3eb, 0xad3030c0, 0xc4efef9b, - 0xda3f3ffc, 0xc7555549, 0xdba2a2b2, 0xe9eaea8f, - 0x6a656589, 0x03babad2, 0x4a2f2fbc, 0x8ec0c027, - 0x60dede5f, 0xfc1c1c70, 0x46fdfdd3, 0x1f4d4d29, - 0x76929272, 0xfa7575c9, 0x36060618, 0xae8a8a12, - 0x4bb2b2f2, 0x85e6e6bf, 0x7e0e0e38, 0xe71f1f7c, - 0x55626295, 0x3ad4d477, 0x81a8a89a, 0x52969662, - 0x62f9f9c3, 0xa3c5c533, 0x10252594, 0xab595979, - 0xd084842a, 0xc57272d5, 0xec3939e4, 0x164c4c2d, - 0x945e5e65, 0x9f7878fd, 0xe53838e0, 0x988c8c0a, - 0x17d1d163, 0xe4a5a5ae, 0xa1e2e2af, 0x4e616199, - 0x42b3b3f6, 0x34212184, 0x089c9c4a, 0xee1e1e78, - 0x61434311, 0xb1c7c73b, 0x4ffcfcd7, 0x24040410, - 0xe3515159, 0x2599995e, 0x226d6da9, 0x650d0d34, - 0x79fafacf, 0x69dfdf5b, 0xa97e7ee5, 0x19242490, - 0xfe3b3bec, 0x9aabab96, 0xf0cece1f, 0x99111144, - 0x838f8f06, 0x044e4e25, 0x66b7b7e6, 0xe0ebeb8b, - 0xc13c3cf0, 0xfd81813e, 0x4094946a, 0x1cf7f7fb, - 0x18b9b9de, 0x8b13134c, 0x512c2cb0, 0x05d3d36b, - 0x8ce7e7bb, 0x396e6ea5, 0xaac4c437, 0x1b03030c, - 0xdc565645, 0x5e44440d, 0xa07f7fe1, 0x88a9a99e, - 0x672a2aa8, 0x0abbbbd6, 0x87c1c123, 0xf1535351, - 0x72dcdc57, 0x530b0b2c, 0x019d9d4e, 0x2b6c6cad, - 0xa43131c4, 0xf37474cd, 0x15f6f6ff, 0x4c464605, - 0xa5acac8a, 0xb589891e, 0xb4141450, 0xbae1e1a3, - 0xa6161658, 0xf73a3ae8, 0x066969b9, 0x41090924, - 0xd77070dd, 0x6fb6b6e2, 0x1ed0d067, 0xd6eded93, - 0xe2cccc17, 0x68424215, 0x2c98985a, 0xeda4a4aa, - 0x752828a0, 0x865c5c6d, 0x6bf8f8c7, 0xc2868622, - }, - { - 0x30d81818, 0x46262323, 0x91b8c6c6, 0xcdfbe8e8, - 0x13cb8787, 0x6d11b8b8, 0x02090101, 0x9e0d4f4f, - 0x6c9b3636, 0x51ffa6a6, 0xb90cd2d2, 0xf70ef5f5, - 0xf2967979, 0xde306f6f, 0x3f6d9191, 0xa4f85252, - 0xc0476060, 0x6535bcbc, 0x2b379b9b, 0x018a8e8e, - 0x5bd2a3a3, 0x186c0c0c, 0xf6847b7b, 0x6a803535, - 0x3af51d1d, 0xddb3e0e0, 0xb321d7d7, 0x999cc2c2, - 0x5c432e2e, 0x96294b4b, 0xe15dfefe, 0xaed55757, - 0x2abd1515, 0xeee87777, 0x6e923737, 0xd79ee5e5, - 0x23139f9f, 0xfd23f0f0, 0x94204a4a, 0xa944dada, - 0xb0a25858, 0x8fcfc9c9, 0x527c2929, 0x145a0a0a, - 0x7f50b1b1, 0x5dc9a0a0, 0xd6146b6b, 0x17d98585, - 0x673cbdbd, 0xba8f5d5d, 0x20901010, 0xf507f4f4, - 0x8bddcbcb, 0x7cd33e3e, 0x0a2d0505, 0xce786767, - 0xd597e4e4, 0x4e022727, 0x82734141, 0x0ba78b8b, - 0x53f6a7a7, 0xfab27d7d, 0x37499595, 0xad56d8d8, - 0xeb70fbfb, 0xc1cdeeee, 0xf8bb7c7c, 0xcc716666, - 0xa77bdddd, 0x2eaf1717, 0x8e454747, 0x211a9e9e, - 0x89d4caca, 0x5a582d2d, 0x632ebfbf, 0x0e3f0707, - 0x47acadad, 0xb4b05a5a, 0x1bef8383, 0x66b63333, - 0xc65c6363, 0x04120202, 0x4993aaaa, 0xe2de7171, - 0x8dc6c8c8, 0x32d11919, 0x923b4949, 0xaf5fd9d9, - 0xf931f2f2, 0xdba8e3e3, 0xb6b95b5b, 0x0dbc8888, - 0x293e9a9a, 0x4c0b2626, 0x64bf3232, 0x7d59b0b0, - 0xcff2e9e9, 0x1e770f0f, 0xb733d5d5, 0x1df48080, - 0x6127bebe, 0x87ebcdcd, 0x68893434, 0x90324848, - 0xe354ffff, 0xf48d7a7a, 0x3d649090, 0xbe9d5f5f, - 0x403d2020, 0xd00f6868, 0x34ca1a1a, 0x41b7aeae, - 0x757db4b4, 0xa8ce5454, 0x3b7f9393, 0x442f2222, - 0xc8636464, 0xff2af1f1, 0xe6cc7373, 0x24821212, - 0x807a4040, 0x10480808, 0x9b95c3c3, 0xc5dfecec, - 0xab4ddbdb, 0x5fc0a1a1, 0x07918d8d, 0x7ac83d3d, - 0x335b9797, 0x00000000, 0x83f9cfcf, 0x566e2b2b, - 0xece17676, 0x19e68282, 0xb128d6d6, 0x36c31b1b, - 0x7774b5b5, 0x43beafaf, 0xd41d6a6a, 0xa0ea5050, - 0x8a574545, 0xfb38f3f3, 0x60ad3030, 0xc3c4efef, - 0x7eda3f3f, 0xaac75555, 0x59dba2a2, 0xc9e9eaea, - 0xca6a6565, 0x6903baba, 0x5e4a2f2f, 0x9d8ec0c0, - 0xa160dede, 0x38fc1c1c, 0xe746fdfd, 0x9a1f4d4d, - 0x39769292, 0xeafa7575, 0x0c360606, 0x09ae8a8a, - 0x794bb2b2, 0xd185e6e6, 0x1c7e0e0e, 0x3ee71f1f, - 0xc4556262, 0xb53ad4d4, 0x4d81a8a8, 0x31529696, - 0xef62f9f9, 0x97a3c5c5, 0x4a102525, 0xb2ab5959, - 0x15d08484, 0xe4c57272, 0x72ec3939, 0x98164c4c, - 0xbc945e5e, 0xf09f7878, 0x70e53838, 0x05988c8c, - 0xbf17d1d1, 0x57e4a5a5, 0xd9a1e2e2, 0xc24e6161, - 0x7b42b3b3, 0x42342121, 0x25089c9c, 0x3cee1e1e, - 0x86614343, 0x93b1c7c7, 0xe54ffcfc, 0x08240404, - 0xa2e35151, 0x2f259999, 0xda226d6d, 0x1a650d0d, - 0xe979fafa, 0xa369dfdf, 0xfca97e7e, 0x48192424, - 0x76fe3b3b, 0x4b9aabab, 0x81f0cece, 0x22991111, - 0x03838f8f, 0x9c044e4e, 0x7366b7b7, 0xcbe0ebeb, - 0x78c13c3c, 0x1ffd8181, 0x35409494, 0xf31cf7f7, - 0x6f18b9b9, 0x268b1313, 0x58512c2c, 0xbb05d3d3, - 0xd38ce7e7, 0xdc396e6e, 0x95aac4c4, 0x061b0303, - 0xacdc5656, 0x885e4444, 0xfea07f7f, 0x4f88a9a9, - 0x54672a2a, 0x6b0abbbb, 0x9f87c1c1, 0xa6f15353, - 0xa572dcdc, 0x16530b0b, 0x27019d9d, 0xd82b6c6c, - 0x62a43131, 0xe8f37474, 0xf115f6f6, 0x8c4c4646, - 0x45a5acac, 0x0fb58989, 0x28b41414, 0xdfbae1e1, - 0x2ca61616, 0x74f73a3a, 0xd2066969, 0x12410909, - 0xe0d77070, 0x716fb6b6, 0xbd1ed0d0, 0xc7d6eded, - 0x85e2cccc, 0x84684242, 0x2d2c9898, 0x55eda4a4, - 0x50752828, 0xb8865c5c, 0xed6bf8f8, 0x11c28686, - }, - { - 0x7830d818, 0xaf462623, 0xf991b8c6, 0x6fcdfbe8, - 0xa113cb87, 0x626d11b8, 0x05020901, 0x6e9e0d4f, - 0xee6c9b36, 0x0451ffa6, 0xbdb90cd2, 0x06f70ef5, - 0x80f29679, 0xcede306f, 0xef3f6d91, 0x07a4f852, - 0xfdc04760, 0x766535bc, 0xcd2b379b, 0x8c018a8e, - 0x155bd2a3, 0x3c186c0c, 0x8af6847b, 0xe16a8035, - 0x693af51d, 0x47ddb3e0, 0xacb321d7, 0xed999cc2, - 0x965c432e, 0x7a96294b, 0x21e15dfe, 0x16aed557, - 0x412abd15, 0xb6eee877, 0xeb6e9237, 0x56d79ee5, - 0xd923139f, 0x17fd23f0, 0x7f94204a, 0x95a944da, - 0x25b0a258, 0xca8fcfc9, 0x8d527c29, 0x22145a0a, - 0x4f7f50b1, 0x1a5dc9a0, 0xdad6146b, 0xab17d985, - 0x73673cbd, 0x34ba8f5d, 0x50209010, 0x03f507f4, - 0xc08bddcb, 0xc67cd33e, 0x110a2d05, 0xe6ce7867, - 0x53d597e4, 0xbb4e0227, 0x58827341, 0x9d0ba78b, - 0x0153f6a7, 0x94fab27d, 0xfb374995, 0x9fad56d8, - 0x30eb70fb, 0x71c1cdee, 0x91f8bb7c, 0xe3cc7166, - 0x8ea77bdd, 0x4b2eaf17, 0x468e4547, 0xdc211a9e, - 0xc589d4ca, 0x995a582d, 0x79632ebf, 0x1b0e3f07, - 0x2347acad, 0x2fb4b05a, 0xb51bef83, 0xff66b633, - 0xf2c65c63, 0x0a041202, 0x384993aa, 0xa8e2de71, - 0xcf8dc6c8, 0x7d32d119, 0x70923b49, 0x9aaf5fd9, - 0x1df931f2, 0x48dba8e3, 0x2ab6b95b, 0x920dbc88, - 0xc8293e9a, 0xbe4c0b26, 0xfa64bf32, 0x4a7d59b0, - 0x6acff2e9, 0x331e770f, 0xa6b733d5, 0xba1df480, - 0x7c6127be, 0xde87ebcd, 0xe4688934, 0x75903248, - 0x24e354ff, 0x8ff48d7a, 0xea3d6490, 0x3ebe9d5f, - 0xa0403d20, 0xd5d00f68, 0x7234ca1a, 0x2c41b7ae, - 0x5e757db4, 0x19a8ce54, 0xe53b7f93, 0xaa442f22, - 0xe9c86364, 0x12ff2af1, 0xa2e6cc73, 0x5a248212, - 0x5d807a40, 0x28104808, 0xe89b95c3, 0x7bc5dfec, - 0x90ab4ddb, 0x1f5fc0a1, 0x8307918d, 0xc97ac83d, - 0xf1335b97, 0x00000000, 0xd483f9cf, 0x87566e2b, - 0xb3ece176, 0xb019e682, 0xa9b128d6, 0x7736c31b, - 0x5b7774b5, 0x2943beaf, 0xdfd41d6a, 0x0da0ea50, - 0x4c8a5745, 0x18fb38f3, 0xf060ad30, 0x74c3c4ef, - 0xc37eda3f, 0x1caac755, 0x1059dba2, 0x65c9e9ea, - 0xecca6a65, 0x686903ba, 0x935e4a2f, 0xe79d8ec0, - 0x81a160de, 0x6c38fc1c, 0x2ee746fd, 0x649a1f4d, - 0xe0397692, 0xbceafa75, 0x1e0c3606, 0x9809ae8a, - 0x40794bb2, 0x59d185e6, 0x361c7e0e, 0x633ee71f, - 0xf7c45562, 0xa3b53ad4, 0x324d81a8, 0xf4315296, - 0x3aef62f9, 0xf697a3c5, 0xb14a1025, 0x20b2ab59, - 0xae15d084, 0xa7e4c572, 0xdd72ec39, 0x6198164c, - 0x3bbc945e, 0x85f09f78, 0xd870e538, 0x8605988c, - 0xb2bf17d1, 0x0b57e4a5, 0x4dd9a1e2, 0xf8c24e61, - 0x457b42b3, 0xa5423421, 0xd625089c, 0x663cee1e, - 0x52866143, 0xfc93b1c7, 0x2be54ffc, 0x14082404, - 0x08a2e351, 0xc72f2599, 0xc4da226d, 0x391a650d, - 0x35e979fa, 0x84a369df, 0x9bfca97e, 0xb4481924, - 0xd776fe3b, 0x3d4b9aab, 0xd181f0ce, 0x55229911, - 0x8903838f, 0x6b9c044e, 0x517366b7, 0x60cbe0eb, - 0xcc78c13c, 0xbf1ffd81, 0xfe354094, 0x0cf31cf7, - 0x676f18b9, 0x5f268b13, 0x9c58512c, 0xb8bb05d3, - 0x5cd38ce7, 0xcbdc396e, 0xf395aac4, 0x0f061b03, - 0x13acdc56, 0x49885e44, 0x9efea07f, 0x374f88a9, - 0x8254672a, 0x6d6b0abb, 0xe29f87c1, 0x02a6f153, - 0x8ba572dc, 0x2716530b, 0xd327019d, 0xc1d82b6c, - 0xf562a431, 0xb9e8f374, 0x09f115f6, 0x438c4c46, - 0x2645a5ac, 0x970fb589, 0x4428b414, 0x42dfbae1, - 0x4e2ca616, 0xd274f73a, 0xd0d20669, 0x2d124109, - 0xade0d770, 0x54716fb6, 0xb7bd1ed0, 0x7ec7d6ed, - 0xdb85e2cc, 0x57846842, 0xc22d2c98, 0x0e55eda4, - 0x88507528, 0x31b8865c, 0x3fed6bf8, 0xa411c286, - }, - { - 0xc07830d8, 0x05af4626, 0x7ef991b8, 0x136fcdfb, - 0x4ca113cb, 0xa9626d11, 0x08050209, 0x426e9e0d, - 0xadee6c9b, 0x590451ff, 0xdebdb90c, 0xfb06f70e, - 0xef80f296, 0x5fcede30, 0xfcef3f6d, 0xaa07a4f8, - 0x27fdc047, 0x89766535, 0xaccd2b37, 0x048c018a, - 0x71155bd2, 0x603c186c, 0xff8af684, 0xb5e16a80, - 0xe8693af5, 0x5347ddb3, 0xf6acb321, 0x5eed999c, - 0x6d965c43, 0x627a9629, 0xa321e15d, 0x8216aed5, - 0xa8412abd, 0x9fb6eee8, 0xa5eb6e92, 0x7b56d79e, - 0x8cd92313, 0xd317fd23, 0x6a7f9420, 0x9e95a944, - 0xfa25b0a2, 0x06ca8fcf, 0x558d527c, 0x5022145a, - 0xe14f7f50, 0x691a5dc9, 0x7fdad614, 0x5cab17d9, - 0x8173673c, 0xd234ba8f, 0x80502090, 0xf303f507, - 0x16c08bdd, 0xedc67cd3, 0x28110a2d, 0x1fe6ce78, - 0x7353d597, 0x25bb4e02, 0x32588273, 0x2c9d0ba7, - 0x510153f6, 0xcf94fab2, 0xdcfb3749, 0x8e9fad56, - 0x8b30eb70, 0x2371c1cd, 0xc791f8bb, 0x17e3cc71, - 0xa68ea77b, 0xb84b2eaf, 0x02468e45, 0x84dc211a, - 0x1ec589d4, 0x75995a58, 0x9179632e, 0x381b0e3f, - 0x012347ac, 0xea2fb4b0, 0x6cb51bef, 0x85ff66b6, - 0x3ff2c65c, 0x100a0412, 0x39384993, 0xafa8e2de, - 0x0ecf8dc6, 0xc87d32d1, 0x7270923b, 0x869aaf5f, - 0xc31df931, 0x4b48dba8, 0xe22ab6b9, 0x34920dbc, - 0xa4c8293e, 0x2dbe4c0b, 0x8dfa64bf, 0xe94a7d59, - 0x1b6acff2, 0x78331e77, 0xe6a6b733, 0x74ba1df4, - 0x997c6127, 0x26de87eb, 0xbde46889, 0x7a759032, - 0xab24e354, 0xf78ff48d, 0xf4ea3d64, 0xc23ebe9d, - 0x1da0403d, 0x67d5d00f, 0xd07234ca, 0x192c41b7, - 0xc95e757d, 0x9a19a8ce, 0xece53b7f, 0x0daa442f, - 0x07e9c863, 0xdb12ff2a, 0xbfa2e6cc, 0x905a2482, - 0x3a5d807a, 0x40281048, 0x56e89b95, 0x337bc5df, - 0x9690ab4d, 0x611f5fc0, 0x1c830791, 0xf5c97ac8, - 0xccf1335b, 0x00000000, 0x36d483f9, 0x4587566e, - 0x97b3ece1, 0x64b019e6, 0xfea9b128, 0xd87736c3, - 0xc15b7774, 0x112943be, 0x77dfd41d, 0xba0da0ea, - 0x124c8a57, 0xcb18fb38, 0x9df060ad, 0x2b74c3c4, - 0xe5c37eda, 0x921caac7, 0x791059db, 0x0365c9e9, - 0x0fecca6a, 0xb9686903, 0x65935e4a, 0x4ee79d8e, - 0xbe81a160, 0xe06c38fc, 0xbb2ee746, 0x52649a1f, - 0xe4e03976, 0x8fbceafa, 0x301e0c36, 0x249809ae, - 0xf940794b, 0x6359d185, 0x70361c7e, 0xf8633ee7, - 0x37f7c455, 0xeea3b53a, 0x29324d81, 0xc4f43152, - 0x9b3aef62, 0x66f697a3, 0x35b14a10, 0xf220b2ab, - 0x54ae15d0, 0xb7a7e4c5, 0xd5dd72ec, 0x5a619816, - 0xca3bbc94, 0xe785f09f, 0xddd870e5, 0x14860598, - 0xc6b2bf17, 0x410b57e4, 0x434dd9a1, 0x2ff8c24e, - 0xf1457b42, 0x15a54234, 0x94d62508, 0xf0663cee, - 0x22528661, 0x76fc93b1, 0xb32be54f, 0x20140824, - 0xb208a2e3, 0xbcc72f25, 0x4fc4da22, 0x68391a65, - 0x8335e979, 0xb684a369, 0xd79bfca9, 0x3db44819, - 0xc5d776fe, 0x313d4b9a, 0x3ed181f0, 0x88552299, - 0x0c890383, 0x4a6b9c04, 0xd1517366, 0x0b60cbe0, - 0xfdcc78c1, 0x7cbf1ffd, 0xd4fe3540, 0xeb0cf31c, - 0xa1676f18, 0x985f268b, 0x7d9c5851, 0xd6b8bb05, - 0x6b5cd38c, 0x57cbdc39, 0x6ef395aa, 0x180f061b, - 0x8a13acdc, 0x1a49885e, 0xdf9efea0, 0x21374f88, - 0x4d825467, 0xb16d6b0a, 0x46e29f87, 0xa202a6f1, - 0xae8ba572, 0x58271653, 0x9cd32701, 0x47c1d82b, - 0x95f562a4, 0x87b9e8f3, 0xe309f115, 0x0a438c4c, - 0x092645a5, 0x3c970fb5, 0xa04428b4, 0x5b42dfba, - 0xb04e2ca6, 0xcdd274f7, 0x6fd0d206, 0x482d1241, - 0xa7ade0d7, 0xd954716f, 0xceb7bd1e, 0x3b7ec7d6, - 0x2edb85e2, 0x2a578468, 0xb4c22d2c, 0x490e55ed, - 0x5d885075, 0xda31b886, 0x933fed6b, 0x44a411c2, - }, - { - 0x18c07830, 0x2305af46, 0xc67ef991, 0xe8136fcd, - 0x874ca113, 0xb8a9626d, 0x01080502, 0x4f426e9e, - 0x36adee6c, 0xa6590451, 0xd2debdb9, 0xf5fb06f7, - 0x79ef80f2, 0x6f5fcede, 0x91fcef3f, 0x52aa07a4, - 0x6027fdc0, 0xbc897665, 0x9baccd2b, 0x8e048c01, - 0xa371155b, 0x0c603c18, 0x7bff8af6, 0x35b5e16a, - 0x1de8693a, 0xe05347dd, 0xd7f6acb3, 0xc25eed99, - 0x2e6d965c, 0x4b627a96, 0xfea321e1, 0x578216ae, - 0x15a8412a, 0x779fb6ee, 0x37a5eb6e, 0xe57b56d7, - 0x9f8cd923, 0xf0d317fd, 0x4a6a7f94, 0xda9e95a9, - 0x58fa25b0, 0xc906ca8f, 0x29558d52, 0x0a502214, - 0xb1e14f7f, 0xa0691a5d, 0x6b7fdad6, 0x855cab17, - 0xbd817367, 0x5dd234ba, 0x10805020, 0xf4f303f5, - 0xcb16c08b, 0x3eedc67c, 0x0528110a, 0x671fe6ce, - 0xe47353d5, 0x2725bb4e, 0x41325882, 0x8b2c9d0b, - 0xa7510153, 0x7dcf94fa, 0x95dcfb37, 0xd88e9fad, - 0xfb8b30eb, 0xee2371c1, 0x7cc791f8, 0x6617e3cc, - 0xdda68ea7, 0x17b84b2e, 0x4702468e, 0x9e84dc21, - 0xca1ec589, 0x2d75995a, 0xbf917963, 0x07381b0e, - 0xad012347, 0x5aea2fb4, 0x836cb51b, 0x3385ff66, - 0x633ff2c6, 0x02100a04, 0xaa393849, 0x71afa8e2, - 0xc80ecf8d, 0x19c87d32, 0x49727092, 0xd9869aaf, - 0xf2c31df9, 0xe34b48db, 0x5be22ab6, 0x8834920d, - 0x9aa4c829, 0x262dbe4c, 0x328dfa64, 0xb0e94a7d, - 0xe91b6acf, 0x0f78331e, 0xd5e6a6b7, 0x8074ba1d, - 0xbe997c61, 0xcd26de87, 0x34bde468, 0x487a7590, - 0xffab24e3, 0x7af78ff4, 0x90f4ea3d, 0x5fc23ebe, - 0x201da040, 0x6867d5d0, 0x1ad07234, 0xae192c41, - 0xb4c95e75, 0x549a19a8, 0x93ece53b, 0x220daa44, - 0x6407e9c8, 0xf1db12ff, 0x73bfa2e6, 0x12905a24, - 0x403a5d80, 0x08402810, 0xc356e89b, 0xec337bc5, - 0xdb9690ab, 0xa1611f5f, 0x8d1c8307, 0x3df5c97a, - 0x97ccf133, 0x00000000, 0xcf36d483, 0x2b458756, - 0x7697b3ec, 0x8264b019, 0xd6fea9b1, 0x1bd87736, - 0xb5c15b77, 0xaf112943, 0x6a77dfd4, 0x50ba0da0, - 0x45124c8a, 0xf3cb18fb, 0x309df060, 0xef2b74c3, - 0x3fe5c37e, 0x55921caa, 0xa2791059, 0xea0365c9, - 0x650fecca, 0xbab96869, 0x2f65935e, 0xc04ee79d, - 0xdebe81a1, 0x1ce06c38, 0xfdbb2ee7, 0x4d52649a, - 0x92e4e039, 0x758fbcea, 0x06301e0c, 0x8a249809, - 0xb2f94079, 0xe66359d1, 0x0e70361c, 0x1ff8633e, - 0x6237f7c4, 0xd4eea3b5, 0xa829324d, 0x96c4f431, - 0xf99b3aef, 0xc566f697, 0x2535b14a, 0x59f220b2, - 0x8454ae15, 0x72b7a7e4, 0x39d5dd72, 0x4c5a6198, - 0x5eca3bbc, 0x78e785f0, 0x38ddd870, 0x8c148605, - 0xd1c6b2bf, 0xa5410b57, 0xe2434dd9, 0x612ff8c2, - 0xb3f1457b, 0x2115a542, 0x9c94d625, 0x1ef0663c, - 0x43225286, 0xc776fc93, 0xfcb32be5, 0x04201408, - 0x51b208a2, 0x99bcc72f, 0x6d4fc4da, 0x0d68391a, - 0xfa8335e9, 0xdfb684a3, 0x7ed79bfc, 0x243db448, - 0x3bc5d776, 0xab313d4b, 0xce3ed181, 0x11885522, - 0x8f0c8903, 0x4e4a6b9c, 0xb7d15173, 0xeb0b60cb, - 0x3cfdcc78, 0x817cbf1f, 0x94d4fe35, 0xf7eb0cf3, - 0xb9a1676f, 0x13985f26, 0x2c7d9c58, 0xd3d6b8bb, - 0xe76b5cd3, 0x6e57cbdc, 0xc46ef395, 0x03180f06, - 0x568a13ac, 0x441a4988, 0x7fdf9efe, 0xa921374f, - 0x2a4d8254, 0xbbb16d6b, 0xc146e29f, 0x53a202a6, - 0xdcae8ba5, 0x0b582716, 0x9d9cd327, 0x6c47c1d8, - 0x3195f562, 0x7487b9e8, 0xf6e309f1, 0x460a438c, - 0xac092645, 0x893c970f, 0x14a04428, 0xe15b42df, - 0x16b04e2c, 0x3acdd274, 0x696fd0d2, 0x09482d12, - 0x70a7ade0, 0xb6d95471, 0xd0ceb7bd, 0xed3b7ec7, - 0xcc2edb85, 0x422a5784, 0x98b4c22d, 0xa4490e55, - 0x285d8850, 0x5cda31b8, 0xf8933fed, 0x8644a411, - }, - { - 0x6018c078, 0x8c2305af, 0x3fc67ef9, 0x87e8136f, - 0x26874ca1, 0xdab8a962, 0x04010805, 0x214f426e, - 0xd836adee, 0xa2a65904, 0x6fd2debd, 0xf3f5fb06, - 0xf979ef80, 0xa16f5fce, 0x7e91fcef, 0x5552aa07, - 0x9d6027fd, 0xcabc8976, 0x569baccd, 0x028e048c, - 0xb6a37115, 0x300c603c, 0xf17bff8a, 0xd435b5e1, - 0x741de869, 0xa7e05347, 0x7bd7f6ac, 0x2fc25eed, - 0xb82e6d96, 0x314b627a, 0xdffea321, 0x41578216, - 0x5415a841, 0xc1779fb6, 0xdc37a5eb, 0xb3e57b56, - 0x469f8cd9, 0xe7f0d317, 0x354a6a7f, 0x4fda9e95, - 0x7d58fa25, 0x03c906ca, 0xa429558d, 0x280a5022, - 0xfeb1e14f, 0xbaa0691a, 0xb16b7fda, 0x2e855cab, - 0xcebd8173, 0x695dd234, 0x40108050, 0xf7f4f303, - 0x0bcb16c0, 0xf83eedc6, 0x14052811, 0x81671fe6, - 0xb7e47353, 0x9c2725bb, 0x19413258, 0x168b2c9d, - 0xa6a75101, 0xe97dcf94, 0x6e95dcfb, 0x47d88e9f, - 0xcbfb8b30, 0x9fee2371, 0xed7cc791, 0x856617e3, - 0x53dda68e, 0x5c17b84b, 0x01470246, 0x429e84dc, - 0x0fca1ec5, 0xb42d7599, 0xc6bf9179, 0x1c07381b, - 0x8ead0123, 0x755aea2f, 0x36836cb5, 0xcc3385ff, - 0x91633ff2, 0x0802100a, 0x92aa3938, 0xd971afa8, - 0x07c80ecf, 0x6419c87d, 0x39497270, 0x43d9869a, - 0xeff2c31d, 0xabe34b48, 0x715be22a, 0x1a883492, - 0x529aa4c8, 0x98262dbe, 0xc8328dfa, 0xfab0e94a, - 0x83e91b6a, 0x3c0f7833, 0x73d5e6a6, 0x3a8074ba, - 0xc2be997c, 0x13cd26de, 0xd034bde4, 0x3d487a75, - 0xdbffab24, 0xf57af78f, 0x7a90f4ea, 0x615fc23e, - 0x80201da0, 0xbd6867d5, 0x681ad072, 0x82ae192c, - 0xeab4c95e, 0x4d549a19, 0x7693ece5, 0x88220daa, - 0x8d6407e9, 0xe3f1db12, 0xd173bfa2, 0x4812905a, - 0x1d403a5d, 0x20084028, 0x2bc356e8, 0x97ec337b, - 0x4bdb9690, 0xbea1611f, 0x0e8d1c83, 0xf43df5c9, - 0x6697ccf1, 0x00000000, 0x1bcf36d4, 0xac2b4587, - 0xc57697b3, 0x328264b0, 0x7fd6fea9, 0x6c1bd877, - 0xeeb5c15b, 0x86af1129, 0xb56a77df, 0x5d50ba0d, - 0x0945124c, 0xebf3cb18, 0xc0309df0, 0x9bef2b74, - 0xfc3fe5c3, 0x4955921c, 0xb2a27910, 0x8fea0365, - 0x89650fec, 0xd2bab968, 0xbc2f6593, 0x27c04ee7, - 0x5fdebe81, 0x701ce06c, 0xd3fdbb2e, 0x294d5264, - 0x7292e4e0, 0xc9758fbc, 0x1806301e, 0x128a2498, - 0xf2b2f940, 0xbfe66359, 0x380e7036, 0x7c1ff863, - 0x956237f7, 0x77d4eea3, 0x9aa82932, 0x6296c4f4, - 0xc3f99b3a, 0x33c566f6, 0x942535b1, 0x7959f220, - 0x2a8454ae, 0xd572b7a7, 0xe439d5dd, 0x2d4c5a61, - 0x655eca3b, 0xfd78e785, 0xe038ddd8, 0x0a8c1486, - 0x63d1c6b2, 0xaea5410b, 0xafe2434d, 0x99612ff8, - 0xf6b3f145, 0x842115a5, 0x4a9c94d6, 0x781ef066, - 0x11432252, 0x3bc776fc, 0xd7fcb32b, 0x10042014, - 0x5951b208, 0x5e99bcc7, 0xa96d4fc4, 0x340d6839, - 0xcffa8335, 0x5bdfb684, 0xe57ed79b, 0x90243db4, - 0xec3bc5d7, 0x96ab313d, 0x1fce3ed1, 0x44118855, - 0x068f0c89, 0x254e4a6b, 0xe6b7d151, 0x8beb0b60, - 0xf03cfdcc, 0x3e817cbf, 0x6a94d4fe, 0xfbf7eb0c, - 0xdeb9a167, 0x4c13985f, 0xb02c7d9c, 0x6bd3d6b8, - 0xbbe76b5c, 0xa56e57cb, 0x37c46ef3, 0x0c03180f, - 0x45568a13, 0x0d441a49, 0xe17fdf9e, 0x9ea92137, - 0xa82a4d82, 0xd6bbb16d, 0x23c146e2, 0x5153a202, - 0x57dcae8b, 0x2c0b5827, 0x4e9d9cd3, 0xad6c47c1, - 0xc43195f5, 0xcd7487b9, 0xfff6e309, 0x05460a43, - 0x8aac0926, 0x1e893c97, 0x5014a044, 0xa3e15b42, - 0x5816b04e, 0xe83acdd2, 0xb9696fd0, 0x2409482d, - 0xdd70a7ad, 0xe2b6d954, 0x67d0ceb7, 0x93ed3b7e, - 0x17cc2edb, 0x15422a57, 0x5a98b4c2, 0xaaa4490e, - 0xa0285d88, 0x6d5cda31, 0xc7f8933f, 0x228644a4, - }, - { - 0x186018c0, 0x238c2305, 0xc63fc67e, 0xe887e813, - 0x8726874c, 0xb8dab8a9, 0x01040108, 0x4f214f42, - 0x36d836ad, 0xa6a2a659, 0xd26fd2de, 0xf5f3f5fb, - 0x79f979ef, 0x6fa16f5f, 0x917e91fc, 0x525552aa, - 0x609d6027, 0xbccabc89, 0x9b569bac, 0x8e028e04, - 0xa3b6a371, 0x0c300c60, 0x7bf17bff, 0x35d435b5, - 0x1d741de8, 0xe0a7e053, 0xd77bd7f6, 0xc22fc25e, - 0x2eb82e6d, 0x4b314b62, 0xfedffea3, 0x57415782, - 0x155415a8, 0x77c1779f, 0x37dc37a5, 0xe5b3e57b, - 0x9f469f8c, 0xf0e7f0d3, 0x4a354a6a, 0xda4fda9e, - 0x587d58fa, 0xc903c906, 0x29a42955, 0x0a280a50, - 0xb1feb1e1, 0xa0baa069, 0x6bb16b7f, 0x852e855c, - 0xbdcebd81, 0x5d695dd2, 0x10401080, 0xf4f7f4f3, - 0xcb0bcb16, 0x3ef83eed, 0x05140528, 0x6781671f, - 0xe4b7e473, 0x279c2725, 0x41194132, 0x8b168b2c, - 0xa7a6a751, 0x7de97dcf, 0x956e95dc, 0xd847d88e, - 0xfbcbfb8b, 0xee9fee23, 0x7ced7cc7, 0x66856617, - 0xdd53dda6, 0x175c17b8, 0x47014702, 0x9e429e84, - 0xca0fca1e, 0x2db42d75, 0xbfc6bf91, 0x071c0738, - 0xad8ead01, 0x5a755aea, 0x8336836c, 0x33cc3385, - 0x6391633f, 0x02080210, 0xaa92aa39, 0x71d971af, - 0xc807c80e, 0x196419c8, 0x49394972, 0xd943d986, - 0xf2eff2c3, 0xe3abe34b, 0x5b715be2, 0x881a8834, - 0x9a529aa4, 0x2698262d, 0x32c8328d, 0xb0fab0e9, - 0xe983e91b, 0x0f3c0f78, 0xd573d5e6, 0x803a8074, - 0xbec2be99, 0xcd13cd26, 0x34d034bd, 0x483d487a, - 0xffdbffab, 0x7af57af7, 0x907a90f4, 0x5f615fc2, - 0x2080201d, 0x68bd6867, 0x1a681ad0, 0xae82ae19, - 0xb4eab4c9, 0x544d549a, 0x937693ec, 0x2288220d, - 0x648d6407, 0xf1e3f1db, 0x73d173bf, 0x12481290, - 0x401d403a, 0x08200840, 0xc32bc356, 0xec97ec33, - 0xdb4bdb96, 0xa1bea161, 0x8d0e8d1c, 0x3df43df5, - 0x976697cc, 0x00000000, 0xcf1bcf36, 0x2bac2b45, - 0x76c57697, 0x82328264, 0xd67fd6fe, 0x1b6c1bd8, - 0xb5eeb5c1, 0xaf86af11, 0x6ab56a77, 0x505d50ba, - 0x45094512, 0xf3ebf3cb, 0x30c0309d, 0xef9bef2b, - 0x3ffc3fe5, 0x55495592, 0xa2b2a279, 0xea8fea03, - 0x6589650f, 0xbad2bab9, 0x2fbc2f65, 0xc027c04e, - 0xde5fdebe, 0x1c701ce0, 0xfdd3fdbb, 0x4d294d52, - 0x927292e4, 0x75c9758f, 0x06180630, 0x8a128a24, - 0xb2f2b2f9, 0xe6bfe663, 0x0e380e70, 0x1f7c1ff8, - 0x62956237, 0xd477d4ee, 0xa89aa829, 0x966296c4, - 0xf9c3f99b, 0xc533c566, 0x25942535, 0x597959f2, - 0x842a8454, 0x72d572b7, 0x39e439d5, 0x4c2d4c5a, - 0x5e655eca, 0x78fd78e7, 0x38e038dd, 0x8c0a8c14, - 0xd163d1c6, 0xa5aea541, 0xe2afe243, 0x6199612f, - 0xb3f6b3f1, 0x21842115, 0x9c4a9c94, 0x1e781ef0, - 0x43114322, 0xc73bc776, 0xfcd7fcb3, 0x04100420, - 0x515951b2, 0x995e99bc, 0x6da96d4f, 0x0d340d68, - 0xfacffa83, 0xdf5bdfb6, 0x7ee57ed7, 0x2490243d, - 0x3bec3bc5, 0xab96ab31, 0xce1fce3e, 0x11441188, - 0x8f068f0c, 0x4e254e4a, 0xb7e6b7d1, 0xeb8beb0b, - 0x3cf03cfd, 0x813e817c, 0x946a94d4, 0xf7fbf7eb, - 0xb9deb9a1, 0x134c1398, 0x2cb02c7d, 0xd36bd3d6, - 0xe7bbe76b, 0x6ea56e57, 0xc437c46e, 0x030c0318, - 0x5645568a, 0x440d441a, 0x7fe17fdf, 0xa99ea921, - 0x2aa82a4d, 0xbbd6bbb1, 0xc123c146, 0x535153a2, - 0xdc57dcae, 0x0b2c0b58, 0x9d4e9d9c, 0x6cad6c47, - 0x31c43195, 0x74cd7487, 0xf6fff6e3, 0x4605460a, - 0xac8aac09, 0x891e893c, 0x145014a0, 0xe1a3e15b, - 0x165816b0, 0x3ae83acd, 0x69b9696f, 0x09240948, - 0x70dd70a7, 0xb6e2b6d9, 0xd067d0ce, 0xed93ed3b, - 0xcc17cc2e, 0x4215422a, 0x985a98b4, 0xa4aaa449, - 0x28a0285d, 0x5c6d5cda, 0xf8c7f893, 0x86228644, - } -}; - -__device__ __constant__ u32 Cl[8][256] = -{ - { - 0xc07830d8, 0x05af4626, 0x7ef991b8, 0x136fcdfb, - 0x4ca113cb, 0xa9626d11, 0x08050209, 0x426e9e0d, - 0xadee6c9b, 0x590451ff, 0xdebdb90c, 0xfb06f70e, - 0xef80f296, 0x5fcede30, 0xfcef3f6d, 0xaa07a4f8, - 0x27fdc047, 0x89766535, 0xaccd2b37, 0x048c018a, - 0x71155bd2, 0x603c186c, 0xff8af684, 0xb5e16a80, - 0xe8693af5, 0x5347ddb3, 0xf6acb321, 0x5eed999c, - 0x6d965c43, 0x627a9629, 0xa321e15d, 0x8216aed5, - 0xa8412abd, 0x9fb6eee8, 0xa5eb6e92, 0x7b56d79e, - 0x8cd92313, 0xd317fd23, 0x6a7f9420, 0x9e95a944, - 0xfa25b0a2, 0x06ca8fcf, 0x558d527c, 0x5022145a, - 0xe14f7f50, 0x691a5dc9, 0x7fdad614, 0x5cab17d9, - 0x8173673c, 0xd234ba8f, 0x80502090, 0xf303f507, - 0x16c08bdd, 0xedc67cd3, 0x28110a2d, 0x1fe6ce78, - 0x7353d597, 0x25bb4e02, 0x32588273, 0x2c9d0ba7, - 0x510153f6, 0xcf94fab2, 0xdcfb3749, 0x8e9fad56, - 0x8b30eb70, 0x2371c1cd, 0xc791f8bb, 0x17e3cc71, - 0xa68ea77b, 0xb84b2eaf, 0x02468e45, 0x84dc211a, - 0x1ec589d4, 0x75995a58, 0x9179632e, 0x381b0e3f, - 0x012347ac, 0xea2fb4b0, 0x6cb51bef, 0x85ff66b6, - 0x3ff2c65c, 0x100a0412, 0x39384993, 0xafa8e2de, - 0x0ecf8dc6, 0xc87d32d1, 0x7270923b, 0x869aaf5f, - 0xc31df931, 0x4b48dba8, 0xe22ab6b9, 0x34920dbc, - 0xa4c8293e, 0x2dbe4c0b, 0x8dfa64bf, 0xe94a7d59, - 0x1b6acff2, 0x78331e77, 0xe6a6b733, 0x74ba1df4, - 0x997c6127, 0x26de87eb, 0xbde46889, 0x7a759032, - 0xab24e354, 0xf78ff48d, 0xf4ea3d64, 0xc23ebe9d, - 0x1da0403d, 0x67d5d00f, 0xd07234ca, 0x192c41b7, - 0xc95e757d, 0x9a19a8ce, 0xece53b7f, 0x0daa442f, - 0x07e9c863, 0xdb12ff2a, 0xbfa2e6cc, 0x905a2482, - 0x3a5d807a, 0x40281048, 0x56e89b95, 0x337bc5df, - 0x9690ab4d, 0x611f5fc0, 0x1c830791, 0xf5c97ac8, - 0xccf1335b, 0x00000000, 0x36d483f9, 0x4587566e, - 0x97b3ece1, 0x64b019e6, 0xfea9b128, 0xd87736c3, - 0xc15b7774, 0x112943be, 0x77dfd41d, 0xba0da0ea, - 0x124c8a57, 0xcb18fb38, 0x9df060ad, 0x2b74c3c4, - 0xe5c37eda, 0x921caac7, 0x791059db, 0x0365c9e9, - 0x0fecca6a, 0xb9686903, 0x65935e4a, 0x4ee79d8e, - 0xbe81a160, 0xe06c38fc, 0xbb2ee746, 0x52649a1f, - 0xe4e03976, 0x8fbceafa, 0x301e0c36, 0x249809ae, - 0xf940794b, 0x6359d185, 0x70361c7e, 0xf8633ee7, - 0x37f7c455, 0xeea3b53a, 0x29324d81, 0xc4f43152, - 0x9b3aef62, 0x66f697a3, 0x35b14a10, 0xf220b2ab, - 0x54ae15d0, 0xb7a7e4c5, 0xd5dd72ec, 0x5a619816, - 0xca3bbc94, 0xe785f09f, 0xddd870e5, 0x14860598, - 0xc6b2bf17, 0x410b57e4, 0x434dd9a1, 0x2ff8c24e, - 0xf1457b42, 0x15a54234, 0x94d62508, 0xf0663cee, - 0x22528661, 0x76fc93b1, 0xb32be54f, 0x20140824, - 0xb208a2e3, 0xbcc72f25, 0x4fc4da22, 0x68391a65, - 0x8335e979, 0xb684a369, 0xd79bfca9, 0x3db44819, - 0xc5d776fe, 0x313d4b9a, 0x3ed181f0, 0x88552299, - 0x0c890383, 0x4a6b9c04, 0xd1517366, 0x0b60cbe0, - 0xfdcc78c1, 0x7cbf1ffd, 0xd4fe3540, 0xeb0cf31c, - 0xa1676f18, 0x985f268b, 0x7d9c5851, 0xd6b8bb05, - 0x6b5cd38c, 0x57cbdc39, 0x6ef395aa, 0x180f061b, - 0x8a13acdc, 0x1a49885e, 0xdf9efea0, 0x21374f88, - 0x4d825467, 0xb16d6b0a, 0x46e29f87, 0xa202a6f1, - 0xae8ba572, 0x58271653, 0x9cd32701, 0x47c1d82b, - 0x95f562a4, 0x87b9e8f3, 0xe309f115, 0x0a438c4c, - 0x092645a5, 0x3c970fb5, 0xa04428b4, 0x5b42dfba, - 0xb04e2ca6, 0xcdd274f7, 0x6fd0d206, 0x482d1241, - 0xa7ade0d7, 0xd954716f, 0xceb7bd1e, 0x3b7ec7d6, - 0x2edb85e2, 0x2a578468, 0xb4c22d2c, 0x490e55ed, - 0x5d885075, 0xda31b886, 0x933fed6b, 0x44a411c2, - }, - { - 0x18c07830, 0x2305af46, 0xc67ef991, 0xe8136fcd, - 0x874ca113, 0xb8a9626d, 0x01080502, 0x4f426e9e, - 0x36adee6c, 0xa6590451, 0xd2debdb9, 0xf5fb06f7, - 0x79ef80f2, 0x6f5fcede, 0x91fcef3f, 0x52aa07a4, - 0x6027fdc0, 0xbc897665, 0x9baccd2b, 0x8e048c01, - 0xa371155b, 0x0c603c18, 0x7bff8af6, 0x35b5e16a, - 0x1de8693a, 0xe05347dd, 0xd7f6acb3, 0xc25eed99, - 0x2e6d965c, 0x4b627a96, 0xfea321e1, 0x578216ae, - 0x15a8412a, 0x779fb6ee, 0x37a5eb6e, 0xe57b56d7, - 0x9f8cd923, 0xf0d317fd, 0x4a6a7f94, 0xda9e95a9, - 0x58fa25b0, 0xc906ca8f, 0x29558d52, 0x0a502214, - 0xb1e14f7f, 0xa0691a5d, 0x6b7fdad6, 0x855cab17, - 0xbd817367, 0x5dd234ba, 0x10805020, 0xf4f303f5, - 0xcb16c08b, 0x3eedc67c, 0x0528110a, 0x671fe6ce, - 0xe47353d5, 0x2725bb4e, 0x41325882, 0x8b2c9d0b, - 0xa7510153, 0x7dcf94fa, 0x95dcfb37, 0xd88e9fad, - 0xfb8b30eb, 0xee2371c1, 0x7cc791f8, 0x6617e3cc, - 0xdda68ea7, 0x17b84b2e, 0x4702468e, 0x9e84dc21, - 0xca1ec589, 0x2d75995a, 0xbf917963, 0x07381b0e, - 0xad012347, 0x5aea2fb4, 0x836cb51b, 0x3385ff66, - 0x633ff2c6, 0x02100a04, 0xaa393849, 0x71afa8e2, - 0xc80ecf8d, 0x19c87d32, 0x49727092, 0xd9869aaf, - 0xf2c31df9, 0xe34b48db, 0x5be22ab6, 0x8834920d, - 0x9aa4c829, 0x262dbe4c, 0x328dfa64, 0xb0e94a7d, - 0xe91b6acf, 0x0f78331e, 0xd5e6a6b7, 0x8074ba1d, - 0xbe997c61, 0xcd26de87, 0x34bde468, 0x487a7590, - 0xffab24e3, 0x7af78ff4, 0x90f4ea3d, 0x5fc23ebe, - 0x201da040, 0x6867d5d0, 0x1ad07234, 0xae192c41, - 0xb4c95e75, 0x549a19a8, 0x93ece53b, 0x220daa44, - 0x6407e9c8, 0xf1db12ff, 0x73bfa2e6, 0x12905a24, - 0x403a5d80, 0x08402810, 0xc356e89b, 0xec337bc5, - 0xdb9690ab, 0xa1611f5f, 0x8d1c8307, 0x3df5c97a, - 0x97ccf133, 0x00000000, 0xcf36d483, 0x2b458756, - 0x7697b3ec, 0x8264b019, 0xd6fea9b1, 0x1bd87736, - 0xb5c15b77, 0xaf112943, 0x6a77dfd4, 0x50ba0da0, - 0x45124c8a, 0xf3cb18fb, 0x309df060, 0xef2b74c3, - 0x3fe5c37e, 0x55921caa, 0xa2791059, 0xea0365c9, - 0x650fecca, 0xbab96869, 0x2f65935e, 0xc04ee79d, - 0xdebe81a1, 0x1ce06c38, 0xfdbb2ee7, 0x4d52649a, - 0x92e4e039, 0x758fbcea, 0x06301e0c, 0x8a249809, - 0xb2f94079, 0xe66359d1, 0x0e70361c, 0x1ff8633e, - 0x6237f7c4, 0xd4eea3b5, 0xa829324d, 0x96c4f431, - 0xf99b3aef, 0xc566f697, 0x2535b14a, 0x59f220b2, - 0x8454ae15, 0x72b7a7e4, 0x39d5dd72, 0x4c5a6198, - 0x5eca3bbc, 0x78e785f0, 0x38ddd870, 0x8c148605, - 0xd1c6b2bf, 0xa5410b57, 0xe2434dd9, 0x612ff8c2, - 0xb3f1457b, 0x2115a542, 0x9c94d625, 0x1ef0663c, - 0x43225286, 0xc776fc93, 0xfcb32be5, 0x04201408, - 0x51b208a2, 0x99bcc72f, 0x6d4fc4da, 0x0d68391a, - 0xfa8335e9, 0xdfb684a3, 0x7ed79bfc, 0x243db448, - 0x3bc5d776, 0xab313d4b, 0xce3ed181, 0x11885522, - 0x8f0c8903, 0x4e4a6b9c, 0xb7d15173, 0xeb0b60cb, - 0x3cfdcc78, 0x817cbf1f, 0x94d4fe35, 0xf7eb0cf3, - 0xb9a1676f, 0x13985f26, 0x2c7d9c58, 0xd3d6b8bb, - 0xe76b5cd3, 0x6e57cbdc, 0xc46ef395, 0x03180f06, - 0x568a13ac, 0x441a4988, 0x7fdf9efe, 0xa921374f, - 0x2a4d8254, 0xbbb16d6b, 0xc146e29f, 0x53a202a6, - 0xdcae8ba5, 0x0b582716, 0x9d9cd327, 0x6c47c1d8, - 0x3195f562, 0x7487b9e8, 0xf6e309f1, 0x460a438c, - 0xac092645, 0x893c970f, 0x14a04428, 0xe15b42df, - 0x16b04e2c, 0x3acdd274, 0x696fd0d2, 0x09482d12, - 0x70a7ade0, 0xb6d95471, 0xd0ceb7bd, 0xed3b7ec7, - 0xcc2edb85, 0x422a5784, 0x98b4c22d, 0xa4490e55, - 0x285d8850, 0x5cda31b8, 0xf8933fed, 0x8644a411, - }, - { - 0x6018c078, 0x8c2305af, 0x3fc67ef9, 0x87e8136f, - 0x26874ca1, 0xdab8a962, 0x04010805, 0x214f426e, - 0xd836adee, 0xa2a65904, 0x6fd2debd, 0xf3f5fb06, - 0xf979ef80, 0xa16f5fce, 0x7e91fcef, 0x5552aa07, - 0x9d6027fd, 0xcabc8976, 0x569baccd, 0x028e048c, - 0xb6a37115, 0x300c603c, 0xf17bff8a, 0xd435b5e1, - 0x741de869, 0xa7e05347, 0x7bd7f6ac, 0x2fc25eed, - 0xb82e6d96, 0x314b627a, 0xdffea321, 0x41578216, - 0x5415a841, 0xc1779fb6, 0xdc37a5eb, 0xb3e57b56, - 0x469f8cd9, 0xe7f0d317, 0x354a6a7f, 0x4fda9e95, - 0x7d58fa25, 0x03c906ca, 0xa429558d, 0x280a5022, - 0xfeb1e14f, 0xbaa0691a, 0xb16b7fda, 0x2e855cab, - 0xcebd8173, 0x695dd234, 0x40108050, 0xf7f4f303, - 0x0bcb16c0, 0xf83eedc6, 0x14052811, 0x81671fe6, - 0xb7e47353, 0x9c2725bb, 0x19413258, 0x168b2c9d, - 0xa6a75101, 0xe97dcf94, 0x6e95dcfb, 0x47d88e9f, - 0xcbfb8b30, 0x9fee2371, 0xed7cc791, 0x856617e3, - 0x53dda68e, 0x5c17b84b, 0x01470246, 0x429e84dc, - 0x0fca1ec5, 0xb42d7599, 0xc6bf9179, 0x1c07381b, - 0x8ead0123, 0x755aea2f, 0x36836cb5, 0xcc3385ff, - 0x91633ff2, 0x0802100a, 0x92aa3938, 0xd971afa8, - 0x07c80ecf, 0x6419c87d, 0x39497270, 0x43d9869a, - 0xeff2c31d, 0xabe34b48, 0x715be22a, 0x1a883492, - 0x529aa4c8, 0x98262dbe, 0xc8328dfa, 0xfab0e94a, - 0x83e91b6a, 0x3c0f7833, 0x73d5e6a6, 0x3a8074ba, - 0xc2be997c, 0x13cd26de, 0xd034bde4, 0x3d487a75, - 0xdbffab24, 0xf57af78f, 0x7a90f4ea, 0x615fc23e, - 0x80201da0, 0xbd6867d5, 0x681ad072, 0x82ae192c, - 0xeab4c95e, 0x4d549a19, 0x7693ece5, 0x88220daa, - 0x8d6407e9, 0xe3f1db12, 0xd173bfa2, 0x4812905a, - 0x1d403a5d, 0x20084028, 0x2bc356e8, 0x97ec337b, - 0x4bdb9690, 0xbea1611f, 0x0e8d1c83, 0xf43df5c9, - 0x6697ccf1, 0x00000000, 0x1bcf36d4, 0xac2b4587, - 0xc57697b3, 0x328264b0, 0x7fd6fea9, 0x6c1bd877, - 0xeeb5c15b, 0x86af1129, 0xb56a77df, 0x5d50ba0d, - 0x0945124c, 0xebf3cb18, 0xc0309df0, 0x9bef2b74, - 0xfc3fe5c3, 0x4955921c, 0xb2a27910, 0x8fea0365, - 0x89650fec, 0xd2bab968, 0xbc2f6593, 0x27c04ee7, - 0x5fdebe81, 0x701ce06c, 0xd3fdbb2e, 0x294d5264, - 0x7292e4e0, 0xc9758fbc, 0x1806301e, 0x128a2498, - 0xf2b2f940, 0xbfe66359, 0x380e7036, 0x7c1ff863, - 0x956237f7, 0x77d4eea3, 0x9aa82932, 0x6296c4f4, - 0xc3f99b3a, 0x33c566f6, 0x942535b1, 0x7959f220, - 0x2a8454ae, 0xd572b7a7, 0xe439d5dd, 0x2d4c5a61, - 0x655eca3b, 0xfd78e785, 0xe038ddd8, 0x0a8c1486, - 0x63d1c6b2, 0xaea5410b, 0xafe2434d, 0x99612ff8, - 0xf6b3f145, 0x842115a5, 0x4a9c94d6, 0x781ef066, - 0x11432252, 0x3bc776fc, 0xd7fcb32b, 0x10042014, - 0x5951b208, 0x5e99bcc7, 0xa96d4fc4, 0x340d6839, - 0xcffa8335, 0x5bdfb684, 0xe57ed79b, 0x90243db4, - 0xec3bc5d7, 0x96ab313d, 0x1fce3ed1, 0x44118855, - 0x068f0c89, 0x254e4a6b, 0xe6b7d151, 0x8beb0b60, - 0xf03cfdcc, 0x3e817cbf, 0x6a94d4fe, 0xfbf7eb0c, - 0xdeb9a167, 0x4c13985f, 0xb02c7d9c, 0x6bd3d6b8, - 0xbbe76b5c, 0xa56e57cb, 0x37c46ef3, 0x0c03180f, - 0x45568a13, 0x0d441a49, 0xe17fdf9e, 0x9ea92137, - 0xa82a4d82, 0xd6bbb16d, 0x23c146e2, 0x5153a202, - 0x57dcae8b, 0x2c0b5827, 0x4e9d9cd3, 0xad6c47c1, - 0xc43195f5, 0xcd7487b9, 0xfff6e309, 0x05460a43, - 0x8aac0926, 0x1e893c97, 0x5014a044, 0xa3e15b42, - 0x5816b04e, 0xe83acdd2, 0xb9696fd0, 0x2409482d, - 0xdd70a7ad, 0xe2b6d954, 0x67d0ceb7, 0x93ed3b7e, - 0x17cc2edb, 0x15422a57, 0x5a98b4c2, 0xaaa4490e, - 0xa0285d88, 0x6d5cda31, 0xc7f8933f, 0x228644a4, - }, - { - 0x186018c0, 0x238c2305, 0xc63fc67e, 0xe887e813, - 0x8726874c, 0xb8dab8a9, 0x01040108, 0x4f214f42, - 0x36d836ad, 0xa6a2a659, 0xd26fd2de, 0xf5f3f5fb, - 0x79f979ef, 0x6fa16f5f, 0x917e91fc, 0x525552aa, - 0x609d6027, 0xbccabc89, 0x9b569bac, 0x8e028e04, - 0xa3b6a371, 0x0c300c60, 0x7bf17bff, 0x35d435b5, - 0x1d741de8, 0xe0a7e053, 0xd77bd7f6, 0xc22fc25e, - 0x2eb82e6d, 0x4b314b62, 0xfedffea3, 0x57415782, - 0x155415a8, 0x77c1779f, 0x37dc37a5, 0xe5b3e57b, - 0x9f469f8c, 0xf0e7f0d3, 0x4a354a6a, 0xda4fda9e, - 0x587d58fa, 0xc903c906, 0x29a42955, 0x0a280a50, - 0xb1feb1e1, 0xa0baa069, 0x6bb16b7f, 0x852e855c, - 0xbdcebd81, 0x5d695dd2, 0x10401080, 0xf4f7f4f3, - 0xcb0bcb16, 0x3ef83eed, 0x05140528, 0x6781671f, - 0xe4b7e473, 0x279c2725, 0x41194132, 0x8b168b2c, - 0xa7a6a751, 0x7de97dcf, 0x956e95dc, 0xd847d88e, - 0xfbcbfb8b, 0xee9fee23, 0x7ced7cc7, 0x66856617, - 0xdd53dda6, 0x175c17b8, 0x47014702, 0x9e429e84, - 0xca0fca1e, 0x2db42d75, 0xbfc6bf91, 0x071c0738, - 0xad8ead01, 0x5a755aea, 0x8336836c, 0x33cc3385, - 0x6391633f, 0x02080210, 0xaa92aa39, 0x71d971af, - 0xc807c80e, 0x196419c8, 0x49394972, 0xd943d986, - 0xf2eff2c3, 0xe3abe34b, 0x5b715be2, 0x881a8834, - 0x9a529aa4, 0x2698262d, 0x32c8328d, 0xb0fab0e9, - 0xe983e91b, 0x0f3c0f78, 0xd573d5e6, 0x803a8074, - 0xbec2be99, 0xcd13cd26, 0x34d034bd, 0x483d487a, - 0xffdbffab, 0x7af57af7, 0x907a90f4, 0x5f615fc2, - 0x2080201d, 0x68bd6867, 0x1a681ad0, 0xae82ae19, - 0xb4eab4c9, 0x544d549a, 0x937693ec, 0x2288220d, - 0x648d6407, 0xf1e3f1db, 0x73d173bf, 0x12481290, - 0x401d403a, 0x08200840, 0xc32bc356, 0xec97ec33, - 0xdb4bdb96, 0xa1bea161, 0x8d0e8d1c, 0x3df43df5, - 0x976697cc, 0x00000000, 0xcf1bcf36, 0x2bac2b45, - 0x76c57697, 0x82328264, 0xd67fd6fe, 0x1b6c1bd8, - 0xb5eeb5c1, 0xaf86af11, 0x6ab56a77, 0x505d50ba, - 0x45094512, 0xf3ebf3cb, 0x30c0309d, 0xef9bef2b, - 0x3ffc3fe5, 0x55495592, 0xa2b2a279, 0xea8fea03, - 0x6589650f, 0xbad2bab9, 0x2fbc2f65, 0xc027c04e, - 0xde5fdebe, 0x1c701ce0, 0xfdd3fdbb, 0x4d294d52, - 0x927292e4, 0x75c9758f, 0x06180630, 0x8a128a24, - 0xb2f2b2f9, 0xe6bfe663, 0x0e380e70, 0x1f7c1ff8, - 0x62956237, 0xd477d4ee, 0xa89aa829, 0x966296c4, - 0xf9c3f99b, 0xc533c566, 0x25942535, 0x597959f2, - 0x842a8454, 0x72d572b7, 0x39e439d5, 0x4c2d4c5a, - 0x5e655eca, 0x78fd78e7, 0x38e038dd, 0x8c0a8c14, - 0xd163d1c6, 0xa5aea541, 0xe2afe243, 0x6199612f, - 0xb3f6b3f1, 0x21842115, 0x9c4a9c94, 0x1e781ef0, - 0x43114322, 0xc73bc776, 0xfcd7fcb3, 0x04100420, - 0x515951b2, 0x995e99bc, 0x6da96d4f, 0x0d340d68, - 0xfacffa83, 0xdf5bdfb6, 0x7ee57ed7, 0x2490243d, - 0x3bec3bc5, 0xab96ab31, 0xce1fce3e, 0x11441188, - 0x8f068f0c, 0x4e254e4a, 0xb7e6b7d1, 0xeb8beb0b, - 0x3cf03cfd, 0x813e817c, 0x946a94d4, 0xf7fbf7eb, - 0xb9deb9a1, 0x134c1398, 0x2cb02c7d, 0xd36bd3d6, - 0xe7bbe76b, 0x6ea56e57, 0xc437c46e, 0x030c0318, - 0x5645568a, 0x440d441a, 0x7fe17fdf, 0xa99ea921, - 0x2aa82a4d, 0xbbd6bbb1, 0xc123c146, 0x535153a2, - 0xdc57dcae, 0x0b2c0b58, 0x9d4e9d9c, 0x6cad6c47, - 0x31c43195, 0x74cd7487, 0xf6fff6e3, 0x4605460a, - 0xac8aac09, 0x891e893c, 0x145014a0, 0xe1a3e15b, - 0x165816b0, 0x3ae83acd, 0x69b9696f, 0x09240948, - 0x70dd70a7, 0xb6e2b6d9, 0xd067d0ce, 0xed93ed3b, - 0xcc17cc2e, 0x4215422a, 0x985a98b4, 0xa4aaa449, - 0x28a0285d, 0x5c6d5cda, 0xf8c7f893, 0x86228644, - }, - { - 0x18186018, 0x23238c23, 0xc6c63fc6, 0xe8e887e8, - 0x87872687, 0xb8b8dab8, 0x01010401, 0x4f4f214f, - 0x3636d836, 0xa6a6a2a6, 0xd2d26fd2, 0xf5f5f3f5, - 0x7979f979, 0x6f6fa16f, 0x91917e91, 0x52525552, - 0x60609d60, 0xbcbccabc, 0x9b9b569b, 0x8e8e028e, - 0xa3a3b6a3, 0x0c0c300c, 0x7b7bf17b, 0x3535d435, - 0x1d1d741d, 0xe0e0a7e0, 0xd7d77bd7, 0xc2c22fc2, - 0x2e2eb82e, 0x4b4b314b, 0xfefedffe, 0x57574157, - 0x15155415, 0x7777c177, 0x3737dc37, 0xe5e5b3e5, - 0x9f9f469f, 0xf0f0e7f0, 0x4a4a354a, 0xdada4fda, - 0x58587d58, 0xc9c903c9, 0x2929a429, 0x0a0a280a, - 0xb1b1feb1, 0xa0a0baa0, 0x6b6bb16b, 0x85852e85, - 0xbdbdcebd, 0x5d5d695d, 0x10104010, 0xf4f4f7f4, - 0xcbcb0bcb, 0x3e3ef83e, 0x05051405, 0x67678167, - 0xe4e4b7e4, 0x27279c27, 0x41411941, 0x8b8b168b, - 0xa7a7a6a7, 0x7d7de97d, 0x95956e95, 0xd8d847d8, - 0xfbfbcbfb, 0xeeee9fee, 0x7c7ced7c, 0x66668566, - 0xdddd53dd, 0x17175c17, 0x47470147, 0x9e9e429e, - 0xcaca0fca, 0x2d2db42d, 0xbfbfc6bf, 0x07071c07, - 0xadad8ead, 0x5a5a755a, 0x83833683, 0x3333cc33, - 0x63639163, 0x02020802, 0xaaaa92aa, 0x7171d971, - 0xc8c807c8, 0x19196419, 0x49493949, 0xd9d943d9, - 0xf2f2eff2, 0xe3e3abe3, 0x5b5b715b, 0x88881a88, - 0x9a9a529a, 0x26269826, 0x3232c832, 0xb0b0fab0, - 0xe9e983e9, 0x0f0f3c0f, 0xd5d573d5, 0x80803a80, - 0xbebec2be, 0xcdcd13cd, 0x3434d034, 0x48483d48, - 0xffffdbff, 0x7a7af57a, 0x90907a90, 0x5f5f615f, - 0x20208020, 0x6868bd68, 0x1a1a681a, 0xaeae82ae, - 0xb4b4eab4, 0x54544d54, 0x93937693, 0x22228822, - 0x64648d64, 0xf1f1e3f1, 0x7373d173, 0x12124812, - 0x40401d40, 0x08082008, 0xc3c32bc3, 0xecec97ec, - 0xdbdb4bdb, 0xa1a1bea1, 0x8d8d0e8d, 0x3d3df43d, - 0x97976697, 0x00000000, 0xcfcf1bcf, 0x2b2bac2b, - 0x7676c576, 0x82823282, 0xd6d67fd6, 0x1b1b6c1b, - 0xb5b5eeb5, 0xafaf86af, 0x6a6ab56a, 0x50505d50, - 0x45450945, 0xf3f3ebf3, 0x3030c030, 0xefef9bef, - 0x3f3ffc3f, 0x55554955, 0xa2a2b2a2, 0xeaea8fea, - 0x65658965, 0xbabad2ba, 0x2f2fbc2f, 0xc0c027c0, - 0xdede5fde, 0x1c1c701c, 0xfdfdd3fd, 0x4d4d294d, - 0x92927292, 0x7575c975, 0x06061806, 0x8a8a128a, - 0xb2b2f2b2, 0xe6e6bfe6, 0x0e0e380e, 0x1f1f7c1f, - 0x62629562, 0xd4d477d4, 0xa8a89aa8, 0x96966296, - 0xf9f9c3f9, 0xc5c533c5, 0x25259425, 0x59597959, - 0x84842a84, 0x7272d572, 0x3939e439, 0x4c4c2d4c, - 0x5e5e655e, 0x7878fd78, 0x3838e038, 0x8c8c0a8c, - 0xd1d163d1, 0xa5a5aea5, 0xe2e2afe2, 0x61619961, - 0xb3b3f6b3, 0x21218421, 0x9c9c4a9c, 0x1e1e781e, - 0x43431143, 0xc7c73bc7, 0xfcfcd7fc, 0x04041004, - 0x51515951, 0x99995e99, 0x6d6da96d, 0x0d0d340d, - 0xfafacffa, 0xdfdf5bdf, 0x7e7ee57e, 0x24249024, - 0x3b3bec3b, 0xabab96ab, 0xcece1fce, 0x11114411, - 0x8f8f068f, 0x4e4e254e, 0xb7b7e6b7, 0xebeb8beb, - 0x3c3cf03c, 0x81813e81, 0x94946a94, 0xf7f7fbf7, - 0xb9b9deb9, 0x13134c13, 0x2c2cb02c, 0xd3d36bd3, - 0xe7e7bbe7, 0x6e6ea56e, 0xc4c437c4, 0x03030c03, - 0x56564556, 0x44440d44, 0x7f7fe17f, 0xa9a99ea9, - 0x2a2aa82a, 0xbbbbd6bb, 0xc1c123c1, 0x53535153, - 0xdcdc57dc, 0x0b0b2c0b, 0x9d9d4e9d, 0x6c6cad6c, - 0x3131c431, 0x7474cd74, 0xf6f6fff6, 0x46460546, - 0xacac8aac, 0x89891e89, 0x14145014, 0xe1e1a3e1, - 0x16165816, 0x3a3ae83a, 0x6969b969, 0x09092409, - 0x7070dd70, 0xb6b6e2b6, 0xd0d067d0, 0xeded93ed, - 0xcccc17cc, 0x42421542, 0x98985a98, 0xa4a4aaa4, - 0x2828a028, 0x5c5c6d5c, 0xf8f8c7f8, 0x86862286, - }, - { - 0xd8181860, 0x2623238c, 0xb8c6c63f, 0xfbe8e887, - 0xcb878726, 0x11b8b8da, 0x09010104, 0x0d4f4f21, - 0x9b3636d8, 0xffa6a6a2, 0x0cd2d26f, 0x0ef5f5f3, - 0x967979f9, 0x306f6fa1, 0x6d91917e, 0xf8525255, - 0x4760609d, 0x35bcbcca, 0x379b9b56, 0x8a8e8e02, - 0xd2a3a3b6, 0x6c0c0c30, 0x847b7bf1, 0x803535d4, - 0xf51d1d74, 0xb3e0e0a7, 0x21d7d77b, 0x9cc2c22f, - 0x432e2eb8, 0x294b4b31, 0x5dfefedf, 0xd5575741, - 0xbd151554, 0xe87777c1, 0x923737dc, 0x9ee5e5b3, - 0x139f9f46, 0x23f0f0e7, 0x204a4a35, 0x44dada4f, - 0xa258587d, 0xcfc9c903, 0x7c2929a4, 0x5a0a0a28, - 0x50b1b1fe, 0xc9a0a0ba, 0x146b6bb1, 0xd985852e, - 0x3cbdbdce, 0x8f5d5d69, 0x90101040, 0x07f4f4f7, - 0xddcbcb0b, 0xd33e3ef8, 0x2d050514, 0x78676781, - 0x97e4e4b7, 0x0227279c, 0x73414119, 0xa78b8b16, - 0xf6a7a7a6, 0xb27d7de9, 0x4995956e, 0x56d8d847, - 0x70fbfbcb, 0xcdeeee9f, 0xbb7c7ced, 0x71666685, - 0x7bdddd53, 0xaf17175c, 0x45474701, 0x1a9e9e42, - 0xd4caca0f, 0x582d2db4, 0x2ebfbfc6, 0x3f07071c, - 0xacadad8e, 0xb05a5a75, 0xef838336, 0xb63333cc, - 0x5c636391, 0x12020208, 0x93aaaa92, 0xde7171d9, - 0xc6c8c807, 0xd1191964, 0x3b494939, 0x5fd9d943, - 0x31f2f2ef, 0xa8e3e3ab, 0xb95b5b71, 0xbc88881a, - 0x3e9a9a52, 0x0b262698, 0xbf3232c8, 0x59b0b0fa, - 0xf2e9e983, 0x770f0f3c, 0x33d5d573, 0xf480803a, - 0x27bebec2, 0xebcdcd13, 0x893434d0, 0x3248483d, - 0x54ffffdb, 0x8d7a7af5, 0x6490907a, 0x9d5f5f61, - 0x3d202080, 0x0f6868bd, 0xca1a1a68, 0xb7aeae82, - 0x7db4b4ea, 0xce54544d, 0x7f939376, 0x2f222288, - 0x6364648d, 0x2af1f1e3, 0xcc7373d1, 0x82121248, - 0x7a40401d, 0x48080820, 0x95c3c32b, 0xdfecec97, - 0x4ddbdb4b, 0xc0a1a1be, 0x918d8d0e, 0xc83d3df4, - 0x5b979766, 0x00000000, 0xf9cfcf1b, 0x6e2b2bac, - 0xe17676c5, 0xe6828232, 0x28d6d67f, 0xc31b1b6c, - 0x74b5b5ee, 0xbeafaf86, 0x1d6a6ab5, 0xea50505d, - 0x57454509, 0x38f3f3eb, 0xad3030c0, 0xc4efef9b, - 0xda3f3ffc, 0xc7555549, 0xdba2a2b2, 0xe9eaea8f, - 0x6a656589, 0x03babad2, 0x4a2f2fbc, 0x8ec0c027, - 0x60dede5f, 0xfc1c1c70, 0x46fdfdd3, 0x1f4d4d29, - 0x76929272, 0xfa7575c9, 0x36060618, 0xae8a8a12, - 0x4bb2b2f2, 0x85e6e6bf, 0x7e0e0e38, 0xe71f1f7c, - 0x55626295, 0x3ad4d477, 0x81a8a89a, 0x52969662, - 0x62f9f9c3, 0xa3c5c533, 0x10252594, 0xab595979, - 0xd084842a, 0xc57272d5, 0xec3939e4, 0x164c4c2d, - 0x945e5e65, 0x9f7878fd, 0xe53838e0, 0x988c8c0a, - 0x17d1d163, 0xe4a5a5ae, 0xa1e2e2af, 0x4e616199, - 0x42b3b3f6, 0x34212184, 0x089c9c4a, 0xee1e1e78, - 0x61434311, 0xb1c7c73b, 0x4ffcfcd7, 0x24040410, - 0xe3515159, 0x2599995e, 0x226d6da9, 0x650d0d34, - 0x79fafacf, 0x69dfdf5b, 0xa97e7ee5, 0x19242490, - 0xfe3b3bec, 0x9aabab96, 0xf0cece1f, 0x99111144, - 0x838f8f06, 0x044e4e25, 0x66b7b7e6, 0xe0ebeb8b, - 0xc13c3cf0, 0xfd81813e, 0x4094946a, 0x1cf7f7fb, - 0x18b9b9de, 0x8b13134c, 0x512c2cb0, 0x05d3d36b, - 0x8ce7e7bb, 0x396e6ea5, 0xaac4c437, 0x1b03030c, - 0xdc565645, 0x5e44440d, 0xa07f7fe1, 0x88a9a99e, - 0x672a2aa8, 0x0abbbbd6, 0x87c1c123, 0xf1535351, - 0x72dcdc57, 0x530b0b2c, 0x019d9d4e, 0x2b6c6cad, - 0xa43131c4, 0xf37474cd, 0x15f6f6ff, 0x4c464605, - 0xa5acac8a, 0xb589891e, 0xb4141450, 0xbae1e1a3, - 0xa6161658, 0xf73a3ae8, 0x066969b9, 0x41090924, - 0xd77070dd, 0x6fb6b6e2, 0x1ed0d067, 0xd6eded93, - 0xe2cccc17, 0x68424215, 0x2c98985a, 0xeda4a4aa, - 0x752828a0, 0x865c5c6d, 0x6bf8f8c7, 0xc2868622, - }, - { - 0x30d81818, 0x46262323, 0x91b8c6c6, 0xcdfbe8e8, - 0x13cb8787, 0x6d11b8b8, 0x02090101, 0x9e0d4f4f, - 0x6c9b3636, 0x51ffa6a6, 0xb90cd2d2, 0xf70ef5f5, - 0xf2967979, 0xde306f6f, 0x3f6d9191, 0xa4f85252, - 0xc0476060, 0x6535bcbc, 0x2b379b9b, 0x018a8e8e, - 0x5bd2a3a3, 0x186c0c0c, 0xf6847b7b, 0x6a803535, - 0x3af51d1d, 0xddb3e0e0, 0xb321d7d7, 0x999cc2c2, - 0x5c432e2e, 0x96294b4b, 0xe15dfefe, 0xaed55757, - 0x2abd1515, 0xeee87777, 0x6e923737, 0xd79ee5e5, - 0x23139f9f, 0xfd23f0f0, 0x94204a4a, 0xa944dada, - 0xb0a25858, 0x8fcfc9c9, 0x527c2929, 0x145a0a0a, - 0x7f50b1b1, 0x5dc9a0a0, 0xd6146b6b, 0x17d98585, - 0x673cbdbd, 0xba8f5d5d, 0x20901010, 0xf507f4f4, - 0x8bddcbcb, 0x7cd33e3e, 0x0a2d0505, 0xce786767, - 0xd597e4e4, 0x4e022727, 0x82734141, 0x0ba78b8b, - 0x53f6a7a7, 0xfab27d7d, 0x37499595, 0xad56d8d8, - 0xeb70fbfb, 0xc1cdeeee, 0xf8bb7c7c, 0xcc716666, - 0xa77bdddd, 0x2eaf1717, 0x8e454747, 0x211a9e9e, - 0x89d4caca, 0x5a582d2d, 0x632ebfbf, 0x0e3f0707, - 0x47acadad, 0xb4b05a5a, 0x1bef8383, 0x66b63333, - 0xc65c6363, 0x04120202, 0x4993aaaa, 0xe2de7171, - 0x8dc6c8c8, 0x32d11919, 0x923b4949, 0xaf5fd9d9, - 0xf931f2f2, 0xdba8e3e3, 0xb6b95b5b, 0x0dbc8888, - 0x293e9a9a, 0x4c0b2626, 0x64bf3232, 0x7d59b0b0, - 0xcff2e9e9, 0x1e770f0f, 0xb733d5d5, 0x1df48080, - 0x6127bebe, 0x87ebcdcd, 0x68893434, 0x90324848, - 0xe354ffff, 0xf48d7a7a, 0x3d649090, 0xbe9d5f5f, - 0x403d2020, 0xd00f6868, 0x34ca1a1a, 0x41b7aeae, - 0x757db4b4, 0xa8ce5454, 0x3b7f9393, 0x442f2222, - 0xc8636464, 0xff2af1f1, 0xe6cc7373, 0x24821212, - 0x807a4040, 0x10480808, 0x9b95c3c3, 0xc5dfecec, - 0xab4ddbdb, 0x5fc0a1a1, 0x07918d8d, 0x7ac83d3d, - 0x335b9797, 0x00000000, 0x83f9cfcf, 0x566e2b2b, - 0xece17676, 0x19e68282, 0xb128d6d6, 0x36c31b1b, - 0x7774b5b5, 0x43beafaf, 0xd41d6a6a, 0xa0ea5050, - 0x8a574545, 0xfb38f3f3, 0x60ad3030, 0xc3c4efef, - 0x7eda3f3f, 0xaac75555, 0x59dba2a2, 0xc9e9eaea, - 0xca6a6565, 0x6903baba, 0x5e4a2f2f, 0x9d8ec0c0, - 0xa160dede, 0x38fc1c1c, 0xe746fdfd, 0x9a1f4d4d, - 0x39769292, 0xeafa7575, 0x0c360606, 0x09ae8a8a, - 0x794bb2b2, 0xd185e6e6, 0x1c7e0e0e, 0x3ee71f1f, - 0xc4556262, 0xb53ad4d4, 0x4d81a8a8, 0x31529696, - 0xef62f9f9, 0x97a3c5c5, 0x4a102525, 0xb2ab5959, - 0x15d08484, 0xe4c57272, 0x72ec3939, 0x98164c4c, - 0xbc945e5e, 0xf09f7878, 0x70e53838, 0x05988c8c, - 0xbf17d1d1, 0x57e4a5a5, 0xd9a1e2e2, 0xc24e6161, - 0x7b42b3b3, 0x42342121, 0x25089c9c, 0x3cee1e1e, - 0x86614343, 0x93b1c7c7, 0xe54ffcfc, 0x08240404, - 0xa2e35151, 0x2f259999, 0xda226d6d, 0x1a650d0d, - 0xe979fafa, 0xa369dfdf, 0xfca97e7e, 0x48192424, - 0x76fe3b3b, 0x4b9aabab, 0x81f0cece, 0x22991111, - 0x03838f8f, 0x9c044e4e, 0x7366b7b7, 0xcbe0ebeb, - 0x78c13c3c, 0x1ffd8181, 0x35409494, 0xf31cf7f7, - 0x6f18b9b9, 0x268b1313, 0x58512c2c, 0xbb05d3d3, - 0xd38ce7e7, 0xdc396e6e, 0x95aac4c4, 0x061b0303, - 0xacdc5656, 0x885e4444, 0xfea07f7f, 0x4f88a9a9, - 0x54672a2a, 0x6b0abbbb, 0x9f87c1c1, 0xa6f15353, - 0xa572dcdc, 0x16530b0b, 0x27019d9d, 0xd82b6c6c, - 0x62a43131, 0xe8f37474, 0xf115f6f6, 0x8c4c4646, - 0x45a5acac, 0x0fb58989, 0x28b41414, 0xdfbae1e1, - 0x2ca61616, 0x74f73a3a, 0xd2066969, 0x12410909, - 0xe0d77070, 0x716fb6b6, 0xbd1ed0d0, 0xc7d6eded, - 0x85e2cccc, 0x84684242, 0x2d2c9898, 0x55eda4a4, - 0x50752828, 0xb8865c5c, 0xed6bf8f8, 0x11c28686, - }, - { - 0x7830d818, 0xaf462623, 0xf991b8c6, 0x6fcdfbe8, - 0xa113cb87, 0x626d11b8, 0x05020901, 0x6e9e0d4f, - 0xee6c9b36, 0x0451ffa6, 0xbdb90cd2, 0x06f70ef5, - 0x80f29679, 0xcede306f, 0xef3f6d91, 0x07a4f852, - 0xfdc04760, 0x766535bc, 0xcd2b379b, 0x8c018a8e, - 0x155bd2a3, 0x3c186c0c, 0x8af6847b, 0xe16a8035, - 0x693af51d, 0x47ddb3e0, 0xacb321d7, 0xed999cc2, - 0x965c432e, 0x7a96294b, 0x21e15dfe, 0x16aed557, - 0x412abd15, 0xb6eee877, 0xeb6e9237, 0x56d79ee5, - 0xd923139f, 0x17fd23f0, 0x7f94204a, 0x95a944da, - 0x25b0a258, 0xca8fcfc9, 0x8d527c29, 0x22145a0a, - 0x4f7f50b1, 0x1a5dc9a0, 0xdad6146b, 0xab17d985, - 0x73673cbd, 0x34ba8f5d, 0x50209010, 0x03f507f4, - 0xc08bddcb, 0xc67cd33e, 0x110a2d05, 0xe6ce7867, - 0x53d597e4, 0xbb4e0227, 0x58827341, 0x9d0ba78b, - 0x0153f6a7, 0x94fab27d, 0xfb374995, 0x9fad56d8, - 0x30eb70fb, 0x71c1cdee, 0x91f8bb7c, 0xe3cc7166, - 0x8ea77bdd, 0x4b2eaf17, 0x468e4547, 0xdc211a9e, - 0xc589d4ca, 0x995a582d, 0x79632ebf, 0x1b0e3f07, - 0x2347acad, 0x2fb4b05a, 0xb51bef83, 0xff66b633, - 0xf2c65c63, 0x0a041202, 0x384993aa, 0xa8e2de71, - 0xcf8dc6c8, 0x7d32d119, 0x70923b49, 0x9aaf5fd9, - 0x1df931f2, 0x48dba8e3, 0x2ab6b95b, 0x920dbc88, - 0xc8293e9a, 0xbe4c0b26, 0xfa64bf32, 0x4a7d59b0, - 0x6acff2e9, 0x331e770f, 0xa6b733d5, 0xba1df480, - 0x7c6127be, 0xde87ebcd, 0xe4688934, 0x75903248, - 0x24e354ff, 0x8ff48d7a, 0xea3d6490, 0x3ebe9d5f, - 0xa0403d20, 0xd5d00f68, 0x7234ca1a, 0x2c41b7ae, - 0x5e757db4, 0x19a8ce54, 0xe53b7f93, 0xaa442f22, - 0xe9c86364, 0x12ff2af1, 0xa2e6cc73, 0x5a248212, - 0x5d807a40, 0x28104808, 0xe89b95c3, 0x7bc5dfec, - 0x90ab4ddb, 0x1f5fc0a1, 0x8307918d, 0xc97ac83d, - 0xf1335b97, 0x00000000, 0xd483f9cf, 0x87566e2b, - 0xb3ece176, 0xb019e682, 0xa9b128d6, 0x7736c31b, - 0x5b7774b5, 0x2943beaf, 0xdfd41d6a, 0x0da0ea50, - 0x4c8a5745, 0x18fb38f3, 0xf060ad30, 0x74c3c4ef, - 0xc37eda3f, 0x1caac755, 0x1059dba2, 0x65c9e9ea, - 0xecca6a65, 0x686903ba, 0x935e4a2f, 0xe79d8ec0, - 0x81a160de, 0x6c38fc1c, 0x2ee746fd, 0x649a1f4d, - 0xe0397692, 0xbceafa75, 0x1e0c3606, 0x9809ae8a, - 0x40794bb2, 0x59d185e6, 0x361c7e0e, 0x633ee71f, - 0xf7c45562, 0xa3b53ad4, 0x324d81a8, 0xf4315296, - 0x3aef62f9, 0xf697a3c5, 0xb14a1025, 0x20b2ab59, - 0xae15d084, 0xa7e4c572, 0xdd72ec39, 0x6198164c, - 0x3bbc945e, 0x85f09f78, 0xd870e538, 0x8605988c, - 0xb2bf17d1, 0x0b57e4a5, 0x4dd9a1e2, 0xf8c24e61, - 0x457b42b3, 0xa5423421, 0xd625089c, 0x663cee1e, - 0x52866143, 0xfc93b1c7, 0x2be54ffc, 0x14082404, - 0x08a2e351, 0xc72f2599, 0xc4da226d, 0x391a650d, - 0x35e979fa, 0x84a369df, 0x9bfca97e, 0xb4481924, - 0xd776fe3b, 0x3d4b9aab, 0xd181f0ce, 0x55229911, - 0x8903838f, 0x6b9c044e, 0x517366b7, 0x60cbe0eb, - 0xcc78c13c, 0xbf1ffd81, 0xfe354094, 0x0cf31cf7, - 0x676f18b9, 0x5f268b13, 0x9c58512c, 0xb8bb05d3, - 0x5cd38ce7, 0xcbdc396e, 0xf395aac4, 0x0f061b03, - 0x13acdc56, 0x49885e44, 0x9efea07f, 0x374f88a9, - 0x8254672a, 0x6d6b0abb, 0xe29f87c1, 0x02a6f153, - 0x8ba572dc, 0x2716530b, 0xd327019d, 0xc1d82b6c, - 0xf562a431, 0xb9e8f374, 0x09f115f6, 0x438c4c46, - 0x2645a5ac, 0x970fb589, 0x4428b414, 0x42dfbae1, - 0x4e2ca616, 0xd274f73a, 0xd0d20669, 0x2d124109, - 0xade0d770, 0x54716fb6, 0xb7bd1ed0, 0x7ec7d6ed, - 0xdb85e2cc, 0x57846842, 0xc22d2c98, 0x0e55eda4, - 0x88507528, 0x31b8865c, 0x3fed6bf8, 0xa411c286, - }, -}; - -__device__ __constant__ u32 rch[R + 1] = -{ - 0x00000000, - 0x1823c6e8, - 0x36a6d2f5, - 0x60bc9b8e, - 0x1de0d7c2, - 0x157737e5, - 0x58c9290a, - 0xbd5d10f4, - 0xe427418b, - 0xfbee7c66, - 0xca2dbf07, -}; - -__device__ __constant__ u32 rcl[R + 1] = -{ - 0x00000000, - 0x87b8014f, - 0x796f9152, - 0xa30c7b35, - 0x2e4bfe57, - 0x9ff04ada, - 0xb1a06b85, - 0xcb3e0567, - 0xa77d95d8, - 0xdd17479e, - 0xad5a8333, -}; - -__device__ static void whirlpool_transform (const u32x w[16], u32x dgst[16], u32 s_Ch[8][256], u32 s_Cl[8][256]) -{ - u32x Kh[8]; - u32x Kl[8]; - - Kh[0] = 0x300beec0; - Kl[0] = 0xaf902967; - Kh[1] = 0x28282828; - Kl[1] = 0x28282828; - Kh[2] = 0x28282828; - Kl[2] = 0x28282828; - Kh[3] = 0x28282828; - Kl[3] = 0x28282828; - Kh[4] = 0x28282828; - Kl[4] = 0x28282828; - Kh[5] = 0x28282828; - Kl[5] = 0x28282828; - Kh[6] = 0x28282828; - Kl[6] = 0x28282828; - Kh[7] = 0x28282828; - Kl[7] = 0x28282828; - - u32x stateh[8]; - u32x statel[8]; - - stateh[0] = w[ 0]; - statel[0] = w[ 1]; - stateh[1] = w[ 2]; - statel[1] = w[ 3]; - stateh[2] = w[ 4]; - statel[2] = w[ 5]; - stateh[3] = w[ 6]; - statel[3] = w[ 7]; - stateh[4] = w[ 8]; - statel[4] = w[ 9]; - stateh[5] = w[10]; - statel[5] = w[11]; - stateh[6] = w[12]; - statel[6] = w[13]; - stateh[7] = w[14]; - statel[7] = w[15]; - - u32x Lh[8]; - u32x Ll[8]; - - #pragma unroll - for (int i = 0; i < 8; i++) - { - const u32x Lp0 = stateh[(i + 8) & 7] >> 24; - const u32x Lp1 = stateh[(i + 7) & 7] >> 16; - const u32x Lp2 = stateh[(i + 6) & 7] >> 8; - const u32x Lp3 = stateh[(i + 5) & 7] >> 0; - const u32x Lp4 = statel[(i + 4) & 7] >> 24; - const u32x Lp5 = statel[(i + 3) & 7] >> 16; - const u32x Lp6 = statel[(i + 2) & 7] >> 8; - const u32x Lp7 = statel[(i + 1) & 7] >> 0; - - Lh[i] = BOX (s_Ch, 0, Lp0 & 0xff) - ^ BOX (s_Ch, 1, Lp1 & 0xff) - ^ BOX (s_Ch, 2, Lp2 & 0xff) - ^ BOX (s_Ch, 3, Lp3 & 0xff) - ^ BOX (s_Ch, 4, Lp4 & 0xff) - ^ BOX (s_Ch, 5, Lp5 & 0xff) - ^ BOX (s_Ch, 6, Lp6 & 0xff) - ^ BOX (s_Ch, 7, Lp7 & 0xff); - - Ll[i] = BOX (s_Cl, 0, Lp0 & 0xff) - ^ BOX (s_Cl, 1, Lp1 & 0xff) - ^ BOX (s_Cl, 2, Lp2 & 0xff) - ^ BOX (s_Cl, 3, Lp3 & 0xff) - ^ BOX (s_Cl, 4, Lp4 & 0xff) - ^ BOX (s_Cl, 5, Lp5 & 0xff) - ^ BOX (s_Cl, 6, Lp6 & 0xff) - ^ BOX (s_Cl, 7, Lp7 & 0xff); - } - - stateh[0] = Lh[0] ^ Kh[0]; - statel[0] = Ll[0] ^ Kl[0]; - stateh[1] = Lh[1] ^ Kh[1]; - statel[1] = Ll[1] ^ Kl[1]; - stateh[2] = Lh[2] ^ Kh[2]; - statel[2] = Ll[2] ^ Kl[2]; - stateh[3] = Lh[3] ^ Kh[3]; - statel[3] = Ll[3] ^ Kl[3]; - stateh[4] = Lh[4] ^ Kh[4]; - statel[4] = Ll[4] ^ Kl[4]; - stateh[5] = Lh[5] ^ Kh[5]; - statel[5] = Ll[5] ^ Kl[5]; - stateh[6] = Lh[6] ^ Kh[6]; - statel[6] = Ll[6] ^ Kl[6]; - stateh[7] = Lh[7] ^ Kh[7]; - statel[7] = Ll[7] ^ Kl[7]; - - for (int r = 2; r <= R; r++) - { - u32x Lh[8]; - u32x Ll[8]; - - #pragma unroll - for (int i = 0; i < 8; i++) - { - const u32x Lp0 = Kh[(i + 8) & 7] >> 24; - const u32x Lp1 = Kh[(i + 7) & 7] >> 16; - const u32x Lp2 = Kh[(i + 6) & 7] >> 8; - const u32x Lp3 = Kh[(i + 5) & 7] >> 0; - const u32x Lp4 = Kl[(i + 4) & 7] >> 24; - const u32x Lp5 = Kl[(i + 3) & 7] >> 16; - const u32x Lp6 = Kl[(i + 2) & 7] >> 8; - const u32x Lp7 = Kl[(i + 1) & 7] >> 0; - - Lh[i] = BOX (s_Ch, 0, Lp0 & 0xff) - ^ BOX (s_Ch, 1, Lp1 & 0xff) - ^ BOX (s_Ch, 2, Lp2 & 0xff) - ^ BOX (s_Ch, 3, Lp3 & 0xff) - ^ BOX (s_Ch, 4, Lp4 & 0xff) - ^ BOX (s_Ch, 5, Lp5 & 0xff) - ^ BOX (s_Ch, 6, Lp6 & 0xff) - ^ BOX (s_Ch, 7, Lp7 & 0xff); - - Ll[i] = BOX (s_Cl, 0, Lp0 & 0xff) - ^ BOX (s_Cl, 1, Lp1 & 0xff) - ^ BOX (s_Cl, 2, Lp2 & 0xff) - ^ BOX (s_Cl, 3, Lp3 & 0xff) - ^ BOX (s_Cl, 4, Lp4 & 0xff) - ^ BOX (s_Cl, 5, Lp5 & 0xff) - ^ BOX (s_Cl, 6, Lp6 & 0xff) - ^ BOX (s_Cl, 7, Lp7 & 0xff); - } - - Kh[0] = Lh[0] ^ rch[r]; - Kl[0] = Ll[0] ^ rcl[r]; - Kh[1] = Lh[1]; - Kl[1] = Ll[1]; - Kh[2] = Lh[2]; - Kl[2] = Ll[2]; - Kh[3] = Lh[3]; - Kl[3] = Ll[3]; - Kh[4] = Lh[4]; - Kl[4] = Ll[4]; - Kh[5] = Lh[5]; - Kl[5] = Ll[5]; - Kh[6] = Lh[6]; - Kl[6] = Ll[6]; - Kh[7] = Lh[7]; - Kl[7] = Ll[7]; - - #pragma unroll 8 - for (int i = 0; i < 8; i++) - { - const u32x Lp0 = stateh[(i + 8) & 7] >> 24; - const u32x Lp1 = stateh[(i + 7) & 7] >> 16; - const u32x Lp2 = stateh[(i + 6) & 7] >> 8; - const u32x Lp3 = stateh[(i + 5) & 7] >> 0; - const u32x Lp4 = statel[(i + 4) & 7] >> 24; - const u32x Lp5 = statel[(i + 3) & 7] >> 16; - const u32x Lp6 = statel[(i + 2) & 7] >> 8; - const u32x Lp7 = statel[(i + 1) & 7] >> 0; - - Lh[i] = BOX (s_Ch, 0, Lp0 & 0xff) - ^ BOX (s_Ch, 1, Lp1 & 0xff) - ^ BOX (s_Ch, 2, Lp2 & 0xff) - ^ BOX (s_Ch, 3, Lp3 & 0xff) - ^ BOX (s_Ch, 4, Lp4 & 0xff) - ^ BOX (s_Ch, 5, Lp5 & 0xff) - ^ BOX (s_Ch, 6, Lp6 & 0xff) - ^ BOX (s_Ch, 7, Lp7 & 0xff); - - Ll[i] = BOX (s_Cl, 0, Lp0 & 0xff) - ^ BOX (s_Cl, 1, Lp1 & 0xff) - ^ BOX (s_Cl, 2, Lp2 & 0xff) - ^ BOX (s_Cl, 3, Lp3 & 0xff) - ^ BOX (s_Cl, 4, Lp4 & 0xff) - ^ BOX (s_Cl, 5, Lp5 & 0xff) - ^ BOX (s_Cl, 6, Lp6 & 0xff) - ^ BOX (s_Cl, 7, Lp7 & 0xff); - } - - stateh[0] = Lh[0] ^ Kh[0]; - statel[0] = Ll[0] ^ Kl[0]; - stateh[1] = Lh[1] ^ Kh[1]; - statel[1] = Ll[1] ^ Kl[1]; - stateh[2] = Lh[2] ^ Kh[2]; - statel[2] = Ll[2] ^ Kl[2]; - stateh[3] = Lh[3] ^ Kh[3]; - statel[3] = Ll[3] ^ Kl[3]; - stateh[4] = Lh[4] ^ Kh[4]; - statel[4] = Ll[4] ^ Kl[4]; - stateh[5] = Lh[5] ^ Kh[5]; - statel[5] = Ll[5] ^ Kl[5]; - stateh[6] = Lh[6] ^ Kh[6]; - statel[6] = Ll[6] ^ Kl[6]; - stateh[7] = Lh[7] ^ Kh[7]; - statel[7] = Ll[7] ^ Kl[7]; - } - - dgst[ 0] = stateh[0] ^ w[ 0]; - dgst[ 1] = statel[0] ^ w[ 1]; - dgst[ 2] = stateh[1] ^ w[ 2]; - dgst[ 3] = statel[1] ^ w[ 3]; - dgst[ 4] = stateh[2] ^ w[ 4]; - dgst[ 5] = statel[2] ^ w[ 5]; - dgst[ 6] = stateh[3] ^ w[ 6]; - dgst[ 7] = statel[3] ^ w[ 7]; - dgst[ 8] = stateh[4] ^ w[ 8]; - dgst[ 9] = statel[4] ^ w[ 9]; - dgst[10] = stateh[5] ^ w[10]; - dgst[11] = statel[5] ^ w[11]; - dgst[12] = stateh[6] ^ w[12]; - dgst[13] = statel[6] ^ w[13]; - dgst[14] = stateh[7] ^ w[14]; - dgst[15] = statel[7] ^ w[15]; -} - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ __shared__ u32 s_Ch[8][256]; -__device__ __shared__ u32 s_Cl[8][256]; - -__device__ static void m06100m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x wl[16]; - - wl[ 0] = w0[0]; - wl[ 1] = w0[1]; - wl[ 2] = w0[2]; - wl[ 3] = w0[3]; - wl[ 4] = w1[0]; - wl[ 5] = w1[1]; - wl[ 6] = w1[2]; - wl[ 7] = w1[3]; - wl[ 8] = w2[0]; - wl[ 9] = w2[1]; - wl[10] = w2[2]; - wl[11] = w2[3]; - wl[12] = w3[0]; - wl[13] = w3[1]; - wl[14] = 0; - wl[15] = pw_len * 8; - - u32x dgst[16]; - - whirlpool_transform (wl, dgst, s_Ch, s_Cl); - - const u32x r0 = dgst[0]; - const u32x r1 = dgst[1]; - const u32x r2 = dgst[2]; - const u32x r3 = dgst[3]; - - #include VECT_COMPARE_M - } -} - -__device__ static void m06100s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x wl[16]; - - wl[ 0] = w0[0]; - wl[ 1] = w0[1]; - wl[ 2] = w0[2]; - wl[ 3] = w0[3]; - wl[ 4] = w1[0]; - wl[ 5] = w1[1]; - wl[ 6] = w1[2]; - wl[ 7] = w1[3]; - wl[ 8] = w2[0]; - wl[ 9] = w2[1]; - wl[10] = w2[2]; - wl[11] = w2[3]; - wl[12] = w3[0]; - wl[13] = w3[1]; - wl[14] = 0; - wl[15] = pw_len * 8; - - u32x dgst[16]; - - whirlpool_transform (wl, dgst, s_Ch, s_Cl); - - const u32x r0 = dgst[0]; - const u32x r1 = dgst[1]; - const u32x r2 = dgst[2]; - const u32x r3 = dgst[3]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06100_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * modifier - */ - - for (u32 i = 0; i < 8; i++) - { - s_Ch[i][lid] = Ch[i][lid]; - s_Cl[i][lid] = Cl[i][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m06100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06100_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * modifier - */ - - for (u32 i = 0; i < 8; i++) - { - s_Ch[i][lid] = Ch[i][lid]; - s_Cl[i][lid] = Cl[i][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m06100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06100_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * modifier - */ - - for (u32 i = 0; i < 8; i++) - { - s_Ch[i][lid] = Ch[i][lid]; - s_Cl[i][lid] = Cl[i][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m06100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06100_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * modifier - */ - - for (u32 i = 0; i < 8; i++) - { - s_Ch[i][lid] = Ch[i][lid]; - s_Cl[i][lid] = Cl[i][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m06100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06100_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * modifier - */ - - for (u32 i = 0; i < 8; i++) - { - s_Ch[i][lid] = Ch[i][lid]; - s_Cl[i][lid] = Cl[i][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m06100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06100_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * modifier - */ - - for (u32 i = 0; i < 8; i++) - { - s_Ch[i][lid] = Ch[i][lid]; - s_Cl[i][lid] = Cl[i][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m06100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m06211.cu b/nv/m06211.cu deleted file mode 100644 index 4933532..0000000 --- a/nv/m06211.cu +++ /dev/null @@ -1,717 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _RIPEMD160_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#include "gpu_aes256_nv.c" -#include "gpu_twofish256_nv.c" -#include "gpu_serpent256_nv.c" - -__device__ static void ripemd160_transform (const u32x w[16], u32x dgst[5]) -{ - u32x a1 = dgst[0]; - u32x b1 = dgst[1]; - u32x c1 = dgst[2]; - u32x d1 = dgst[3]; - u32x e1 = dgst[4]; - - RIPEMD160_STEP (RIPEMD160_F , a1, b1, c1, d1, e1, w[ 0], RIPEMD160C00, RIPEMD160S00); - RIPEMD160_STEP (RIPEMD160_F , e1, a1, b1, c1, d1, w[ 1], RIPEMD160C00, RIPEMD160S01); - RIPEMD160_STEP (RIPEMD160_F , d1, e1, a1, b1, c1, w[ 2], RIPEMD160C00, RIPEMD160S02); - RIPEMD160_STEP (RIPEMD160_F , c1, d1, e1, a1, b1, w[ 3], RIPEMD160C00, RIPEMD160S03); - RIPEMD160_STEP (RIPEMD160_F , b1, c1, d1, e1, a1, w[ 4], RIPEMD160C00, RIPEMD160S04); - RIPEMD160_STEP (RIPEMD160_F , a1, b1, c1, d1, e1, w[ 5], RIPEMD160C00, RIPEMD160S05); - RIPEMD160_STEP (RIPEMD160_F , e1, a1, b1, c1, d1, w[ 6], RIPEMD160C00, RIPEMD160S06); - RIPEMD160_STEP (RIPEMD160_F , d1, e1, a1, b1, c1, w[ 7], RIPEMD160C00, RIPEMD160S07); - RIPEMD160_STEP (RIPEMD160_F , c1, d1, e1, a1, b1, w[ 8], RIPEMD160C00, RIPEMD160S08); - RIPEMD160_STEP (RIPEMD160_F , b1, c1, d1, e1, a1, w[ 9], RIPEMD160C00, RIPEMD160S09); - RIPEMD160_STEP (RIPEMD160_F , a1, b1, c1, d1, e1, w[10], RIPEMD160C00, RIPEMD160S0A); - RIPEMD160_STEP (RIPEMD160_F , e1, a1, b1, c1, d1, w[11], RIPEMD160C00, RIPEMD160S0B); - RIPEMD160_STEP (RIPEMD160_F , d1, e1, a1, b1, c1, w[12], RIPEMD160C00, RIPEMD160S0C); - RIPEMD160_STEP (RIPEMD160_F , c1, d1, e1, a1, b1, w[13], RIPEMD160C00, RIPEMD160S0D); - RIPEMD160_STEP (RIPEMD160_F , b1, c1, d1, e1, a1, w[14], RIPEMD160C00, RIPEMD160S0E); - RIPEMD160_STEP (RIPEMD160_F , a1, b1, c1, d1, e1, w[15], RIPEMD160C00, RIPEMD160S0F); - - RIPEMD160_STEP (RIPEMD160_Go, e1, a1, b1, c1, d1, w[ 7], RIPEMD160C10, RIPEMD160S10); - RIPEMD160_STEP (RIPEMD160_Go, d1, e1, a1, b1, c1, w[ 4], RIPEMD160C10, RIPEMD160S11); - RIPEMD160_STEP (RIPEMD160_Go, c1, d1, e1, a1, b1, w[13], RIPEMD160C10, RIPEMD160S12); - RIPEMD160_STEP (RIPEMD160_Go, b1, c1, d1, e1, a1, w[ 1], RIPEMD160C10, RIPEMD160S13); - RIPEMD160_STEP (RIPEMD160_Go, a1, b1, c1, d1, e1, w[10], RIPEMD160C10, RIPEMD160S14); - RIPEMD160_STEP (RIPEMD160_Go, e1, a1, b1, c1, d1, w[ 6], RIPEMD160C10, RIPEMD160S15); - RIPEMD160_STEP (RIPEMD160_Go, d1, e1, a1, b1, c1, w[15], RIPEMD160C10, RIPEMD160S16); - RIPEMD160_STEP (RIPEMD160_Go, c1, d1, e1, a1, b1, w[ 3], RIPEMD160C10, RIPEMD160S17); - RIPEMD160_STEP (RIPEMD160_Go, b1, c1, d1, e1, a1, w[12], RIPEMD160C10, RIPEMD160S18); - RIPEMD160_STEP (RIPEMD160_Go, a1, b1, c1, d1, e1, w[ 0], RIPEMD160C10, RIPEMD160S19); - RIPEMD160_STEP (RIPEMD160_Go, e1, a1, b1, c1, d1, w[ 9], RIPEMD160C10, RIPEMD160S1A); - RIPEMD160_STEP (RIPEMD160_Go, d1, e1, a1, b1, c1, w[ 5], RIPEMD160C10, RIPEMD160S1B); - RIPEMD160_STEP (RIPEMD160_Go, c1, d1, e1, a1, b1, w[ 2], RIPEMD160C10, RIPEMD160S1C); - RIPEMD160_STEP (RIPEMD160_Go, b1, c1, d1, e1, a1, w[14], RIPEMD160C10, RIPEMD160S1D); - RIPEMD160_STEP (RIPEMD160_Go, a1, b1, c1, d1, e1, w[11], RIPEMD160C10, RIPEMD160S1E); - RIPEMD160_STEP (RIPEMD160_Go, e1, a1, b1, c1, d1, w[ 8], RIPEMD160C10, RIPEMD160S1F); - - RIPEMD160_STEP (RIPEMD160_H , d1, e1, a1, b1, c1, w[ 3], RIPEMD160C20, RIPEMD160S20); - RIPEMD160_STEP (RIPEMD160_H , c1, d1, e1, a1, b1, w[10], RIPEMD160C20, RIPEMD160S21); - RIPEMD160_STEP (RIPEMD160_H , b1, c1, d1, e1, a1, w[14], RIPEMD160C20, RIPEMD160S22); - RIPEMD160_STEP (RIPEMD160_H , a1, b1, c1, d1, e1, w[ 4], RIPEMD160C20, RIPEMD160S23); - RIPEMD160_STEP (RIPEMD160_H , e1, a1, b1, c1, d1, w[ 9], RIPEMD160C20, RIPEMD160S24); - RIPEMD160_STEP (RIPEMD160_H , d1, e1, a1, b1, c1, w[15], RIPEMD160C20, RIPEMD160S25); - RIPEMD160_STEP (RIPEMD160_H , c1, d1, e1, a1, b1, w[ 8], RIPEMD160C20, RIPEMD160S26); - RIPEMD160_STEP (RIPEMD160_H , b1, c1, d1, e1, a1, w[ 1], RIPEMD160C20, RIPEMD160S27); - RIPEMD160_STEP (RIPEMD160_H , a1, b1, c1, d1, e1, w[ 2], RIPEMD160C20, RIPEMD160S28); - RIPEMD160_STEP (RIPEMD160_H , e1, a1, b1, c1, d1, w[ 7], RIPEMD160C20, RIPEMD160S29); - RIPEMD160_STEP (RIPEMD160_H , d1, e1, a1, b1, c1, w[ 0], RIPEMD160C20, RIPEMD160S2A); - RIPEMD160_STEP (RIPEMD160_H , c1, d1, e1, a1, b1, w[ 6], RIPEMD160C20, RIPEMD160S2B); - RIPEMD160_STEP (RIPEMD160_H , b1, c1, d1, e1, a1, w[13], RIPEMD160C20, RIPEMD160S2C); - RIPEMD160_STEP (RIPEMD160_H , a1, b1, c1, d1, e1, w[11], RIPEMD160C20, RIPEMD160S2D); - RIPEMD160_STEP (RIPEMD160_H , e1, a1, b1, c1, d1, w[ 5], RIPEMD160C20, RIPEMD160S2E); - RIPEMD160_STEP (RIPEMD160_H , d1, e1, a1, b1, c1, w[12], RIPEMD160C20, RIPEMD160S2F); - - RIPEMD160_STEP (RIPEMD160_Io, c1, d1, e1, a1, b1, w[ 1], RIPEMD160C30, RIPEMD160S30); - RIPEMD160_STEP (RIPEMD160_Io, b1, c1, d1, e1, a1, w[ 9], RIPEMD160C30, RIPEMD160S31); - RIPEMD160_STEP (RIPEMD160_Io, a1, b1, c1, d1, e1, w[11], RIPEMD160C30, RIPEMD160S32); - RIPEMD160_STEP (RIPEMD160_Io, e1, a1, b1, c1, d1, w[10], RIPEMD160C30, RIPEMD160S33); - RIPEMD160_STEP (RIPEMD160_Io, d1, e1, a1, b1, c1, w[ 0], RIPEMD160C30, RIPEMD160S34); - RIPEMD160_STEP (RIPEMD160_Io, c1, d1, e1, a1, b1, w[ 8], RIPEMD160C30, RIPEMD160S35); - RIPEMD160_STEP (RIPEMD160_Io, b1, c1, d1, e1, a1, w[12], RIPEMD160C30, RIPEMD160S36); - RIPEMD160_STEP (RIPEMD160_Io, a1, b1, c1, d1, e1, w[ 4], RIPEMD160C30, RIPEMD160S37); - RIPEMD160_STEP (RIPEMD160_Io, e1, a1, b1, c1, d1, w[13], RIPEMD160C30, RIPEMD160S38); - RIPEMD160_STEP (RIPEMD160_Io, d1, e1, a1, b1, c1, w[ 3], RIPEMD160C30, RIPEMD160S39); - RIPEMD160_STEP (RIPEMD160_Io, c1, d1, e1, a1, b1, w[ 7], RIPEMD160C30, RIPEMD160S3A); - RIPEMD160_STEP (RIPEMD160_Io, b1, c1, d1, e1, a1, w[15], RIPEMD160C30, RIPEMD160S3B); - RIPEMD160_STEP (RIPEMD160_Io, a1, b1, c1, d1, e1, w[14], RIPEMD160C30, RIPEMD160S3C); - RIPEMD160_STEP (RIPEMD160_Io, e1, a1, b1, c1, d1, w[ 5], RIPEMD160C30, RIPEMD160S3D); - RIPEMD160_STEP (RIPEMD160_Io, d1, e1, a1, b1, c1, w[ 6], RIPEMD160C30, RIPEMD160S3E); - RIPEMD160_STEP (RIPEMD160_Io, c1, d1, e1, a1, b1, w[ 2], RIPEMD160C30, RIPEMD160S3F); - - RIPEMD160_STEP (RIPEMD160_J , b1, c1, d1, e1, a1, w[ 4], RIPEMD160C40, RIPEMD160S40); - RIPEMD160_STEP (RIPEMD160_J , a1, b1, c1, d1, e1, w[ 0], RIPEMD160C40, RIPEMD160S41); - RIPEMD160_STEP (RIPEMD160_J , e1, a1, b1, c1, d1, w[ 5], RIPEMD160C40, RIPEMD160S42); - RIPEMD160_STEP (RIPEMD160_J , d1, e1, a1, b1, c1, w[ 9], RIPEMD160C40, RIPEMD160S43); - RIPEMD160_STEP (RIPEMD160_J , c1, d1, e1, a1, b1, w[ 7], RIPEMD160C40, RIPEMD160S44); - RIPEMD160_STEP (RIPEMD160_J , b1, c1, d1, e1, a1, w[12], RIPEMD160C40, RIPEMD160S45); - RIPEMD160_STEP (RIPEMD160_J , a1, b1, c1, d1, e1, w[ 2], RIPEMD160C40, RIPEMD160S46); - RIPEMD160_STEP (RIPEMD160_J , e1, a1, b1, c1, d1, w[10], RIPEMD160C40, RIPEMD160S47); - RIPEMD160_STEP (RIPEMD160_J , d1, e1, a1, b1, c1, w[14], RIPEMD160C40, RIPEMD160S48); - RIPEMD160_STEP (RIPEMD160_J , c1, d1, e1, a1, b1, w[ 1], RIPEMD160C40, RIPEMD160S49); - RIPEMD160_STEP (RIPEMD160_J , b1, c1, d1, e1, a1, w[ 3], RIPEMD160C40, RIPEMD160S4A); - RIPEMD160_STEP (RIPEMD160_J , a1, b1, c1, d1, e1, w[ 8], RIPEMD160C40, RIPEMD160S4B); - RIPEMD160_STEP (RIPEMD160_J , e1, a1, b1, c1, d1, w[11], RIPEMD160C40, RIPEMD160S4C); - RIPEMD160_STEP (RIPEMD160_J , d1, e1, a1, b1, c1, w[ 6], RIPEMD160C40, RIPEMD160S4D); - RIPEMD160_STEP (RIPEMD160_J , c1, d1, e1, a1, b1, w[15], RIPEMD160C40, RIPEMD160S4E); - RIPEMD160_STEP (RIPEMD160_J , b1, c1, d1, e1, a1, w[13], RIPEMD160C40, RIPEMD160S4F); - - u32x a2 = dgst[0]; - u32x b2 = dgst[1]; - u32x c2 = dgst[2]; - u32x d2 = dgst[3]; - u32x e2 = dgst[4]; - - //RIPEMD160_STEP_WORKAROUND_BUG (RIPEMD160_J , a2, b2, c2, d2, e2, w[ 5], RIPEMD160C50, RIPEMD160S50); - RIPEMD160_STEP (RIPEMD160_J , a2, b2, c2, d2, e2, w[ 5], RIPEMD160C50, RIPEMD160S50); - RIPEMD160_STEP (RIPEMD160_J , e2, a2, b2, c2, d2, w[14], RIPEMD160C50, RIPEMD160S51); - RIPEMD160_STEP (RIPEMD160_J , d2, e2, a2, b2, c2, w[ 7], RIPEMD160C50, RIPEMD160S52); - RIPEMD160_STEP (RIPEMD160_J , c2, d2, e2, a2, b2, w[ 0], RIPEMD160C50, RIPEMD160S53); - RIPEMD160_STEP (RIPEMD160_J , b2, c2, d2, e2, a2, w[ 9], RIPEMD160C50, RIPEMD160S54); - RIPEMD160_STEP (RIPEMD160_J , a2, b2, c2, d2, e2, w[ 2], RIPEMD160C50, RIPEMD160S55); - RIPEMD160_STEP (RIPEMD160_J , e2, a2, b2, c2, d2, w[11], RIPEMD160C50, RIPEMD160S56); - RIPEMD160_STEP (RIPEMD160_J , d2, e2, a2, b2, c2, w[ 4], RIPEMD160C50, RIPEMD160S57); - RIPEMD160_STEP (RIPEMD160_J , c2, d2, e2, a2, b2, w[13], RIPEMD160C50, RIPEMD160S58); - RIPEMD160_STEP (RIPEMD160_J , b2, c2, d2, e2, a2, w[ 6], RIPEMD160C50, RIPEMD160S59); - RIPEMD160_STEP (RIPEMD160_J , a2, b2, c2, d2, e2, w[15], RIPEMD160C50, RIPEMD160S5A); - RIPEMD160_STEP (RIPEMD160_J , e2, a2, b2, c2, d2, w[ 8], RIPEMD160C50, RIPEMD160S5B); - RIPEMD160_STEP (RIPEMD160_J , d2, e2, a2, b2, c2, w[ 1], RIPEMD160C50, RIPEMD160S5C); - RIPEMD160_STEP (RIPEMD160_J , c2, d2, e2, a2, b2, w[10], RIPEMD160C50, RIPEMD160S5D); - RIPEMD160_STEP (RIPEMD160_J , b2, c2, d2, e2, a2, w[ 3], RIPEMD160C50, RIPEMD160S5E); - RIPEMD160_STEP (RIPEMD160_J , a2, b2, c2, d2, e2, w[12], RIPEMD160C50, RIPEMD160S5F); - - RIPEMD160_STEP (RIPEMD160_Io, e2, a2, b2, c2, d2, w[ 6], RIPEMD160C60, RIPEMD160S60); - RIPEMD160_STEP (RIPEMD160_Io, d2, e2, a2, b2, c2, w[11], RIPEMD160C60, RIPEMD160S61); - RIPEMD160_STEP (RIPEMD160_Io, c2, d2, e2, a2, b2, w[ 3], RIPEMD160C60, RIPEMD160S62); - RIPEMD160_STEP (RIPEMD160_Io, b2, c2, d2, e2, a2, w[ 7], RIPEMD160C60, RIPEMD160S63); - RIPEMD160_STEP (RIPEMD160_Io, a2, b2, c2, d2, e2, w[ 0], RIPEMD160C60, RIPEMD160S64); - RIPEMD160_STEP (RIPEMD160_Io, e2, a2, b2, c2, d2, w[13], RIPEMD160C60, RIPEMD160S65); - RIPEMD160_STEP (RIPEMD160_Io, d2, e2, a2, b2, c2, w[ 5], RIPEMD160C60, RIPEMD160S66); - RIPEMD160_STEP (RIPEMD160_Io, c2, d2, e2, a2, b2, w[10], RIPEMD160C60, RIPEMD160S67); - RIPEMD160_STEP (RIPEMD160_Io, b2, c2, d2, e2, a2, w[14], RIPEMD160C60, RIPEMD160S68); - RIPEMD160_STEP (RIPEMD160_Io, a2, b2, c2, d2, e2, w[15], RIPEMD160C60, RIPEMD160S69); - RIPEMD160_STEP (RIPEMD160_Io, e2, a2, b2, c2, d2, w[ 8], RIPEMD160C60, RIPEMD160S6A); - RIPEMD160_STEP (RIPEMD160_Io, d2, e2, a2, b2, c2, w[12], RIPEMD160C60, RIPEMD160S6B); - RIPEMD160_STEP (RIPEMD160_Io, c2, d2, e2, a2, b2, w[ 4], RIPEMD160C60, RIPEMD160S6C); - RIPEMD160_STEP (RIPEMD160_Io, b2, c2, d2, e2, a2, w[ 9], RIPEMD160C60, RIPEMD160S6D); - RIPEMD160_STEP (RIPEMD160_Io, a2, b2, c2, d2, e2, w[ 1], RIPEMD160C60, RIPEMD160S6E); - RIPEMD160_STEP (RIPEMD160_Io, e2, a2, b2, c2, d2, w[ 2], RIPEMD160C60, RIPEMD160S6F); - - RIPEMD160_STEP (RIPEMD160_H , d2, e2, a2, b2, c2, w[15], RIPEMD160C70, RIPEMD160S70); - RIPEMD160_STEP (RIPEMD160_H , c2, d2, e2, a2, b2, w[ 5], RIPEMD160C70, RIPEMD160S71); - RIPEMD160_STEP (RIPEMD160_H , b2, c2, d2, e2, a2, w[ 1], RIPEMD160C70, RIPEMD160S72); - RIPEMD160_STEP (RIPEMD160_H , a2, b2, c2, d2, e2, w[ 3], RIPEMD160C70, RIPEMD160S73); - RIPEMD160_STEP (RIPEMD160_H , e2, a2, b2, c2, d2, w[ 7], RIPEMD160C70, RIPEMD160S74); - RIPEMD160_STEP (RIPEMD160_H , d2, e2, a2, b2, c2, w[14], RIPEMD160C70, RIPEMD160S75); - RIPEMD160_STEP (RIPEMD160_H , c2, d2, e2, a2, b2, w[ 6], RIPEMD160C70, RIPEMD160S76); - RIPEMD160_STEP (RIPEMD160_H , b2, c2, d2, e2, a2, w[ 9], RIPEMD160C70, RIPEMD160S77); - RIPEMD160_STEP (RIPEMD160_H , a2, b2, c2, d2, e2, w[11], RIPEMD160C70, RIPEMD160S78); - RIPEMD160_STEP (RIPEMD160_H , e2, a2, b2, c2, d2, w[ 8], RIPEMD160C70, RIPEMD160S79); - RIPEMD160_STEP (RIPEMD160_H , d2, e2, a2, b2, c2, w[12], RIPEMD160C70, RIPEMD160S7A); - RIPEMD160_STEP (RIPEMD160_H , c2, d2, e2, a2, b2, w[ 2], RIPEMD160C70, RIPEMD160S7B); - RIPEMD160_STEP (RIPEMD160_H , b2, c2, d2, e2, a2, w[10], RIPEMD160C70, RIPEMD160S7C); - RIPEMD160_STEP (RIPEMD160_H , a2, b2, c2, d2, e2, w[ 0], RIPEMD160C70, RIPEMD160S7D); - RIPEMD160_STEP (RIPEMD160_H , e2, a2, b2, c2, d2, w[ 4], RIPEMD160C70, RIPEMD160S7E); - RIPEMD160_STEP (RIPEMD160_H , d2, e2, a2, b2, c2, w[13], RIPEMD160C70, RIPEMD160S7F); - - RIPEMD160_STEP (RIPEMD160_Go, c2, d2, e2, a2, b2, w[ 8], RIPEMD160C80, RIPEMD160S80); - RIPEMD160_STEP (RIPEMD160_Go, b2, c2, d2, e2, a2, w[ 6], RIPEMD160C80, RIPEMD160S81); - RIPEMD160_STEP (RIPEMD160_Go, a2, b2, c2, d2, e2, w[ 4], RIPEMD160C80, RIPEMD160S82); - RIPEMD160_STEP (RIPEMD160_Go, e2, a2, b2, c2, d2, w[ 1], RIPEMD160C80, RIPEMD160S83); - RIPEMD160_STEP (RIPEMD160_Go, d2, e2, a2, b2, c2, w[ 3], RIPEMD160C80, RIPEMD160S84); - RIPEMD160_STEP (RIPEMD160_Go, c2, d2, e2, a2, b2, w[11], RIPEMD160C80, RIPEMD160S85); - RIPEMD160_STEP (RIPEMD160_Go, b2, c2, d2, e2, a2, w[15], RIPEMD160C80, RIPEMD160S86); - RIPEMD160_STEP (RIPEMD160_Go, a2, b2, c2, d2, e2, w[ 0], RIPEMD160C80, RIPEMD160S87); - RIPEMD160_STEP (RIPEMD160_Go, e2, a2, b2, c2, d2, w[ 5], RIPEMD160C80, RIPEMD160S88); - RIPEMD160_STEP (RIPEMD160_Go, d2, e2, a2, b2, c2, w[12], RIPEMD160C80, RIPEMD160S89); - RIPEMD160_STEP (RIPEMD160_Go, c2, d2, e2, a2, b2, w[ 2], RIPEMD160C80, RIPEMD160S8A); - RIPEMD160_STEP (RIPEMD160_Go, b2, c2, d2, e2, a2, w[13], RIPEMD160C80, RIPEMD160S8B); - RIPEMD160_STEP (RIPEMD160_Go, a2, b2, c2, d2, e2, w[ 9], RIPEMD160C80, RIPEMD160S8C); - RIPEMD160_STEP (RIPEMD160_Go, e2, a2, b2, c2, d2, w[ 7], RIPEMD160C80, RIPEMD160S8D); - RIPEMD160_STEP (RIPEMD160_Go, d2, e2, a2, b2, c2, w[10], RIPEMD160C80, RIPEMD160S8E); - RIPEMD160_STEP (RIPEMD160_Go, c2, d2, e2, a2, b2, w[14], RIPEMD160C80, RIPEMD160S8F); - - RIPEMD160_STEP (RIPEMD160_F , b2, c2, d2, e2, a2, w[12], RIPEMD160C90, RIPEMD160S90); - RIPEMD160_STEP (RIPEMD160_F , a2, b2, c2, d2, e2, w[15], RIPEMD160C90, RIPEMD160S91); - RIPEMD160_STEP (RIPEMD160_F , e2, a2, b2, c2, d2, w[10], RIPEMD160C90, RIPEMD160S92); - RIPEMD160_STEP (RIPEMD160_F , d2, e2, a2, b2, c2, w[ 4], RIPEMD160C90, RIPEMD160S93); - RIPEMD160_STEP (RIPEMD160_F , c2, d2, e2, a2, b2, w[ 1], RIPEMD160C90, RIPEMD160S94); - RIPEMD160_STEP (RIPEMD160_F , b2, c2, d2, e2, a2, w[ 5], RIPEMD160C90, RIPEMD160S95); - RIPEMD160_STEP (RIPEMD160_F , a2, b2, c2, d2, e2, w[ 8], RIPEMD160C90, RIPEMD160S96); - RIPEMD160_STEP (RIPEMD160_F , e2, a2, b2, c2, d2, w[ 7], RIPEMD160C90, RIPEMD160S97); - RIPEMD160_STEP (RIPEMD160_F , d2, e2, a2, b2, c2, w[ 6], RIPEMD160C90, RIPEMD160S98); - RIPEMD160_STEP (RIPEMD160_F , c2, d2, e2, a2, b2, w[ 2], RIPEMD160C90, RIPEMD160S99); - RIPEMD160_STEP (RIPEMD160_F , b2, c2, d2, e2, a2, w[13], RIPEMD160C90, RIPEMD160S9A); - RIPEMD160_STEP (RIPEMD160_F , a2, b2, c2, d2, e2, w[14], RIPEMD160C90, RIPEMD160S9B); - RIPEMD160_STEP (RIPEMD160_F , e2, a2, b2, c2, d2, w[ 0], RIPEMD160C90, RIPEMD160S9C); - RIPEMD160_STEP (RIPEMD160_F , d2, e2, a2, b2, c2, w[ 3], RIPEMD160C90, RIPEMD160S9D); - RIPEMD160_STEP (RIPEMD160_F , c2, d2, e2, a2, b2, w[ 9], RIPEMD160C90, RIPEMD160S9E); - RIPEMD160_STEP (RIPEMD160_F , b2, c2, d2, e2, a2, w[11], RIPEMD160C90, RIPEMD160S9F); - - const u32x a = dgst[1] + c1 + d2; - const u32x b = dgst[2] + d1 + e2; - const u32x c = dgst[3] + e1 + a2; - const u32x d = dgst[4] + a1 + b2; - const u32x e = dgst[0] + b1 + c2; - - dgst[0] = a; - dgst[1] = b; - dgst[2] = c; - dgst[3] = d; - dgst[4] = e; -} - -__device__ static void hmac_run2 (const u32x w1[16], const u32x w2[16], const u32x ipad[5], const u32x opad[5], u32x dgst[5]) -{ - dgst[0] = ipad[0]; - dgst[1] = ipad[1]; - dgst[2] = ipad[2]; - dgst[3] = ipad[3]; - dgst[4] = ipad[4]; - - ripemd160_transform (w1, dgst); - ripemd160_transform (w2, dgst); - - u32x w[16]; - - w[ 0] = dgst[0]; - w[ 1] = dgst[1]; - w[ 2] = dgst[2]; - w[ 3] = dgst[3]; - w[ 4] = dgst[4]; - w[ 5] = 0x80; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = (64 + 20) * 8; - w[15] = 0; - - dgst[0] = opad[0]; - dgst[1] = opad[1]; - dgst[2] = opad[2]; - dgst[3] = opad[3]; - dgst[4] = opad[4]; - - ripemd160_transform (w, dgst); -} - -__device__ static void hmac_run (u32x w[16], const u32x ipad[5], const u32x opad[5], u32x dgst[5]) -{ - dgst[0] = ipad[0]; - dgst[1] = ipad[1]; - dgst[2] = ipad[2]; - dgst[3] = ipad[3]; - dgst[4] = ipad[4]; - - ripemd160_transform (w, dgst); - - w[ 0] = dgst[0]; - w[ 1] = dgst[1]; - w[ 2] = dgst[2]; - w[ 3] = dgst[3]; - w[ 4] = dgst[4]; - w[ 5] = 0x80; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = (64 + 20) * 8; - w[15] = 0; - - dgst[0] = opad[0]; - dgst[1] = opad[1]; - dgst[2] = opad[2]; - dgst[3] = opad[3]; - dgst[4] = opad[4]; - - ripemd160_transform (w, dgst); -} - -__device__ static void hmac_init (u32x w[16], u32x ipad[5], u32x opad[5]) -{ - w[ 0] ^= 0x36363636; - w[ 1] ^= 0x36363636; - w[ 2] ^= 0x36363636; - w[ 3] ^= 0x36363636; - w[ 4] ^= 0x36363636; - w[ 5] ^= 0x36363636; - w[ 6] ^= 0x36363636; - w[ 7] ^= 0x36363636; - w[ 8] ^= 0x36363636; - w[ 9] ^= 0x36363636; - w[10] ^= 0x36363636; - w[11] ^= 0x36363636; - w[12] ^= 0x36363636; - w[13] ^= 0x36363636; - w[14] ^= 0x36363636; - w[15] ^= 0x36363636; - - ipad[0] = RIPEMD160M_A; - ipad[1] = RIPEMD160M_B; - ipad[2] = RIPEMD160M_C; - ipad[3] = RIPEMD160M_D; - ipad[4] = RIPEMD160M_E; - - ripemd160_transform (w, ipad); - - w[ 0] ^= 0x6a6a6a6a; - w[ 1] ^= 0x6a6a6a6a; - w[ 2] ^= 0x6a6a6a6a; - w[ 3] ^= 0x6a6a6a6a; - w[ 4] ^= 0x6a6a6a6a; - w[ 5] ^= 0x6a6a6a6a; - w[ 6] ^= 0x6a6a6a6a; - w[ 7] ^= 0x6a6a6a6a; - w[ 8] ^= 0x6a6a6a6a; - w[ 9] ^= 0x6a6a6a6a; - w[10] ^= 0x6a6a6a6a; - w[11] ^= 0x6a6a6a6a; - w[12] ^= 0x6a6a6a6a; - w[13] ^= 0x6a6a6a6a; - w[14] ^= 0x6a6a6a6a; - w[15] ^= 0x6a6a6a6a; - - opad[0] = RIPEMD160M_A; - opad[1] = RIPEMD160M_B; - opad[2] = RIPEMD160M_C; - opad[3] = RIPEMD160M_D; - opad[4] = RIPEMD160M_E; - - ripemd160_transform (w, opad); -} - -__device__ static u32 u8add (const u32 a, const u32 b) -{ - const u32 a1 = (a >> 0) & 0xff; - const u32 a2 = (a >> 8) & 0xff; - const u32 a3 = (a >> 16) & 0xff; - const u32 a4 = (a >> 24) & 0xff; - - const u32 b1 = (b >> 0) & 0xff; - const u32 b2 = (b >> 8) & 0xff; - const u32 b3 = (b >> 16) & 0xff; - const u32 b4 = (b >> 24) & 0xff; - - const u32 r1 = (a1 + b1) & 0xff; - const u32 r2 = (a2 + b2) & 0xff; - const u32 r3 = (a3 + b3) & 0xff; - const u32 r4 = (a4 + b4) & 0xff; - - const u32 r = r1 << 0 - | r2 << 8 - | r3 << 16 - | r4 << 24; - - return r; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06211_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, tc_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const tc_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - /** - * keyfile - */ - - w0[0] = u8add (w0[0], esalt_bufs[salt_pos].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[salt_pos].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[salt_pos].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[salt_pos].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[salt_pos].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[salt_pos].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[salt_pos].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[salt_pos].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[salt_pos].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[salt_pos].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[salt_pos].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[salt_pos].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[salt_pos].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[salt_pos].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[salt_pos].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[salt_pos].keyfile_buf[15]); - - /** - * salt - */ - - u32x salt_buf1[16]; - - salt_buf1[ 0] = esalt_bufs[salt_pos].salt_buf[ 0]; - salt_buf1[ 1] = esalt_bufs[salt_pos].salt_buf[ 1]; - salt_buf1[ 2] = esalt_bufs[salt_pos].salt_buf[ 2]; - salt_buf1[ 3] = esalt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[ 4] = esalt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[ 5] = esalt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[ 6] = esalt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[ 7] = esalt_bufs[salt_pos].salt_buf[ 7]; - salt_buf1[ 8] = esalt_bufs[salt_pos].salt_buf[ 8]; - salt_buf1[ 9] = esalt_bufs[salt_pos].salt_buf[ 9]; - salt_buf1[10] = esalt_bufs[salt_pos].salt_buf[10]; - salt_buf1[11] = esalt_bufs[salt_pos].salt_buf[11]; - salt_buf1[12] = esalt_bufs[salt_pos].salt_buf[12]; - salt_buf1[13] = esalt_bufs[salt_pos].salt_buf[13]; - salt_buf1[14] = esalt_bufs[salt_pos].salt_buf[14]; - salt_buf1[15] = esalt_bufs[salt_pos].salt_buf[15]; - - u32x salt_buf2[16]; - - salt_buf2[ 0] = 0; - salt_buf2[ 1] = 0x80; - salt_buf2[ 2] = 0; - salt_buf2[ 3] = 0; - salt_buf2[ 4] = 0; - salt_buf2[ 5] = 0; - salt_buf2[ 6] = 0; - salt_buf2[ 7] = 0; - salt_buf2[ 8] = 0; - salt_buf2[ 9] = 0; - salt_buf2[10] = 0; - salt_buf2[11] = 0; - salt_buf2[12] = 0; - salt_buf2[13] = 0; - salt_buf2[14] = (64 + 64 + 4) * 8; - salt_buf2[15] = 0; - - const u32 truecrypt_mdlen = salt_bufs[0].truecrypt_mdlen; - - u32x w[16]; - - w[ 0] = w0[0]; - w[ 1] = w0[1]; - w[ 2] = w0[2]; - w[ 3] = w0[3]; - w[ 4] = w1[0]; - w[ 5] = w1[1]; - w[ 6] = w1[2]; - w[ 7] = w1[3]; - w[ 8] = w2[0]; - w[ 9] = w2[1]; - w[10] = w2[2]; - w[11] = w2[3]; - w[12] = w3[0]; - w[13] = w3[1]; - w[14] = w3[2]; - w[15] = w3[3]; - - u32x ipad[5]; - u32x opad[5]; - - hmac_init (w, ipad, opad); - - tmps[gid].ipad[0] = ipad[0]; - tmps[gid].ipad[1] = ipad[1]; - tmps[gid].ipad[2] = ipad[2]; - tmps[gid].ipad[3] = ipad[3]; - tmps[gid].ipad[4] = ipad[4]; - - tmps[gid].opad[0] = opad[0]; - tmps[gid].opad[1] = opad[1]; - tmps[gid].opad[2] = opad[2]; - tmps[gid].opad[3] = opad[3]; - tmps[gid].opad[4] = opad[4]; - - for (u32 i = 0, j = 1; i < (truecrypt_mdlen / 8 / 4); i += 5, j += 1) - { - salt_buf2[0] = swap_workaround (j); - - u32x dgst[5]; - - hmac_run2 (salt_buf1, salt_buf2, ipad, opad, dgst); - - tmps[gid].dgst[i + 0] = dgst[0]; - tmps[gid].dgst[i + 1] = dgst[1]; - tmps[gid].dgst[i + 2] = dgst[2]; - tmps[gid].dgst[i + 3] = dgst[3]; - tmps[gid].dgst[i + 4] = dgst[4]; - - tmps[gid].out[i + 0] = dgst[0]; - tmps[gid].out[i + 1] = dgst[1]; - tmps[gid].out[i + 2] = dgst[2]; - tmps[gid].out[i + 3] = dgst[3]; - tmps[gid].out[i + 4] = dgst[4]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06211_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, tc_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const tc_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 truecrypt_mdlen = salt_bufs[0].truecrypt_mdlen; - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x ipad[5]; - u32x opad[5]; - - ipad[0] = tmps[gid].ipad[0]; - ipad[1] = tmps[gid].ipad[1]; - ipad[2] = tmps[gid].ipad[2]; - ipad[3] = tmps[gid].ipad[3]; - ipad[4] = tmps[gid].ipad[4]; - - opad[0] = tmps[gid].opad[0]; - opad[1] = tmps[gid].opad[1]; - opad[2] = tmps[gid].opad[2]; - opad[3] = tmps[gid].opad[3]; - opad[4] = tmps[gid].opad[4]; - - for (u32 i = 0; i < (truecrypt_mdlen / 8 / 4); i += 5) - { - u32x dgst[5]; - u32x out[5]; - - dgst[0] = tmps[gid].dgst[i + 0]; - dgst[1] = tmps[gid].dgst[i + 1]; - dgst[2] = tmps[gid].dgst[i + 2]; - dgst[3] = tmps[gid].dgst[i + 3]; - dgst[4] = tmps[gid].dgst[i + 4]; - - out[0] = tmps[gid].out[i + 0]; - out[1] = tmps[gid].out[i + 1]; - out[2] = tmps[gid].out[i + 2]; - out[3] = tmps[gid].out[i + 3]; - out[4] = tmps[gid].out[i + 4]; - - for (u32 j = 0; j < loop_cnt; j++) - { - u32x w[16]; - - w[ 0] = dgst[0]; - w[ 1] = dgst[1]; - w[ 2] = dgst[2]; - w[ 3] = dgst[3]; - w[ 4] = dgst[4]; - w[ 5] = 0x80; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = (64 + 20) * 8; - w[15] = 0; - - hmac_run (w, ipad, opad, dgst); - - out[0] ^= dgst[0]; - out[1] ^= dgst[1]; - out[2] ^= dgst[2]; - out[3] ^= dgst[3]; - out[4] ^= dgst[4]; - } - - tmps[gid].dgst[i + 0] = dgst[0]; - tmps[gid].dgst[i + 1] = dgst[1]; - tmps[gid].dgst[i + 2] = dgst[2]; - tmps[gid].dgst[i + 3] = dgst[3]; - tmps[gid].dgst[i + 4] = dgst[4]; - - tmps[gid].out[i + 0] = out[0]; - tmps[gid].out[i + 1] = out[1]; - tmps[gid].out[i + 2] = out[2]; - tmps[gid].out[i + 3] = out[3]; - tmps[gid].out[i + 4] = out[4]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06211_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, tc_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const tc_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - if (gid >= gid_max) return; - - u32 ukey1[8]; - - ukey1[0] = tmps[gid].out[ 0]; - ukey1[1] = tmps[gid].out[ 1]; - ukey1[2] = tmps[gid].out[ 2]; - ukey1[3] = tmps[gid].out[ 3]; - ukey1[4] = tmps[gid].out[ 4]; - ukey1[5] = tmps[gid].out[ 5]; - ukey1[6] = tmps[gid].out[ 6]; - ukey1[7] = tmps[gid].out[ 7]; - - u32 ukey2[8]; - - ukey2[0] = tmps[gid].out[ 8]; - ukey2[1] = tmps[gid].out[ 9]; - ukey2[2] = tmps[gid].out[10]; - ukey2[3] = tmps[gid].out[11]; - ukey2[4] = tmps[gid].out[12]; - ukey2[5] = tmps[gid].out[13]; - ukey2[6] = tmps[gid].out[14]; - ukey2[7] = tmps[gid].out[15]; - - u32 data[4]; - - data[0] = esalt_bufs[0].data_buf[0]; - data[1] = esalt_bufs[0].data_buf[1]; - data[2] = esalt_bufs[0].data_buf[2]; - data[3] = esalt_bufs[0].data_buf[3]; - - u32 tmp[4]; - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - aes256_decrypt_xts (ukey1, ukey2, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - serpent256_decrypt_xts (ukey1, ukey2, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - twofish256_decrypt_xts (ukey1, ukey2, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } -} diff --git a/nv/m06212.cu b/nv/m06212.cu deleted file mode 100644 index 48f6eb5..0000000 --- a/nv/m06212.cu +++ /dev/null @@ -1,790 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _RIPEMD160_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#include "gpu_aes256_nv.c" -#include "gpu_twofish256_nv.c" -#include "gpu_serpent256_nv.c" - -__device__ static void ripemd160_transform (const u32x w[16], u32x dgst[5]) -{ - u32x a1 = dgst[0]; - u32x b1 = dgst[1]; - u32x c1 = dgst[2]; - u32x d1 = dgst[3]; - u32x e1 = dgst[4]; - - RIPEMD160_STEP (RIPEMD160_F , a1, b1, c1, d1, e1, w[ 0], RIPEMD160C00, RIPEMD160S00); - RIPEMD160_STEP (RIPEMD160_F , e1, a1, b1, c1, d1, w[ 1], RIPEMD160C00, RIPEMD160S01); - RIPEMD160_STEP (RIPEMD160_F , d1, e1, a1, b1, c1, w[ 2], RIPEMD160C00, RIPEMD160S02); - RIPEMD160_STEP (RIPEMD160_F , c1, d1, e1, a1, b1, w[ 3], RIPEMD160C00, RIPEMD160S03); - RIPEMD160_STEP (RIPEMD160_F , b1, c1, d1, e1, a1, w[ 4], RIPEMD160C00, RIPEMD160S04); - RIPEMD160_STEP (RIPEMD160_F , a1, b1, c1, d1, e1, w[ 5], RIPEMD160C00, RIPEMD160S05); - RIPEMD160_STEP (RIPEMD160_F , e1, a1, b1, c1, d1, w[ 6], RIPEMD160C00, RIPEMD160S06); - RIPEMD160_STEP (RIPEMD160_F , d1, e1, a1, b1, c1, w[ 7], RIPEMD160C00, RIPEMD160S07); - RIPEMD160_STEP (RIPEMD160_F , c1, d1, e1, a1, b1, w[ 8], RIPEMD160C00, RIPEMD160S08); - RIPEMD160_STEP (RIPEMD160_F , b1, c1, d1, e1, a1, w[ 9], RIPEMD160C00, RIPEMD160S09); - RIPEMD160_STEP (RIPEMD160_F , a1, b1, c1, d1, e1, w[10], RIPEMD160C00, RIPEMD160S0A); - RIPEMD160_STEP (RIPEMD160_F , e1, a1, b1, c1, d1, w[11], RIPEMD160C00, RIPEMD160S0B); - RIPEMD160_STEP (RIPEMD160_F , d1, e1, a1, b1, c1, w[12], RIPEMD160C00, RIPEMD160S0C); - RIPEMD160_STEP (RIPEMD160_F , c1, d1, e1, a1, b1, w[13], RIPEMD160C00, RIPEMD160S0D); - RIPEMD160_STEP (RIPEMD160_F , b1, c1, d1, e1, a1, w[14], RIPEMD160C00, RIPEMD160S0E); - RIPEMD160_STEP (RIPEMD160_F , a1, b1, c1, d1, e1, w[15], RIPEMD160C00, RIPEMD160S0F); - - RIPEMD160_STEP (RIPEMD160_Go, e1, a1, b1, c1, d1, w[ 7], RIPEMD160C10, RIPEMD160S10); - RIPEMD160_STEP (RIPEMD160_Go, d1, e1, a1, b1, c1, w[ 4], RIPEMD160C10, RIPEMD160S11); - RIPEMD160_STEP (RIPEMD160_Go, c1, d1, e1, a1, b1, w[13], RIPEMD160C10, RIPEMD160S12); - RIPEMD160_STEP (RIPEMD160_Go, b1, c1, d1, e1, a1, w[ 1], RIPEMD160C10, RIPEMD160S13); - RIPEMD160_STEP (RIPEMD160_Go, a1, b1, c1, d1, e1, w[10], RIPEMD160C10, RIPEMD160S14); - RIPEMD160_STEP (RIPEMD160_Go, e1, a1, b1, c1, d1, w[ 6], RIPEMD160C10, RIPEMD160S15); - RIPEMD160_STEP (RIPEMD160_Go, d1, e1, a1, b1, c1, w[15], RIPEMD160C10, RIPEMD160S16); - RIPEMD160_STEP (RIPEMD160_Go, c1, d1, e1, a1, b1, w[ 3], RIPEMD160C10, RIPEMD160S17); - RIPEMD160_STEP (RIPEMD160_Go, b1, c1, d1, e1, a1, w[12], RIPEMD160C10, RIPEMD160S18); - RIPEMD160_STEP (RIPEMD160_Go, a1, b1, c1, d1, e1, w[ 0], RIPEMD160C10, RIPEMD160S19); - RIPEMD160_STEP (RIPEMD160_Go, e1, a1, b1, c1, d1, w[ 9], RIPEMD160C10, RIPEMD160S1A); - RIPEMD160_STEP (RIPEMD160_Go, d1, e1, a1, b1, c1, w[ 5], RIPEMD160C10, RIPEMD160S1B); - RIPEMD160_STEP (RIPEMD160_Go, c1, d1, e1, a1, b1, w[ 2], RIPEMD160C10, RIPEMD160S1C); - RIPEMD160_STEP (RIPEMD160_Go, b1, c1, d1, e1, a1, w[14], RIPEMD160C10, RIPEMD160S1D); - RIPEMD160_STEP (RIPEMD160_Go, a1, b1, c1, d1, e1, w[11], RIPEMD160C10, RIPEMD160S1E); - RIPEMD160_STEP (RIPEMD160_Go, e1, a1, b1, c1, d1, w[ 8], RIPEMD160C10, RIPEMD160S1F); - - RIPEMD160_STEP (RIPEMD160_H , d1, e1, a1, b1, c1, w[ 3], RIPEMD160C20, RIPEMD160S20); - RIPEMD160_STEP (RIPEMD160_H , c1, d1, e1, a1, b1, w[10], RIPEMD160C20, RIPEMD160S21); - RIPEMD160_STEP (RIPEMD160_H , b1, c1, d1, e1, a1, w[14], RIPEMD160C20, RIPEMD160S22); - RIPEMD160_STEP (RIPEMD160_H , a1, b1, c1, d1, e1, w[ 4], RIPEMD160C20, RIPEMD160S23); - RIPEMD160_STEP (RIPEMD160_H , e1, a1, b1, c1, d1, w[ 9], RIPEMD160C20, RIPEMD160S24); - RIPEMD160_STEP (RIPEMD160_H , d1, e1, a1, b1, c1, w[15], RIPEMD160C20, RIPEMD160S25); - RIPEMD160_STEP (RIPEMD160_H , c1, d1, e1, a1, b1, w[ 8], RIPEMD160C20, RIPEMD160S26); - RIPEMD160_STEP (RIPEMD160_H , b1, c1, d1, e1, a1, w[ 1], RIPEMD160C20, RIPEMD160S27); - RIPEMD160_STEP (RIPEMD160_H , a1, b1, c1, d1, e1, w[ 2], RIPEMD160C20, RIPEMD160S28); - RIPEMD160_STEP (RIPEMD160_H , e1, a1, b1, c1, d1, w[ 7], RIPEMD160C20, RIPEMD160S29); - RIPEMD160_STEP (RIPEMD160_H , d1, e1, a1, b1, c1, w[ 0], RIPEMD160C20, RIPEMD160S2A); - RIPEMD160_STEP (RIPEMD160_H , c1, d1, e1, a1, b1, w[ 6], RIPEMD160C20, RIPEMD160S2B); - RIPEMD160_STEP (RIPEMD160_H , b1, c1, d1, e1, a1, w[13], RIPEMD160C20, RIPEMD160S2C); - RIPEMD160_STEP (RIPEMD160_H , a1, b1, c1, d1, e1, w[11], RIPEMD160C20, RIPEMD160S2D); - RIPEMD160_STEP (RIPEMD160_H , e1, a1, b1, c1, d1, w[ 5], RIPEMD160C20, RIPEMD160S2E); - RIPEMD160_STEP (RIPEMD160_H , d1, e1, a1, b1, c1, w[12], RIPEMD160C20, RIPEMD160S2F); - - RIPEMD160_STEP (RIPEMD160_Io, c1, d1, e1, a1, b1, w[ 1], RIPEMD160C30, RIPEMD160S30); - RIPEMD160_STEP (RIPEMD160_Io, b1, c1, d1, e1, a1, w[ 9], RIPEMD160C30, RIPEMD160S31); - RIPEMD160_STEP (RIPEMD160_Io, a1, b1, c1, d1, e1, w[11], RIPEMD160C30, RIPEMD160S32); - RIPEMD160_STEP (RIPEMD160_Io, e1, a1, b1, c1, d1, w[10], RIPEMD160C30, RIPEMD160S33); - RIPEMD160_STEP (RIPEMD160_Io, d1, e1, a1, b1, c1, w[ 0], RIPEMD160C30, RIPEMD160S34); - RIPEMD160_STEP (RIPEMD160_Io, c1, d1, e1, a1, b1, w[ 8], RIPEMD160C30, RIPEMD160S35); - RIPEMD160_STEP (RIPEMD160_Io, b1, c1, d1, e1, a1, w[12], RIPEMD160C30, RIPEMD160S36); - RIPEMD160_STEP (RIPEMD160_Io, a1, b1, c1, d1, e1, w[ 4], RIPEMD160C30, RIPEMD160S37); - RIPEMD160_STEP (RIPEMD160_Io, e1, a1, b1, c1, d1, w[13], RIPEMD160C30, RIPEMD160S38); - RIPEMD160_STEP (RIPEMD160_Io, d1, e1, a1, b1, c1, w[ 3], RIPEMD160C30, RIPEMD160S39); - RIPEMD160_STEP (RIPEMD160_Io, c1, d1, e1, a1, b1, w[ 7], RIPEMD160C30, RIPEMD160S3A); - RIPEMD160_STEP (RIPEMD160_Io, b1, c1, d1, e1, a1, w[15], RIPEMD160C30, RIPEMD160S3B); - RIPEMD160_STEP (RIPEMD160_Io, a1, b1, c1, d1, e1, w[14], RIPEMD160C30, RIPEMD160S3C); - RIPEMD160_STEP (RIPEMD160_Io, e1, a1, b1, c1, d1, w[ 5], RIPEMD160C30, RIPEMD160S3D); - RIPEMD160_STEP (RIPEMD160_Io, d1, e1, a1, b1, c1, w[ 6], RIPEMD160C30, RIPEMD160S3E); - RIPEMD160_STEP (RIPEMD160_Io, c1, d1, e1, a1, b1, w[ 2], RIPEMD160C30, RIPEMD160S3F); - - RIPEMD160_STEP (RIPEMD160_J , b1, c1, d1, e1, a1, w[ 4], RIPEMD160C40, RIPEMD160S40); - RIPEMD160_STEP (RIPEMD160_J , a1, b1, c1, d1, e1, w[ 0], RIPEMD160C40, RIPEMD160S41); - RIPEMD160_STEP (RIPEMD160_J , e1, a1, b1, c1, d1, w[ 5], RIPEMD160C40, RIPEMD160S42); - RIPEMD160_STEP (RIPEMD160_J , d1, e1, a1, b1, c1, w[ 9], RIPEMD160C40, RIPEMD160S43); - RIPEMD160_STEP (RIPEMD160_J , c1, d1, e1, a1, b1, w[ 7], RIPEMD160C40, RIPEMD160S44); - RIPEMD160_STEP (RIPEMD160_J , b1, c1, d1, e1, a1, w[12], RIPEMD160C40, RIPEMD160S45); - RIPEMD160_STEP (RIPEMD160_J , a1, b1, c1, d1, e1, w[ 2], RIPEMD160C40, RIPEMD160S46); - RIPEMD160_STEP (RIPEMD160_J , e1, a1, b1, c1, d1, w[10], RIPEMD160C40, RIPEMD160S47); - RIPEMD160_STEP (RIPEMD160_J , d1, e1, a1, b1, c1, w[14], RIPEMD160C40, RIPEMD160S48); - RIPEMD160_STEP (RIPEMD160_J , c1, d1, e1, a1, b1, w[ 1], RIPEMD160C40, RIPEMD160S49); - RIPEMD160_STEP (RIPEMD160_J , b1, c1, d1, e1, a1, w[ 3], RIPEMD160C40, RIPEMD160S4A); - RIPEMD160_STEP (RIPEMD160_J , a1, b1, c1, d1, e1, w[ 8], RIPEMD160C40, RIPEMD160S4B); - RIPEMD160_STEP (RIPEMD160_J , e1, a1, b1, c1, d1, w[11], RIPEMD160C40, RIPEMD160S4C); - RIPEMD160_STEP (RIPEMD160_J , d1, e1, a1, b1, c1, w[ 6], RIPEMD160C40, RIPEMD160S4D); - RIPEMD160_STEP (RIPEMD160_J , c1, d1, e1, a1, b1, w[15], RIPEMD160C40, RIPEMD160S4E); - RIPEMD160_STEP (RIPEMD160_J , b1, c1, d1, e1, a1, w[13], RIPEMD160C40, RIPEMD160S4F); - - u32x a2 = dgst[0]; - u32x b2 = dgst[1]; - u32x c2 = dgst[2]; - u32x d2 = dgst[3]; - u32x e2 = dgst[4]; - - //RIPEMD160_STEP_WORKAROUND_BUG (RIPEMD160_J , a2, b2, c2, d2, e2, w[ 5], RIPEMD160C50, RIPEMD160S50); - RIPEMD160_STEP (RIPEMD160_J , a2, b2, c2, d2, e2, w[ 5], RIPEMD160C50, RIPEMD160S50); - RIPEMD160_STEP (RIPEMD160_J , e2, a2, b2, c2, d2, w[14], RIPEMD160C50, RIPEMD160S51); - RIPEMD160_STEP (RIPEMD160_J , d2, e2, a2, b2, c2, w[ 7], RIPEMD160C50, RIPEMD160S52); - RIPEMD160_STEP (RIPEMD160_J , c2, d2, e2, a2, b2, w[ 0], RIPEMD160C50, RIPEMD160S53); - RIPEMD160_STEP (RIPEMD160_J , b2, c2, d2, e2, a2, w[ 9], RIPEMD160C50, RIPEMD160S54); - RIPEMD160_STEP (RIPEMD160_J , a2, b2, c2, d2, e2, w[ 2], RIPEMD160C50, RIPEMD160S55); - RIPEMD160_STEP (RIPEMD160_J , e2, a2, b2, c2, d2, w[11], RIPEMD160C50, RIPEMD160S56); - RIPEMD160_STEP (RIPEMD160_J , d2, e2, a2, b2, c2, w[ 4], RIPEMD160C50, RIPEMD160S57); - RIPEMD160_STEP (RIPEMD160_J , c2, d2, e2, a2, b2, w[13], RIPEMD160C50, RIPEMD160S58); - RIPEMD160_STEP (RIPEMD160_J , b2, c2, d2, e2, a2, w[ 6], RIPEMD160C50, RIPEMD160S59); - RIPEMD160_STEP (RIPEMD160_J , a2, b2, c2, d2, e2, w[15], RIPEMD160C50, RIPEMD160S5A); - RIPEMD160_STEP (RIPEMD160_J , e2, a2, b2, c2, d2, w[ 8], RIPEMD160C50, RIPEMD160S5B); - RIPEMD160_STEP (RIPEMD160_J , d2, e2, a2, b2, c2, w[ 1], RIPEMD160C50, RIPEMD160S5C); - RIPEMD160_STEP (RIPEMD160_J , c2, d2, e2, a2, b2, w[10], RIPEMD160C50, RIPEMD160S5D); - RIPEMD160_STEP (RIPEMD160_J , b2, c2, d2, e2, a2, w[ 3], RIPEMD160C50, RIPEMD160S5E); - RIPEMD160_STEP (RIPEMD160_J , a2, b2, c2, d2, e2, w[12], RIPEMD160C50, RIPEMD160S5F); - - RIPEMD160_STEP (RIPEMD160_Io, e2, a2, b2, c2, d2, w[ 6], RIPEMD160C60, RIPEMD160S60); - RIPEMD160_STEP (RIPEMD160_Io, d2, e2, a2, b2, c2, w[11], RIPEMD160C60, RIPEMD160S61); - RIPEMD160_STEP (RIPEMD160_Io, c2, d2, e2, a2, b2, w[ 3], RIPEMD160C60, RIPEMD160S62); - RIPEMD160_STEP (RIPEMD160_Io, b2, c2, d2, e2, a2, w[ 7], RIPEMD160C60, RIPEMD160S63); - RIPEMD160_STEP (RIPEMD160_Io, a2, b2, c2, d2, e2, w[ 0], RIPEMD160C60, RIPEMD160S64); - RIPEMD160_STEP (RIPEMD160_Io, e2, a2, b2, c2, d2, w[13], RIPEMD160C60, RIPEMD160S65); - RIPEMD160_STEP (RIPEMD160_Io, d2, e2, a2, b2, c2, w[ 5], RIPEMD160C60, RIPEMD160S66); - RIPEMD160_STEP (RIPEMD160_Io, c2, d2, e2, a2, b2, w[10], RIPEMD160C60, RIPEMD160S67); - RIPEMD160_STEP (RIPEMD160_Io, b2, c2, d2, e2, a2, w[14], RIPEMD160C60, RIPEMD160S68); - RIPEMD160_STEP (RIPEMD160_Io, a2, b2, c2, d2, e2, w[15], RIPEMD160C60, RIPEMD160S69); - RIPEMD160_STEP (RIPEMD160_Io, e2, a2, b2, c2, d2, w[ 8], RIPEMD160C60, RIPEMD160S6A); - RIPEMD160_STEP (RIPEMD160_Io, d2, e2, a2, b2, c2, w[12], RIPEMD160C60, RIPEMD160S6B); - RIPEMD160_STEP (RIPEMD160_Io, c2, d2, e2, a2, b2, w[ 4], RIPEMD160C60, RIPEMD160S6C); - RIPEMD160_STEP (RIPEMD160_Io, b2, c2, d2, e2, a2, w[ 9], RIPEMD160C60, RIPEMD160S6D); - RIPEMD160_STEP (RIPEMD160_Io, a2, b2, c2, d2, e2, w[ 1], RIPEMD160C60, RIPEMD160S6E); - RIPEMD160_STEP (RIPEMD160_Io, e2, a2, b2, c2, d2, w[ 2], RIPEMD160C60, RIPEMD160S6F); - - RIPEMD160_STEP (RIPEMD160_H , d2, e2, a2, b2, c2, w[15], RIPEMD160C70, RIPEMD160S70); - RIPEMD160_STEP (RIPEMD160_H , c2, d2, e2, a2, b2, w[ 5], RIPEMD160C70, RIPEMD160S71); - RIPEMD160_STEP (RIPEMD160_H , b2, c2, d2, e2, a2, w[ 1], RIPEMD160C70, RIPEMD160S72); - RIPEMD160_STEP (RIPEMD160_H , a2, b2, c2, d2, e2, w[ 3], RIPEMD160C70, RIPEMD160S73); - RIPEMD160_STEP (RIPEMD160_H , e2, a2, b2, c2, d2, w[ 7], RIPEMD160C70, RIPEMD160S74); - RIPEMD160_STEP (RIPEMD160_H , d2, e2, a2, b2, c2, w[14], RIPEMD160C70, RIPEMD160S75); - RIPEMD160_STEP (RIPEMD160_H , c2, d2, e2, a2, b2, w[ 6], RIPEMD160C70, RIPEMD160S76); - RIPEMD160_STEP (RIPEMD160_H , b2, c2, d2, e2, a2, w[ 9], RIPEMD160C70, RIPEMD160S77); - RIPEMD160_STEP (RIPEMD160_H , a2, b2, c2, d2, e2, w[11], RIPEMD160C70, RIPEMD160S78); - RIPEMD160_STEP (RIPEMD160_H , e2, a2, b2, c2, d2, w[ 8], RIPEMD160C70, RIPEMD160S79); - RIPEMD160_STEP (RIPEMD160_H , d2, e2, a2, b2, c2, w[12], RIPEMD160C70, RIPEMD160S7A); - RIPEMD160_STEP (RIPEMD160_H , c2, d2, e2, a2, b2, w[ 2], RIPEMD160C70, RIPEMD160S7B); - RIPEMD160_STEP (RIPEMD160_H , b2, c2, d2, e2, a2, w[10], RIPEMD160C70, RIPEMD160S7C); - RIPEMD160_STEP (RIPEMD160_H , a2, b2, c2, d2, e2, w[ 0], RIPEMD160C70, RIPEMD160S7D); - RIPEMD160_STEP (RIPEMD160_H , e2, a2, b2, c2, d2, w[ 4], RIPEMD160C70, RIPEMD160S7E); - RIPEMD160_STEP (RIPEMD160_H , d2, e2, a2, b2, c2, w[13], RIPEMD160C70, RIPEMD160S7F); - - RIPEMD160_STEP (RIPEMD160_Go, c2, d2, e2, a2, b2, w[ 8], RIPEMD160C80, RIPEMD160S80); - RIPEMD160_STEP (RIPEMD160_Go, b2, c2, d2, e2, a2, w[ 6], RIPEMD160C80, RIPEMD160S81); - RIPEMD160_STEP (RIPEMD160_Go, a2, b2, c2, d2, e2, w[ 4], RIPEMD160C80, RIPEMD160S82); - RIPEMD160_STEP (RIPEMD160_Go, e2, a2, b2, c2, d2, w[ 1], RIPEMD160C80, RIPEMD160S83); - RIPEMD160_STEP (RIPEMD160_Go, d2, e2, a2, b2, c2, w[ 3], RIPEMD160C80, RIPEMD160S84); - RIPEMD160_STEP (RIPEMD160_Go, c2, d2, e2, a2, b2, w[11], RIPEMD160C80, RIPEMD160S85); - RIPEMD160_STEP (RIPEMD160_Go, b2, c2, d2, e2, a2, w[15], RIPEMD160C80, RIPEMD160S86); - RIPEMD160_STEP (RIPEMD160_Go, a2, b2, c2, d2, e2, w[ 0], RIPEMD160C80, RIPEMD160S87); - RIPEMD160_STEP (RIPEMD160_Go, e2, a2, b2, c2, d2, w[ 5], RIPEMD160C80, RIPEMD160S88); - RIPEMD160_STEP (RIPEMD160_Go, d2, e2, a2, b2, c2, w[12], RIPEMD160C80, RIPEMD160S89); - RIPEMD160_STEP (RIPEMD160_Go, c2, d2, e2, a2, b2, w[ 2], RIPEMD160C80, RIPEMD160S8A); - RIPEMD160_STEP (RIPEMD160_Go, b2, c2, d2, e2, a2, w[13], RIPEMD160C80, RIPEMD160S8B); - RIPEMD160_STEP (RIPEMD160_Go, a2, b2, c2, d2, e2, w[ 9], RIPEMD160C80, RIPEMD160S8C); - RIPEMD160_STEP (RIPEMD160_Go, e2, a2, b2, c2, d2, w[ 7], RIPEMD160C80, RIPEMD160S8D); - RIPEMD160_STEP (RIPEMD160_Go, d2, e2, a2, b2, c2, w[10], RIPEMD160C80, RIPEMD160S8E); - RIPEMD160_STEP (RIPEMD160_Go, c2, d2, e2, a2, b2, w[14], RIPEMD160C80, RIPEMD160S8F); - - RIPEMD160_STEP (RIPEMD160_F , b2, c2, d2, e2, a2, w[12], RIPEMD160C90, RIPEMD160S90); - RIPEMD160_STEP (RIPEMD160_F , a2, b2, c2, d2, e2, w[15], RIPEMD160C90, RIPEMD160S91); - RIPEMD160_STEP (RIPEMD160_F , e2, a2, b2, c2, d2, w[10], RIPEMD160C90, RIPEMD160S92); - RIPEMD160_STEP (RIPEMD160_F , d2, e2, a2, b2, c2, w[ 4], RIPEMD160C90, RIPEMD160S93); - RIPEMD160_STEP (RIPEMD160_F , c2, d2, e2, a2, b2, w[ 1], RIPEMD160C90, RIPEMD160S94); - RIPEMD160_STEP (RIPEMD160_F , b2, c2, d2, e2, a2, w[ 5], RIPEMD160C90, RIPEMD160S95); - RIPEMD160_STEP (RIPEMD160_F , a2, b2, c2, d2, e2, w[ 8], RIPEMD160C90, RIPEMD160S96); - RIPEMD160_STEP (RIPEMD160_F , e2, a2, b2, c2, d2, w[ 7], RIPEMD160C90, RIPEMD160S97); - RIPEMD160_STEP (RIPEMD160_F , d2, e2, a2, b2, c2, w[ 6], RIPEMD160C90, RIPEMD160S98); - RIPEMD160_STEP (RIPEMD160_F , c2, d2, e2, a2, b2, w[ 2], RIPEMD160C90, RIPEMD160S99); - RIPEMD160_STEP (RIPEMD160_F , b2, c2, d2, e2, a2, w[13], RIPEMD160C90, RIPEMD160S9A); - RIPEMD160_STEP (RIPEMD160_F , a2, b2, c2, d2, e2, w[14], RIPEMD160C90, RIPEMD160S9B); - RIPEMD160_STEP (RIPEMD160_F , e2, a2, b2, c2, d2, w[ 0], RIPEMD160C90, RIPEMD160S9C); - RIPEMD160_STEP (RIPEMD160_F , d2, e2, a2, b2, c2, w[ 3], RIPEMD160C90, RIPEMD160S9D); - RIPEMD160_STEP (RIPEMD160_F , c2, d2, e2, a2, b2, w[ 9], RIPEMD160C90, RIPEMD160S9E); - RIPEMD160_STEP (RIPEMD160_F , b2, c2, d2, e2, a2, w[11], RIPEMD160C90, RIPEMD160S9F); - - const u32x a = dgst[1] + c1 + d2; - const u32x b = dgst[2] + d1 + e2; - const u32x c = dgst[3] + e1 + a2; - const u32x d = dgst[4] + a1 + b2; - const u32x e = dgst[0] + b1 + c2; - - dgst[0] = a; - dgst[1] = b; - dgst[2] = c; - dgst[3] = d; - dgst[4] = e; -} - -__device__ static void hmac_run2 (const u32x w1[16], const u32x w2[16], const u32x ipad[5], const u32x opad[5], u32x dgst[5]) -{ - dgst[0] = ipad[0]; - dgst[1] = ipad[1]; - dgst[2] = ipad[2]; - dgst[3] = ipad[3]; - dgst[4] = ipad[4]; - - ripemd160_transform (w1, dgst); - ripemd160_transform (w2, dgst); - - u32x w[16]; - - w[ 0] = dgst[0]; - w[ 1] = dgst[1]; - w[ 2] = dgst[2]; - w[ 3] = dgst[3]; - w[ 4] = dgst[4]; - w[ 5] = 0x80; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = (64 + 20) * 8; - w[15] = 0; - - dgst[0] = opad[0]; - dgst[1] = opad[1]; - dgst[2] = opad[2]; - dgst[3] = opad[3]; - dgst[4] = opad[4]; - - ripemd160_transform (w, dgst); -} - -__device__ static void hmac_run (u32x w[16], const u32x ipad[5], const u32x opad[5], u32x dgst[5]) -{ - dgst[0] = ipad[0]; - dgst[1] = ipad[1]; - dgst[2] = ipad[2]; - dgst[3] = ipad[3]; - dgst[4] = ipad[4]; - - ripemd160_transform (w, dgst); - - w[ 0] = dgst[0]; - w[ 1] = dgst[1]; - w[ 2] = dgst[2]; - w[ 3] = dgst[3]; - w[ 4] = dgst[4]; - w[ 5] = 0x80; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = (64 + 20) * 8; - w[15] = 0; - - dgst[0] = opad[0]; - dgst[1] = opad[1]; - dgst[2] = opad[2]; - dgst[3] = opad[3]; - dgst[4] = opad[4]; - - ripemd160_transform (w, dgst); -} - -__device__ static void hmac_init (u32x w[16], u32x ipad[5], u32x opad[5]) -{ - w[ 0] ^= 0x36363636; - w[ 1] ^= 0x36363636; - w[ 2] ^= 0x36363636; - w[ 3] ^= 0x36363636; - w[ 4] ^= 0x36363636; - w[ 5] ^= 0x36363636; - w[ 6] ^= 0x36363636; - w[ 7] ^= 0x36363636; - w[ 8] ^= 0x36363636; - w[ 9] ^= 0x36363636; - w[10] ^= 0x36363636; - w[11] ^= 0x36363636; - w[12] ^= 0x36363636; - w[13] ^= 0x36363636; - w[14] ^= 0x36363636; - w[15] ^= 0x36363636; - - ipad[0] = RIPEMD160M_A; - ipad[1] = RIPEMD160M_B; - ipad[2] = RIPEMD160M_C; - ipad[3] = RIPEMD160M_D; - ipad[4] = RIPEMD160M_E; - - ripemd160_transform (w, ipad); - - w[ 0] ^= 0x6a6a6a6a; - w[ 1] ^= 0x6a6a6a6a; - w[ 2] ^= 0x6a6a6a6a; - w[ 3] ^= 0x6a6a6a6a; - w[ 4] ^= 0x6a6a6a6a; - w[ 5] ^= 0x6a6a6a6a; - w[ 6] ^= 0x6a6a6a6a; - w[ 7] ^= 0x6a6a6a6a; - w[ 8] ^= 0x6a6a6a6a; - w[ 9] ^= 0x6a6a6a6a; - w[10] ^= 0x6a6a6a6a; - w[11] ^= 0x6a6a6a6a; - w[12] ^= 0x6a6a6a6a; - w[13] ^= 0x6a6a6a6a; - w[14] ^= 0x6a6a6a6a; - w[15] ^= 0x6a6a6a6a; - - opad[0] = RIPEMD160M_A; - opad[1] = RIPEMD160M_B; - opad[2] = RIPEMD160M_C; - opad[3] = RIPEMD160M_D; - opad[4] = RIPEMD160M_E; - - ripemd160_transform (w, opad); -} - -__device__ static u32 u8add (const u32 a, const u32 b) -{ - const u32 a1 = (a >> 0) & 0xff; - const u32 a2 = (a >> 8) & 0xff; - const u32 a3 = (a >> 16) & 0xff; - const u32 a4 = (a >> 24) & 0xff; - - const u32 b1 = (b >> 0) & 0xff; - const u32 b2 = (b >> 8) & 0xff; - const u32 b3 = (b >> 16) & 0xff; - const u32 b4 = (b >> 24) & 0xff; - - const u32 r1 = (a1 + b1) & 0xff; - const u32 r2 = (a2 + b2) & 0xff; - const u32 r3 = (a3 + b3) & 0xff; - const u32 r4 = (a4 + b4) & 0xff; - - const u32 r = r1 << 0 - | r2 << 8 - | r3 << 16 - | r4 << 24; - - return r; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06212_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, tc_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const tc_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - /** - * keyfile - */ - - w0[0] = u8add (w0[0], esalt_bufs[salt_pos].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[salt_pos].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[salt_pos].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[salt_pos].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[salt_pos].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[salt_pos].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[salt_pos].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[salt_pos].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[salt_pos].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[salt_pos].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[salt_pos].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[salt_pos].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[salt_pos].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[salt_pos].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[salt_pos].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[salt_pos].keyfile_buf[15]); - - /** - * salt - */ - - u32x salt_buf1[16]; - - salt_buf1[ 0] = esalt_bufs[salt_pos].salt_buf[ 0]; - salt_buf1[ 1] = esalt_bufs[salt_pos].salt_buf[ 1]; - salt_buf1[ 2] = esalt_bufs[salt_pos].salt_buf[ 2]; - salt_buf1[ 3] = esalt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[ 4] = esalt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[ 5] = esalt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[ 6] = esalt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[ 7] = esalt_bufs[salt_pos].salt_buf[ 7]; - salt_buf1[ 8] = esalt_bufs[salt_pos].salt_buf[ 8]; - salt_buf1[ 9] = esalt_bufs[salt_pos].salt_buf[ 9]; - salt_buf1[10] = esalt_bufs[salt_pos].salt_buf[10]; - salt_buf1[11] = esalt_bufs[salt_pos].salt_buf[11]; - salt_buf1[12] = esalt_bufs[salt_pos].salt_buf[12]; - salt_buf1[13] = esalt_bufs[salt_pos].salt_buf[13]; - salt_buf1[14] = esalt_bufs[salt_pos].salt_buf[14]; - salt_buf1[15] = esalt_bufs[salt_pos].salt_buf[15]; - - u32x salt_buf2[16]; - - salt_buf2[ 0] = 0; - salt_buf2[ 1] = 0x80; - salt_buf2[ 2] = 0; - salt_buf2[ 3] = 0; - salt_buf2[ 4] = 0; - salt_buf2[ 5] = 0; - salt_buf2[ 6] = 0; - salt_buf2[ 7] = 0; - salt_buf2[ 8] = 0; - salt_buf2[ 9] = 0; - salt_buf2[10] = 0; - salt_buf2[11] = 0; - salt_buf2[12] = 0; - salt_buf2[13] = 0; - salt_buf2[14] = (64 + 64 + 4) * 8; - salt_buf2[15] = 0; - - const u32 truecrypt_mdlen = salt_bufs[0].truecrypt_mdlen; - - u32x w[16]; - - w[ 0] = w0[0]; - w[ 1] = w0[1]; - w[ 2] = w0[2]; - w[ 3] = w0[3]; - w[ 4] = w1[0]; - w[ 5] = w1[1]; - w[ 6] = w1[2]; - w[ 7] = w1[3]; - w[ 8] = w2[0]; - w[ 9] = w2[1]; - w[10] = w2[2]; - w[11] = w2[3]; - w[12] = w3[0]; - w[13] = w3[1]; - w[14] = w3[2]; - w[15] = w3[3]; - - u32x ipad[5]; - u32x opad[5]; - - hmac_init (w, ipad, opad); - - tmps[gid].ipad[0] = ipad[0]; - tmps[gid].ipad[1] = ipad[1]; - tmps[gid].ipad[2] = ipad[2]; - tmps[gid].ipad[3] = ipad[3]; - tmps[gid].ipad[4] = ipad[4]; - - tmps[gid].opad[0] = opad[0]; - tmps[gid].opad[1] = opad[1]; - tmps[gid].opad[2] = opad[2]; - tmps[gid].opad[3] = opad[3]; - tmps[gid].opad[4] = opad[4]; - - for (u32 i = 0, j = 1; i < (truecrypt_mdlen / 8 / 4); i += 5, j += 1) - { - salt_buf2[0] = swap_workaround (j); - - u32x dgst[5]; - - hmac_run2 (salt_buf1, salt_buf2, ipad, opad, dgst); - - tmps[gid].dgst[i + 0] = dgst[0]; - tmps[gid].dgst[i + 1] = dgst[1]; - tmps[gid].dgst[i + 2] = dgst[2]; - tmps[gid].dgst[i + 3] = dgst[3]; - tmps[gid].dgst[i + 4] = dgst[4]; - - tmps[gid].out[i + 0] = dgst[0]; - tmps[gid].out[i + 1] = dgst[1]; - tmps[gid].out[i + 2] = dgst[2]; - tmps[gid].out[i + 3] = dgst[3]; - tmps[gid].out[i + 4] = dgst[4]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06212_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, tc_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const tc_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 truecrypt_mdlen = salt_bufs[0].truecrypt_mdlen; - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x ipad[5]; - u32x opad[5]; - - ipad[0] = tmps[gid].ipad[0]; - ipad[1] = tmps[gid].ipad[1]; - ipad[2] = tmps[gid].ipad[2]; - ipad[3] = tmps[gid].ipad[3]; - ipad[4] = tmps[gid].ipad[4]; - - opad[0] = tmps[gid].opad[0]; - opad[1] = tmps[gid].opad[1]; - opad[2] = tmps[gid].opad[2]; - opad[3] = tmps[gid].opad[3]; - opad[4] = tmps[gid].opad[4]; - - for (u32 i = 0; i < (truecrypt_mdlen / 8 / 4); i += 5) - { - u32x dgst[5]; - u32x out[5]; - - dgst[0] = tmps[gid].dgst[i + 0]; - dgst[1] = tmps[gid].dgst[i + 1]; - dgst[2] = tmps[gid].dgst[i + 2]; - dgst[3] = tmps[gid].dgst[i + 3]; - dgst[4] = tmps[gid].dgst[i + 4]; - - out[0] = tmps[gid].out[i + 0]; - out[1] = tmps[gid].out[i + 1]; - out[2] = tmps[gid].out[i + 2]; - out[3] = tmps[gid].out[i + 3]; - out[4] = tmps[gid].out[i + 4]; - - for (u32 j = 0; j < loop_cnt; j++) - { - u32x w[16]; - - w[ 0] = dgst[0]; - w[ 1] = dgst[1]; - w[ 2] = dgst[2]; - w[ 3] = dgst[3]; - w[ 4] = dgst[4]; - w[ 5] = 0x80; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = (64 + 20) * 8; - w[15] = 0; - - hmac_run (w, ipad, opad, dgst); - - out[0] ^= dgst[0]; - out[1] ^= dgst[1]; - out[2] ^= dgst[2]; - out[3] ^= dgst[3]; - out[4] ^= dgst[4]; - } - - tmps[gid].dgst[i + 0] = dgst[0]; - tmps[gid].dgst[i + 1] = dgst[1]; - tmps[gid].dgst[i + 2] = dgst[2]; - tmps[gid].dgst[i + 3] = dgst[3]; - tmps[gid].dgst[i + 4] = dgst[4]; - - tmps[gid].out[i + 0] = out[0]; - tmps[gid].out[i + 1] = out[1]; - tmps[gid].out[i + 2] = out[2]; - tmps[gid].out[i + 3] = out[3]; - tmps[gid].out[i + 4] = out[4]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06212_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, tc_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const tc_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - if (gid >= gid_max) return; - - u32 ukey1[8]; - - ukey1[0] = tmps[gid].out[ 0]; - ukey1[1] = tmps[gid].out[ 1]; - ukey1[2] = tmps[gid].out[ 2]; - ukey1[3] = tmps[gid].out[ 3]; - ukey1[4] = tmps[gid].out[ 4]; - ukey1[5] = tmps[gid].out[ 5]; - ukey1[6] = tmps[gid].out[ 6]; - ukey1[7] = tmps[gid].out[ 7]; - - u32 ukey2[8]; - - ukey2[0] = tmps[gid].out[ 8]; - ukey2[1] = tmps[gid].out[ 9]; - ukey2[2] = tmps[gid].out[10]; - ukey2[3] = tmps[gid].out[11]; - ukey2[4] = tmps[gid].out[12]; - ukey2[5] = tmps[gid].out[13]; - ukey2[6] = tmps[gid].out[14]; - ukey2[7] = tmps[gid].out[15]; - - u32 data[4]; - - data[0] = esalt_bufs[0].data_buf[0]; - data[1] = esalt_bufs[0].data_buf[1]; - data[2] = esalt_bufs[0].data_buf[2]; - data[3] = esalt_bufs[0].data_buf[3]; - - u32 tmp[4]; - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - aes256_decrypt_xts (ukey1, ukey2, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - serpent256_decrypt_xts (ukey1, ukey2, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - twofish256_decrypt_xts (ukey1, ukey2, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - u32 ukey3[8]; - - ukey3[0] = tmps[gid].out[16]; - ukey3[1] = tmps[gid].out[17]; - ukey3[2] = tmps[gid].out[18]; - ukey3[3] = tmps[gid].out[19]; - ukey3[4] = tmps[gid].out[20]; - ukey3[5] = tmps[gid].out[21]; - ukey3[6] = tmps[gid].out[22]; - ukey3[7] = tmps[gid].out[23]; - - u32 ukey4[8]; - - ukey4[0] = tmps[gid].out[24]; - ukey4[1] = tmps[gid].out[25]; - ukey4[2] = tmps[gid].out[26]; - ukey4[3] = tmps[gid].out[27]; - ukey4[4] = tmps[gid].out[28]; - ukey4[5] = tmps[gid].out[29]; - ukey4[6] = tmps[gid].out[30]; - ukey4[7] = tmps[gid].out[31]; - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - aes256_decrypt_xts (ukey2, ukey4, tmp, tmp); - twofish256_decrypt_xts (ukey1, ukey3, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - serpent256_decrypt_xts (ukey2, ukey4, tmp, tmp); - aes256_decrypt_xts (ukey1, ukey3, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - twofish256_decrypt_xts (ukey2, ukey4, tmp, tmp); - serpent256_decrypt_xts (ukey1, ukey3, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } -} diff --git a/nv/m06213.cu b/nv/m06213.cu deleted file mode 100644 index c31f9e2..0000000 --- a/nv/m06213.cu +++ /dev/null @@ -1,848 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _RIPEMD160_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#include "gpu_aes256_nv.c" -#include "gpu_twofish256_nv.c" -#include "gpu_serpent256_nv.c" - -__device__ static void ripemd160_transform (const u32x w[16], u32x dgst[5]) -{ - u32x a1 = dgst[0]; - u32x b1 = dgst[1]; - u32x c1 = dgst[2]; - u32x d1 = dgst[3]; - u32x e1 = dgst[4]; - - RIPEMD160_STEP (RIPEMD160_F , a1, b1, c1, d1, e1, w[ 0], RIPEMD160C00, RIPEMD160S00); - RIPEMD160_STEP (RIPEMD160_F , e1, a1, b1, c1, d1, w[ 1], RIPEMD160C00, RIPEMD160S01); - RIPEMD160_STEP (RIPEMD160_F , d1, e1, a1, b1, c1, w[ 2], RIPEMD160C00, RIPEMD160S02); - RIPEMD160_STEP (RIPEMD160_F , c1, d1, e1, a1, b1, w[ 3], RIPEMD160C00, RIPEMD160S03); - RIPEMD160_STEP (RIPEMD160_F , b1, c1, d1, e1, a1, w[ 4], RIPEMD160C00, RIPEMD160S04); - RIPEMD160_STEP (RIPEMD160_F , a1, b1, c1, d1, e1, w[ 5], RIPEMD160C00, RIPEMD160S05); - RIPEMD160_STEP (RIPEMD160_F , e1, a1, b1, c1, d1, w[ 6], RIPEMD160C00, RIPEMD160S06); - RIPEMD160_STEP (RIPEMD160_F , d1, e1, a1, b1, c1, w[ 7], RIPEMD160C00, RIPEMD160S07); - RIPEMD160_STEP (RIPEMD160_F , c1, d1, e1, a1, b1, w[ 8], RIPEMD160C00, RIPEMD160S08); - RIPEMD160_STEP (RIPEMD160_F , b1, c1, d1, e1, a1, w[ 9], RIPEMD160C00, RIPEMD160S09); - RIPEMD160_STEP (RIPEMD160_F , a1, b1, c1, d1, e1, w[10], RIPEMD160C00, RIPEMD160S0A); - RIPEMD160_STEP (RIPEMD160_F , e1, a1, b1, c1, d1, w[11], RIPEMD160C00, RIPEMD160S0B); - RIPEMD160_STEP (RIPEMD160_F , d1, e1, a1, b1, c1, w[12], RIPEMD160C00, RIPEMD160S0C); - RIPEMD160_STEP (RIPEMD160_F , c1, d1, e1, a1, b1, w[13], RIPEMD160C00, RIPEMD160S0D); - RIPEMD160_STEP (RIPEMD160_F , b1, c1, d1, e1, a1, w[14], RIPEMD160C00, RIPEMD160S0E); - RIPEMD160_STEP (RIPEMD160_F , a1, b1, c1, d1, e1, w[15], RIPEMD160C00, RIPEMD160S0F); - - RIPEMD160_STEP (RIPEMD160_Go, e1, a1, b1, c1, d1, w[ 7], RIPEMD160C10, RIPEMD160S10); - RIPEMD160_STEP (RIPEMD160_Go, d1, e1, a1, b1, c1, w[ 4], RIPEMD160C10, RIPEMD160S11); - RIPEMD160_STEP (RIPEMD160_Go, c1, d1, e1, a1, b1, w[13], RIPEMD160C10, RIPEMD160S12); - RIPEMD160_STEP (RIPEMD160_Go, b1, c1, d1, e1, a1, w[ 1], RIPEMD160C10, RIPEMD160S13); - RIPEMD160_STEP (RIPEMD160_Go, a1, b1, c1, d1, e1, w[10], RIPEMD160C10, RIPEMD160S14); - RIPEMD160_STEP (RIPEMD160_Go, e1, a1, b1, c1, d1, w[ 6], RIPEMD160C10, RIPEMD160S15); - RIPEMD160_STEP (RIPEMD160_Go, d1, e1, a1, b1, c1, w[15], RIPEMD160C10, RIPEMD160S16); - RIPEMD160_STEP (RIPEMD160_Go, c1, d1, e1, a1, b1, w[ 3], RIPEMD160C10, RIPEMD160S17); - RIPEMD160_STEP (RIPEMD160_Go, b1, c1, d1, e1, a1, w[12], RIPEMD160C10, RIPEMD160S18); - RIPEMD160_STEP (RIPEMD160_Go, a1, b1, c1, d1, e1, w[ 0], RIPEMD160C10, RIPEMD160S19); - RIPEMD160_STEP (RIPEMD160_Go, e1, a1, b1, c1, d1, w[ 9], RIPEMD160C10, RIPEMD160S1A); - RIPEMD160_STEP (RIPEMD160_Go, d1, e1, a1, b1, c1, w[ 5], RIPEMD160C10, RIPEMD160S1B); - RIPEMD160_STEP (RIPEMD160_Go, c1, d1, e1, a1, b1, w[ 2], RIPEMD160C10, RIPEMD160S1C); - RIPEMD160_STEP (RIPEMD160_Go, b1, c1, d1, e1, a1, w[14], RIPEMD160C10, RIPEMD160S1D); - RIPEMD160_STEP (RIPEMD160_Go, a1, b1, c1, d1, e1, w[11], RIPEMD160C10, RIPEMD160S1E); - RIPEMD160_STEP (RIPEMD160_Go, e1, a1, b1, c1, d1, w[ 8], RIPEMD160C10, RIPEMD160S1F); - - RIPEMD160_STEP (RIPEMD160_H , d1, e1, a1, b1, c1, w[ 3], RIPEMD160C20, RIPEMD160S20); - RIPEMD160_STEP (RIPEMD160_H , c1, d1, e1, a1, b1, w[10], RIPEMD160C20, RIPEMD160S21); - RIPEMD160_STEP (RIPEMD160_H , b1, c1, d1, e1, a1, w[14], RIPEMD160C20, RIPEMD160S22); - RIPEMD160_STEP (RIPEMD160_H , a1, b1, c1, d1, e1, w[ 4], RIPEMD160C20, RIPEMD160S23); - RIPEMD160_STEP (RIPEMD160_H , e1, a1, b1, c1, d1, w[ 9], RIPEMD160C20, RIPEMD160S24); - RIPEMD160_STEP (RIPEMD160_H , d1, e1, a1, b1, c1, w[15], RIPEMD160C20, RIPEMD160S25); - RIPEMD160_STEP (RIPEMD160_H , c1, d1, e1, a1, b1, w[ 8], RIPEMD160C20, RIPEMD160S26); - RIPEMD160_STEP (RIPEMD160_H , b1, c1, d1, e1, a1, w[ 1], RIPEMD160C20, RIPEMD160S27); - RIPEMD160_STEP (RIPEMD160_H , a1, b1, c1, d1, e1, w[ 2], RIPEMD160C20, RIPEMD160S28); - RIPEMD160_STEP (RIPEMD160_H , e1, a1, b1, c1, d1, w[ 7], RIPEMD160C20, RIPEMD160S29); - RIPEMD160_STEP (RIPEMD160_H , d1, e1, a1, b1, c1, w[ 0], RIPEMD160C20, RIPEMD160S2A); - RIPEMD160_STEP (RIPEMD160_H , c1, d1, e1, a1, b1, w[ 6], RIPEMD160C20, RIPEMD160S2B); - RIPEMD160_STEP (RIPEMD160_H , b1, c1, d1, e1, a1, w[13], RIPEMD160C20, RIPEMD160S2C); - RIPEMD160_STEP (RIPEMD160_H , a1, b1, c1, d1, e1, w[11], RIPEMD160C20, RIPEMD160S2D); - RIPEMD160_STEP (RIPEMD160_H , e1, a1, b1, c1, d1, w[ 5], RIPEMD160C20, RIPEMD160S2E); - RIPEMD160_STEP (RIPEMD160_H , d1, e1, a1, b1, c1, w[12], RIPEMD160C20, RIPEMD160S2F); - - RIPEMD160_STEP (RIPEMD160_Io, c1, d1, e1, a1, b1, w[ 1], RIPEMD160C30, RIPEMD160S30); - RIPEMD160_STEP (RIPEMD160_Io, b1, c1, d1, e1, a1, w[ 9], RIPEMD160C30, RIPEMD160S31); - RIPEMD160_STEP (RIPEMD160_Io, a1, b1, c1, d1, e1, w[11], RIPEMD160C30, RIPEMD160S32); - RIPEMD160_STEP (RIPEMD160_Io, e1, a1, b1, c1, d1, w[10], RIPEMD160C30, RIPEMD160S33); - RIPEMD160_STEP (RIPEMD160_Io, d1, e1, a1, b1, c1, w[ 0], RIPEMD160C30, RIPEMD160S34); - RIPEMD160_STEP (RIPEMD160_Io, c1, d1, e1, a1, b1, w[ 8], RIPEMD160C30, RIPEMD160S35); - RIPEMD160_STEP (RIPEMD160_Io, b1, c1, d1, e1, a1, w[12], RIPEMD160C30, RIPEMD160S36); - RIPEMD160_STEP (RIPEMD160_Io, a1, b1, c1, d1, e1, w[ 4], RIPEMD160C30, RIPEMD160S37); - RIPEMD160_STEP (RIPEMD160_Io, e1, a1, b1, c1, d1, w[13], RIPEMD160C30, RIPEMD160S38); - RIPEMD160_STEP (RIPEMD160_Io, d1, e1, a1, b1, c1, w[ 3], RIPEMD160C30, RIPEMD160S39); - RIPEMD160_STEP (RIPEMD160_Io, c1, d1, e1, a1, b1, w[ 7], RIPEMD160C30, RIPEMD160S3A); - RIPEMD160_STEP (RIPEMD160_Io, b1, c1, d1, e1, a1, w[15], RIPEMD160C30, RIPEMD160S3B); - RIPEMD160_STEP (RIPEMD160_Io, a1, b1, c1, d1, e1, w[14], RIPEMD160C30, RIPEMD160S3C); - RIPEMD160_STEP (RIPEMD160_Io, e1, a1, b1, c1, d1, w[ 5], RIPEMD160C30, RIPEMD160S3D); - RIPEMD160_STEP (RIPEMD160_Io, d1, e1, a1, b1, c1, w[ 6], RIPEMD160C30, RIPEMD160S3E); - RIPEMD160_STEP (RIPEMD160_Io, c1, d1, e1, a1, b1, w[ 2], RIPEMD160C30, RIPEMD160S3F); - - RIPEMD160_STEP (RIPEMD160_J , b1, c1, d1, e1, a1, w[ 4], RIPEMD160C40, RIPEMD160S40); - RIPEMD160_STEP (RIPEMD160_J , a1, b1, c1, d1, e1, w[ 0], RIPEMD160C40, RIPEMD160S41); - RIPEMD160_STEP (RIPEMD160_J , e1, a1, b1, c1, d1, w[ 5], RIPEMD160C40, RIPEMD160S42); - RIPEMD160_STEP (RIPEMD160_J , d1, e1, a1, b1, c1, w[ 9], RIPEMD160C40, RIPEMD160S43); - RIPEMD160_STEP (RIPEMD160_J , c1, d1, e1, a1, b1, w[ 7], RIPEMD160C40, RIPEMD160S44); - RIPEMD160_STEP (RIPEMD160_J , b1, c1, d1, e1, a1, w[12], RIPEMD160C40, RIPEMD160S45); - RIPEMD160_STEP (RIPEMD160_J , a1, b1, c1, d1, e1, w[ 2], RIPEMD160C40, RIPEMD160S46); - RIPEMD160_STEP (RIPEMD160_J , e1, a1, b1, c1, d1, w[10], RIPEMD160C40, RIPEMD160S47); - RIPEMD160_STEP (RIPEMD160_J , d1, e1, a1, b1, c1, w[14], RIPEMD160C40, RIPEMD160S48); - RIPEMD160_STEP (RIPEMD160_J , c1, d1, e1, a1, b1, w[ 1], RIPEMD160C40, RIPEMD160S49); - RIPEMD160_STEP (RIPEMD160_J , b1, c1, d1, e1, a1, w[ 3], RIPEMD160C40, RIPEMD160S4A); - RIPEMD160_STEP (RIPEMD160_J , a1, b1, c1, d1, e1, w[ 8], RIPEMD160C40, RIPEMD160S4B); - RIPEMD160_STEP (RIPEMD160_J , e1, a1, b1, c1, d1, w[11], RIPEMD160C40, RIPEMD160S4C); - RIPEMD160_STEP (RIPEMD160_J , d1, e1, a1, b1, c1, w[ 6], RIPEMD160C40, RIPEMD160S4D); - RIPEMD160_STEP (RIPEMD160_J , c1, d1, e1, a1, b1, w[15], RIPEMD160C40, RIPEMD160S4E); - RIPEMD160_STEP (RIPEMD160_J , b1, c1, d1, e1, a1, w[13], RIPEMD160C40, RIPEMD160S4F); - - u32x a2 = dgst[0]; - u32x b2 = dgst[1]; - u32x c2 = dgst[2]; - u32x d2 = dgst[3]; - u32x e2 = dgst[4]; - - //RIPEMD160_STEP_WORKAROUND_BUG (RIPEMD160_J , a2, b2, c2, d2, e2, w[ 5], RIPEMD160C50, RIPEMD160S50); - RIPEMD160_STEP (RIPEMD160_J , a2, b2, c2, d2, e2, w[ 5], RIPEMD160C50, RIPEMD160S50); - RIPEMD160_STEP (RIPEMD160_J , e2, a2, b2, c2, d2, w[14], RIPEMD160C50, RIPEMD160S51); - RIPEMD160_STEP (RIPEMD160_J , d2, e2, a2, b2, c2, w[ 7], RIPEMD160C50, RIPEMD160S52); - RIPEMD160_STEP (RIPEMD160_J , c2, d2, e2, a2, b2, w[ 0], RIPEMD160C50, RIPEMD160S53); - RIPEMD160_STEP (RIPEMD160_J , b2, c2, d2, e2, a2, w[ 9], RIPEMD160C50, RIPEMD160S54); - RIPEMD160_STEP (RIPEMD160_J , a2, b2, c2, d2, e2, w[ 2], RIPEMD160C50, RIPEMD160S55); - RIPEMD160_STEP (RIPEMD160_J , e2, a2, b2, c2, d2, w[11], RIPEMD160C50, RIPEMD160S56); - RIPEMD160_STEP (RIPEMD160_J , d2, e2, a2, b2, c2, w[ 4], RIPEMD160C50, RIPEMD160S57); - RIPEMD160_STEP (RIPEMD160_J , c2, d2, e2, a2, b2, w[13], RIPEMD160C50, RIPEMD160S58); - RIPEMD160_STEP (RIPEMD160_J , b2, c2, d2, e2, a2, w[ 6], RIPEMD160C50, RIPEMD160S59); - RIPEMD160_STEP (RIPEMD160_J , a2, b2, c2, d2, e2, w[15], RIPEMD160C50, RIPEMD160S5A); - RIPEMD160_STEP (RIPEMD160_J , e2, a2, b2, c2, d2, w[ 8], RIPEMD160C50, RIPEMD160S5B); - RIPEMD160_STEP (RIPEMD160_J , d2, e2, a2, b2, c2, w[ 1], RIPEMD160C50, RIPEMD160S5C); - RIPEMD160_STEP (RIPEMD160_J , c2, d2, e2, a2, b2, w[10], RIPEMD160C50, RIPEMD160S5D); - RIPEMD160_STEP (RIPEMD160_J , b2, c2, d2, e2, a2, w[ 3], RIPEMD160C50, RIPEMD160S5E); - RIPEMD160_STEP (RIPEMD160_J , a2, b2, c2, d2, e2, w[12], RIPEMD160C50, RIPEMD160S5F); - - RIPEMD160_STEP (RIPEMD160_Io, e2, a2, b2, c2, d2, w[ 6], RIPEMD160C60, RIPEMD160S60); - RIPEMD160_STEP (RIPEMD160_Io, d2, e2, a2, b2, c2, w[11], RIPEMD160C60, RIPEMD160S61); - RIPEMD160_STEP (RIPEMD160_Io, c2, d2, e2, a2, b2, w[ 3], RIPEMD160C60, RIPEMD160S62); - RIPEMD160_STEP (RIPEMD160_Io, b2, c2, d2, e2, a2, w[ 7], RIPEMD160C60, RIPEMD160S63); - RIPEMD160_STEP (RIPEMD160_Io, a2, b2, c2, d2, e2, w[ 0], RIPEMD160C60, RIPEMD160S64); - RIPEMD160_STEP (RIPEMD160_Io, e2, a2, b2, c2, d2, w[13], RIPEMD160C60, RIPEMD160S65); - RIPEMD160_STEP (RIPEMD160_Io, d2, e2, a2, b2, c2, w[ 5], RIPEMD160C60, RIPEMD160S66); - RIPEMD160_STEP (RIPEMD160_Io, c2, d2, e2, a2, b2, w[10], RIPEMD160C60, RIPEMD160S67); - RIPEMD160_STEP (RIPEMD160_Io, b2, c2, d2, e2, a2, w[14], RIPEMD160C60, RIPEMD160S68); - RIPEMD160_STEP (RIPEMD160_Io, a2, b2, c2, d2, e2, w[15], RIPEMD160C60, RIPEMD160S69); - RIPEMD160_STEP (RIPEMD160_Io, e2, a2, b2, c2, d2, w[ 8], RIPEMD160C60, RIPEMD160S6A); - RIPEMD160_STEP (RIPEMD160_Io, d2, e2, a2, b2, c2, w[12], RIPEMD160C60, RIPEMD160S6B); - RIPEMD160_STEP (RIPEMD160_Io, c2, d2, e2, a2, b2, w[ 4], RIPEMD160C60, RIPEMD160S6C); - RIPEMD160_STEP (RIPEMD160_Io, b2, c2, d2, e2, a2, w[ 9], RIPEMD160C60, RIPEMD160S6D); - RIPEMD160_STEP (RIPEMD160_Io, a2, b2, c2, d2, e2, w[ 1], RIPEMD160C60, RIPEMD160S6E); - RIPEMD160_STEP (RIPEMD160_Io, e2, a2, b2, c2, d2, w[ 2], RIPEMD160C60, RIPEMD160S6F); - - RIPEMD160_STEP (RIPEMD160_H , d2, e2, a2, b2, c2, w[15], RIPEMD160C70, RIPEMD160S70); - RIPEMD160_STEP (RIPEMD160_H , c2, d2, e2, a2, b2, w[ 5], RIPEMD160C70, RIPEMD160S71); - RIPEMD160_STEP (RIPEMD160_H , b2, c2, d2, e2, a2, w[ 1], RIPEMD160C70, RIPEMD160S72); - RIPEMD160_STEP (RIPEMD160_H , a2, b2, c2, d2, e2, w[ 3], RIPEMD160C70, RIPEMD160S73); - RIPEMD160_STEP (RIPEMD160_H , e2, a2, b2, c2, d2, w[ 7], RIPEMD160C70, RIPEMD160S74); - RIPEMD160_STEP (RIPEMD160_H , d2, e2, a2, b2, c2, w[14], RIPEMD160C70, RIPEMD160S75); - RIPEMD160_STEP (RIPEMD160_H , c2, d2, e2, a2, b2, w[ 6], RIPEMD160C70, RIPEMD160S76); - RIPEMD160_STEP (RIPEMD160_H , b2, c2, d2, e2, a2, w[ 9], RIPEMD160C70, RIPEMD160S77); - RIPEMD160_STEP (RIPEMD160_H , a2, b2, c2, d2, e2, w[11], RIPEMD160C70, RIPEMD160S78); - RIPEMD160_STEP (RIPEMD160_H , e2, a2, b2, c2, d2, w[ 8], RIPEMD160C70, RIPEMD160S79); - RIPEMD160_STEP (RIPEMD160_H , d2, e2, a2, b2, c2, w[12], RIPEMD160C70, RIPEMD160S7A); - RIPEMD160_STEP (RIPEMD160_H , c2, d2, e2, a2, b2, w[ 2], RIPEMD160C70, RIPEMD160S7B); - RIPEMD160_STEP (RIPEMD160_H , b2, c2, d2, e2, a2, w[10], RIPEMD160C70, RIPEMD160S7C); - RIPEMD160_STEP (RIPEMD160_H , a2, b2, c2, d2, e2, w[ 0], RIPEMD160C70, RIPEMD160S7D); - RIPEMD160_STEP (RIPEMD160_H , e2, a2, b2, c2, d2, w[ 4], RIPEMD160C70, RIPEMD160S7E); - RIPEMD160_STEP (RIPEMD160_H , d2, e2, a2, b2, c2, w[13], RIPEMD160C70, RIPEMD160S7F); - - RIPEMD160_STEP (RIPEMD160_Go, c2, d2, e2, a2, b2, w[ 8], RIPEMD160C80, RIPEMD160S80); - RIPEMD160_STEP (RIPEMD160_Go, b2, c2, d2, e2, a2, w[ 6], RIPEMD160C80, RIPEMD160S81); - RIPEMD160_STEP (RIPEMD160_Go, a2, b2, c2, d2, e2, w[ 4], RIPEMD160C80, RIPEMD160S82); - RIPEMD160_STEP (RIPEMD160_Go, e2, a2, b2, c2, d2, w[ 1], RIPEMD160C80, RIPEMD160S83); - RIPEMD160_STEP (RIPEMD160_Go, d2, e2, a2, b2, c2, w[ 3], RIPEMD160C80, RIPEMD160S84); - RIPEMD160_STEP (RIPEMD160_Go, c2, d2, e2, a2, b2, w[11], RIPEMD160C80, RIPEMD160S85); - RIPEMD160_STEP (RIPEMD160_Go, b2, c2, d2, e2, a2, w[15], RIPEMD160C80, RIPEMD160S86); - RIPEMD160_STEP (RIPEMD160_Go, a2, b2, c2, d2, e2, w[ 0], RIPEMD160C80, RIPEMD160S87); - RIPEMD160_STEP (RIPEMD160_Go, e2, a2, b2, c2, d2, w[ 5], RIPEMD160C80, RIPEMD160S88); - RIPEMD160_STEP (RIPEMD160_Go, d2, e2, a2, b2, c2, w[12], RIPEMD160C80, RIPEMD160S89); - RIPEMD160_STEP (RIPEMD160_Go, c2, d2, e2, a2, b2, w[ 2], RIPEMD160C80, RIPEMD160S8A); - RIPEMD160_STEP (RIPEMD160_Go, b2, c2, d2, e2, a2, w[13], RIPEMD160C80, RIPEMD160S8B); - RIPEMD160_STEP (RIPEMD160_Go, a2, b2, c2, d2, e2, w[ 9], RIPEMD160C80, RIPEMD160S8C); - RIPEMD160_STEP (RIPEMD160_Go, e2, a2, b2, c2, d2, w[ 7], RIPEMD160C80, RIPEMD160S8D); - RIPEMD160_STEP (RIPEMD160_Go, d2, e2, a2, b2, c2, w[10], RIPEMD160C80, RIPEMD160S8E); - RIPEMD160_STEP (RIPEMD160_Go, c2, d2, e2, a2, b2, w[14], RIPEMD160C80, RIPEMD160S8F); - - RIPEMD160_STEP (RIPEMD160_F , b2, c2, d2, e2, a2, w[12], RIPEMD160C90, RIPEMD160S90); - RIPEMD160_STEP (RIPEMD160_F , a2, b2, c2, d2, e2, w[15], RIPEMD160C90, RIPEMD160S91); - RIPEMD160_STEP (RIPEMD160_F , e2, a2, b2, c2, d2, w[10], RIPEMD160C90, RIPEMD160S92); - RIPEMD160_STEP (RIPEMD160_F , d2, e2, a2, b2, c2, w[ 4], RIPEMD160C90, RIPEMD160S93); - RIPEMD160_STEP (RIPEMD160_F , c2, d2, e2, a2, b2, w[ 1], RIPEMD160C90, RIPEMD160S94); - RIPEMD160_STEP (RIPEMD160_F , b2, c2, d2, e2, a2, w[ 5], RIPEMD160C90, RIPEMD160S95); - RIPEMD160_STEP (RIPEMD160_F , a2, b2, c2, d2, e2, w[ 8], RIPEMD160C90, RIPEMD160S96); - RIPEMD160_STEP (RIPEMD160_F , e2, a2, b2, c2, d2, w[ 7], RIPEMD160C90, RIPEMD160S97); - RIPEMD160_STEP (RIPEMD160_F , d2, e2, a2, b2, c2, w[ 6], RIPEMD160C90, RIPEMD160S98); - RIPEMD160_STEP (RIPEMD160_F , c2, d2, e2, a2, b2, w[ 2], RIPEMD160C90, RIPEMD160S99); - RIPEMD160_STEP (RIPEMD160_F , b2, c2, d2, e2, a2, w[13], RIPEMD160C90, RIPEMD160S9A); - RIPEMD160_STEP (RIPEMD160_F , a2, b2, c2, d2, e2, w[14], RIPEMD160C90, RIPEMD160S9B); - RIPEMD160_STEP (RIPEMD160_F , e2, a2, b2, c2, d2, w[ 0], RIPEMD160C90, RIPEMD160S9C); - RIPEMD160_STEP (RIPEMD160_F , d2, e2, a2, b2, c2, w[ 3], RIPEMD160C90, RIPEMD160S9D); - RIPEMD160_STEP (RIPEMD160_F , c2, d2, e2, a2, b2, w[ 9], RIPEMD160C90, RIPEMD160S9E); - RIPEMD160_STEP (RIPEMD160_F , b2, c2, d2, e2, a2, w[11], RIPEMD160C90, RIPEMD160S9F); - - const u32x a = dgst[1] + c1 + d2; - const u32x b = dgst[2] + d1 + e2; - const u32x c = dgst[3] + e1 + a2; - const u32x d = dgst[4] + a1 + b2; - const u32x e = dgst[0] + b1 + c2; - - dgst[0] = a; - dgst[1] = b; - dgst[2] = c; - dgst[3] = d; - dgst[4] = e; -} - -__device__ static void hmac_run2 (const u32x w1[16], const u32x w2[16], const u32x ipad[5], const u32x opad[5], u32x dgst[5]) -{ - dgst[0] = ipad[0]; - dgst[1] = ipad[1]; - dgst[2] = ipad[2]; - dgst[3] = ipad[3]; - dgst[4] = ipad[4]; - - ripemd160_transform (w1, dgst); - ripemd160_transform (w2, dgst); - - u32x w[16]; - - w[ 0] = dgst[0]; - w[ 1] = dgst[1]; - w[ 2] = dgst[2]; - w[ 3] = dgst[3]; - w[ 4] = dgst[4]; - w[ 5] = 0x80; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = (64 + 20) * 8; - w[15] = 0; - - dgst[0] = opad[0]; - dgst[1] = opad[1]; - dgst[2] = opad[2]; - dgst[3] = opad[3]; - dgst[4] = opad[4]; - - ripemd160_transform (w, dgst); -} - -__device__ static void hmac_run (u32x w[16], const u32x ipad[5], const u32x opad[5], u32x dgst[5]) -{ - dgst[0] = ipad[0]; - dgst[1] = ipad[1]; - dgst[2] = ipad[2]; - dgst[3] = ipad[3]; - dgst[4] = ipad[4]; - - ripemd160_transform (w, dgst); - - w[ 0] = dgst[0]; - w[ 1] = dgst[1]; - w[ 2] = dgst[2]; - w[ 3] = dgst[3]; - w[ 4] = dgst[4]; - w[ 5] = 0x80; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = (64 + 20) * 8; - w[15] = 0; - - dgst[0] = opad[0]; - dgst[1] = opad[1]; - dgst[2] = opad[2]; - dgst[3] = opad[3]; - dgst[4] = opad[4]; - - ripemd160_transform (w, dgst); -} - -__device__ static void hmac_init (u32x w[16], u32x ipad[5], u32x opad[5]) -{ - w[ 0] ^= 0x36363636; - w[ 1] ^= 0x36363636; - w[ 2] ^= 0x36363636; - w[ 3] ^= 0x36363636; - w[ 4] ^= 0x36363636; - w[ 5] ^= 0x36363636; - w[ 6] ^= 0x36363636; - w[ 7] ^= 0x36363636; - w[ 8] ^= 0x36363636; - w[ 9] ^= 0x36363636; - w[10] ^= 0x36363636; - w[11] ^= 0x36363636; - w[12] ^= 0x36363636; - w[13] ^= 0x36363636; - w[14] ^= 0x36363636; - w[15] ^= 0x36363636; - - ipad[0] = RIPEMD160M_A; - ipad[1] = RIPEMD160M_B; - ipad[2] = RIPEMD160M_C; - ipad[3] = RIPEMD160M_D; - ipad[4] = RIPEMD160M_E; - - ripemd160_transform (w, ipad); - - w[ 0] ^= 0x6a6a6a6a; - w[ 1] ^= 0x6a6a6a6a; - w[ 2] ^= 0x6a6a6a6a; - w[ 3] ^= 0x6a6a6a6a; - w[ 4] ^= 0x6a6a6a6a; - w[ 5] ^= 0x6a6a6a6a; - w[ 6] ^= 0x6a6a6a6a; - w[ 7] ^= 0x6a6a6a6a; - w[ 8] ^= 0x6a6a6a6a; - w[ 9] ^= 0x6a6a6a6a; - w[10] ^= 0x6a6a6a6a; - w[11] ^= 0x6a6a6a6a; - w[12] ^= 0x6a6a6a6a; - w[13] ^= 0x6a6a6a6a; - w[14] ^= 0x6a6a6a6a; - w[15] ^= 0x6a6a6a6a; - - opad[0] = RIPEMD160M_A; - opad[1] = RIPEMD160M_B; - opad[2] = RIPEMD160M_C; - opad[3] = RIPEMD160M_D; - opad[4] = RIPEMD160M_E; - - ripemd160_transform (w, opad); -} - -__device__ static u32 u8add (const u32 a, const u32 b) -{ - const u32 a1 = (a >> 0) & 0xff; - const u32 a2 = (a >> 8) & 0xff; - const u32 a3 = (a >> 16) & 0xff; - const u32 a4 = (a >> 24) & 0xff; - - const u32 b1 = (b >> 0) & 0xff; - const u32 b2 = (b >> 8) & 0xff; - const u32 b3 = (b >> 16) & 0xff; - const u32 b4 = (b >> 24) & 0xff; - - const u32 r1 = (a1 + b1) & 0xff; - const u32 r2 = (a2 + b2) & 0xff; - const u32 r3 = (a3 + b3) & 0xff; - const u32 r4 = (a4 + b4) & 0xff; - - const u32 r = r1 << 0 - | r2 << 8 - | r3 << 16 - | r4 << 24; - - return r; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06213_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, tc_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const tc_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - /** - * keyfile - */ - - w0[0] = u8add (w0[0], esalt_bufs[salt_pos].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[salt_pos].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[salt_pos].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[salt_pos].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[salt_pos].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[salt_pos].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[salt_pos].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[salt_pos].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[salt_pos].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[salt_pos].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[salt_pos].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[salt_pos].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[salt_pos].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[salt_pos].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[salt_pos].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[salt_pos].keyfile_buf[15]); - - /** - * salt - */ - - u32x salt_buf1[16]; - - salt_buf1[ 0] = esalt_bufs[salt_pos].salt_buf[ 0]; - salt_buf1[ 1] = esalt_bufs[salt_pos].salt_buf[ 1]; - salt_buf1[ 2] = esalt_bufs[salt_pos].salt_buf[ 2]; - salt_buf1[ 3] = esalt_bufs[salt_pos].salt_buf[ 3]; - salt_buf1[ 4] = esalt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[ 5] = esalt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[ 6] = esalt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[ 7] = esalt_bufs[salt_pos].salt_buf[ 7]; - salt_buf1[ 8] = esalt_bufs[salt_pos].salt_buf[ 8]; - salt_buf1[ 9] = esalt_bufs[salt_pos].salt_buf[ 9]; - salt_buf1[10] = esalt_bufs[salt_pos].salt_buf[10]; - salt_buf1[11] = esalt_bufs[salt_pos].salt_buf[11]; - salt_buf1[12] = esalt_bufs[salt_pos].salt_buf[12]; - salt_buf1[13] = esalt_bufs[salt_pos].salt_buf[13]; - salt_buf1[14] = esalt_bufs[salt_pos].salt_buf[14]; - salt_buf1[15] = esalt_bufs[salt_pos].salt_buf[15]; - - u32x salt_buf2[16]; - - salt_buf2[ 0] = 0; - salt_buf2[ 1] = 0x80; - salt_buf2[ 2] = 0; - salt_buf2[ 3] = 0; - salt_buf2[ 4] = 0; - salt_buf2[ 5] = 0; - salt_buf2[ 6] = 0; - salt_buf2[ 7] = 0; - salt_buf2[ 8] = 0; - salt_buf2[ 9] = 0; - salt_buf2[10] = 0; - salt_buf2[11] = 0; - salt_buf2[12] = 0; - salt_buf2[13] = 0; - salt_buf2[14] = (64 + 64 + 4) * 8; - salt_buf2[15] = 0; - - const u32 truecrypt_mdlen = salt_bufs[0].truecrypt_mdlen; - - u32x w[16]; - - w[ 0] = w0[0]; - w[ 1] = w0[1]; - w[ 2] = w0[2]; - w[ 3] = w0[3]; - w[ 4] = w1[0]; - w[ 5] = w1[1]; - w[ 6] = w1[2]; - w[ 7] = w1[3]; - w[ 8] = w2[0]; - w[ 9] = w2[1]; - w[10] = w2[2]; - w[11] = w2[3]; - w[12] = w3[0]; - w[13] = w3[1]; - w[14] = w3[2]; - w[15] = w3[3]; - - u32x ipad[5]; - u32x opad[5]; - - hmac_init (w, ipad, opad); - - tmps[gid].ipad[0] = ipad[0]; - tmps[gid].ipad[1] = ipad[1]; - tmps[gid].ipad[2] = ipad[2]; - tmps[gid].ipad[3] = ipad[3]; - tmps[gid].ipad[4] = ipad[4]; - - tmps[gid].opad[0] = opad[0]; - tmps[gid].opad[1] = opad[1]; - tmps[gid].opad[2] = opad[2]; - tmps[gid].opad[3] = opad[3]; - tmps[gid].opad[4] = opad[4]; - - for (u32 i = 0, j = 1; i < (truecrypt_mdlen / 8 / 4); i += 5, j += 1) - { - salt_buf2[0] = swap_workaround (j); - - u32x dgst[5]; - - hmac_run2 (salt_buf1, salt_buf2, ipad, opad, dgst); - - tmps[gid].dgst[i + 0] = dgst[0]; - tmps[gid].dgst[i + 1] = dgst[1]; - tmps[gid].dgst[i + 2] = dgst[2]; - tmps[gid].dgst[i + 3] = dgst[3]; - tmps[gid].dgst[i + 4] = dgst[4]; - - tmps[gid].out[i + 0] = dgst[0]; - tmps[gid].out[i + 1] = dgst[1]; - tmps[gid].out[i + 2] = dgst[2]; - tmps[gid].out[i + 3] = dgst[3]; - tmps[gid].out[i + 4] = dgst[4]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06213_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, tc_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const tc_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 truecrypt_mdlen = salt_bufs[0].truecrypt_mdlen; - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x ipad[5]; - u32x opad[5]; - - ipad[0] = tmps[gid].ipad[0]; - ipad[1] = tmps[gid].ipad[1]; - ipad[2] = tmps[gid].ipad[2]; - ipad[3] = tmps[gid].ipad[3]; - ipad[4] = tmps[gid].ipad[4]; - - opad[0] = tmps[gid].opad[0]; - opad[1] = tmps[gid].opad[1]; - opad[2] = tmps[gid].opad[2]; - opad[3] = tmps[gid].opad[3]; - opad[4] = tmps[gid].opad[4]; - - for (u32 i = 0; i < (truecrypt_mdlen / 8 / 4); i += 5) - { - u32x dgst[5]; - u32x out[5]; - - dgst[0] = tmps[gid].dgst[i + 0]; - dgst[1] = tmps[gid].dgst[i + 1]; - dgst[2] = tmps[gid].dgst[i + 2]; - dgst[3] = tmps[gid].dgst[i + 3]; - dgst[4] = tmps[gid].dgst[i + 4]; - - out[0] = tmps[gid].out[i + 0]; - out[1] = tmps[gid].out[i + 1]; - out[2] = tmps[gid].out[i + 2]; - out[3] = tmps[gid].out[i + 3]; - out[4] = tmps[gid].out[i + 4]; - - for (u32 j = 0; j < loop_cnt; j++) - { - u32x w[16]; - - w[ 0] = dgst[0]; - w[ 1] = dgst[1]; - w[ 2] = dgst[2]; - w[ 3] = dgst[3]; - w[ 4] = dgst[4]; - w[ 5] = 0x80; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = (64 + 20) * 8; - w[15] = 0; - - hmac_run (w, ipad, opad, dgst); - - out[0] ^= dgst[0]; - out[1] ^= dgst[1]; - out[2] ^= dgst[2]; - out[3] ^= dgst[3]; - out[4] ^= dgst[4]; - } - - tmps[gid].dgst[i + 0] = dgst[0]; - tmps[gid].dgst[i + 1] = dgst[1]; - tmps[gid].dgst[i + 2] = dgst[2]; - tmps[gid].dgst[i + 3] = dgst[3]; - tmps[gid].dgst[i + 4] = dgst[4]; - - tmps[gid].out[i + 0] = out[0]; - tmps[gid].out[i + 1] = out[1]; - tmps[gid].out[i + 2] = out[2]; - tmps[gid].out[i + 3] = out[3]; - tmps[gid].out[i + 4] = out[4]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06213_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, tc_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const tc_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - if (gid >= gid_max) return; - - u32 ukey1[8]; - - ukey1[0] = tmps[gid].out[ 0]; - ukey1[1] = tmps[gid].out[ 1]; - ukey1[2] = tmps[gid].out[ 2]; - ukey1[3] = tmps[gid].out[ 3]; - ukey1[4] = tmps[gid].out[ 4]; - ukey1[5] = tmps[gid].out[ 5]; - ukey1[6] = tmps[gid].out[ 6]; - ukey1[7] = tmps[gid].out[ 7]; - - u32 ukey2[8]; - - ukey2[0] = tmps[gid].out[ 8]; - ukey2[1] = tmps[gid].out[ 9]; - ukey2[2] = tmps[gid].out[10]; - ukey2[3] = tmps[gid].out[11]; - ukey2[4] = tmps[gid].out[12]; - ukey2[5] = tmps[gid].out[13]; - ukey2[6] = tmps[gid].out[14]; - ukey2[7] = tmps[gid].out[15]; - - u32 data[4]; - - data[0] = esalt_bufs[0].data_buf[0]; - data[1] = esalt_bufs[0].data_buf[1]; - data[2] = esalt_bufs[0].data_buf[2]; - data[3] = esalt_bufs[0].data_buf[3]; - - u32 tmp[4]; - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - aes256_decrypt_xts (ukey1, ukey2, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - serpent256_decrypt_xts (ukey1, ukey2, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - twofish256_decrypt_xts (ukey1, ukey2, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - u32 ukey3[8]; - - ukey3[0] = tmps[gid].out[16]; - ukey3[1] = tmps[gid].out[17]; - ukey3[2] = tmps[gid].out[18]; - ukey3[3] = tmps[gid].out[19]; - ukey3[4] = tmps[gid].out[20]; - ukey3[5] = tmps[gid].out[21]; - ukey3[6] = tmps[gid].out[22]; - ukey3[7] = tmps[gid].out[23]; - - u32 ukey4[8]; - - ukey4[0] = tmps[gid].out[24]; - ukey4[1] = tmps[gid].out[25]; - ukey4[2] = tmps[gid].out[26]; - ukey4[3] = tmps[gid].out[27]; - ukey4[4] = tmps[gid].out[28]; - ukey4[5] = tmps[gid].out[29]; - ukey4[6] = tmps[gid].out[30]; - ukey4[7] = tmps[gid].out[31]; - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - aes256_decrypt_xts (ukey2, ukey4, tmp, tmp); - twofish256_decrypt_xts (ukey1, ukey3, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - serpent256_decrypt_xts (ukey2, ukey4, tmp, tmp); - aes256_decrypt_xts (ukey1, ukey3, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - twofish256_decrypt_xts (ukey2, ukey4, tmp, tmp); - serpent256_decrypt_xts (ukey1, ukey3, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - u32 ukey5[8]; - - ukey5[0] = tmps[gid].out[32]; - ukey5[1] = tmps[gid].out[33]; - ukey5[2] = tmps[gid].out[34]; - ukey5[3] = tmps[gid].out[35]; - ukey5[4] = tmps[gid].out[36]; - ukey5[5] = tmps[gid].out[37]; - ukey5[6] = tmps[gid].out[38]; - ukey5[7] = tmps[gid].out[39]; - - u32 ukey6[8]; - - ukey6[0] = tmps[gid].out[40]; - ukey6[1] = tmps[gid].out[41]; - ukey6[2] = tmps[gid].out[42]; - ukey6[3] = tmps[gid].out[43]; - ukey6[4] = tmps[gid].out[44]; - ukey6[5] = tmps[gid].out[45]; - ukey6[6] = tmps[gid].out[46]; - ukey6[7] = tmps[gid].out[47]; - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - aes256_decrypt_xts (ukey3, ukey6, tmp, tmp); - twofish256_decrypt_xts (ukey2, ukey5, tmp, tmp); - serpent256_decrypt_xts (ukey1, ukey4, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - serpent256_decrypt_xts (ukey3, ukey6, tmp, tmp); - twofish256_decrypt_xts (ukey2, ukey5, tmp, tmp); - aes256_decrypt_xts (ukey1, ukey4, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } -} diff --git a/nv/m06221.cu b/nv/m06221.cu deleted file mode 100644 index 6ae1c90..0000000 --- a/nv/m06221.cu +++ /dev/null @@ -1,621 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA512_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#include "gpu_aes256_nv.c" -#include "gpu_twofish256_nv.c" -#include "gpu_serpent256_nv.c" - -__device__ __constant__ u64 k_sha512[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -__device__ static void sha512_transform (const u64 w[16], u64 dgst[8]) -{ - u64 a = dgst[0]; - u64 b = dgst[1]; - u64 c = dgst[2]; - u64 d = dgst[3]; - u64 e = dgst[4]; - u64 f = dgst[5]; - u64 g = dgst[6]; - u64 h = dgst[7]; - - u64 w0_t = w[ 0]; - u64 w1_t = w[ 1]; - u64 w2_t = w[ 2]; - u64 w3_t = w[ 3]; - u64 w4_t = w[ 4]; - u64 w5_t = w[ 5]; - u64 w6_t = w[ 6]; - u64 w7_t = w[ 7]; - u64 w8_t = w[ 8]; - u64 w9_t = w[ 9]; - u64 wa_t = w[10]; - u64 wb_t = w[11]; - u64 wc_t = w[12]; - u64 wd_t = w[13]; - u64 we_t = w[14]; - u64 wf_t = w[15]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - dgst[0] += a; - dgst[1] += b; - dgst[2] += c; - dgst[3] += d; - dgst[4] += e; - dgst[5] += f; - dgst[6] += g; - dgst[7] += h; -} - -__device__ static void hmac_run (const u64 w1[16], const u64 ipad[8], const u64 opad[8], u64 dgst[8]) -{ - dgst[0] = ipad[0]; - dgst[1] = ipad[1]; - dgst[2] = ipad[2]; - dgst[3] = ipad[3]; - dgst[4] = ipad[4]; - dgst[5] = ipad[5]; - dgst[6] = ipad[6]; - dgst[7] = ipad[7]; - - sha512_transform (w1, dgst); - - u64 w[16]; - - w[ 0] = dgst[0]; - w[ 1] = dgst[1]; - w[ 2] = dgst[2]; - w[ 3] = dgst[3]; - w[ 4] = dgst[4]; - w[ 5] = dgst[5]; - w[ 6] = dgst[6]; - w[ 7] = dgst[7]; - w[ 8] = 0x8000000000000000; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = (128 + 64) * 8; - - dgst[0] = opad[0]; - dgst[1] = opad[1]; - dgst[2] = opad[2]; - dgst[3] = opad[3]; - dgst[4] = opad[4]; - dgst[5] = opad[5]; - dgst[6] = opad[6]; - dgst[7] = opad[7]; - - sha512_transform (w, dgst); -} - -__device__ static void hmac_init (u64 w[16], u64 ipad[8], u64 opad[8]) -{ - w[ 0] ^= 0x3636363636363636; - w[ 1] ^= 0x3636363636363636; - w[ 2] ^= 0x3636363636363636; - w[ 3] ^= 0x3636363636363636; - w[ 4] ^= 0x3636363636363636; - w[ 5] ^= 0x3636363636363636; - w[ 6] ^= 0x3636363636363636; - w[ 7] ^= 0x3636363636363636; - w[ 8] ^= 0x3636363636363636; - w[ 9] ^= 0x3636363636363636; - w[10] ^= 0x3636363636363636; - w[11] ^= 0x3636363636363636; - w[12] ^= 0x3636363636363636; - w[13] ^= 0x3636363636363636; - w[14] ^= 0x3636363636363636; - w[15] ^= 0x3636363636363636; - - ipad[0] = SHA512M_A; - ipad[1] = SHA512M_B; - ipad[2] = SHA512M_C; - ipad[3] = SHA512M_D; - ipad[4] = SHA512M_E; - ipad[5] = SHA512M_F; - ipad[6] = SHA512M_G; - ipad[7] = SHA512M_H; - - sha512_transform (w, ipad); - - w[ 0] ^= 0x6a6a6a6a6a6a6a6a; - w[ 1] ^= 0x6a6a6a6a6a6a6a6a; - w[ 2] ^= 0x6a6a6a6a6a6a6a6a; - w[ 3] ^= 0x6a6a6a6a6a6a6a6a; - w[ 4] ^= 0x6a6a6a6a6a6a6a6a; - w[ 5] ^= 0x6a6a6a6a6a6a6a6a; - w[ 6] ^= 0x6a6a6a6a6a6a6a6a; - w[ 7] ^= 0x6a6a6a6a6a6a6a6a; - w[ 8] ^= 0x6a6a6a6a6a6a6a6a; - w[ 9] ^= 0x6a6a6a6a6a6a6a6a; - w[10] ^= 0x6a6a6a6a6a6a6a6a; - w[11] ^= 0x6a6a6a6a6a6a6a6a; - w[12] ^= 0x6a6a6a6a6a6a6a6a; - w[13] ^= 0x6a6a6a6a6a6a6a6a; - w[14] ^= 0x6a6a6a6a6a6a6a6a; - w[15] ^= 0x6a6a6a6a6a6a6a6a; - - opad[0] = SHA512M_A; - opad[1] = SHA512M_B; - opad[2] = SHA512M_C; - opad[3] = SHA512M_D; - opad[4] = SHA512M_E; - opad[5] = SHA512M_F; - opad[6] = SHA512M_G; - opad[7] = SHA512M_H; - - sha512_transform (w, opad); -} - -__device__ static u32 u8add (const u32 a, const u32 b) -{ - const u32 a1 = (a >> 0) & 0xff; - const u32 a2 = (a >> 8) & 0xff; - const u32 a3 = (a >> 16) & 0xff; - const u32 a4 = (a >> 24) & 0xff; - - const u32 b1 = (b >> 0) & 0xff; - const u32 b2 = (b >> 8) & 0xff; - const u32 b3 = (b >> 16) & 0xff; - const u32 b4 = (b >> 24) & 0xff; - - const u32 r1 = (a1 + b1) & 0xff; - const u32 r2 = (a2 + b2) & 0xff; - const u32 r3 = (a3 + b3) & 0xff; - const u32 r4 = (a4 + b4) & 0xff; - - const u32 r = r1 << 0 - | r2 << 8 - | r3 << 16 - | r4 << 24; - - return r; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06221_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, tc64_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const tc_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - /** - * keyfile - */ - - w0[0] = u8add (w0[0], esalt_bufs[salt_pos].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[salt_pos].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[salt_pos].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[salt_pos].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[salt_pos].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[salt_pos].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[salt_pos].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[salt_pos].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[salt_pos].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[salt_pos].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[salt_pos].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[salt_pos].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[salt_pos].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[salt_pos].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[salt_pos].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[salt_pos].keyfile_buf[15]); - - /** - * salt - */ - - u64 salt_buf[16]; - - // swap fehlt - - salt_buf[ 0] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 0])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 1]); - salt_buf[ 1] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 2])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 3]); - salt_buf[ 2] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 4])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 5]); - salt_buf[ 3] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 6])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 7]); - salt_buf[ 4] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 8])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 9]); - salt_buf[ 5] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[10])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[11]); - salt_buf[ 6] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[12])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[13]); - salt_buf[ 7] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[14])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[15]); - salt_buf[ 8] = 0; - salt_buf[ 9] = 0; - salt_buf[10] = 0; - salt_buf[11] = 0; - salt_buf[12] = 0; - salt_buf[13] = 0; - salt_buf[14] = 0; - salt_buf[15] = (128 + 64 + 4) * 8; - - const u32 truecrypt_mdlen = salt_bufs[0].truecrypt_mdlen; - - u64 w[16]; - - w[ 0] = ((u64) swap_workaround (w0[0])) << 32 | (u64) swap_workaround (w0[1]); - w[ 1] = ((u64) swap_workaround (w0[2])) << 32 | (u64) swap_workaround (w0[3]); - w[ 2] = ((u64) swap_workaround (w1[0])) << 32 | (u64) swap_workaround (w1[1]); - w[ 3] = ((u64) swap_workaround (w1[2])) << 32 | (u64) swap_workaround (w1[3]); - w[ 4] = ((u64) swap_workaround (w2[0])) << 32 | (u64) swap_workaround (w2[1]); - w[ 5] = ((u64) swap_workaround (w2[2])) << 32 | (u64) swap_workaround (w2[3]); - w[ 6] = ((u64) swap_workaround (w3[0])) << 32 | (u64) swap_workaround (w3[1]); - w[ 7] = ((u64) swap_workaround (w3[2])) << 32 | (u64) swap_workaround (w3[3]); - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - u64 ipad[8]; - u64 opad[8]; - - hmac_init (w, ipad, opad); - - tmps[gid].ipad[0] = ipad[0]; - tmps[gid].ipad[1] = ipad[1]; - tmps[gid].ipad[2] = ipad[2]; - tmps[gid].ipad[3] = ipad[3]; - tmps[gid].ipad[4] = ipad[4]; - tmps[gid].ipad[5] = ipad[5]; - tmps[gid].ipad[6] = ipad[6]; - tmps[gid].ipad[7] = ipad[7]; - - tmps[gid].opad[0] = opad[0]; - tmps[gid].opad[1] = opad[1]; - tmps[gid].opad[2] = opad[2]; - tmps[gid].opad[3] = opad[3]; - tmps[gid].opad[4] = opad[4]; - tmps[gid].opad[5] = opad[5]; - tmps[gid].opad[6] = opad[6]; - tmps[gid].opad[7] = opad[7]; - - for (u32 i = 0, j = 1; i < (truecrypt_mdlen / 8 / 8); i += 8, j += 1) - { - salt_buf[8] = (u64) j << 32 | (u64) 0x80000000; - - u64 dgst[8]; - - hmac_run (salt_buf, ipad, opad, dgst); - - tmps[gid].dgst[i + 0] = dgst[0]; - tmps[gid].dgst[i + 1] = dgst[1]; - tmps[gid].dgst[i + 2] = dgst[2]; - tmps[gid].dgst[i + 3] = dgst[3]; - tmps[gid].dgst[i + 4] = dgst[4]; - tmps[gid].dgst[i + 5] = dgst[5]; - tmps[gid].dgst[i + 6] = dgst[6]; - tmps[gid].dgst[i + 7] = dgst[7]; - - tmps[gid].out[i + 0] = dgst[0]; - tmps[gid].out[i + 1] = dgst[1]; - tmps[gid].out[i + 2] = dgst[2]; - tmps[gid].out[i + 3] = dgst[3]; - tmps[gid].out[i + 4] = dgst[4]; - tmps[gid].out[i + 5] = dgst[5]; - tmps[gid].out[i + 6] = dgst[6]; - tmps[gid].out[i + 7] = dgst[7]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06221_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, tc64_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const tc_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 truecrypt_mdlen = salt_bufs[0].truecrypt_mdlen; - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u64 ipad[8]; - - ipad[0] = tmps[gid].ipad[0]; - ipad[1] = tmps[gid].ipad[1]; - ipad[2] = tmps[gid].ipad[2]; - ipad[3] = tmps[gid].ipad[3]; - ipad[4] = tmps[gid].ipad[4]; - ipad[5] = tmps[gid].ipad[5]; - ipad[6] = tmps[gid].ipad[6]; - ipad[7] = tmps[gid].ipad[7]; - - u64 opad[8]; - - opad[0] = tmps[gid].opad[0]; - opad[1] = tmps[gid].opad[1]; - opad[2] = tmps[gid].opad[2]; - opad[3] = tmps[gid].opad[3]; - opad[4] = tmps[gid].opad[4]; - opad[5] = tmps[gid].opad[5]; - opad[6] = tmps[gid].opad[6]; - opad[7] = tmps[gid].opad[7]; - - for (u32 i = 0; i < (truecrypt_mdlen / 8 / 8); i += 8) - { - u64 dgst[8]; - - dgst[0] = tmps[gid].dgst[i + 0]; - dgst[1] = tmps[gid].dgst[i + 1]; - dgst[2] = tmps[gid].dgst[i + 2]; - dgst[3] = tmps[gid].dgst[i + 3]; - dgst[4] = tmps[gid].dgst[i + 4]; - dgst[5] = tmps[gid].dgst[i + 5]; - dgst[6] = tmps[gid].dgst[i + 6]; - dgst[7] = tmps[gid].dgst[i + 7]; - - u64 out[8]; - - out[0] = tmps[gid].out[i + 0]; - out[1] = tmps[gid].out[i + 1]; - out[2] = tmps[gid].out[i + 2]; - out[3] = tmps[gid].out[i + 3]; - out[4] = tmps[gid].out[i + 4]; - out[5] = tmps[gid].out[i + 5]; - out[6] = tmps[gid].out[i + 6]; - out[7] = tmps[gid].out[i + 7]; - - for (u32 j = 0; j < loop_cnt; j++) - { - u64 w[16]; - - w[ 0] = dgst[0]; - w[ 1] = dgst[1]; - w[ 2] = dgst[2]; - w[ 3] = dgst[3]; - w[ 4] = dgst[4]; - w[ 5] = dgst[5]; - w[ 6] = dgst[6]; - w[ 7] = dgst[7]; - w[ 8] = 0x8000000000000000; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = (128 + 64) * 8; - - hmac_run (w, ipad, opad, dgst); - - out[0] ^= dgst[0]; - out[1] ^= dgst[1]; - out[2] ^= dgst[2]; - out[3] ^= dgst[3]; - out[4] ^= dgst[4]; - out[5] ^= dgst[5]; - out[6] ^= dgst[6]; - out[7] ^= dgst[7]; - } - - tmps[gid].dgst[i + 0] = dgst[0]; - tmps[gid].dgst[i + 1] = dgst[1]; - tmps[gid].dgst[i + 2] = dgst[2]; - tmps[gid].dgst[i + 3] = dgst[3]; - tmps[gid].dgst[i + 4] = dgst[4]; - tmps[gid].dgst[i + 5] = dgst[5]; - tmps[gid].dgst[i + 6] = dgst[6]; - tmps[gid].dgst[i + 7] = dgst[7]; - - tmps[gid].out[i + 0] = out[0]; - tmps[gid].out[i + 1] = out[1]; - tmps[gid].out[i + 2] = out[2]; - tmps[gid].out[i + 3] = out[3]; - tmps[gid].out[i + 4] = out[4]; - tmps[gid].out[i + 5] = out[5]; - tmps[gid].out[i + 6] = out[6]; - tmps[gid].out[i + 7] = out[7]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06221_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, tc64_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const tc_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - if (gid >= gid_max) return; - - u32 ukey1[8]; - - ukey1[0] = swap_workaround (h32_from_64 (tmps[gid].out[ 0])); - ukey1[1] = swap_workaround (l32_from_64 (tmps[gid].out[ 0])); - ukey1[2] = swap_workaround (h32_from_64 (tmps[gid].out[ 1])); - ukey1[3] = swap_workaround (l32_from_64 (tmps[gid].out[ 1])); - ukey1[4] = swap_workaround (h32_from_64 (tmps[gid].out[ 2])); - ukey1[5] = swap_workaround (l32_from_64 (tmps[gid].out[ 2])); - ukey1[6] = swap_workaround (h32_from_64 (tmps[gid].out[ 3])); - ukey1[7] = swap_workaround (l32_from_64 (tmps[gid].out[ 3])); - - u32 ukey2[8]; - - ukey2[0] = swap_workaround (h32_from_64 (tmps[gid].out[ 4])); - ukey2[1] = swap_workaround (l32_from_64 (tmps[gid].out[ 4])); - ukey2[2] = swap_workaround (h32_from_64 (tmps[gid].out[ 5])); - ukey2[3] = swap_workaround (l32_from_64 (tmps[gid].out[ 5])); - ukey2[4] = swap_workaround (h32_from_64 (tmps[gid].out[ 6])); - ukey2[5] = swap_workaround (l32_from_64 (tmps[gid].out[ 6])); - ukey2[6] = swap_workaround (h32_from_64 (tmps[gid].out[ 7])); - ukey2[7] = swap_workaround (l32_from_64 (tmps[gid].out[ 7])); - - u32 data[4]; - - data[0] = esalt_bufs[0].data_buf[0]; - data[1] = esalt_bufs[0].data_buf[1]; - data[2] = esalt_bufs[0].data_buf[2]; - data[3] = esalt_bufs[0].data_buf[3]; - - u32 tmp[4]; - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - aes256_decrypt_xts (ukey1, ukey2, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - serpent256_decrypt_xts (ukey1, ukey2, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - twofish256_decrypt_xts (ukey1, ukey2, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } -} diff --git a/nv/m06222.cu b/nv/m06222.cu deleted file mode 100644 index 8daf916..0000000 --- a/nv/m06222.cu +++ /dev/null @@ -1,694 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA512_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#include "gpu_aes256_nv.c" -#include "gpu_twofish256_nv.c" -#include "gpu_serpent256_nv.c" - -__device__ __constant__ u64 k_sha512[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -__device__ static void sha512_transform (const u64 w[16], u64 dgst[8]) -{ - u64 a = dgst[0]; - u64 b = dgst[1]; - u64 c = dgst[2]; - u64 d = dgst[3]; - u64 e = dgst[4]; - u64 f = dgst[5]; - u64 g = dgst[6]; - u64 h = dgst[7]; - - u64 w0_t = w[ 0]; - u64 w1_t = w[ 1]; - u64 w2_t = w[ 2]; - u64 w3_t = w[ 3]; - u64 w4_t = w[ 4]; - u64 w5_t = w[ 5]; - u64 w6_t = w[ 6]; - u64 w7_t = w[ 7]; - u64 w8_t = w[ 8]; - u64 w9_t = w[ 9]; - u64 wa_t = w[10]; - u64 wb_t = w[11]; - u64 wc_t = w[12]; - u64 wd_t = w[13]; - u64 we_t = w[14]; - u64 wf_t = w[15]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - dgst[0] += a; - dgst[1] += b; - dgst[2] += c; - dgst[3] += d; - dgst[4] += e; - dgst[5] += f; - dgst[6] += g; - dgst[7] += h; -} - -__device__ static void hmac_run (const u64 w1[16], const u64 ipad[8], const u64 opad[8], u64 dgst[8]) -{ - dgst[0] = ipad[0]; - dgst[1] = ipad[1]; - dgst[2] = ipad[2]; - dgst[3] = ipad[3]; - dgst[4] = ipad[4]; - dgst[5] = ipad[5]; - dgst[6] = ipad[6]; - dgst[7] = ipad[7]; - - sha512_transform (w1, dgst); - - u64 w[16]; - - w[ 0] = dgst[0]; - w[ 1] = dgst[1]; - w[ 2] = dgst[2]; - w[ 3] = dgst[3]; - w[ 4] = dgst[4]; - w[ 5] = dgst[5]; - w[ 6] = dgst[6]; - w[ 7] = dgst[7]; - w[ 8] = 0x8000000000000000; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = (128 + 64) * 8; - - dgst[0] = opad[0]; - dgst[1] = opad[1]; - dgst[2] = opad[2]; - dgst[3] = opad[3]; - dgst[4] = opad[4]; - dgst[5] = opad[5]; - dgst[6] = opad[6]; - dgst[7] = opad[7]; - - sha512_transform (w, dgst); -} - -__device__ static void hmac_init (u64 w[16], u64 ipad[8], u64 opad[8]) -{ - w[ 0] ^= 0x3636363636363636; - w[ 1] ^= 0x3636363636363636; - w[ 2] ^= 0x3636363636363636; - w[ 3] ^= 0x3636363636363636; - w[ 4] ^= 0x3636363636363636; - w[ 5] ^= 0x3636363636363636; - w[ 6] ^= 0x3636363636363636; - w[ 7] ^= 0x3636363636363636; - w[ 8] ^= 0x3636363636363636; - w[ 9] ^= 0x3636363636363636; - w[10] ^= 0x3636363636363636; - w[11] ^= 0x3636363636363636; - w[12] ^= 0x3636363636363636; - w[13] ^= 0x3636363636363636; - w[14] ^= 0x3636363636363636; - w[15] ^= 0x3636363636363636; - - ipad[0] = SHA512M_A; - ipad[1] = SHA512M_B; - ipad[2] = SHA512M_C; - ipad[3] = SHA512M_D; - ipad[4] = SHA512M_E; - ipad[5] = SHA512M_F; - ipad[6] = SHA512M_G; - ipad[7] = SHA512M_H; - - sha512_transform (w, ipad); - - w[ 0] ^= 0x6a6a6a6a6a6a6a6a; - w[ 1] ^= 0x6a6a6a6a6a6a6a6a; - w[ 2] ^= 0x6a6a6a6a6a6a6a6a; - w[ 3] ^= 0x6a6a6a6a6a6a6a6a; - w[ 4] ^= 0x6a6a6a6a6a6a6a6a; - w[ 5] ^= 0x6a6a6a6a6a6a6a6a; - w[ 6] ^= 0x6a6a6a6a6a6a6a6a; - w[ 7] ^= 0x6a6a6a6a6a6a6a6a; - w[ 8] ^= 0x6a6a6a6a6a6a6a6a; - w[ 9] ^= 0x6a6a6a6a6a6a6a6a; - w[10] ^= 0x6a6a6a6a6a6a6a6a; - w[11] ^= 0x6a6a6a6a6a6a6a6a; - w[12] ^= 0x6a6a6a6a6a6a6a6a; - w[13] ^= 0x6a6a6a6a6a6a6a6a; - w[14] ^= 0x6a6a6a6a6a6a6a6a; - w[15] ^= 0x6a6a6a6a6a6a6a6a; - - opad[0] = SHA512M_A; - opad[1] = SHA512M_B; - opad[2] = SHA512M_C; - opad[3] = SHA512M_D; - opad[4] = SHA512M_E; - opad[5] = SHA512M_F; - opad[6] = SHA512M_G; - opad[7] = SHA512M_H; - - sha512_transform (w, opad); -} - -__device__ static u32 u8add (const u32 a, const u32 b) -{ - const u32 a1 = (a >> 0) & 0xff; - const u32 a2 = (a >> 8) & 0xff; - const u32 a3 = (a >> 16) & 0xff; - const u32 a4 = (a >> 24) & 0xff; - - const u32 b1 = (b >> 0) & 0xff; - const u32 b2 = (b >> 8) & 0xff; - const u32 b3 = (b >> 16) & 0xff; - const u32 b4 = (b >> 24) & 0xff; - - const u32 r1 = (a1 + b1) & 0xff; - const u32 r2 = (a2 + b2) & 0xff; - const u32 r3 = (a3 + b3) & 0xff; - const u32 r4 = (a4 + b4) & 0xff; - - const u32 r = r1 << 0 - | r2 << 8 - | r3 << 16 - | r4 << 24; - - return r; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06222_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, tc64_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const tc_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - /** - * keyfile - */ - - w0[0] = u8add (w0[0], esalt_bufs[salt_pos].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[salt_pos].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[salt_pos].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[salt_pos].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[salt_pos].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[salt_pos].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[salt_pos].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[salt_pos].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[salt_pos].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[salt_pos].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[salt_pos].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[salt_pos].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[salt_pos].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[salt_pos].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[salt_pos].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[salt_pos].keyfile_buf[15]); - - /** - * salt - */ - - u64 salt_buf[16]; - - // swap fehlt - - salt_buf[ 0] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 0])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 1]); - salt_buf[ 1] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 2])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 3]); - salt_buf[ 2] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 4])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 5]); - salt_buf[ 3] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 6])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 7]); - salt_buf[ 4] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 8])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 9]); - salt_buf[ 5] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[10])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[11]); - salt_buf[ 6] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[12])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[13]); - salt_buf[ 7] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[14])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[15]); - salt_buf[ 8] = 0; - salt_buf[ 9] = 0; - salt_buf[10] = 0; - salt_buf[11] = 0; - salt_buf[12] = 0; - salt_buf[13] = 0; - salt_buf[14] = 0; - salt_buf[15] = (128 + 64 + 4) * 8; - - const u32 truecrypt_mdlen = salt_bufs[0].truecrypt_mdlen; - - u64 w[16]; - - w[ 0] = ((u64) swap_workaround (w0[0])) << 32 | (u64) swap_workaround (w0[1]); - w[ 1] = ((u64) swap_workaround (w0[2])) << 32 | (u64) swap_workaround (w0[3]); - w[ 2] = ((u64) swap_workaround (w1[0])) << 32 | (u64) swap_workaround (w1[1]); - w[ 3] = ((u64) swap_workaround (w1[2])) << 32 | (u64) swap_workaround (w1[3]); - w[ 4] = ((u64) swap_workaround (w2[0])) << 32 | (u64) swap_workaround (w2[1]); - w[ 5] = ((u64) swap_workaround (w2[2])) << 32 | (u64) swap_workaround (w2[3]); - w[ 6] = ((u64) swap_workaround (w3[0])) << 32 | (u64) swap_workaround (w3[1]); - w[ 7] = ((u64) swap_workaround (w3[2])) << 32 | (u64) swap_workaround (w3[3]); - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - u64 ipad[8]; - u64 opad[8]; - - hmac_init (w, ipad, opad); - - tmps[gid].ipad[0] = ipad[0]; - tmps[gid].ipad[1] = ipad[1]; - tmps[gid].ipad[2] = ipad[2]; - tmps[gid].ipad[3] = ipad[3]; - tmps[gid].ipad[4] = ipad[4]; - tmps[gid].ipad[5] = ipad[5]; - tmps[gid].ipad[6] = ipad[6]; - tmps[gid].ipad[7] = ipad[7]; - - tmps[gid].opad[0] = opad[0]; - tmps[gid].opad[1] = opad[1]; - tmps[gid].opad[2] = opad[2]; - tmps[gid].opad[3] = opad[3]; - tmps[gid].opad[4] = opad[4]; - tmps[gid].opad[5] = opad[5]; - tmps[gid].opad[6] = opad[6]; - tmps[gid].opad[7] = opad[7]; - - for (u32 i = 0, j = 1; i < (truecrypt_mdlen / 8 / 8); i += 8, j += 1) - { - salt_buf[8] = (u64) j << 32 | (u64) 0x80000000; - - u64 dgst[8]; - - hmac_run (salt_buf, ipad, opad, dgst); - - tmps[gid].dgst[i + 0] = dgst[0]; - tmps[gid].dgst[i + 1] = dgst[1]; - tmps[gid].dgst[i + 2] = dgst[2]; - tmps[gid].dgst[i + 3] = dgst[3]; - tmps[gid].dgst[i + 4] = dgst[4]; - tmps[gid].dgst[i + 5] = dgst[5]; - tmps[gid].dgst[i + 6] = dgst[6]; - tmps[gid].dgst[i + 7] = dgst[7]; - - tmps[gid].out[i + 0] = dgst[0]; - tmps[gid].out[i + 1] = dgst[1]; - tmps[gid].out[i + 2] = dgst[2]; - tmps[gid].out[i + 3] = dgst[3]; - tmps[gid].out[i + 4] = dgst[4]; - tmps[gid].out[i + 5] = dgst[5]; - tmps[gid].out[i + 6] = dgst[6]; - tmps[gid].out[i + 7] = dgst[7]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06222_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, tc64_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const tc_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 truecrypt_mdlen = salt_bufs[0].truecrypt_mdlen; - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u64 ipad[8]; - - ipad[0] = tmps[gid].ipad[0]; - ipad[1] = tmps[gid].ipad[1]; - ipad[2] = tmps[gid].ipad[2]; - ipad[3] = tmps[gid].ipad[3]; - ipad[4] = tmps[gid].ipad[4]; - ipad[5] = tmps[gid].ipad[5]; - ipad[6] = tmps[gid].ipad[6]; - ipad[7] = tmps[gid].ipad[7]; - - u64 opad[8]; - - opad[0] = tmps[gid].opad[0]; - opad[1] = tmps[gid].opad[1]; - opad[2] = tmps[gid].opad[2]; - opad[3] = tmps[gid].opad[3]; - opad[4] = tmps[gid].opad[4]; - opad[5] = tmps[gid].opad[5]; - opad[6] = tmps[gid].opad[6]; - opad[7] = tmps[gid].opad[7]; - - for (u32 i = 0; i < (truecrypt_mdlen / 8 / 8); i += 8) - { - u64 dgst[8]; - - dgst[0] = tmps[gid].dgst[i + 0]; - dgst[1] = tmps[gid].dgst[i + 1]; - dgst[2] = tmps[gid].dgst[i + 2]; - dgst[3] = tmps[gid].dgst[i + 3]; - dgst[4] = tmps[gid].dgst[i + 4]; - dgst[5] = tmps[gid].dgst[i + 5]; - dgst[6] = tmps[gid].dgst[i + 6]; - dgst[7] = tmps[gid].dgst[i + 7]; - - u64 out[8]; - - out[0] = tmps[gid].out[i + 0]; - out[1] = tmps[gid].out[i + 1]; - out[2] = tmps[gid].out[i + 2]; - out[3] = tmps[gid].out[i + 3]; - out[4] = tmps[gid].out[i + 4]; - out[5] = tmps[gid].out[i + 5]; - out[6] = tmps[gid].out[i + 6]; - out[7] = tmps[gid].out[i + 7]; - - for (u32 j = 0; j < loop_cnt; j++) - { - u64 w[16]; - - w[ 0] = dgst[0]; - w[ 1] = dgst[1]; - w[ 2] = dgst[2]; - w[ 3] = dgst[3]; - w[ 4] = dgst[4]; - w[ 5] = dgst[5]; - w[ 6] = dgst[6]; - w[ 7] = dgst[7]; - w[ 8] = 0x8000000000000000; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = (128 + 64) * 8; - - hmac_run (w, ipad, opad, dgst); - - out[0] ^= dgst[0]; - out[1] ^= dgst[1]; - out[2] ^= dgst[2]; - out[3] ^= dgst[3]; - out[4] ^= dgst[4]; - out[5] ^= dgst[5]; - out[6] ^= dgst[6]; - out[7] ^= dgst[7]; - } - - tmps[gid].dgst[i + 0] = dgst[0]; - tmps[gid].dgst[i + 1] = dgst[1]; - tmps[gid].dgst[i + 2] = dgst[2]; - tmps[gid].dgst[i + 3] = dgst[3]; - tmps[gid].dgst[i + 4] = dgst[4]; - tmps[gid].dgst[i + 5] = dgst[5]; - tmps[gid].dgst[i + 6] = dgst[6]; - tmps[gid].dgst[i + 7] = dgst[7]; - - tmps[gid].out[i + 0] = out[0]; - tmps[gid].out[i + 1] = out[1]; - tmps[gid].out[i + 2] = out[2]; - tmps[gid].out[i + 3] = out[3]; - tmps[gid].out[i + 4] = out[4]; - tmps[gid].out[i + 5] = out[5]; - tmps[gid].out[i + 6] = out[6]; - tmps[gid].out[i + 7] = out[7]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06222_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, tc64_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const tc_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - if (gid >= gid_max) return; - - u32 ukey1[8]; - - ukey1[0] = swap_workaround (h32_from_64 (tmps[gid].out[ 0])); - ukey1[1] = swap_workaround (l32_from_64 (tmps[gid].out[ 0])); - ukey1[2] = swap_workaround (h32_from_64 (tmps[gid].out[ 1])); - ukey1[3] = swap_workaround (l32_from_64 (tmps[gid].out[ 1])); - ukey1[4] = swap_workaround (h32_from_64 (tmps[gid].out[ 2])); - ukey1[5] = swap_workaround (l32_from_64 (tmps[gid].out[ 2])); - ukey1[6] = swap_workaround (h32_from_64 (tmps[gid].out[ 3])); - ukey1[7] = swap_workaround (l32_from_64 (tmps[gid].out[ 3])); - - u32 ukey2[8]; - - ukey2[0] = swap_workaround (h32_from_64 (tmps[gid].out[ 4])); - ukey2[1] = swap_workaround (l32_from_64 (tmps[gid].out[ 4])); - ukey2[2] = swap_workaround (h32_from_64 (tmps[gid].out[ 5])); - ukey2[3] = swap_workaround (l32_from_64 (tmps[gid].out[ 5])); - ukey2[4] = swap_workaround (h32_from_64 (tmps[gid].out[ 6])); - ukey2[5] = swap_workaround (l32_from_64 (tmps[gid].out[ 6])); - ukey2[6] = swap_workaround (h32_from_64 (tmps[gid].out[ 7])); - ukey2[7] = swap_workaround (l32_from_64 (tmps[gid].out[ 7])); - - u32 data[4]; - - data[0] = esalt_bufs[0].data_buf[0]; - data[1] = esalt_bufs[0].data_buf[1]; - data[2] = esalt_bufs[0].data_buf[2]; - data[3] = esalt_bufs[0].data_buf[3]; - - u32 tmp[4]; - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - aes256_decrypt_xts (ukey1, ukey2, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - serpent256_decrypt_xts (ukey1, ukey2, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - twofish256_decrypt_xts (ukey1, ukey2, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - u32 ukey3[8]; - - ukey3[0] = swap_workaround (h32_from_64 (tmps[gid].out[ 8])); - ukey3[1] = swap_workaround (l32_from_64 (tmps[gid].out[ 8])); - ukey3[2] = swap_workaround (h32_from_64 (tmps[gid].out[ 9])); - ukey3[3] = swap_workaround (l32_from_64 (tmps[gid].out[ 9])); - ukey3[4] = swap_workaround (h32_from_64 (tmps[gid].out[10])); - ukey3[5] = swap_workaround (l32_from_64 (tmps[gid].out[10])); - ukey3[6] = swap_workaround (h32_from_64 (tmps[gid].out[11])); - ukey3[7] = swap_workaround (l32_from_64 (tmps[gid].out[11])); - - u32 ukey4[8]; - - ukey4[0] = swap_workaround (h32_from_64 (tmps[gid].out[12])); - ukey4[1] = swap_workaround (l32_from_64 (tmps[gid].out[12])); - ukey4[2] = swap_workaround (h32_from_64 (tmps[gid].out[13])); - ukey4[3] = swap_workaround (l32_from_64 (tmps[gid].out[13])); - ukey4[4] = swap_workaround (h32_from_64 (tmps[gid].out[14])); - ukey4[5] = swap_workaround (l32_from_64 (tmps[gid].out[14])); - ukey4[6] = swap_workaround (h32_from_64 (tmps[gid].out[15])); - ukey4[7] = swap_workaround (l32_from_64 (tmps[gid].out[15])); - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - aes256_decrypt_xts (ukey2, ukey4, tmp, tmp); - twofish256_decrypt_xts (ukey1, ukey3, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - serpent256_decrypt_xts (ukey2, ukey4, tmp, tmp); - aes256_decrypt_xts (ukey1, ukey3, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - twofish256_decrypt_xts (ukey2, ukey4, tmp, tmp); - serpent256_decrypt_xts (ukey1, ukey3, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } -} diff --git a/nv/m06223.cu b/nv/m06223.cu deleted file mode 100644 index b2b9093..0000000 --- a/nv/m06223.cu +++ /dev/null @@ -1,752 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA512_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#include "gpu_aes256_nv.c" -#include "gpu_twofish256_nv.c" -#include "gpu_serpent256_nv.c" - -__device__ __constant__ u64 k_sha512[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -__device__ static void sha512_transform (const u64 w[16], u64 dgst[8]) -{ - u64 a = dgst[0]; - u64 b = dgst[1]; - u64 c = dgst[2]; - u64 d = dgst[3]; - u64 e = dgst[4]; - u64 f = dgst[5]; - u64 g = dgst[6]; - u64 h = dgst[7]; - - u64 w0_t = w[ 0]; - u64 w1_t = w[ 1]; - u64 w2_t = w[ 2]; - u64 w3_t = w[ 3]; - u64 w4_t = w[ 4]; - u64 w5_t = w[ 5]; - u64 w6_t = w[ 6]; - u64 w7_t = w[ 7]; - u64 w8_t = w[ 8]; - u64 w9_t = w[ 9]; - u64 wa_t = w[10]; - u64 wb_t = w[11]; - u64 wc_t = w[12]; - u64 wd_t = w[13]; - u64 we_t = w[14]; - u64 wf_t = w[15]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - dgst[0] += a; - dgst[1] += b; - dgst[2] += c; - dgst[3] += d; - dgst[4] += e; - dgst[5] += f; - dgst[6] += g; - dgst[7] += h; -} - -__device__ static void hmac_run (const u64 w1[16], const u64 ipad[8], const u64 opad[8], u64 dgst[8]) -{ - dgst[0] = ipad[0]; - dgst[1] = ipad[1]; - dgst[2] = ipad[2]; - dgst[3] = ipad[3]; - dgst[4] = ipad[4]; - dgst[5] = ipad[5]; - dgst[6] = ipad[6]; - dgst[7] = ipad[7]; - - sha512_transform (w1, dgst); - - u64 w[16]; - - w[ 0] = dgst[0]; - w[ 1] = dgst[1]; - w[ 2] = dgst[2]; - w[ 3] = dgst[3]; - w[ 4] = dgst[4]; - w[ 5] = dgst[5]; - w[ 6] = dgst[6]; - w[ 7] = dgst[7]; - w[ 8] = 0x8000000000000000; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = (128 + 64) * 8; - - dgst[0] = opad[0]; - dgst[1] = opad[1]; - dgst[2] = opad[2]; - dgst[3] = opad[3]; - dgst[4] = opad[4]; - dgst[5] = opad[5]; - dgst[6] = opad[6]; - dgst[7] = opad[7]; - - sha512_transform (w, dgst); -} - -__device__ static void hmac_init (u64 w[16], u64 ipad[8], u64 opad[8]) -{ - w[ 0] ^= 0x3636363636363636; - w[ 1] ^= 0x3636363636363636; - w[ 2] ^= 0x3636363636363636; - w[ 3] ^= 0x3636363636363636; - w[ 4] ^= 0x3636363636363636; - w[ 5] ^= 0x3636363636363636; - w[ 6] ^= 0x3636363636363636; - w[ 7] ^= 0x3636363636363636; - w[ 8] ^= 0x3636363636363636; - w[ 9] ^= 0x3636363636363636; - w[10] ^= 0x3636363636363636; - w[11] ^= 0x3636363636363636; - w[12] ^= 0x3636363636363636; - w[13] ^= 0x3636363636363636; - w[14] ^= 0x3636363636363636; - w[15] ^= 0x3636363636363636; - - ipad[0] = SHA512M_A; - ipad[1] = SHA512M_B; - ipad[2] = SHA512M_C; - ipad[3] = SHA512M_D; - ipad[4] = SHA512M_E; - ipad[5] = SHA512M_F; - ipad[6] = SHA512M_G; - ipad[7] = SHA512M_H; - - sha512_transform (w, ipad); - - w[ 0] ^= 0x6a6a6a6a6a6a6a6a; - w[ 1] ^= 0x6a6a6a6a6a6a6a6a; - w[ 2] ^= 0x6a6a6a6a6a6a6a6a; - w[ 3] ^= 0x6a6a6a6a6a6a6a6a; - w[ 4] ^= 0x6a6a6a6a6a6a6a6a; - w[ 5] ^= 0x6a6a6a6a6a6a6a6a; - w[ 6] ^= 0x6a6a6a6a6a6a6a6a; - w[ 7] ^= 0x6a6a6a6a6a6a6a6a; - w[ 8] ^= 0x6a6a6a6a6a6a6a6a; - w[ 9] ^= 0x6a6a6a6a6a6a6a6a; - w[10] ^= 0x6a6a6a6a6a6a6a6a; - w[11] ^= 0x6a6a6a6a6a6a6a6a; - w[12] ^= 0x6a6a6a6a6a6a6a6a; - w[13] ^= 0x6a6a6a6a6a6a6a6a; - w[14] ^= 0x6a6a6a6a6a6a6a6a; - w[15] ^= 0x6a6a6a6a6a6a6a6a; - - opad[0] = SHA512M_A; - opad[1] = SHA512M_B; - opad[2] = SHA512M_C; - opad[3] = SHA512M_D; - opad[4] = SHA512M_E; - opad[5] = SHA512M_F; - opad[6] = SHA512M_G; - opad[7] = SHA512M_H; - - sha512_transform (w, opad); -} - -__device__ static u32 u8add (const u32 a, const u32 b) -{ - const u32 a1 = (a >> 0) & 0xff; - const u32 a2 = (a >> 8) & 0xff; - const u32 a3 = (a >> 16) & 0xff; - const u32 a4 = (a >> 24) & 0xff; - - const u32 b1 = (b >> 0) & 0xff; - const u32 b2 = (b >> 8) & 0xff; - const u32 b3 = (b >> 16) & 0xff; - const u32 b4 = (b >> 24) & 0xff; - - const u32 r1 = (a1 + b1) & 0xff; - const u32 r2 = (a2 + b2) & 0xff; - const u32 r3 = (a3 + b3) & 0xff; - const u32 r4 = (a4 + b4) & 0xff; - - const u32 r = r1 << 0 - | r2 << 8 - | r3 << 16 - | r4 << 24; - - return r; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06223_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, tc64_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const tc_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - /** - * keyfile - */ - - w0[0] = u8add (w0[0], esalt_bufs[salt_pos].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[salt_pos].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[salt_pos].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[salt_pos].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[salt_pos].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[salt_pos].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[salt_pos].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[salt_pos].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[salt_pos].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[salt_pos].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[salt_pos].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[salt_pos].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[salt_pos].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[salt_pos].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[salt_pos].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[salt_pos].keyfile_buf[15]); - - /** - * salt - */ - - u64 salt_buf[16]; - - // swap fehlt - - salt_buf[ 0] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 0])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 1]); - salt_buf[ 1] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 2])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 3]); - salt_buf[ 2] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 4])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 5]); - salt_buf[ 3] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 6])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 7]); - salt_buf[ 4] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 8])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[ 9]); - salt_buf[ 5] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[10])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[11]); - salt_buf[ 6] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[12])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[13]); - salt_buf[ 7] = ((u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[14])) << 32 | (u64) swap_workaround (esalt_bufs[salt_pos].salt_buf[15]); - salt_buf[ 8] = 0; - salt_buf[ 9] = 0; - salt_buf[10] = 0; - salt_buf[11] = 0; - salt_buf[12] = 0; - salt_buf[13] = 0; - salt_buf[14] = 0; - salt_buf[15] = (128 + 64 + 4) * 8; - - const u32 truecrypt_mdlen = salt_bufs[0].truecrypt_mdlen; - - u64 w[16]; - - w[ 0] = ((u64) swap_workaround (w0[0])) << 32 | (u64) swap_workaround (w0[1]); - w[ 1] = ((u64) swap_workaround (w0[2])) << 32 | (u64) swap_workaround (w0[3]); - w[ 2] = ((u64) swap_workaround (w1[0])) << 32 | (u64) swap_workaround (w1[1]); - w[ 3] = ((u64) swap_workaround (w1[2])) << 32 | (u64) swap_workaround (w1[3]); - w[ 4] = ((u64) swap_workaround (w2[0])) << 32 | (u64) swap_workaround (w2[1]); - w[ 5] = ((u64) swap_workaround (w2[2])) << 32 | (u64) swap_workaround (w2[3]); - w[ 6] = ((u64) swap_workaround (w3[0])) << 32 | (u64) swap_workaround (w3[1]); - w[ 7] = ((u64) swap_workaround (w3[2])) << 32 | (u64) swap_workaround (w3[3]); - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - u64 ipad[8]; - u64 opad[8]; - - hmac_init (w, ipad, opad); - - tmps[gid].ipad[0] = ipad[0]; - tmps[gid].ipad[1] = ipad[1]; - tmps[gid].ipad[2] = ipad[2]; - tmps[gid].ipad[3] = ipad[3]; - tmps[gid].ipad[4] = ipad[4]; - tmps[gid].ipad[5] = ipad[5]; - tmps[gid].ipad[6] = ipad[6]; - tmps[gid].ipad[7] = ipad[7]; - - tmps[gid].opad[0] = opad[0]; - tmps[gid].opad[1] = opad[1]; - tmps[gid].opad[2] = opad[2]; - tmps[gid].opad[3] = opad[3]; - tmps[gid].opad[4] = opad[4]; - tmps[gid].opad[5] = opad[5]; - tmps[gid].opad[6] = opad[6]; - tmps[gid].opad[7] = opad[7]; - - for (u32 i = 0, j = 1; i < (truecrypt_mdlen / 8 / 8); i += 8, j += 1) - { - salt_buf[8] = (u64) j << 32 | (u64) 0x80000000; - - u64 dgst[8]; - - hmac_run (salt_buf, ipad, opad, dgst); - - tmps[gid].dgst[i + 0] = dgst[0]; - tmps[gid].dgst[i + 1] = dgst[1]; - tmps[gid].dgst[i + 2] = dgst[2]; - tmps[gid].dgst[i + 3] = dgst[3]; - tmps[gid].dgst[i + 4] = dgst[4]; - tmps[gid].dgst[i + 5] = dgst[5]; - tmps[gid].dgst[i + 6] = dgst[6]; - tmps[gid].dgst[i + 7] = dgst[7]; - - tmps[gid].out[i + 0] = dgst[0]; - tmps[gid].out[i + 1] = dgst[1]; - tmps[gid].out[i + 2] = dgst[2]; - tmps[gid].out[i + 3] = dgst[3]; - tmps[gid].out[i + 4] = dgst[4]; - tmps[gid].out[i + 5] = dgst[5]; - tmps[gid].out[i + 6] = dgst[6]; - tmps[gid].out[i + 7] = dgst[7]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06223_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, tc64_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const tc_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 truecrypt_mdlen = salt_bufs[0].truecrypt_mdlen; - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u64 ipad[8]; - - ipad[0] = tmps[gid].ipad[0]; - ipad[1] = tmps[gid].ipad[1]; - ipad[2] = tmps[gid].ipad[2]; - ipad[3] = tmps[gid].ipad[3]; - ipad[4] = tmps[gid].ipad[4]; - ipad[5] = tmps[gid].ipad[5]; - ipad[6] = tmps[gid].ipad[6]; - ipad[7] = tmps[gid].ipad[7]; - - u64 opad[8]; - - opad[0] = tmps[gid].opad[0]; - opad[1] = tmps[gid].opad[1]; - opad[2] = tmps[gid].opad[2]; - opad[3] = tmps[gid].opad[3]; - opad[4] = tmps[gid].opad[4]; - opad[5] = tmps[gid].opad[5]; - opad[6] = tmps[gid].opad[6]; - opad[7] = tmps[gid].opad[7]; - - for (u32 i = 0; i < (truecrypt_mdlen / 8 / 8); i += 8) - { - u64 dgst[8]; - - dgst[0] = tmps[gid].dgst[i + 0]; - dgst[1] = tmps[gid].dgst[i + 1]; - dgst[2] = tmps[gid].dgst[i + 2]; - dgst[3] = tmps[gid].dgst[i + 3]; - dgst[4] = tmps[gid].dgst[i + 4]; - dgst[5] = tmps[gid].dgst[i + 5]; - dgst[6] = tmps[gid].dgst[i + 6]; - dgst[7] = tmps[gid].dgst[i + 7]; - - u64 out[8]; - - out[0] = tmps[gid].out[i + 0]; - out[1] = tmps[gid].out[i + 1]; - out[2] = tmps[gid].out[i + 2]; - out[3] = tmps[gid].out[i + 3]; - out[4] = tmps[gid].out[i + 4]; - out[5] = tmps[gid].out[i + 5]; - out[6] = tmps[gid].out[i + 6]; - out[7] = tmps[gid].out[i + 7]; - - for (u32 j = 0; j < loop_cnt; j++) - { - u64 w[16]; - - w[ 0] = dgst[0]; - w[ 1] = dgst[1]; - w[ 2] = dgst[2]; - w[ 3] = dgst[3]; - w[ 4] = dgst[4]; - w[ 5] = dgst[5]; - w[ 6] = dgst[6]; - w[ 7] = dgst[7]; - w[ 8] = 0x8000000000000000; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = (128 + 64) * 8; - - hmac_run (w, ipad, opad, dgst); - - out[0] ^= dgst[0]; - out[1] ^= dgst[1]; - out[2] ^= dgst[2]; - out[3] ^= dgst[3]; - out[4] ^= dgst[4]; - out[5] ^= dgst[5]; - out[6] ^= dgst[6]; - out[7] ^= dgst[7]; - } - - tmps[gid].dgst[i + 0] = dgst[0]; - tmps[gid].dgst[i + 1] = dgst[1]; - tmps[gid].dgst[i + 2] = dgst[2]; - tmps[gid].dgst[i + 3] = dgst[3]; - tmps[gid].dgst[i + 4] = dgst[4]; - tmps[gid].dgst[i + 5] = dgst[5]; - tmps[gid].dgst[i + 6] = dgst[6]; - tmps[gid].dgst[i + 7] = dgst[7]; - - tmps[gid].out[i + 0] = out[0]; - tmps[gid].out[i + 1] = out[1]; - tmps[gid].out[i + 2] = out[2]; - tmps[gid].out[i + 3] = out[3]; - tmps[gid].out[i + 4] = out[4]; - tmps[gid].out[i + 5] = out[5]; - tmps[gid].out[i + 6] = out[6]; - tmps[gid].out[i + 7] = out[7]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06223_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, tc64_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const tc_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - if (gid >= gid_max) return; - - u32 ukey1[8]; - - ukey1[0] = swap_workaround (h32_from_64 (tmps[gid].out[ 0])); - ukey1[1] = swap_workaround (l32_from_64 (tmps[gid].out[ 0])); - ukey1[2] = swap_workaround (h32_from_64 (tmps[gid].out[ 1])); - ukey1[3] = swap_workaround (l32_from_64 (tmps[gid].out[ 1])); - ukey1[4] = swap_workaround (h32_from_64 (tmps[gid].out[ 2])); - ukey1[5] = swap_workaround (l32_from_64 (tmps[gid].out[ 2])); - ukey1[6] = swap_workaround (h32_from_64 (tmps[gid].out[ 3])); - ukey1[7] = swap_workaround (l32_from_64 (tmps[gid].out[ 3])); - - u32 ukey2[8]; - - ukey2[0] = swap_workaround (h32_from_64 (tmps[gid].out[ 4])); - ukey2[1] = swap_workaround (l32_from_64 (tmps[gid].out[ 4])); - ukey2[2] = swap_workaround (h32_from_64 (tmps[gid].out[ 5])); - ukey2[3] = swap_workaround (l32_from_64 (tmps[gid].out[ 5])); - ukey2[4] = swap_workaround (h32_from_64 (tmps[gid].out[ 6])); - ukey2[5] = swap_workaround (l32_from_64 (tmps[gid].out[ 6])); - ukey2[6] = swap_workaround (h32_from_64 (tmps[gid].out[ 7])); - ukey2[7] = swap_workaround (l32_from_64 (tmps[gid].out[ 7])); - - u32 data[4]; - - data[0] = esalt_bufs[0].data_buf[0]; - data[1] = esalt_bufs[0].data_buf[1]; - data[2] = esalt_bufs[0].data_buf[2]; - data[3] = esalt_bufs[0].data_buf[3]; - - u32 tmp[4]; - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - aes256_decrypt_xts (ukey1, ukey2, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - serpent256_decrypt_xts (ukey1, ukey2, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - twofish256_decrypt_xts (ukey1, ukey2, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - u32 ukey3[8]; - - ukey3[0] = swap_workaround (h32_from_64 (tmps[gid].out[ 8])); - ukey3[1] = swap_workaround (l32_from_64 (tmps[gid].out[ 8])); - ukey3[2] = swap_workaround (h32_from_64 (tmps[gid].out[ 9])); - ukey3[3] = swap_workaround (l32_from_64 (tmps[gid].out[ 9])); - ukey3[4] = swap_workaround (h32_from_64 (tmps[gid].out[10])); - ukey3[5] = swap_workaround (l32_from_64 (tmps[gid].out[10])); - ukey3[6] = swap_workaround (h32_from_64 (tmps[gid].out[11])); - ukey3[7] = swap_workaround (l32_from_64 (tmps[gid].out[11])); - - u32 ukey4[8]; - - ukey4[0] = swap_workaround (h32_from_64 (tmps[gid].out[12])); - ukey4[1] = swap_workaround (l32_from_64 (tmps[gid].out[12])); - ukey4[2] = swap_workaround (h32_from_64 (tmps[gid].out[13])); - ukey4[3] = swap_workaround (l32_from_64 (tmps[gid].out[13])); - ukey4[4] = swap_workaround (h32_from_64 (tmps[gid].out[14])); - ukey4[5] = swap_workaround (l32_from_64 (tmps[gid].out[14])); - ukey4[6] = swap_workaround (h32_from_64 (tmps[gid].out[15])); - ukey4[7] = swap_workaround (l32_from_64 (tmps[gid].out[15])); - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - aes256_decrypt_xts (ukey2, ukey4, tmp, tmp); - twofish256_decrypt_xts (ukey1, ukey3, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - serpent256_decrypt_xts (ukey2, ukey4, tmp, tmp); - aes256_decrypt_xts (ukey1, ukey3, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - twofish256_decrypt_xts (ukey2, ukey4, tmp, tmp); - serpent256_decrypt_xts (ukey1, ukey3, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - u32 ukey5[8]; - - ukey5[0] = swap_workaround (h32_from_64 (tmps[gid].out[16])); - ukey5[1] = swap_workaround (l32_from_64 (tmps[gid].out[16])); - ukey5[2] = swap_workaround (h32_from_64 (tmps[gid].out[17])); - ukey5[3] = swap_workaround (l32_from_64 (tmps[gid].out[17])); - ukey5[4] = swap_workaround (h32_from_64 (tmps[gid].out[18])); - ukey5[5] = swap_workaround (l32_from_64 (tmps[gid].out[18])); - ukey5[6] = swap_workaround (h32_from_64 (tmps[gid].out[19])); - ukey5[7] = swap_workaround (l32_from_64 (tmps[gid].out[19])); - - u32 ukey6[8]; - - ukey6[0] = swap_workaround (h32_from_64 (tmps[gid].out[20])); - ukey6[1] = swap_workaround (l32_from_64 (tmps[gid].out[20])); - ukey6[2] = swap_workaround (h32_from_64 (tmps[gid].out[21])); - ukey6[3] = swap_workaround (l32_from_64 (tmps[gid].out[21])); - ukey6[4] = swap_workaround (h32_from_64 (tmps[gid].out[22])); - ukey6[5] = swap_workaround (l32_from_64 (tmps[gid].out[22])); - ukey6[6] = swap_workaround (h32_from_64 (tmps[gid].out[23])); - ukey6[7] = swap_workaround (l32_from_64 (tmps[gid].out[23])); - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - aes256_decrypt_xts (ukey3, ukey6, tmp, tmp); - twofish256_decrypt_xts (ukey2, ukey5, tmp, tmp); - serpent256_decrypt_xts (ukey1, ukey4, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - serpent256_decrypt_xts (ukey3, ukey6, tmp, tmp); - twofish256_decrypt_xts (ukey2, ukey5, tmp, tmp); - aes256_decrypt_xts (ukey1, ukey4, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } -} diff --git a/nv/m06231.cu b/nv/m06231.cu deleted file mode 100644 index ac27d12..0000000 --- a/nv/m06231.cu +++ /dev/null @@ -1,1990 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _WHIRLPOOL_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#include "gpu_aes256_nv.c" -#include "gpu_twofish256_nv.c" -#include "gpu_serpent256_nv.c" - -#define R 10 - -#define BOX(S,n,i) (u32) ((S)[(n)][(i)]) - -__device__ __constant__ u32 Ch[8][256] = -{ - { - 0x18186018, 0x23238c23, 0xc6c63fc6, 0xe8e887e8, - 0x87872687, 0xb8b8dab8, 0x01010401, 0x4f4f214f, - 0x3636d836, 0xa6a6a2a6, 0xd2d26fd2, 0xf5f5f3f5, - 0x7979f979, 0x6f6fa16f, 0x91917e91, 0x52525552, - 0x60609d60, 0xbcbccabc, 0x9b9b569b, 0x8e8e028e, - 0xa3a3b6a3, 0x0c0c300c, 0x7b7bf17b, 0x3535d435, - 0x1d1d741d, 0xe0e0a7e0, 0xd7d77bd7, 0xc2c22fc2, - 0x2e2eb82e, 0x4b4b314b, 0xfefedffe, 0x57574157, - 0x15155415, 0x7777c177, 0x3737dc37, 0xe5e5b3e5, - 0x9f9f469f, 0xf0f0e7f0, 0x4a4a354a, 0xdada4fda, - 0x58587d58, 0xc9c903c9, 0x2929a429, 0x0a0a280a, - 0xb1b1feb1, 0xa0a0baa0, 0x6b6bb16b, 0x85852e85, - 0xbdbdcebd, 0x5d5d695d, 0x10104010, 0xf4f4f7f4, - 0xcbcb0bcb, 0x3e3ef83e, 0x05051405, 0x67678167, - 0xe4e4b7e4, 0x27279c27, 0x41411941, 0x8b8b168b, - 0xa7a7a6a7, 0x7d7de97d, 0x95956e95, 0xd8d847d8, - 0xfbfbcbfb, 0xeeee9fee, 0x7c7ced7c, 0x66668566, - 0xdddd53dd, 0x17175c17, 0x47470147, 0x9e9e429e, - 0xcaca0fca, 0x2d2db42d, 0xbfbfc6bf, 0x07071c07, - 0xadad8ead, 0x5a5a755a, 0x83833683, 0x3333cc33, - 0x63639163, 0x02020802, 0xaaaa92aa, 0x7171d971, - 0xc8c807c8, 0x19196419, 0x49493949, 0xd9d943d9, - 0xf2f2eff2, 0xe3e3abe3, 0x5b5b715b, 0x88881a88, - 0x9a9a529a, 0x26269826, 0x3232c832, 0xb0b0fab0, - 0xe9e983e9, 0x0f0f3c0f, 0xd5d573d5, 0x80803a80, - 0xbebec2be, 0xcdcd13cd, 0x3434d034, 0x48483d48, - 0xffffdbff, 0x7a7af57a, 0x90907a90, 0x5f5f615f, - 0x20208020, 0x6868bd68, 0x1a1a681a, 0xaeae82ae, - 0xb4b4eab4, 0x54544d54, 0x93937693, 0x22228822, - 0x64648d64, 0xf1f1e3f1, 0x7373d173, 0x12124812, - 0x40401d40, 0x08082008, 0xc3c32bc3, 0xecec97ec, - 0xdbdb4bdb, 0xa1a1bea1, 0x8d8d0e8d, 0x3d3df43d, - 0x97976697, 0x00000000, 0xcfcf1bcf, 0x2b2bac2b, - 0x7676c576, 0x82823282, 0xd6d67fd6, 0x1b1b6c1b, - 0xb5b5eeb5, 0xafaf86af, 0x6a6ab56a, 0x50505d50, - 0x45450945, 0xf3f3ebf3, 0x3030c030, 0xefef9bef, - 0x3f3ffc3f, 0x55554955, 0xa2a2b2a2, 0xeaea8fea, - 0x65658965, 0xbabad2ba, 0x2f2fbc2f, 0xc0c027c0, - 0xdede5fde, 0x1c1c701c, 0xfdfdd3fd, 0x4d4d294d, - 0x92927292, 0x7575c975, 0x06061806, 0x8a8a128a, - 0xb2b2f2b2, 0xe6e6bfe6, 0x0e0e380e, 0x1f1f7c1f, - 0x62629562, 0xd4d477d4, 0xa8a89aa8, 0x96966296, - 0xf9f9c3f9, 0xc5c533c5, 0x25259425, 0x59597959, - 0x84842a84, 0x7272d572, 0x3939e439, 0x4c4c2d4c, - 0x5e5e655e, 0x7878fd78, 0x3838e038, 0x8c8c0a8c, - 0xd1d163d1, 0xa5a5aea5, 0xe2e2afe2, 0x61619961, - 0xb3b3f6b3, 0x21218421, 0x9c9c4a9c, 0x1e1e781e, - 0x43431143, 0xc7c73bc7, 0xfcfcd7fc, 0x04041004, - 0x51515951, 0x99995e99, 0x6d6da96d, 0x0d0d340d, - 0xfafacffa, 0xdfdf5bdf, 0x7e7ee57e, 0x24249024, - 0x3b3bec3b, 0xabab96ab, 0xcece1fce, 0x11114411, - 0x8f8f068f, 0x4e4e254e, 0xb7b7e6b7, 0xebeb8beb, - 0x3c3cf03c, 0x81813e81, 0x94946a94, 0xf7f7fbf7, - 0xb9b9deb9, 0x13134c13, 0x2c2cb02c, 0xd3d36bd3, - 0xe7e7bbe7, 0x6e6ea56e, 0xc4c437c4, 0x03030c03, - 0x56564556, 0x44440d44, 0x7f7fe17f, 0xa9a99ea9, - 0x2a2aa82a, 0xbbbbd6bb, 0xc1c123c1, 0x53535153, - 0xdcdc57dc, 0x0b0b2c0b, 0x9d9d4e9d, 0x6c6cad6c, - 0x3131c431, 0x7474cd74, 0xf6f6fff6, 0x46460546, - 0xacac8aac, 0x89891e89, 0x14145014, 0xe1e1a3e1, - 0x16165816, 0x3a3ae83a, 0x6969b969, 0x09092409, - 0x7070dd70, 0xb6b6e2b6, 0xd0d067d0, 0xeded93ed, - 0xcccc17cc, 0x42421542, 0x98985a98, 0xa4a4aaa4, - 0x2828a028, 0x5c5c6d5c, 0xf8f8c7f8, 0x86862286, - }, - { - 0xd8181860, 0x2623238c, 0xb8c6c63f, 0xfbe8e887, - 0xcb878726, 0x11b8b8da, 0x09010104, 0x0d4f4f21, - 0x9b3636d8, 0xffa6a6a2, 0x0cd2d26f, 0x0ef5f5f3, - 0x967979f9, 0x306f6fa1, 0x6d91917e, 0xf8525255, - 0x4760609d, 0x35bcbcca, 0x379b9b56, 0x8a8e8e02, - 0xd2a3a3b6, 0x6c0c0c30, 0x847b7bf1, 0x803535d4, - 0xf51d1d74, 0xb3e0e0a7, 0x21d7d77b, 0x9cc2c22f, - 0x432e2eb8, 0x294b4b31, 0x5dfefedf, 0xd5575741, - 0xbd151554, 0xe87777c1, 0x923737dc, 0x9ee5e5b3, - 0x139f9f46, 0x23f0f0e7, 0x204a4a35, 0x44dada4f, - 0xa258587d, 0xcfc9c903, 0x7c2929a4, 0x5a0a0a28, - 0x50b1b1fe, 0xc9a0a0ba, 0x146b6bb1, 0xd985852e, - 0x3cbdbdce, 0x8f5d5d69, 0x90101040, 0x07f4f4f7, - 0xddcbcb0b, 0xd33e3ef8, 0x2d050514, 0x78676781, - 0x97e4e4b7, 0x0227279c, 0x73414119, 0xa78b8b16, - 0xf6a7a7a6, 0xb27d7de9, 0x4995956e, 0x56d8d847, - 0x70fbfbcb, 0xcdeeee9f, 0xbb7c7ced, 0x71666685, - 0x7bdddd53, 0xaf17175c, 0x45474701, 0x1a9e9e42, - 0xd4caca0f, 0x582d2db4, 0x2ebfbfc6, 0x3f07071c, - 0xacadad8e, 0xb05a5a75, 0xef838336, 0xb63333cc, - 0x5c636391, 0x12020208, 0x93aaaa92, 0xde7171d9, - 0xc6c8c807, 0xd1191964, 0x3b494939, 0x5fd9d943, - 0x31f2f2ef, 0xa8e3e3ab, 0xb95b5b71, 0xbc88881a, - 0x3e9a9a52, 0x0b262698, 0xbf3232c8, 0x59b0b0fa, - 0xf2e9e983, 0x770f0f3c, 0x33d5d573, 0xf480803a, - 0x27bebec2, 0xebcdcd13, 0x893434d0, 0x3248483d, - 0x54ffffdb, 0x8d7a7af5, 0x6490907a, 0x9d5f5f61, - 0x3d202080, 0x0f6868bd, 0xca1a1a68, 0xb7aeae82, - 0x7db4b4ea, 0xce54544d, 0x7f939376, 0x2f222288, - 0x6364648d, 0x2af1f1e3, 0xcc7373d1, 0x82121248, - 0x7a40401d, 0x48080820, 0x95c3c32b, 0xdfecec97, - 0x4ddbdb4b, 0xc0a1a1be, 0x918d8d0e, 0xc83d3df4, - 0x5b979766, 0x00000000, 0xf9cfcf1b, 0x6e2b2bac, - 0xe17676c5, 0xe6828232, 0x28d6d67f, 0xc31b1b6c, - 0x74b5b5ee, 0xbeafaf86, 0x1d6a6ab5, 0xea50505d, - 0x57454509, 0x38f3f3eb, 0xad3030c0, 0xc4efef9b, - 0xda3f3ffc, 0xc7555549, 0xdba2a2b2, 0xe9eaea8f, - 0x6a656589, 0x03babad2, 0x4a2f2fbc, 0x8ec0c027, - 0x60dede5f, 0xfc1c1c70, 0x46fdfdd3, 0x1f4d4d29, - 0x76929272, 0xfa7575c9, 0x36060618, 0xae8a8a12, - 0x4bb2b2f2, 0x85e6e6bf, 0x7e0e0e38, 0xe71f1f7c, - 0x55626295, 0x3ad4d477, 0x81a8a89a, 0x52969662, - 0x62f9f9c3, 0xa3c5c533, 0x10252594, 0xab595979, - 0xd084842a, 0xc57272d5, 0xec3939e4, 0x164c4c2d, - 0x945e5e65, 0x9f7878fd, 0xe53838e0, 0x988c8c0a, - 0x17d1d163, 0xe4a5a5ae, 0xa1e2e2af, 0x4e616199, - 0x42b3b3f6, 0x34212184, 0x089c9c4a, 0xee1e1e78, - 0x61434311, 0xb1c7c73b, 0x4ffcfcd7, 0x24040410, - 0xe3515159, 0x2599995e, 0x226d6da9, 0x650d0d34, - 0x79fafacf, 0x69dfdf5b, 0xa97e7ee5, 0x19242490, - 0xfe3b3bec, 0x9aabab96, 0xf0cece1f, 0x99111144, - 0x838f8f06, 0x044e4e25, 0x66b7b7e6, 0xe0ebeb8b, - 0xc13c3cf0, 0xfd81813e, 0x4094946a, 0x1cf7f7fb, - 0x18b9b9de, 0x8b13134c, 0x512c2cb0, 0x05d3d36b, - 0x8ce7e7bb, 0x396e6ea5, 0xaac4c437, 0x1b03030c, - 0xdc565645, 0x5e44440d, 0xa07f7fe1, 0x88a9a99e, - 0x672a2aa8, 0x0abbbbd6, 0x87c1c123, 0xf1535351, - 0x72dcdc57, 0x530b0b2c, 0x019d9d4e, 0x2b6c6cad, - 0xa43131c4, 0xf37474cd, 0x15f6f6ff, 0x4c464605, - 0xa5acac8a, 0xb589891e, 0xb4141450, 0xbae1e1a3, - 0xa6161658, 0xf73a3ae8, 0x066969b9, 0x41090924, - 0xd77070dd, 0x6fb6b6e2, 0x1ed0d067, 0xd6eded93, - 0xe2cccc17, 0x68424215, 0x2c98985a, 0xeda4a4aa, - 0x752828a0, 0x865c5c6d, 0x6bf8f8c7, 0xc2868622, - }, - { - 0x30d81818, 0x46262323, 0x91b8c6c6, 0xcdfbe8e8, - 0x13cb8787, 0x6d11b8b8, 0x02090101, 0x9e0d4f4f, - 0x6c9b3636, 0x51ffa6a6, 0xb90cd2d2, 0xf70ef5f5, - 0xf2967979, 0xde306f6f, 0x3f6d9191, 0xa4f85252, - 0xc0476060, 0x6535bcbc, 0x2b379b9b, 0x018a8e8e, - 0x5bd2a3a3, 0x186c0c0c, 0xf6847b7b, 0x6a803535, - 0x3af51d1d, 0xddb3e0e0, 0xb321d7d7, 0x999cc2c2, - 0x5c432e2e, 0x96294b4b, 0xe15dfefe, 0xaed55757, - 0x2abd1515, 0xeee87777, 0x6e923737, 0xd79ee5e5, - 0x23139f9f, 0xfd23f0f0, 0x94204a4a, 0xa944dada, - 0xb0a25858, 0x8fcfc9c9, 0x527c2929, 0x145a0a0a, - 0x7f50b1b1, 0x5dc9a0a0, 0xd6146b6b, 0x17d98585, - 0x673cbdbd, 0xba8f5d5d, 0x20901010, 0xf507f4f4, - 0x8bddcbcb, 0x7cd33e3e, 0x0a2d0505, 0xce786767, - 0xd597e4e4, 0x4e022727, 0x82734141, 0x0ba78b8b, - 0x53f6a7a7, 0xfab27d7d, 0x37499595, 0xad56d8d8, - 0xeb70fbfb, 0xc1cdeeee, 0xf8bb7c7c, 0xcc716666, - 0xa77bdddd, 0x2eaf1717, 0x8e454747, 0x211a9e9e, - 0x89d4caca, 0x5a582d2d, 0x632ebfbf, 0x0e3f0707, - 0x47acadad, 0xb4b05a5a, 0x1bef8383, 0x66b63333, - 0xc65c6363, 0x04120202, 0x4993aaaa, 0xe2de7171, - 0x8dc6c8c8, 0x32d11919, 0x923b4949, 0xaf5fd9d9, - 0xf931f2f2, 0xdba8e3e3, 0xb6b95b5b, 0x0dbc8888, - 0x293e9a9a, 0x4c0b2626, 0x64bf3232, 0x7d59b0b0, - 0xcff2e9e9, 0x1e770f0f, 0xb733d5d5, 0x1df48080, - 0x6127bebe, 0x87ebcdcd, 0x68893434, 0x90324848, - 0xe354ffff, 0xf48d7a7a, 0x3d649090, 0xbe9d5f5f, - 0x403d2020, 0xd00f6868, 0x34ca1a1a, 0x41b7aeae, - 0x757db4b4, 0xa8ce5454, 0x3b7f9393, 0x442f2222, - 0xc8636464, 0xff2af1f1, 0xe6cc7373, 0x24821212, - 0x807a4040, 0x10480808, 0x9b95c3c3, 0xc5dfecec, - 0xab4ddbdb, 0x5fc0a1a1, 0x07918d8d, 0x7ac83d3d, - 0x335b9797, 0x00000000, 0x83f9cfcf, 0x566e2b2b, - 0xece17676, 0x19e68282, 0xb128d6d6, 0x36c31b1b, - 0x7774b5b5, 0x43beafaf, 0xd41d6a6a, 0xa0ea5050, - 0x8a574545, 0xfb38f3f3, 0x60ad3030, 0xc3c4efef, - 0x7eda3f3f, 0xaac75555, 0x59dba2a2, 0xc9e9eaea, - 0xca6a6565, 0x6903baba, 0x5e4a2f2f, 0x9d8ec0c0, - 0xa160dede, 0x38fc1c1c, 0xe746fdfd, 0x9a1f4d4d, - 0x39769292, 0xeafa7575, 0x0c360606, 0x09ae8a8a, - 0x794bb2b2, 0xd185e6e6, 0x1c7e0e0e, 0x3ee71f1f, - 0xc4556262, 0xb53ad4d4, 0x4d81a8a8, 0x31529696, - 0xef62f9f9, 0x97a3c5c5, 0x4a102525, 0xb2ab5959, - 0x15d08484, 0xe4c57272, 0x72ec3939, 0x98164c4c, - 0xbc945e5e, 0xf09f7878, 0x70e53838, 0x05988c8c, - 0xbf17d1d1, 0x57e4a5a5, 0xd9a1e2e2, 0xc24e6161, - 0x7b42b3b3, 0x42342121, 0x25089c9c, 0x3cee1e1e, - 0x86614343, 0x93b1c7c7, 0xe54ffcfc, 0x08240404, - 0xa2e35151, 0x2f259999, 0xda226d6d, 0x1a650d0d, - 0xe979fafa, 0xa369dfdf, 0xfca97e7e, 0x48192424, - 0x76fe3b3b, 0x4b9aabab, 0x81f0cece, 0x22991111, - 0x03838f8f, 0x9c044e4e, 0x7366b7b7, 0xcbe0ebeb, - 0x78c13c3c, 0x1ffd8181, 0x35409494, 0xf31cf7f7, - 0x6f18b9b9, 0x268b1313, 0x58512c2c, 0xbb05d3d3, - 0xd38ce7e7, 0xdc396e6e, 0x95aac4c4, 0x061b0303, - 0xacdc5656, 0x885e4444, 0xfea07f7f, 0x4f88a9a9, - 0x54672a2a, 0x6b0abbbb, 0x9f87c1c1, 0xa6f15353, - 0xa572dcdc, 0x16530b0b, 0x27019d9d, 0xd82b6c6c, - 0x62a43131, 0xe8f37474, 0xf115f6f6, 0x8c4c4646, - 0x45a5acac, 0x0fb58989, 0x28b41414, 0xdfbae1e1, - 0x2ca61616, 0x74f73a3a, 0xd2066969, 0x12410909, - 0xe0d77070, 0x716fb6b6, 0xbd1ed0d0, 0xc7d6eded, - 0x85e2cccc, 0x84684242, 0x2d2c9898, 0x55eda4a4, - 0x50752828, 0xb8865c5c, 0xed6bf8f8, 0x11c28686, - }, - { - 0x7830d818, 0xaf462623, 0xf991b8c6, 0x6fcdfbe8, - 0xa113cb87, 0x626d11b8, 0x05020901, 0x6e9e0d4f, - 0xee6c9b36, 0x0451ffa6, 0xbdb90cd2, 0x06f70ef5, - 0x80f29679, 0xcede306f, 0xef3f6d91, 0x07a4f852, - 0xfdc04760, 0x766535bc, 0xcd2b379b, 0x8c018a8e, - 0x155bd2a3, 0x3c186c0c, 0x8af6847b, 0xe16a8035, - 0x693af51d, 0x47ddb3e0, 0xacb321d7, 0xed999cc2, - 0x965c432e, 0x7a96294b, 0x21e15dfe, 0x16aed557, - 0x412abd15, 0xb6eee877, 0xeb6e9237, 0x56d79ee5, - 0xd923139f, 0x17fd23f0, 0x7f94204a, 0x95a944da, - 0x25b0a258, 0xca8fcfc9, 0x8d527c29, 0x22145a0a, - 0x4f7f50b1, 0x1a5dc9a0, 0xdad6146b, 0xab17d985, - 0x73673cbd, 0x34ba8f5d, 0x50209010, 0x03f507f4, - 0xc08bddcb, 0xc67cd33e, 0x110a2d05, 0xe6ce7867, - 0x53d597e4, 0xbb4e0227, 0x58827341, 0x9d0ba78b, - 0x0153f6a7, 0x94fab27d, 0xfb374995, 0x9fad56d8, - 0x30eb70fb, 0x71c1cdee, 0x91f8bb7c, 0xe3cc7166, - 0x8ea77bdd, 0x4b2eaf17, 0x468e4547, 0xdc211a9e, - 0xc589d4ca, 0x995a582d, 0x79632ebf, 0x1b0e3f07, - 0x2347acad, 0x2fb4b05a, 0xb51bef83, 0xff66b633, - 0xf2c65c63, 0x0a041202, 0x384993aa, 0xa8e2de71, - 0xcf8dc6c8, 0x7d32d119, 0x70923b49, 0x9aaf5fd9, - 0x1df931f2, 0x48dba8e3, 0x2ab6b95b, 0x920dbc88, - 0xc8293e9a, 0xbe4c0b26, 0xfa64bf32, 0x4a7d59b0, - 0x6acff2e9, 0x331e770f, 0xa6b733d5, 0xba1df480, - 0x7c6127be, 0xde87ebcd, 0xe4688934, 0x75903248, - 0x24e354ff, 0x8ff48d7a, 0xea3d6490, 0x3ebe9d5f, - 0xa0403d20, 0xd5d00f68, 0x7234ca1a, 0x2c41b7ae, - 0x5e757db4, 0x19a8ce54, 0xe53b7f93, 0xaa442f22, - 0xe9c86364, 0x12ff2af1, 0xa2e6cc73, 0x5a248212, - 0x5d807a40, 0x28104808, 0xe89b95c3, 0x7bc5dfec, - 0x90ab4ddb, 0x1f5fc0a1, 0x8307918d, 0xc97ac83d, - 0xf1335b97, 0x00000000, 0xd483f9cf, 0x87566e2b, - 0xb3ece176, 0xb019e682, 0xa9b128d6, 0x7736c31b, - 0x5b7774b5, 0x2943beaf, 0xdfd41d6a, 0x0da0ea50, - 0x4c8a5745, 0x18fb38f3, 0xf060ad30, 0x74c3c4ef, - 0xc37eda3f, 0x1caac755, 0x1059dba2, 0x65c9e9ea, - 0xecca6a65, 0x686903ba, 0x935e4a2f, 0xe79d8ec0, - 0x81a160de, 0x6c38fc1c, 0x2ee746fd, 0x649a1f4d, - 0xe0397692, 0xbceafa75, 0x1e0c3606, 0x9809ae8a, - 0x40794bb2, 0x59d185e6, 0x361c7e0e, 0x633ee71f, - 0xf7c45562, 0xa3b53ad4, 0x324d81a8, 0xf4315296, - 0x3aef62f9, 0xf697a3c5, 0xb14a1025, 0x20b2ab59, - 0xae15d084, 0xa7e4c572, 0xdd72ec39, 0x6198164c, - 0x3bbc945e, 0x85f09f78, 0xd870e538, 0x8605988c, - 0xb2bf17d1, 0x0b57e4a5, 0x4dd9a1e2, 0xf8c24e61, - 0x457b42b3, 0xa5423421, 0xd625089c, 0x663cee1e, - 0x52866143, 0xfc93b1c7, 0x2be54ffc, 0x14082404, - 0x08a2e351, 0xc72f2599, 0xc4da226d, 0x391a650d, - 0x35e979fa, 0x84a369df, 0x9bfca97e, 0xb4481924, - 0xd776fe3b, 0x3d4b9aab, 0xd181f0ce, 0x55229911, - 0x8903838f, 0x6b9c044e, 0x517366b7, 0x60cbe0eb, - 0xcc78c13c, 0xbf1ffd81, 0xfe354094, 0x0cf31cf7, - 0x676f18b9, 0x5f268b13, 0x9c58512c, 0xb8bb05d3, - 0x5cd38ce7, 0xcbdc396e, 0xf395aac4, 0x0f061b03, - 0x13acdc56, 0x49885e44, 0x9efea07f, 0x374f88a9, - 0x8254672a, 0x6d6b0abb, 0xe29f87c1, 0x02a6f153, - 0x8ba572dc, 0x2716530b, 0xd327019d, 0xc1d82b6c, - 0xf562a431, 0xb9e8f374, 0x09f115f6, 0x438c4c46, - 0x2645a5ac, 0x970fb589, 0x4428b414, 0x42dfbae1, - 0x4e2ca616, 0xd274f73a, 0xd0d20669, 0x2d124109, - 0xade0d770, 0x54716fb6, 0xb7bd1ed0, 0x7ec7d6ed, - 0xdb85e2cc, 0x57846842, 0xc22d2c98, 0x0e55eda4, - 0x88507528, 0x31b8865c, 0x3fed6bf8, 0xa411c286, - }, - { - 0xc07830d8, 0x05af4626, 0x7ef991b8, 0x136fcdfb, - 0x4ca113cb, 0xa9626d11, 0x08050209, 0x426e9e0d, - 0xadee6c9b, 0x590451ff, 0xdebdb90c, 0xfb06f70e, - 0xef80f296, 0x5fcede30, 0xfcef3f6d, 0xaa07a4f8, - 0x27fdc047, 0x89766535, 0xaccd2b37, 0x048c018a, - 0x71155bd2, 0x603c186c, 0xff8af684, 0xb5e16a80, - 0xe8693af5, 0x5347ddb3, 0xf6acb321, 0x5eed999c, - 0x6d965c43, 0x627a9629, 0xa321e15d, 0x8216aed5, - 0xa8412abd, 0x9fb6eee8, 0xa5eb6e92, 0x7b56d79e, - 0x8cd92313, 0xd317fd23, 0x6a7f9420, 0x9e95a944, - 0xfa25b0a2, 0x06ca8fcf, 0x558d527c, 0x5022145a, - 0xe14f7f50, 0x691a5dc9, 0x7fdad614, 0x5cab17d9, - 0x8173673c, 0xd234ba8f, 0x80502090, 0xf303f507, - 0x16c08bdd, 0xedc67cd3, 0x28110a2d, 0x1fe6ce78, - 0x7353d597, 0x25bb4e02, 0x32588273, 0x2c9d0ba7, - 0x510153f6, 0xcf94fab2, 0xdcfb3749, 0x8e9fad56, - 0x8b30eb70, 0x2371c1cd, 0xc791f8bb, 0x17e3cc71, - 0xa68ea77b, 0xb84b2eaf, 0x02468e45, 0x84dc211a, - 0x1ec589d4, 0x75995a58, 0x9179632e, 0x381b0e3f, - 0x012347ac, 0xea2fb4b0, 0x6cb51bef, 0x85ff66b6, - 0x3ff2c65c, 0x100a0412, 0x39384993, 0xafa8e2de, - 0x0ecf8dc6, 0xc87d32d1, 0x7270923b, 0x869aaf5f, - 0xc31df931, 0x4b48dba8, 0xe22ab6b9, 0x34920dbc, - 0xa4c8293e, 0x2dbe4c0b, 0x8dfa64bf, 0xe94a7d59, - 0x1b6acff2, 0x78331e77, 0xe6a6b733, 0x74ba1df4, - 0x997c6127, 0x26de87eb, 0xbde46889, 0x7a759032, - 0xab24e354, 0xf78ff48d, 0xf4ea3d64, 0xc23ebe9d, - 0x1da0403d, 0x67d5d00f, 0xd07234ca, 0x192c41b7, - 0xc95e757d, 0x9a19a8ce, 0xece53b7f, 0x0daa442f, - 0x07e9c863, 0xdb12ff2a, 0xbfa2e6cc, 0x905a2482, - 0x3a5d807a, 0x40281048, 0x56e89b95, 0x337bc5df, - 0x9690ab4d, 0x611f5fc0, 0x1c830791, 0xf5c97ac8, - 0xccf1335b, 0x00000000, 0x36d483f9, 0x4587566e, - 0x97b3ece1, 0x64b019e6, 0xfea9b128, 0xd87736c3, - 0xc15b7774, 0x112943be, 0x77dfd41d, 0xba0da0ea, - 0x124c8a57, 0xcb18fb38, 0x9df060ad, 0x2b74c3c4, - 0xe5c37eda, 0x921caac7, 0x791059db, 0x0365c9e9, - 0x0fecca6a, 0xb9686903, 0x65935e4a, 0x4ee79d8e, - 0xbe81a160, 0xe06c38fc, 0xbb2ee746, 0x52649a1f, - 0xe4e03976, 0x8fbceafa, 0x301e0c36, 0x249809ae, - 0xf940794b, 0x6359d185, 0x70361c7e, 0xf8633ee7, - 0x37f7c455, 0xeea3b53a, 0x29324d81, 0xc4f43152, - 0x9b3aef62, 0x66f697a3, 0x35b14a10, 0xf220b2ab, - 0x54ae15d0, 0xb7a7e4c5, 0xd5dd72ec, 0x5a619816, - 0xca3bbc94, 0xe785f09f, 0xddd870e5, 0x14860598, - 0xc6b2bf17, 0x410b57e4, 0x434dd9a1, 0x2ff8c24e, - 0xf1457b42, 0x15a54234, 0x94d62508, 0xf0663cee, - 0x22528661, 0x76fc93b1, 0xb32be54f, 0x20140824, - 0xb208a2e3, 0xbcc72f25, 0x4fc4da22, 0x68391a65, - 0x8335e979, 0xb684a369, 0xd79bfca9, 0x3db44819, - 0xc5d776fe, 0x313d4b9a, 0x3ed181f0, 0x88552299, - 0x0c890383, 0x4a6b9c04, 0xd1517366, 0x0b60cbe0, - 0xfdcc78c1, 0x7cbf1ffd, 0xd4fe3540, 0xeb0cf31c, - 0xa1676f18, 0x985f268b, 0x7d9c5851, 0xd6b8bb05, - 0x6b5cd38c, 0x57cbdc39, 0x6ef395aa, 0x180f061b, - 0x8a13acdc, 0x1a49885e, 0xdf9efea0, 0x21374f88, - 0x4d825467, 0xb16d6b0a, 0x46e29f87, 0xa202a6f1, - 0xae8ba572, 0x58271653, 0x9cd32701, 0x47c1d82b, - 0x95f562a4, 0x87b9e8f3, 0xe309f115, 0x0a438c4c, - 0x092645a5, 0x3c970fb5, 0xa04428b4, 0x5b42dfba, - 0xb04e2ca6, 0xcdd274f7, 0x6fd0d206, 0x482d1241, - 0xa7ade0d7, 0xd954716f, 0xceb7bd1e, 0x3b7ec7d6, - 0x2edb85e2, 0x2a578468, 0xb4c22d2c, 0x490e55ed, - 0x5d885075, 0xda31b886, 0x933fed6b, 0x44a411c2, - }, - { - 0x18c07830, 0x2305af46, 0xc67ef991, 0xe8136fcd, - 0x874ca113, 0xb8a9626d, 0x01080502, 0x4f426e9e, - 0x36adee6c, 0xa6590451, 0xd2debdb9, 0xf5fb06f7, - 0x79ef80f2, 0x6f5fcede, 0x91fcef3f, 0x52aa07a4, - 0x6027fdc0, 0xbc897665, 0x9baccd2b, 0x8e048c01, - 0xa371155b, 0x0c603c18, 0x7bff8af6, 0x35b5e16a, - 0x1de8693a, 0xe05347dd, 0xd7f6acb3, 0xc25eed99, - 0x2e6d965c, 0x4b627a96, 0xfea321e1, 0x578216ae, - 0x15a8412a, 0x779fb6ee, 0x37a5eb6e, 0xe57b56d7, - 0x9f8cd923, 0xf0d317fd, 0x4a6a7f94, 0xda9e95a9, - 0x58fa25b0, 0xc906ca8f, 0x29558d52, 0x0a502214, - 0xb1e14f7f, 0xa0691a5d, 0x6b7fdad6, 0x855cab17, - 0xbd817367, 0x5dd234ba, 0x10805020, 0xf4f303f5, - 0xcb16c08b, 0x3eedc67c, 0x0528110a, 0x671fe6ce, - 0xe47353d5, 0x2725bb4e, 0x41325882, 0x8b2c9d0b, - 0xa7510153, 0x7dcf94fa, 0x95dcfb37, 0xd88e9fad, - 0xfb8b30eb, 0xee2371c1, 0x7cc791f8, 0x6617e3cc, - 0xdda68ea7, 0x17b84b2e, 0x4702468e, 0x9e84dc21, - 0xca1ec589, 0x2d75995a, 0xbf917963, 0x07381b0e, - 0xad012347, 0x5aea2fb4, 0x836cb51b, 0x3385ff66, - 0x633ff2c6, 0x02100a04, 0xaa393849, 0x71afa8e2, - 0xc80ecf8d, 0x19c87d32, 0x49727092, 0xd9869aaf, - 0xf2c31df9, 0xe34b48db, 0x5be22ab6, 0x8834920d, - 0x9aa4c829, 0x262dbe4c, 0x328dfa64, 0xb0e94a7d, - 0xe91b6acf, 0x0f78331e, 0xd5e6a6b7, 0x8074ba1d, - 0xbe997c61, 0xcd26de87, 0x34bde468, 0x487a7590, - 0xffab24e3, 0x7af78ff4, 0x90f4ea3d, 0x5fc23ebe, - 0x201da040, 0x6867d5d0, 0x1ad07234, 0xae192c41, - 0xb4c95e75, 0x549a19a8, 0x93ece53b, 0x220daa44, - 0x6407e9c8, 0xf1db12ff, 0x73bfa2e6, 0x12905a24, - 0x403a5d80, 0x08402810, 0xc356e89b, 0xec337bc5, - 0xdb9690ab, 0xa1611f5f, 0x8d1c8307, 0x3df5c97a, - 0x97ccf133, 0x00000000, 0xcf36d483, 0x2b458756, - 0x7697b3ec, 0x8264b019, 0xd6fea9b1, 0x1bd87736, - 0xb5c15b77, 0xaf112943, 0x6a77dfd4, 0x50ba0da0, - 0x45124c8a, 0xf3cb18fb, 0x309df060, 0xef2b74c3, - 0x3fe5c37e, 0x55921caa, 0xa2791059, 0xea0365c9, - 0x650fecca, 0xbab96869, 0x2f65935e, 0xc04ee79d, - 0xdebe81a1, 0x1ce06c38, 0xfdbb2ee7, 0x4d52649a, - 0x92e4e039, 0x758fbcea, 0x06301e0c, 0x8a249809, - 0xb2f94079, 0xe66359d1, 0x0e70361c, 0x1ff8633e, - 0x6237f7c4, 0xd4eea3b5, 0xa829324d, 0x96c4f431, - 0xf99b3aef, 0xc566f697, 0x2535b14a, 0x59f220b2, - 0x8454ae15, 0x72b7a7e4, 0x39d5dd72, 0x4c5a6198, - 0x5eca3bbc, 0x78e785f0, 0x38ddd870, 0x8c148605, - 0xd1c6b2bf, 0xa5410b57, 0xe2434dd9, 0x612ff8c2, - 0xb3f1457b, 0x2115a542, 0x9c94d625, 0x1ef0663c, - 0x43225286, 0xc776fc93, 0xfcb32be5, 0x04201408, - 0x51b208a2, 0x99bcc72f, 0x6d4fc4da, 0x0d68391a, - 0xfa8335e9, 0xdfb684a3, 0x7ed79bfc, 0x243db448, - 0x3bc5d776, 0xab313d4b, 0xce3ed181, 0x11885522, - 0x8f0c8903, 0x4e4a6b9c, 0xb7d15173, 0xeb0b60cb, - 0x3cfdcc78, 0x817cbf1f, 0x94d4fe35, 0xf7eb0cf3, - 0xb9a1676f, 0x13985f26, 0x2c7d9c58, 0xd3d6b8bb, - 0xe76b5cd3, 0x6e57cbdc, 0xc46ef395, 0x03180f06, - 0x568a13ac, 0x441a4988, 0x7fdf9efe, 0xa921374f, - 0x2a4d8254, 0xbbb16d6b, 0xc146e29f, 0x53a202a6, - 0xdcae8ba5, 0x0b582716, 0x9d9cd327, 0x6c47c1d8, - 0x3195f562, 0x7487b9e8, 0xf6e309f1, 0x460a438c, - 0xac092645, 0x893c970f, 0x14a04428, 0xe15b42df, - 0x16b04e2c, 0x3acdd274, 0x696fd0d2, 0x09482d12, - 0x70a7ade0, 0xb6d95471, 0xd0ceb7bd, 0xed3b7ec7, - 0xcc2edb85, 0x422a5784, 0x98b4c22d, 0xa4490e55, - 0x285d8850, 0x5cda31b8, 0xf8933fed, 0x8644a411, - }, - { - 0x6018c078, 0x8c2305af, 0x3fc67ef9, 0x87e8136f, - 0x26874ca1, 0xdab8a962, 0x04010805, 0x214f426e, - 0xd836adee, 0xa2a65904, 0x6fd2debd, 0xf3f5fb06, - 0xf979ef80, 0xa16f5fce, 0x7e91fcef, 0x5552aa07, - 0x9d6027fd, 0xcabc8976, 0x569baccd, 0x028e048c, - 0xb6a37115, 0x300c603c, 0xf17bff8a, 0xd435b5e1, - 0x741de869, 0xa7e05347, 0x7bd7f6ac, 0x2fc25eed, - 0xb82e6d96, 0x314b627a, 0xdffea321, 0x41578216, - 0x5415a841, 0xc1779fb6, 0xdc37a5eb, 0xb3e57b56, - 0x469f8cd9, 0xe7f0d317, 0x354a6a7f, 0x4fda9e95, - 0x7d58fa25, 0x03c906ca, 0xa429558d, 0x280a5022, - 0xfeb1e14f, 0xbaa0691a, 0xb16b7fda, 0x2e855cab, - 0xcebd8173, 0x695dd234, 0x40108050, 0xf7f4f303, - 0x0bcb16c0, 0xf83eedc6, 0x14052811, 0x81671fe6, - 0xb7e47353, 0x9c2725bb, 0x19413258, 0x168b2c9d, - 0xa6a75101, 0xe97dcf94, 0x6e95dcfb, 0x47d88e9f, - 0xcbfb8b30, 0x9fee2371, 0xed7cc791, 0x856617e3, - 0x53dda68e, 0x5c17b84b, 0x01470246, 0x429e84dc, - 0x0fca1ec5, 0xb42d7599, 0xc6bf9179, 0x1c07381b, - 0x8ead0123, 0x755aea2f, 0x36836cb5, 0xcc3385ff, - 0x91633ff2, 0x0802100a, 0x92aa3938, 0xd971afa8, - 0x07c80ecf, 0x6419c87d, 0x39497270, 0x43d9869a, - 0xeff2c31d, 0xabe34b48, 0x715be22a, 0x1a883492, - 0x529aa4c8, 0x98262dbe, 0xc8328dfa, 0xfab0e94a, - 0x83e91b6a, 0x3c0f7833, 0x73d5e6a6, 0x3a8074ba, - 0xc2be997c, 0x13cd26de, 0xd034bde4, 0x3d487a75, - 0xdbffab24, 0xf57af78f, 0x7a90f4ea, 0x615fc23e, - 0x80201da0, 0xbd6867d5, 0x681ad072, 0x82ae192c, - 0xeab4c95e, 0x4d549a19, 0x7693ece5, 0x88220daa, - 0x8d6407e9, 0xe3f1db12, 0xd173bfa2, 0x4812905a, - 0x1d403a5d, 0x20084028, 0x2bc356e8, 0x97ec337b, - 0x4bdb9690, 0xbea1611f, 0x0e8d1c83, 0xf43df5c9, - 0x6697ccf1, 0x00000000, 0x1bcf36d4, 0xac2b4587, - 0xc57697b3, 0x328264b0, 0x7fd6fea9, 0x6c1bd877, - 0xeeb5c15b, 0x86af1129, 0xb56a77df, 0x5d50ba0d, - 0x0945124c, 0xebf3cb18, 0xc0309df0, 0x9bef2b74, - 0xfc3fe5c3, 0x4955921c, 0xb2a27910, 0x8fea0365, - 0x89650fec, 0xd2bab968, 0xbc2f6593, 0x27c04ee7, - 0x5fdebe81, 0x701ce06c, 0xd3fdbb2e, 0x294d5264, - 0x7292e4e0, 0xc9758fbc, 0x1806301e, 0x128a2498, - 0xf2b2f940, 0xbfe66359, 0x380e7036, 0x7c1ff863, - 0x956237f7, 0x77d4eea3, 0x9aa82932, 0x6296c4f4, - 0xc3f99b3a, 0x33c566f6, 0x942535b1, 0x7959f220, - 0x2a8454ae, 0xd572b7a7, 0xe439d5dd, 0x2d4c5a61, - 0x655eca3b, 0xfd78e785, 0xe038ddd8, 0x0a8c1486, - 0x63d1c6b2, 0xaea5410b, 0xafe2434d, 0x99612ff8, - 0xf6b3f145, 0x842115a5, 0x4a9c94d6, 0x781ef066, - 0x11432252, 0x3bc776fc, 0xd7fcb32b, 0x10042014, - 0x5951b208, 0x5e99bcc7, 0xa96d4fc4, 0x340d6839, - 0xcffa8335, 0x5bdfb684, 0xe57ed79b, 0x90243db4, - 0xec3bc5d7, 0x96ab313d, 0x1fce3ed1, 0x44118855, - 0x068f0c89, 0x254e4a6b, 0xe6b7d151, 0x8beb0b60, - 0xf03cfdcc, 0x3e817cbf, 0x6a94d4fe, 0xfbf7eb0c, - 0xdeb9a167, 0x4c13985f, 0xb02c7d9c, 0x6bd3d6b8, - 0xbbe76b5c, 0xa56e57cb, 0x37c46ef3, 0x0c03180f, - 0x45568a13, 0x0d441a49, 0xe17fdf9e, 0x9ea92137, - 0xa82a4d82, 0xd6bbb16d, 0x23c146e2, 0x5153a202, - 0x57dcae8b, 0x2c0b5827, 0x4e9d9cd3, 0xad6c47c1, - 0xc43195f5, 0xcd7487b9, 0xfff6e309, 0x05460a43, - 0x8aac0926, 0x1e893c97, 0x5014a044, 0xa3e15b42, - 0x5816b04e, 0xe83acdd2, 0xb9696fd0, 0x2409482d, - 0xdd70a7ad, 0xe2b6d954, 0x67d0ceb7, 0x93ed3b7e, - 0x17cc2edb, 0x15422a57, 0x5a98b4c2, 0xaaa4490e, - 0xa0285d88, 0x6d5cda31, 0xc7f8933f, 0x228644a4, - }, - { - 0x186018c0, 0x238c2305, 0xc63fc67e, 0xe887e813, - 0x8726874c, 0xb8dab8a9, 0x01040108, 0x4f214f42, - 0x36d836ad, 0xa6a2a659, 0xd26fd2de, 0xf5f3f5fb, - 0x79f979ef, 0x6fa16f5f, 0x917e91fc, 0x525552aa, - 0x609d6027, 0xbccabc89, 0x9b569bac, 0x8e028e04, - 0xa3b6a371, 0x0c300c60, 0x7bf17bff, 0x35d435b5, - 0x1d741de8, 0xe0a7e053, 0xd77bd7f6, 0xc22fc25e, - 0x2eb82e6d, 0x4b314b62, 0xfedffea3, 0x57415782, - 0x155415a8, 0x77c1779f, 0x37dc37a5, 0xe5b3e57b, - 0x9f469f8c, 0xf0e7f0d3, 0x4a354a6a, 0xda4fda9e, - 0x587d58fa, 0xc903c906, 0x29a42955, 0x0a280a50, - 0xb1feb1e1, 0xa0baa069, 0x6bb16b7f, 0x852e855c, - 0xbdcebd81, 0x5d695dd2, 0x10401080, 0xf4f7f4f3, - 0xcb0bcb16, 0x3ef83eed, 0x05140528, 0x6781671f, - 0xe4b7e473, 0x279c2725, 0x41194132, 0x8b168b2c, - 0xa7a6a751, 0x7de97dcf, 0x956e95dc, 0xd847d88e, - 0xfbcbfb8b, 0xee9fee23, 0x7ced7cc7, 0x66856617, - 0xdd53dda6, 0x175c17b8, 0x47014702, 0x9e429e84, - 0xca0fca1e, 0x2db42d75, 0xbfc6bf91, 0x071c0738, - 0xad8ead01, 0x5a755aea, 0x8336836c, 0x33cc3385, - 0x6391633f, 0x02080210, 0xaa92aa39, 0x71d971af, - 0xc807c80e, 0x196419c8, 0x49394972, 0xd943d986, - 0xf2eff2c3, 0xe3abe34b, 0x5b715be2, 0x881a8834, - 0x9a529aa4, 0x2698262d, 0x32c8328d, 0xb0fab0e9, - 0xe983e91b, 0x0f3c0f78, 0xd573d5e6, 0x803a8074, - 0xbec2be99, 0xcd13cd26, 0x34d034bd, 0x483d487a, - 0xffdbffab, 0x7af57af7, 0x907a90f4, 0x5f615fc2, - 0x2080201d, 0x68bd6867, 0x1a681ad0, 0xae82ae19, - 0xb4eab4c9, 0x544d549a, 0x937693ec, 0x2288220d, - 0x648d6407, 0xf1e3f1db, 0x73d173bf, 0x12481290, - 0x401d403a, 0x08200840, 0xc32bc356, 0xec97ec33, - 0xdb4bdb96, 0xa1bea161, 0x8d0e8d1c, 0x3df43df5, - 0x976697cc, 0x00000000, 0xcf1bcf36, 0x2bac2b45, - 0x76c57697, 0x82328264, 0xd67fd6fe, 0x1b6c1bd8, - 0xb5eeb5c1, 0xaf86af11, 0x6ab56a77, 0x505d50ba, - 0x45094512, 0xf3ebf3cb, 0x30c0309d, 0xef9bef2b, - 0x3ffc3fe5, 0x55495592, 0xa2b2a279, 0xea8fea03, - 0x6589650f, 0xbad2bab9, 0x2fbc2f65, 0xc027c04e, - 0xde5fdebe, 0x1c701ce0, 0xfdd3fdbb, 0x4d294d52, - 0x927292e4, 0x75c9758f, 0x06180630, 0x8a128a24, - 0xb2f2b2f9, 0xe6bfe663, 0x0e380e70, 0x1f7c1ff8, - 0x62956237, 0xd477d4ee, 0xa89aa829, 0x966296c4, - 0xf9c3f99b, 0xc533c566, 0x25942535, 0x597959f2, - 0x842a8454, 0x72d572b7, 0x39e439d5, 0x4c2d4c5a, - 0x5e655eca, 0x78fd78e7, 0x38e038dd, 0x8c0a8c14, - 0xd163d1c6, 0xa5aea541, 0xe2afe243, 0x6199612f, - 0xb3f6b3f1, 0x21842115, 0x9c4a9c94, 0x1e781ef0, - 0x43114322, 0xc73bc776, 0xfcd7fcb3, 0x04100420, - 0x515951b2, 0x995e99bc, 0x6da96d4f, 0x0d340d68, - 0xfacffa83, 0xdf5bdfb6, 0x7ee57ed7, 0x2490243d, - 0x3bec3bc5, 0xab96ab31, 0xce1fce3e, 0x11441188, - 0x8f068f0c, 0x4e254e4a, 0xb7e6b7d1, 0xeb8beb0b, - 0x3cf03cfd, 0x813e817c, 0x946a94d4, 0xf7fbf7eb, - 0xb9deb9a1, 0x134c1398, 0x2cb02c7d, 0xd36bd3d6, - 0xe7bbe76b, 0x6ea56e57, 0xc437c46e, 0x030c0318, - 0x5645568a, 0x440d441a, 0x7fe17fdf, 0xa99ea921, - 0x2aa82a4d, 0xbbd6bbb1, 0xc123c146, 0x535153a2, - 0xdc57dcae, 0x0b2c0b58, 0x9d4e9d9c, 0x6cad6c47, - 0x31c43195, 0x74cd7487, 0xf6fff6e3, 0x4605460a, - 0xac8aac09, 0x891e893c, 0x145014a0, 0xe1a3e15b, - 0x165816b0, 0x3ae83acd, 0x69b9696f, 0x09240948, - 0x70dd70a7, 0xb6e2b6d9, 0xd067d0ce, 0xed93ed3b, - 0xcc17cc2e, 0x4215422a, 0x985a98b4, 0xa4aaa449, - 0x28a0285d, 0x5c6d5cda, 0xf8c7f893, 0x86228644, - } -}; - -__device__ __constant__ u32 Cl[8][256] = -{ - { - 0xc07830d8, 0x05af4626, 0x7ef991b8, 0x136fcdfb, - 0x4ca113cb, 0xa9626d11, 0x08050209, 0x426e9e0d, - 0xadee6c9b, 0x590451ff, 0xdebdb90c, 0xfb06f70e, - 0xef80f296, 0x5fcede30, 0xfcef3f6d, 0xaa07a4f8, - 0x27fdc047, 0x89766535, 0xaccd2b37, 0x048c018a, - 0x71155bd2, 0x603c186c, 0xff8af684, 0xb5e16a80, - 0xe8693af5, 0x5347ddb3, 0xf6acb321, 0x5eed999c, - 0x6d965c43, 0x627a9629, 0xa321e15d, 0x8216aed5, - 0xa8412abd, 0x9fb6eee8, 0xa5eb6e92, 0x7b56d79e, - 0x8cd92313, 0xd317fd23, 0x6a7f9420, 0x9e95a944, - 0xfa25b0a2, 0x06ca8fcf, 0x558d527c, 0x5022145a, - 0xe14f7f50, 0x691a5dc9, 0x7fdad614, 0x5cab17d9, - 0x8173673c, 0xd234ba8f, 0x80502090, 0xf303f507, - 0x16c08bdd, 0xedc67cd3, 0x28110a2d, 0x1fe6ce78, - 0x7353d597, 0x25bb4e02, 0x32588273, 0x2c9d0ba7, - 0x510153f6, 0xcf94fab2, 0xdcfb3749, 0x8e9fad56, - 0x8b30eb70, 0x2371c1cd, 0xc791f8bb, 0x17e3cc71, - 0xa68ea77b, 0xb84b2eaf, 0x02468e45, 0x84dc211a, - 0x1ec589d4, 0x75995a58, 0x9179632e, 0x381b0e3f, - 0x012347ac, 0xea2fb4b0, 0x6cb51bef, 0x85ff66b6, - 0x3ff2c65c, 0x100a0412, 0x39384993, 0xafa8e2de, - 0x0ecf8dc6, 0xc87d32d1, 0x7270923b, 0x869aaf5f, - 0xc31df931, 0x4b48dba8, 0xe22ab6b9, 0x34920dbc, - 0xa4c8293e, 0x2dbe4c0b, 0x8dfa64bf, 0xe94a7d59, - 0x1b6acff2, 0x78331e77, 0xe6a6b733, 0x74ba1df4, - 0x997c6127, 0x26de87eb, 0xbde46889, 0x7a759032, - 0xab24e354, 0xf78ff48d, 0xf4ea3d64, 0xc23ebe9d, - 0x1da0403d, 0x67d5d00f, 0xd07234ca, 0x192c41b7, - 0xc95e757d, 0x9a19a8ce, 0xece53b7f, 0x0daa442f, - 0x07e9c863, 0xdb12ff2a, 0xbfa2e6cc, 0x905a2482, - 0x3a5d807a, 0x40281048, 0x56e89b95, 0x337bc5df, - 0x9690ab4d, 0x611f5fc0, 0x1c830791, 0xf5c97ac8, - 0xccf1335b, 0x00000000, 0x36d483f9, 0x4587566e, - 0x97b3ece1, 0x64b019e6, 0xfea9b128, 0xd87736c3, - 0xc15b7774, 0x112943be, 0x77dfd41d, 0xba0da0ea, - 0x124c8a57, 0xcb18fb38, 0x9df060ad, 0x2b74c3c4, - 0xe5c37eda, 0x921caac7, 0x791059db, 0x0365c9e9, - 0x0fecca6a, 0xb9686903, 0x65935e4a, 0x4ee79d8e, - 0xbe81a160, 0xe06c38fc, 0xbb2ee746, 0x52649a1f, - 0xe4e03976, 0x8fbceafa, 0x301e0c36, 0x249809ae, - 0xf940794b, 0x6359d185, 0x70361c7e, 0xf8633ee7, - 0x37f7c455, 0xeea3b53a, 0x29324d81, 0xc4f43152, - 0x9b3aef62, 0x66f697a3, 0x35b14a10, 0xf220b2ab, - 0x54ae15d0, 0xb7a7e4c5, 0xd5dd72ec, 0x5a619816, - 0xca3bbc94, 0xe785f09f, 0xddd870e5, 0x14860598, - 0xc6b2bf17, 0x410b57e4, 0x434dd9a1, 0x2ff8c24e, - 0xf1457b42, 0x15a54234, 0x94d62508, 0xf0663cee, - 0x22528661, 0x76fc93b1, 0xb32be54f, 0x20140824, - 0xb208a2e3, 0xbcc72f25, 0x4fc4da22, 0x68391a65, - 0x8335e979, 0xb684a369, 0xd79bfca9, 0x3db44819, - 0xc5d776fe, 0x313d4b9a, 0x3ed181f0, 0x88552299, - 0x0c890383, 0x4a6b9c04, 0xd1517366, 0x0b60cbe0, - 0xfdcc78c1, 0x7cbf1ffd, 0xd4fe3540, 0xeb0cf31c, - 0xa1676f18, 0x985f268b, 0x7d9c5851, 0xd6b8bb05, - 0x6b5cd38c, 0x57cbdc39, 0x6ef395aa, 0x180f061b, - 0x8a13acdc, 0x1a49885e, 0xdf9efea0, 0x21374f88, - 0x4d825467, 0xb16d6b0a, 0x46e29f87, 0xa202a6f1, - 0xae8ba572, 0x58271653, 0x9cd32701, 0x47c1d82b, - 0x95f562a4, 0x87b9e8f3, 0xe309f115, 0x0a438c4c, - 0x092645a5, 0x3c970fb5, 0xa04428b4, 0x5b42dfba, - 0xb04e2ca6, 0xcdd274f7, 0x6fd0d206, 0x482d1241, - 0xa7ade0d7, 0xd954716f, 0xceb7bd1e, 0x3b7ec7d6, - 0x2edb85e2, 0x2a578468, 0xb4c22d2c, 0x490e55ed, - 0x5d885075, 0xda31b886, 0x933fed6b, 0x44a411c2, - }, - { - 0x18c07830, 0x2305af46, 0xc67ef991, 0xe8136fcd, - 0x874ca113, 0xb8a9626d, 0x01080502, 0x4f426e9e, - 0x36adee6c, 0xa6590451, 0xd2debdb9, 0xf5fb06f7, - 0x79ef80f2, 0x6f5fcede, 0x91fcef3f, 0x52aa07a4, - 0x6027fdc0, 0xbc897665, 0x9baccd2b, 0x8e048c01, - 0xa371155b, 0x0c603c18, 0x7bff8af6, 0x35b5e16a, - 0x1de8693a, 0xe05347dd, 0xd7f6acb3, 0xc25eed99, - 0x2e6d965c, 0x4b627a96, 0xfea321e1, 0x578216ae, - 0x15a8412a, 0x779fb6ee, 0x37a5eb6e, 0xe57b56d7, - 0x9f8cd923, 0xf0d317fd, 0x4a6a7f94, 0xda9e95a9, - 0x58fa25b0, 0xc906ca8f, 0x29558d52, 0x0a502214, - 0xb1e14f7f, 0xa0691a5d, 0x6b7fdad6, 0x855cab17, - 0xbd817367, 0x5dd234ba, 0x10805020, 0xf4f303f5, - 0xcb16c08b, 0x3eedc67c, 0x0528110a, 0x671fe6ce, - 0xe47353d5, 0x2725bb4e, 0x41325882, 0x8b2c9d0b, - 0xa7510153, 0x7dcf94fa, 0x95dcfb37, 0xd88e9fad, - 0xfb8b30eb, 0xee2371c1, 0x7cc791f8, 0x6617e3cc, - 0xdda68ea7, 0x17b84b2e, 0x4702468e, 0x9e84dc21, - 0xca1ec589, 0x2d75995a, 0xbf917963, 0x07381b0e, - 0xad012347, 0x5aea2fb4, 0x836cb51b, 0x3385ff66, - 0x633ff2c6, 0x02100a04, 0xaa393849, 0x71afa8e2, - 0xc80ecf8d, 0x19c87d32, 0x49727092, 0xd9869aaf, - 0xf2c31df9, 0xe34b48db, 0x5be22ab6, 0x8834920d, - 0x9aa4c829, 0x262dbe4c, 0x328dfa64, 0xb0e94a7d, - 0xe91b6acf, 0x0f78331e, 0xd5e6a6b7, 0x8074ba1d, - 0xbe997c61, 0xcd26de87, 0x34bde468, 0x487a7590, - 0xffab24e3, 0x7af78ff4, 0x90f4ea3d, 0x5fc23ebe, - 0x201da040, 0x6867d5d0, 0x1ad07234, 0xae192c41, - 0xb4c95e75, 0x549a19a8, 0x93ece53b, 0x220daa44, - 0x6407e9c8, 0xf1db12ff, 0x73bfa2e6, 0x12905a24, - 0x403a5d80, 0x08402810, 0xc356e89b, 0xec337bc5, - 0xdb9690ab, 0xa1611f5f, 0x8d1c8307, 0x3df5c97a, - 0x97ccf133, 0x00000000, 0xcf36d483, 0x2b458756, - 0x7697b3ec, 0x8264b019, 0xd6fea9b1, 0x1bd87736, - 0xb5c15b77, 0xaf112943, 0x6a77dfd4, 0x50ba0da0, - 0x45124c8a, 0xf3cb18fb, 0x309df060, 0xef2b74c3, - 0x3fe5c37e, 0x55921caa, 0xa2791059, 0xea0365c9, - 0x650fecca, 0xbab96869, 0x2f65935e, 0xc04ee79d, - 0xdebe81a1, 0x1ce06c38, 0xfdbb2ee7, 0x4d52649a, - 0x92e4e039, 0x758fbcea, 0x06301e0c, 0x8a249809, - 0xb2f94079, 0xe66359d1, 0x0e70361c, 0x1ff8633e, - 0x6237f7c4, 0xd4eea3b5, 0xa829324d, 0x96c4f431, - 0xf99b3aef, 0xc566f697, 0x2535b14a, 0x59f220b2, - 0x8454ae15, 0x72b7a7e4, 0x39d5dd72, 0x4c5a6198, - 0x5eca3bbc, 0x78e785f0, 0x38ddd870, 0x8c148605, - 0xd1c6b2bf, 0xa5410b57, 0xe2434dd9, 0x612ff8c2, - 0xb3f1457b, 0x2115a542, 0x9c94d625, 0x1ef0663c, - 0x43225286, 0xc776fc93, 0xfcb32be5, 0x04201408, - 0x51b208a2, 0x99bcc72f, 0x6d4fc4da, 0x0d68391a, - 0xfa8335e9, 0xdfb684a3, 0x7ed79bfc, 0x243db448, - 0x3bc5d776, 0xab313d4b, 0xce3ed181, 0x11885522, - 0x8f0c8903, 0x4e4a6b9c, 0xb7d15173, 0xeb0b60cb, - 0x3cfdcc78, 0x817cbf1f, 0x94d4fe35, 0xf7eb0cf3, - 0xb9a1676f, 0x13985f26, 0x2c7d9c58, 0xd3d6b8bb, - 0xe76b5cd3, 0x6e57cbdc, 0xc46ef395, 0x03180f06, - 0x568a13ac, 0x441a4988, 0x7fdf9efe, 0xa921374f, - 0x2a4d8254, 0xbbb16d6b, 0xc146e29f, 0x53a202a6, - 0xdcae8ba5, 0x0b582716, 0x9d9cd327, 0x6c47c1d8, - 0x3195f562, 0x7487b9e8, 0xf6e309f1, 0x460a438c, - 0xac092645, 0x893c970f, 0x14a04428, 0xe15b42df, - 0x16b04e2c, 0x3acdd274, 0x696fd0d2, 0x09482d12, - 0x70a7ade0, 0xb6d95471, 0xd0ceb7bd, 0xed3b7ec7, - 0xcc2edb85, 0x422a5784, 0x98b4c22d, 0xa4490e55, - 0x285d8850, 0x5cda31b8, 0xf8933fed, 0x8644a411, - }, - { - 0x6018c078, 0x8c2305af, 0x3fc67ef9, 0x87e8136f, - 0x26874ca1, 0xdab8a962, 0x04010805, 0x214f426e, - 0xd836adee, 0xa2a65904, 0x6fd2debd, 0xf3f5fb06, - 0xf979ef80, 0xa16f5fce, 0x7e91fcef, 0x5552aa07, - 0x9d6027fd, 0xcabc8976, 0x569baccd, 0x028e048c, - 0xb6a37115, 0x300c603c, 0xf17bff8a, 0xd435b5e1, - 0x741de869, 0xa7e05347, 0x7bd7f6ac, 0x2fc25eed, - 0xb82e6d96, 0x314b627a, 0xdffea321, 0x41578216, - 0x5415a841, 0xc1779fb6, 0xdc37a5eb, 0xb3e57b56, - 0x469f8cd9, 0xe7f0d317, 0x354a6a7f, 0x4fda9e95, - 0x7d58fa25, 0x03c906ca, 0xa429558d, 0x280a5022, - 0xfeb1e14f, 0xbaa0691a, 0xb16b7fda, 0x2e855cab, - 0xcebd8173, 0x695dd234, 0x40108050, 0xf7f4f303, - 0x0bcb16c0, 0xf83eedc6, 0x14052811, 0x81671fe6, - 0xb7e47353, 0x9c2725bb, 0x19413258, 0x168b2c9d, - 0xa6a75101, 0xe97dcf94, 0x6e95dcfb, 0x47d88e9f, - 0xcbfb8b30, 0x9fee2371, 0xed7cc791, 0x856617e3, - 0x53dda68e, 0x5c17b84b, 0x01470246, 0x429e84dc, - 0x0fca1ec5, 0xb42d7599, 0xc6bf9179, 0x1c07381b, - 0x8ead0123, 0x755aea2f, 0x36836cb5, 0xcc3385ff, - 0x91633ff2, 0x0802100a, 0x92aa3938, 0xd971afa8, - 0x07c80ecf, 0x6419c87d, 0x39497270, 0x43d9869a, - 0xeff2c31d, 0xabe34b48, 0x715be22a, 0x1a883492, - 0x529aa4c8, 0x98262dbe, 0xc8328dfa, 0xfab0e94a, - 0x83e91b6a, 0x3c0f7833, 0x73d5e6a6, 0x3a8074ba, - 0xc2be997c, 0x13cd26de, 0xd034bde4, 0x3d487a75, - 0xdbffab24, 0xf57af78f, 0x7a90f4ea, 0x615fc23e, - 0x80201da0, 0xbd6867d5, 0x681ad072, 0x82ae192c, - 0xeab4c95e, 0x4d549a19, 0x7693ece5, 0x88220daa, - 0x8d6407e9, 0xe3f1db12, 0xd173bfa2, 0x4812905a, - 0x1d403a5d, 0x20084028, 0x2bc356e8, 0x97ec337b, - 0x4bdb9690, 0xbea1611f, 0x0e8d1c83, 0xf43df5c9, - 0x6697ccf1, 0x00000000, 0x1bcf36d4, 0xac2b4587, - 0xc57697b3, 0x328264b0, 0x7fd6fea9, 0x6c1bd877, - 0xeeb5c15b, 0x86af1129, 0xb56a77df, 0x5d50ba0d, - 0x0945124c, 0xebf3cb18, 0xc0309df0, 0x9bef2b74, - 0xfc3fe5c3, 0x4955921c, 0xb2a27910, 0x8fea0365, - 0x89650fec, 0xd2bab968, 0xbc2f6593, 0x27c04ee7, - 0x5fdebe81, 0x701ce06c, 0xd3fdbb2e, 0x294d5264, - 0x7292e4e0, 0xc9758fbc, 0x1806301e, 0x128a2498, - 0xf2b2f940, 0xbfe66359, 0x380e7036, 0x7c1ff863, - 0x956237f7, 0x77d4eea3, 0x9aa82932, 0x6296c4f4, - 0xc3f99b3a, 0x33c566f6, 0x942535b1, 0x7959f220, - 0x2a8454ae, 0xd572b7a7, 0xe439d5dd, 0x2d4c5a61, - 0x655eca3b, 0xfd78e785, 0xe038ddd8, 0x0a8c1486, - 0x63d1c6b2, 0xaea5410b, 0xafe2434d, 0x99612ff8, - 0xf6b3f145, 0x842115a5, 0x4a9c94d6, 0x781ef066, - 0x11432252, 0x3bc776fc, 0xd7fcb32b, 0x10042014, - 0x5951b208, 0x5e99bcc7, 0xa96d4fc4, 0x340d6839, - 0xcffa8335, 0x5bdfb684, 0xe57ed79b, 0x90243db4, - 0xec3bc5d7, 0x96ab313d, 0x1fce3ed1, 0x44118855, - 0x068f0c89, 0x254e4a6b, 0xe6b7d151, 0x8beb0b60, - 0xf03cfdcc, 0x3e817cbf, 0x6a94d4fe, 0xfbf7eb0c, - 0xdeb9a167, 0x4c13985f, 0xb02c7d9c, 0x6bd3d6b8, - 0xbbe76b5c, 0xa56e57cb, 0x37c46ef3, 0x0c03180f, - 0x45568a13, 0x0d441a49, 0xe17fdf9e, 0x9ea92137, - 0xa82a4d82, 0xd6bbb16d, 0x23c146e2, 0x5153a202, - 0x57dcae8b, 0x2c0b5827, 0x4e9d9cd3, 0xad6c47c1, - 0xc43195f5, 0xcd7487b9, 0xfff6e309, 0x05460a43, - 0x8aac0926, 0x1e893c97, 0x5014a044, 0xa3e15b42, - 0x5816b04e, 0xe83acdd2, 0xb9696fd0, 0x2409482d, - 0xdd70a7ad, 0xe2b6d954, 0x67d0ceb7, 0x93ed3b7e, - 0x17cc2edb, 0x15422a57, 0x5a98b4c2, 0xaaa4490e, - 0xa0285d88, 0x6d5cda31, 0xc7f8933f, 0x228644a4, - }, - { - 0x186018c0, 0x238c2305, 0xc63fc67e, 0xe887e813, - 0x8726874c, 0xb8dab8a9, 0x01040108, 0x4f214f42, - 0x36d836ad, 0xa6a2a659, 0xd26fd2de, 0xf5f3f5fb, - 0x79f979ef, 0x6fa16f5f, 0x917e91fc, 0x525552aa, - 0x609d6027, 0xbccabc89, 0x9b569bac, 0x8e028e04, - 0xa3b6a371, 0x0c300c60, 0x7bf17bff, 0x35d435b5, - 0x1d741de8, 0xe0a7e053, 0xd77bd7f6, 0xc22fc25e, - 0x2eb82e6d, 0x4b314b62, 0xfedffea3, 0x57415782, - 0x155415a8, 0x77c1779f, 0x37dc37a5, 0xe5b3e57b, - 0x9f469f8c, 0xf0e7f0d3, 0x4a354a6a, 0xda4fda9e, - 0x587d58fa, 0xc903c906, 0x29a42955, 0x0a280a50, - 0xb1feb1e1, 0xa0baa069, 0x6bb16b7f, 0x852e855c, - 0xbdcebd81, 0x5d695dd2, 0x10401080, 0xf4f7f4f3, - 0xcb0bcb16, 0x3ef83eed, 0x05140528, 0x6781671f, - 0xe4b7e473, 0x279c2725, 0x41194132, 0x8b168b2c, - 0xa7a6a751, 0x7de97dcf, 0x956e95dc, 0xd847d88e, - 0xfbcbfb8b, 0xee9fee23, 0x7ced7cc7, 0x66856617, - 0xdd53dda6, 0x175c17b8, 0x47014702, 0x9e429e84, - 0xca0fca1e, 0x2db42d75, 0xbfc6bf91, 0x071c0738, - 0xad8ead01, 0x5a755aea, 0x8336836c, 0x33cc3385, - 0x6391633f, 0x02080210, 0xaa92aa39, 0x71d971af, - 0xc807c80e, 0x196419c8, 0x49394972, 0xd943d986, - 0xf2eff2c3, 0xe3abe34b, 0x5b715be2, 0x881a8834, - 0x9a529aa4, 0x2698262d, 0x32c8328d, 0xb0fab0e9, - 0xe983e91b, 0x0f3c0f78, 0xd573d5e6, 0x803a8074, - 0xbec2be99, 0xcd13cd26, 0x34d034bd, 0x483d487a, - 0xffdbffab, 0x7af57af7, 0x907a90f4, 0x5f615fc2, - 0x2080201d, 0x68bd6867, 0x1a681ad0, 0xae82ae19, - 0xb4eab4c9, 0x544d549a, 0x937693ec, 0x2288220d, - 0x648d6407, 0xf1e3f1db, 0x73d173bf, 0x12481290, - 0x401d403a, 0x08200840, 0xc32bc356, 0xec97ec33, - 0xdb4bdb96, 0xa1bea161, 0x8d0e8d1c, 0x3df43df5, - 0x976697cc, 0x00000000, 0xcf1bcf36, 0x2bac2b45, - 0x76c57697, 0x82328264, 0xd67fd6fe, 0x1b6c1bd8, - 0xb5eeb5c1, 0xaf86af11, 0x6ab56a77, 0x505d50ba, - 0x45094512, 0xf3ebf3cb, 0x30c0309d, 0xef9bef2b, - 0x3ffc3fe5, 0x55495592, 0xa2b2a279, 0xea8fea03, - 0x6589650f, 0xbad2bab9, 0x2fbc2f65, 0xc027c04e, - 0xde5fdebe, 0x1c701ce0, 0xfdd3fdbb, 0x4d294d52, - 0x927292e4, 0x75c9758f, 0x06180630, 0x8a128a24, - 0xb2f2b2f9, 0xe6bfe663, 0x0e380e70, 0x1f7c1ff8, - 0x62956237, 0xd477d4ee, 0xa89aa829, 0x966296c4, - 0xf9c3f99b, 0xc533c566, 0x25942535, 0x597959f2, - 0x842a8454, 0x72d572b7, 0x39e439d5, 0x4c2d4c5a, - 0x5e655eca, 0x78fd78e7, 0x38e038dd, 0x8c0a8c14, - 0xd163d1c6, 0xa5aea541, 0xe2afe243, 0x6199612f, - 0xb3f6b3f1, 0x21842115, 0x9c4a9c94, 0x1e781ef0, - 0x43114322, 0xc73bc776, 0xfcd7fcb3, 0x04100420, - 0x515951b2, 0x995e99bc, 0x6da96d4f, 0x0d340d68, - 0xfacffa83, 0xdf5bdfb6, 0x7ee57ed7, 0x2490243d, - 0x3bec3bc5, 0xab96ab31, 0xce1fce3e, 0x11441188, - 0x8f068f0c, 0x4e254e4a, 0xb7e6b7d1, 0xeb8beb0b, - 0x3cf03cfd, 0x813e817c, 0x946a94d4, 0xf7fbf7eb, - 0xb9deb9a1, 0x134c1398, 0x2cb02c7d, 0xd36bd3d6, - 0xe7bbe76b, 0x6ea56e57, 0xc437c46e, 0x030c0318, - 0x5645568a, 0x440d441a, 0x7fe17fdf, 0xa99ea921, - 0x2aa82a4d, 0xbbd6bbb1, 0xc123c146, 0x535153a2, - 0xdc57dcae, 0x0b2c0b58, 0x9d4e9d9c, 0x6cad6c47, - 0x31c43195, 0x74cd7487, 0xf6fff6e3, 0x4605460a, - 0xac8aac09, 0x891e893c, 0x145014a0, 0xe1a3e15b, - 0x165816b0, 0x3ae83acd, 0x69b9696f, 0x09240948, - 0x70dd70a7, 0xb6e2b6d9, 0xd067d0ce, 0xed93ed3b, - 0xcc17cc2e, 0x4215422a, 0x985a98b4, 0xa4aaa449, - 0x28a0285d, 0x5c6d5cda, 0xf8c7f893, 0x86228644, - }, - { - 0x18186018, 0x23238c23, 0xc6c63fc6, 0xe8e887e8, - 0x87872687, 0xb8b8dab8, 0x01010401, 0x4f4f214f, - 0x3636d836, 0xa6a6a2a6, 0xd2d26fd2, 0xf5f5f3f5, - 0x7979f979, 0x6f6fa16f, 0x91917e91, 0x52525552, - 0x60609d60, 0xbcbccabc, 0x9b9b569b, 0x8e8e028e, - 0xa3a3b6a3, 0x0c0c300c, 0x7b7bf17b, 0x3535d435, - 0x1d1d741d, 0xe0e0a7e0, 0xd7d77bd7, 0xc2c22fc2, - 0x2e2eb82e, 0x4b4b314b, 0xfefedffe, 0x57574157, - 0x15155415, 0x7777c177, 0x3737dc37, 0xe5e5b3e5, - 0x9f9f469f, 0xf0f0e7f0, 0x4a4a354a, 0xdada4fda, - 0x58587d58, 0xc9c903c9, 0x2929a429, 0x0a0a280a, - 0xb1b1feb1, 0xa0a0baa0, 0x6b6bb16b, 0x85852e85, - 0xbdbdcebd, 0x5d5d695d, 0x10104010, 0xf4f4f7f4, - 0xcbcb0bcb, 0x3e3ef83e, 0x05051405, 0x67678167, - 0xe4e4b7e4, 0x27279c27, 0x41411941, 0x8b8b168b, - 0xa7a7a6a7, 0x7d7de97d, 0x95956e95, 0xd8d847d8, - 0xfbfbcbfb, 0xeeee9fee, 0x7c7ced7c, 0x66668566, - 0xdddd53dd, 0x17175c17, 0x47470147, 0x9e9e429e, - 0xcaca0fca, 0x2d2db42d, 0xbfbfc6bf, 0x07071c07, - 0xadad8ead, 0x5a5a755a, 0x83833683, 0x3333cc33, - 0x63639163, 0x02020802, 0xaaaa92aa, 0x7171d971, - 0xc8c807c8, 0x19196419, 0x49493949, 0xd9d943d9, - 0xf2f2eff2, 0xe3e3abe3, 0x5b5b715b, 0x88881a88, - 0x9a9a529a, 0x26269826, 0x3232c832, 0xb0b0fab0, - 0xe9e983e9, 0x0f0f3c0f, 0xd5d573d5, 0x80803a80, - 0xbebec2be, 0xcdcd13cd, 0x3434d034, 0x48483d48, - 0xffffdbff, 0x7a7af57a, 0x90907a90, 0x5f5f615f, - 0x20208020, 0x6868bd68, 0x1a1a681a, 0xaeae82ae, - 0xb4b4eab4, 0x54544d54, 0x93937693, 0x22228822, - 0x64648d64, 0xf1f1e3f1, 0x7373d173, 0x12124812, - 0x40401d40, 0x08082008, 0xc3c32bc3, 0xecec97ec, - 0xdbdb4bdb, 0xa1a1bea1, 0x8d8d0e8d, 0x3d3df43d, - 0x97976697, 0x00000000, 0xcfcf1bcf, 0x2b2bac2b, - 0x7676c576, 0x82823282, 0xd6d67fd6, 0x1b1b6c1b, - 0xb5b5eeb5, 0xafaf86af, 0x6a6ab56a, 0x50505d50, - 0x45450945, 0xf3f3ebf3, 0x3030c030, 0xefef9bef, - 0x3f3ffc3f, 0x55554955, 0xa2a2b2a2, 0xeaea8fea, - 0x65658965, 0xbabad2ba, 0x2f2fbc2f, 0xc0c027c0, - 0xdede5fde, 0x1c1c701c, 0xfdfdd3fd, 0x4d4d294d, - 0x92927292, 0x7575c975, 0x06061806, 0x8a8a128a, - 0xb2b2f2b2, 0xe6e6bfe6, 0x0e0e380e, 0x1f1f7c1f, - 0x62629562, 0xd4d477d4, 0xa8a89aa8, 0x96966296, - 0xf9f9c3f9, 0xc5c533c5, 0x25259425, 0x59597959, - 0x84842a84, 0x7272d572, 0x3939e439, 0x4c4c2d4c, - 0x5e5e655e, 0x7878fd78, 0x3838e038, 0x8c8c0a8c, - 0xd1d163d1, 0xa5a5aea5, 0xe2e2afe2, 0x61619961, - 0xb3b3f6b3, 0x21218421, 0x9c9c4a9c, 0x1e1e781e, - 0x43431143, 0xc7c73bc7, 0xfcfcd7fc, 0x04041004, - 0x51515951, 0x99995e99, 0x6d6da96d, 0x0d0d340d, - 0xfafacffa, 0xdfdf5bdf, 0x7e7ee57e, 0x24249024, - 0x3b3bec3b, 0xabab96ab, 0xcece1fce, 0x11114411, - 0x8f8f068f, 0x4e4e254e, 0xb7b7e6b7, 0xebeb8beb, - 0x3c3cf03c, 0x81813e81, 0x94946a94, 0xf7f7fbf7, - 0xb9b9deb9, 0x13134c13, 0x2c2cb02c, 0xd3d36bd3, - 0xe7e7bbe7, 0x6e6ea56e, 0xc4c437c4, 0x03030c03, - 0x56564556, 0x44440d44, 0x7f7fe17f, 0xa9a99ea9, - 0x2a2aa82a, 0xbbbbd6bb, 0xc1c123c1, 0x53535153, - 0xdcdc57dc, 0x0b0b2c0b, 0x9d9d4e9d, 0x6c6cad6c, - 0x3131c431, 0x7474cd74, 0xf6f6fff6, 0x46460546, - 0xacac8aac, 0x89891e89, 0x14145014, 0xe1e1a3e1, - 0x16165816, 0x3a3ae83a, 0x6969b969, 0x09092409, - 0x7070dd70, 0xb6b6e2b6, 0xd0d067d0, 0xeded93ed, - 0xcccc17cc, 0x42421542, 0x98985a98, 0xa4a4aaa4, - 0x2828a028, 0x5c5c6d5c, 0xf8f8c7f8, 0x86862286, - }, - { - 0xd8181860, 0x2623238c, 0xb8c6c63f, 0xfbe8e887, - 0xcb878726, 0x11b8b8da, 0x09010104, 0x0d4f4f21, - 0x9b3636d8, 0xffa6a6a2, 0x0cd2d26f, 0x0ef5f5f3, - 0x967979f9, 0x306f6fa1, 0x6d91917e, 0xf8525255, - 0x4760609d, 0x35bcbcca, 0x379b9b56, 0x8a8e8e02, - 0xd2a3a3b6, 0x6c0c0c30, 0x847b7bf1, 0x803535d4, - 0xf51d1d74, 0xb3e0e0a7, 0x21d7d77b, 0x9cc2c22f, - 0x432e2eb8, 0x294b4b31, 0x5dfefedf, 0xd5575741, - 0xbd151554, 0xe87777c1, 0x923737dc, 0x9ee5e5b3, - 0x139f9f46, 0x23f0f0e7, 0x204a4a35, 0x44dada4f, - 0xa258587d, 0xcfc9c903, 0x7c2929a4, 0x5a0a0a28, - 0x50b1b1fe, 0xc9a0a0ba, 0x146b6bb1, 0xd985852e, - 0x3cbdbdce, 0x8f5d5d69, 0x90101040, 0x07f4f4f7, - 0xddcbcb0b, 0xd33e3ef8, 0x2d050514, 0x78676781, - 0x97e4e4b7, 0x0227279c, 0x73414119, 0xa78b8b16, - 0xf6a7a7a6, 0xb27d7de9, 0x4995956e, 0x56d8d847, - 0x70fbfbcb, 0xcdeeee9f, 0xbb7c7ced, 0x71666685, - 0x7bdddd53, 0xaf17175c, 0x45474701, 0x1a9e9e42, - 0xd4caca0f, 0x582d2db4, 0x2ebfbfc6, 0x3f07071c, - 0xacadad8e, 0xb05a5a75, 0xef838336, 0xb63333cc, - 0x5c636391, 0x12020208, 0x93aaaa92, 0xde7171d9, - 0xc6c8c807, 0xd1191964, 0x3b494939, 0x5fd9d943, - 0x31f2f2ef, 0xa8e3e3ab, 0xb95b5b71, 0xbc88881a, - 0x3e9a9a52, 0x0b262698, 0xbf3232c8, 0x59b0b0fa, - 0xf2e9e983, 0x770f0f3c, 0x33d5d573, 0xf480803a, - 0x27bebec2, 0xebcdcd13, 0x893434d0, 0x3248483d, - 0x54ffffdb, 0x8d7a7af5, 0x6490907a, 0x9d5f5f61, - 0x3d202080, 0x0f6868bd, 0xca1a1a68, 0xb7aeae82, - 0x7db4b4ea, 0xce54544d, 0x7f939376, 0x2f222288, - 0x6364648d, 0x2af1f1e3, 0xcc7373d1, 0x82121248, - 0x7a40401d, 0x48080820, 0x95c3c32b, 0xdfecec97, - 0x4ddbdb4b, 0xc0a1a1be, 0x918d8d0e, 0xc83d3df4, - 0x5b979766, 0x00000000, 0xf9cfcf1b, 0x6e2b2bac, - 0xe17676c5, 0xe6828232, 0x28d6d67f, 0xc31b1b6c, - 0x74b5b5ee, 0xbeafaf86, 0x1d6a6ab5, 0xea50505d, - 0x57454509, 0x38f3f3eb, 0xad3030c0, 0xc4efef9b, - 0xda3f3ffc, 0xc7555549, 0xdba2a2b2, 0xe9eaea8f, - 0x6a656589, 0x03babad2, 0x4a2f2fbc, 0x8ec0c027, - 0x60dede5f, 0xfc1c1c70, 0x46fdfdd3, 0x1f4d4d29, - 0x76929272, 0xfa7575c9, 0x36060618, 0xae8a8a12, - 0x4bb2b2f2, 0x85e6e6bf, 0x7e0e0e38, 0xe71f1f7c, - 0x55626295, 0x3ad4d477, 0x81a8a89a, 0x52969662, - 0x62f9f9c3, 0xa3c5c533, 0x10252594, 0xab595979, - 0xd084842a, 0xc57272d5, 0xec3939e4, 0x164c4c2d, - 0x945e5e65, 0x9f7878fd, 0xe53838e0, 0x988c8c0a, - 0x17d1d163, 0xe4a5a5ae, 0xa1e2e2af, 0x4e616199, - 0x42b3b3f6, 0x34212184, 0x089c9c4a, 0xee1e1e78, - 0x61434311, 0xb1c7c73b, 0x4ffcfcd7, 0x24040410, - 0xe3515159, 0x2599995e, 0x226d6da9, 0x650d0d34, - 0x79fafacf, 0x69dfdf5b, 0xa97e7ee5, 0x19242490, - 0xfe3b3bec, 0x9aabab96, 0xf0cece1f, 0x99111144, - 0x838f8f06, 0x044e4e25, 0x66b7b7e6, 0xe0ebeb8b, - 0xc13c3cf0, 0xfd81813e, 0x4094946a, 0x1cf7f7fb, - 0x18b9b9de, 0x8b13134c, 0x512c2cb0, 0x05d3d36b, - 0x8ce7e7bb, 0x396e6ea5, 0xaac4c437, 0x1b03030c, - 0xdc565645, 0x5e44440d, 0xa07f7fe1, 0x88a9a99e, - 0x672a2aa8, 0x0abbbbd6, 0x87c1c123, 0xf1535351, - 0x72dcdc57, 0x530b0b2c, 0x019d9d4e, 0x2b6c6cad, - 0xa43131c4, 0xf37474cd, 0x15f6f6ff, 0x4c464605, - 0xa5acac8a, 0xb589891e, 0xb4141450, 0xbae1e1a3, - 0xa6161658, 0xf73a3ae8, 0x066969b9, 0x41090924, - 0xd77070dd, 0x6fb6b6e2, 0x1ed0d067, 0xd6eded93, - 0xe2cccc17, 0x68424215, 0x2c98985a, 0xeda4a4aa, - 0x752828a0, 0x865c5c6d, 0x6bf8f8c7, 0xc2868622, - }, - { - 0x30d81818, 0x46262323, 0x91b8c6c6, 0xcdfbe8e8, - 0x13cb8787, 0x6d11b8b8, 0x02090101, 0x9e0d4f4f, - 0x6c9b3636, 0x51ffa6a6, 0xb90cd2d2, 0xf70ef5f5, - 0xf2967979, 0xde306f6f, 0x3f6d9191, 0xa4f85252, - 0xc0476060, 0x6535bcbc, 0x2b379b9b, 0x018a8e8e, - 0x5bd2a3a3, 0x186c0c0c, 0xf6847b7b, 0x6a803535, - 0x3af51d1d, 0xddb3e0e0, 0xb321d7d7, 0x999cc2c2, - 0x5c432e2e, 0x96294b4b, 0xe15dfefe, 0xaed55757, - 0x2abd1515, 0xeee87777, 0x6e923737, 0xd79ee5e5, - 0x23139f9f, 0xfd23f0f0, 0x94204a4a, 0xa944dada, - 0xb0a25858, 0x8fcfc9c9, 0x527c2929, 0x145a0a0a, - 0x7f50b1b1, 0x5dc9a0a0, 0xd6146b6b, 0x17d98585, - 0x673cbdbd, 0xba8f5d5d, 0x20901010, 0xf507f4f4, - 0x8bddcbcb, 0x7cd33e3e, 0x0a2d0505, 0xce786767, - 0xd597e4e4, 0x4e022727, 0x82734141, 0x0ba78b8b, - 0x53f6a7a7, 0xfab27d7d, 0x37499595, 0xad56d8d8, - 0xeb70fbfb, 0xc1cdeeee, 0xf8bb7c7c, 0xcc716666, - 0xa77bdddd, 0x2eaf1717, 0x8e454747, 0x211a9e9e, - 0x89d4caca, 0x5a582d2d, 0x632ebfbf, 0x0e3f0707, - 0x47acadad, 0xb4b05a5a, 0x1bef8383, 0x66b63333, - 0xc65c6363, 0x04120202, 0x4993aaaa, 0xe2de7171, - 0x8dc6c8c8, 0x32d11919, 0x923b4949, 0xaf5fd9d9, - 0xf931f2f2, 0xdba8e3e3, 0xb6b95b5b, 0x0dbc8888, - 0x293e9a9a, 0x4c0b2626, 0x64bf3232, 0x7d59b0b0, - 0xcff2e9e9, 0x1e770f0f, 0xb733d5d5, 0x1df48080, - 0x6127bebe, 0x87ebcdcd, 0x68893434, 0x90324848, - 0xe354ffff, 0xf48d7a7a, 0x3d649090, 0xbe9d5f5f, - 0x403d2020, 0xd00f6868, 0x34ca1a1a, 0x41b7aeae, - 0x757db4b4, 0xa8ce5454, 0x3b7f9393, 0x442f2222, - 0xc8636464, 0xff2af1f1, 0xe6cc7373, 0x24821212, - 0x807a4040, 0x10480808, 0x9b95c3c3, 0xc5dfecec, - 0xab4ddbdb, 0x5fc0a1a1, 0x07918d8d, 0x7ac83d3d, - 0x335b9797, 0x00000000, 0x83f9cfcf, 0x566e2b2b, - 0xece17676, 0x19e68282, 0xb128d6d6, 0x36c31b1b, - 0x7774b5b5, 0x43beafaf, 0xd41d6a6a, 0xa0ea5050, - 0x8a574545, 0xfb38f3f3, 0x60ad3030, 0xc3c4efef, - 0x7eda3f3f, 0xaac75555, 0x59dba2a2, 0xc9e9eaea, - 0xca6a6565, 0x6903baba, 0x5e4a2f2f, 0x9d8ec0c0, - 0xa160dede, 0x38fc1c1c, 0xe746fdfd, 0x9a1f4d4d, - 0x39769292, 0xeafa7575, 0x0c360606, 0x09ae8a8a, - 0x794bb2b2, 0xd185e6e6, 0x1c7e0e0e, 0x3ee71f1f, - 0xc4556262, 0xb53ad4d4, 0x4d81a8a8, 0x31529696, - 0xef62f9f9, 0x97a3c5c5, 0x4a102525, 0xb2ab5959, - 0x15d08484, 0xe4c57272, 0x72ec3939, 0x98164c4c, - 0xbc945e5e, 0xf09f7878, 0x70e53838, 0x05988c8c, - 0xbf17d1d1, 0x57e4a5a5, 0xd9a1e2e2, 0xc24e6161, - 0x7b42b3b3, 0x42342121, 0x25089c9c, 0x3cee1e1e, - 0x86614343, 0x93b1c7c7, 0xe54ffcfc, 0x08240404, - 0xa2e35151, 0x2f259999, 0xda226d6d, 0x1a650d0d, - 0xe979fafa, 0xa369dfdf, 0xfca97e7e, 0x48192424, - 0x76fe3b3b, 0x4b9aabab, 0x81f0cece, 0x22991111, - 0x03838f8f, 0x9c044e4e, 0x7366b7b7, 0xcbe0ebeb, - 0x78c13c3c, 0x1ffd8181, 0x35409494, 0xf31cf7f7, - 0x6f18b9b9, 0x268b1313, 0x58512c2c, 0xbb05d3d3, - 0xd38ce7e7, 0xdc396e6e, 0x95aac4c4, 0x061b0303, - 0xacdc5656, 0x885e4444, 0xfea07f7f, 0x4f88a9a9, - 0x54672a2a, 0x6b0abbbb, 0x9f87c1c1, 0xa6f15353, - 0xa572dcdc, 0x16530b0b, 0x27019d9d, 0xd82b6c6c, - 0x62a43131, 0xe8f37474, 0xf115f6f6, 0x8c4c4646, - 0x45a5acac, 0x0fb58989, 0x28b41414, 0xdfbae1e1, - 0x2ca61616, 0x74f73a3a, 0xd2066969, 0x12410909, - 0xe0d77070, 0x716fb6b6, 0xbd1ed0d0, 0xc7d6eded, - 0x85e2cccc, 0x84684242, 0x2d2c9898, 0x55eda4a4, - 0x50752828, 0xb8865c5c, 0xed6bf8f8, 0x11c28686, - }, - { - 0x7830d818, 0xaf462623, 0xf991b8c6, 0x6fcdfbe8, - 0xa113cb87, 0x626d11b8, 0x05020901, 0x6e9e0d4f, - 0xee6c9b36, 0x0451ffa6, 0xbdb90cd2, 0x06f70ef5, - 0x80f29679, 0xcede306f, 0xef3f6d91, 0x07a4f852, - 0xfdc04760, 0x766535bc, 0xcd2b379b, 0x8c018a8e, - 0x155bd2a3, 0x3c186c0c, 0x8af6847b, 0xe16a8035, - 0x693af51d, 0x47ddb3e0, 0xacb321d7, 0xed999cc2, - 0x965c432e, 0x7a96294b, 0x21e15dfe, 0x16aed557, - 0x412abd15, 0xb6eee877, 0xeb6e9237, 0x56d79ee5, - 0xd923139f, 0x17fd23f0, 0x7f94204a, 0x95a944da, - 0x25b0a258, 0xca8fcfc9, 0x8d527c29, 0x22145a0a, - 0x4f7f50b1, 0x1a5dc9a0, 0xdad6146b, 0xab17d985, - 0x73673cbd, 0x34ba8f5d, 0x50209010, 0x03f507f4, - 0xc08bddcb, 0xc67cd33e, 0x110a2d05, 0xe6ce7867, - 0x53d597e4, 0xbb4e0227, 0x58827341, 0x9d0ba78b, - 0x0153f6a7, 0x94fab27d, 0xfb374995, 0x9fad56d8, - 0x30eb70fb, 0x71c1cdee, 0x91f8bb7c, 0xe3cc7166, - 0x8ea77bdd, 0x4b2eaf17, 0x468e4547, 0xdc211a9e, - 0xc589d4ca, 0x995a582d, 0x79632ebf, 0x1b0e3f07, - 0x2347acad, 0x2fb4b05a, 0xb51bef83, 0xff66b633, - 0xf2c65c63, 0x0a041202, 0x384993aa, 0xa8e2de71, - 0xcf8dc6c8, 0x7d32d119, 0x70923b49, 0x9aaf5fd9, - 0x1df931f2, 0x48dba8e3, 0x2ab6b95b, 0x920dbc88, - 0xc8293e9a, 0xbe4c0b26, 0xfa64bf32, 0x4a7d59b0, - 0x6acff2e9, 0x331e770f, 0xa6b733d5, 0xba1df480, - 0x7c6127be, 0xde87ebcd, 0xe4688934, 0x75903248, - 0x24e354ff, 0x8ff48d7a, 0xea3d6490, 0x3ebe9d5f, - 0xa0403d20, 0xd5d00f68, 0x7234ca1a, 0x2c41b7ae, - 0x5e757db4, 0x19a8ce54, 0xe53b7f93, 0xaa442f22, - 0xe9c86364, 0x12ff2af1, 0xa2e6cc73, 0x5a248212, - 0x5d807a40, 0x28104808, 0xe89b95c3, 0x7bc5dfec, - 0x90ab4ddb, 0x1f5fc0a1, 0x8307918d, 0xc97ac83d, - 0xf1335b97, 0x00000000, 0xd483f9cf, 0x87566e2b, - 0xb3ece176, 0xb019e682, 0xa9b128d6, 0x7736c31b, - 0x5b7774b5, 0x2943beaf, 0xdfd41d6a, 0x0da0ea50, - 0x4c8a5745, 0x18fb38f3, 0xf060ad30, 0x74c3c4ef, - 0xc37eda3f, 0x1caac755, 0x1059dba2, 0x65c9e9ea, - 0xecca6a65, 0x686903ba, 0x935e4a2f, 0xe79d8ec0, - 0x81a160de, 0x6c38fc1c, 0x2ee746fd, 0x649a1f4d, - 0xe0397692, 0xbceafa75, 0x1e0c3606, 0x9809ae8a, - 0x40794bb2, 0x59d185e6, 0x361c7e0e, 0x633ee71f, - 0xf7c45562, 0xa3b53ad4, 0x324d81a8, 0xf4315296, - 0x3aef62f9, 0xf697a3c5, 0xb14a1025, 0x20b2ab59, - 0xae15d084, 0xa7e4c572, 0xdd72ec39, 0x6198164c, - 0x3bbc945e, 0x85f09f78, 0xd870e538, 0x8605988c, - 0xb2bf17d1, 0x0b57e4a5, 0x4dd9a1e2, 0xf8c24e61, - 0x457b42b3, 0xa5423421, 0xd625089c, 0x663cee1e, - 0x52866143, 0xfc93b1c7, 0x2be54ffc, 0x14082404, - 0x08a2e351, 0xc72f2599, 0xc4da226d, 0x391a650d, - 0x35e979fa, 0x84a369df, 0x9bfca97e, 0xb4481924, - 0xd776fe3b, 0x3d4b9aab, 0xd181f0ce, 0x55229911, - 0x8903838f, 0x6b9c044e, 0x517366b7, 0x60cbe0eb, - 0xcc78c13c, 0xbf1ffd81, 0xfe354094, 0x0cf31cf7, - 0x676f18b9, 0x5f268b13, 0x9c58512c, 0xb8bb05d3, - 0x5cd38ce7, 0xcbdc396e, 0xf395aac4, 0x0f061b03, - 0x13acdc56, 0x49885e44, 0x9efea07f, 0x374f88a9, - 0x8254672a, 0x6d6b0abb, 0xe29f87c1, 0x02a6f153, - 0x8ba572dc, 0x2716530b, 0xd327019d, 0xc1d82b6c, - 0xf562a431, 0xb9e8f374, 0x09f115f6, 0x438c4c46, - 0x2645a5ac, 0x970fb589, 0x4428b414, 0x42dfbae1, - 0x4e2ca616, 0xd274f73a, 0xd0d20669, 0x2d124109, - 0xade0d770, 0x54716fb6, 0xb7bd1ed0, 0x7ec7d6ed, - 0xdb85e2cc, 0x57846842, 0xc22d2c98, 0x0e55eda4, - 0x88507528, 0x31b8865c, 0x3fed6bf8, 0xa411c286, - }, -}; - -#ifdef VECT_SIZE1 -#define BOX(S,n,i) (u32) ((S)[(n)][(i)]) -#endif - -__device__ __constant__ u32 rch[R + 1] = -{ - 0x00000000, - 0x1823c6e8, - 0x36a6d2f5, - 0x60bc9b8e, - 0x1de0d7c2, - 0x157737e5, - 0x58c9290a, - 0xbd5d10f4, - 0xe427418b, - 0xfbee7c66, - 0xca2dbf07, -}; - -__device__ __constant__ u32 rcl[R + 1] = -{ - 0x00000000, - 0x87b8014f, - 0x796f9152, - 0xa30c7b35, - 0x2e4bfe57, - 0x9ff04ada, - 0xb1a06b85, - 0xcb3e0567, - 0xa77d95d8, - 0xdd17479e, - 0xad5a8333, -}; - -typedef unsigned char uchar; - -__device__ static void whirlpool_transform (const u32 w[16], u32 dgst[16], u32 s_Ch[8][256], u32 s_Cl[8][256]) -{ - u32 Kh[8]; - u32 Kl[8]; - - Kh[0] = dgst[ 0]; - Kl[0] = dgst[ 1]; - Kh[1] = dgst[ 2]; - Kl[1] = dgst[ 3]; - Kh[2] = dgst[ 4]; - Kl[2] = dgst[ 5]; - Kh[3] = dgst[ 6]; - Kl[3] = dgst[ 7]; - Kh[4] = dgst[ 8]; - Kl[4] = dgst[ 9]; - Kh[5] = dgst[10]; - Kl[5] = dgst[11]; - Kh[6] = dgst[12]; - Kl[6] = dgst[13]; - Kh[7] = dgst[14]; - Kl[7] = dgst[15]; - - u32 stateh[8]; - u32 statel[8]; - - stateh[0] = w[ 0] ^ Kh[0]; - statel[0] = w[ 1] ^ Kl[0]; - stateh[1] = w[ 2] ^ Kh[1]; - statel[1] = w[ 3] ^ Kl[1]; - stateh[2] = w[ 4] ^ Kh[2]; - statel[2] = w[ 5] ^ Kl[2]; - stateh[3] = w[ 6] ^ Kh[3]; - statel[3] = w[ 7] ^ Kl[3]; - stateh[4] = w[ 8] ^ Kh[4]; - statel[4] = w[ 9] ^ Kl[4]; - stateh[5] = w[10] ^ Kh[5]; - statel[5] = w[11] ^ Kl[5]; - stateh[6] = w[12] ^ Kh[6]; - statel[6] = w[13] ^ Kl[6]; - stateh[7] = w[14] ^ Kh[7]; - statel[7] = w[15] ^ Kl[7]; - - u32 r; - - for (r = 1; r <= R; r++) - { - u32 Lh[8]; - u32 Ll[8]; - - u32 i; - - #pragma unroll 8 - for (i = 0; i < 8; i++) - { - const u8 Lp0 = Kh[(i + 8) & 7] >> 24; - const u8 Lp1 = Kh[(i + 7) & 7] >> 16; - const u8 Lp2 = Kh[(i + 6) & 7] >> 8; - const u8 Lp3 = Kh[(i + 5) & 7] >> 0; - const u8 Lp4 = Kl[(i + 4) & 7] >> 24; - const u8 Lp5 = Kl[(i + 3) & 7] >> 16; - const u8 Lp6 = Kl[(i + 2) & 7] >> 8; - const u8 Lp7 = Kl[(i + 1) & 7] >> 0; - - Lh[i] = BOX (s_Ch, 0, Lp0 & 0xff) - ^ BOX (s_Ch, 1, Lp1 & 0xff) - ^ BOX (s_Ch, 2, Lp2 & 0xff) - ^ BOX (s_Ch, 3, Lp3 & 0xff) - ^ BOX (s_Ch, 4, Lp4 & 0xff) - ^ BOX (s_Ch, 5, Lp5 & 0xff) - ^ BOX (s_Ch, 6, Lp6 & 0xff) - ^ BOX (s_Ch, 7, Lp7 & 0xff); - - Ll[i] = BOX (s_Cl, 0, Lp0 & 0xff) - ^ BOX (s_Cl, 1, Lp1 & 0xff) - ^ BOX (s_Cl, 2, Lp2 & 0xff) - ^ BOX (s_Cl, 3, Lp3 & 0xff) - ^ BOX (s_Cl, 4, Lp4 & 0xff) - ^ BOX (s_Cl, 5, Lp5 & 0xff) - ^ BOX (s_Cl, 6, Lp6 & 0xff) - ^ BOX (s_Cl, 7, Lp7 & 0xff); - } - - Kh[0] = Lh[0] ^ rch[r]; - Kl[0] = Ll[0] ^ rcl[r]; - Kh[1] = Lh[1]; - Kl[1] = Ll[1]; - Kh[2] = Lh[2]; - Kl[2] = Ll[2]; - Kh[3] = Lh[3]; - Kl[3] = Ll[3]; - Kh[4] = Lh[4]; - Kl[4] = Ll[4]; - Kh[5] = Lh[5]; - Kl[5] = Ll[5]; - Kh[6] = Lh[6]; - Kl[6] = Ll[6]; - Kh[7] = Lh[7]; - Kl[7] = Ll[7]; - - #pragma unroll 8 - for (i = 0; i < 8; i++) - { - const u8 Lp0 = stateh[(i + 8) & 7] >> 24; - const u8 Lp1 = stateh[(i + 7) & 7] >> 16; - const u8 Lp2 = stateh[(i + 6) & 7] >> 8; - const u8 Lp3 = stateh[(i + 5) & 7] >> 0; - const u8 Lp4 = statel[(i + 4) & 7] >> 24; - const u8 Lp5 = statel[(i + 3) & 7] >> 16; - const u8 Lp6 = statel[(i + 2) & 7] >> 8; - const u8 Lp7 = statel[(i + 1) & 7] >> 0; - - Lh[i] = BOX (s_Ch, 0, Lp0 & 0xff) - ^ BOX (s_Ch, 1, Lp1 & 0xff) - ^ BOX (s_Ch, 2, Lp2 & 0xff) - ^ BOX (s_Ch, 3, Lp3 & 0xff) - ^ BOX (s_Ch, 4, Lp4 & 0xff) - ^ BOX (s_Ch, 5, Lp5 & 0xff) - ^ BOX (s_Ch, 6, Lp6 & 0xff) - ^ BOX (s_Ch, 7, Lp7 & 0xff); - - Ll[i] = BOX (s_Cl, 0, Lp0 & 0xff) - ^ BOX (s_Cl, 1, Lp1 & 0xff) - ^ BOX (s_Cl, 2, Lp2 & 0xff) - ^ BOX (s_Cl, 3, Lp3 & 0xff) - ^ BOX (s_Cl, 4, Lp4 & 0xff) - ^ BOX (s_Cl, 5, Lp5 & 0xff) - ^ BOX (s_Cl, 6, Lp6 & 0xff) - ^ BOX (s_Cl, 7, Lp7 & 0xff); - } - - stateh[0] = Lh[0] ^ Kh[0]; - statel[0] = Ll[0] ^ Kl[0]; - stateh[1] = Lh[1] ^ Kh[1]; - statel[1] = Ll[1] ^ Kl[1]; - stateh[2] = Lh[2] ^ Kh[2]; - statel[2] = Ll[2] ^ Kl[2]; - stateh[3] = Lh[3] ^ Kh[3]; - statel[3] = Ll[3] ^ Kl[3]; - stateh[4] = Lh[4] ^ Kh[4]; - statel[4] = Ll[4] ^ Kl[4]; - stateh[5] = Lh[5] ^ Kh[5]; - statel[5] = Ll[5] ^ Kl[5]; - stateh[6] = Lh[6] ^ Kh[6]; - statel[6] = Ll[6] ^ Kl[6]; - stateh[7] = Lh[7] ^ Kh[7]; - statel[7] = Ll[7] ^ Kl[7]; - } - - dgst[ 0] ^= stateh[0] ^ w[ 0]; - dgst[ 1] ^= statel[0] ^ w[ 1]; - dgst[ 2] ^= stateh[1] ^ w[ 2]; - dgst[ 3] ^= statel[1] ^ w[ 3]; - dgst[ 4] ^= stateh[2] ^ w[ 4]; - dgst[ 5] ^= statel[2] ^ w[ 5]; - dgst[ 6] ^= stateh[3] ^ w[ 6]; - dgst[ 7] ^= statel[3] ^ w[ 7]; - dgst[ 8] ^= stateh[4] ^ w[ 8]; - dgst[ 9] ^= statel[4] ^ w[ 9]; - dgst[10] ^= stateh[5] ^ w[10]; - dgst[11] ^= statel[5] ^ w[11]; - dgst[12] ^= stateh[6] ^ w[12]; - dgst[13] ^= statel[6] ^ w[13]; - dgst[14] ^= stateh[7] ^ w[14]; - dgst[15] ^= statel[7] ^ w[15]; -} - -__device__ static void hmac_run2 (const u32 w1[16], const u32 w2[16], const u32 ipad[16], const u32 opad[16], u32 dgst[16], u32 s_Ch[8][256], u32 s_Cl[8][256]) -{ - dgst[ 0] = ipad[ 0]; - dgst[ 1] = ipad[ 1]; - dgst[ 2] = ipad[ 2]; - dgst[ 3] = ipad[ 3]; - dgst[ 4] = ipad[ 4]; - dgst[ 5] = ipad[ 5]; - dgst[ 6] = ipad[ 6]; - dgst[ 7] = ipad[ 7]; - dgst[ 8] = ipad[ 8]; - dgst[ 9] = ipad[ 9]; - dgst[10] = ipad[10]; - dgst[11] = ipad[11]; - dgst[12] = ipad[12]; - dgst[13] = ipad[13]; - dgst[14] = ipad[14]; - dgst[15] = ipad[15]; - - whirlpool_transform (w1, dgst, s_Ch, s_Cl); - whirlpool_transform (w2, dgst, s_Ch, s_Cl); - - u32 w[16]; - - w[ 0] = dgst[ 0]; - w[ 1] = dgst[ 1]; - w[ 2] = dgst[ 2]; - w[ 3] = dgst[ 3]; - w[ 4] = dgst[ 4]; - w[ 5] = dgst[ 5]; - w[ 6] = dgst[ 6]; - w[ 7] = dgst[ 7]; - w[ 8] = dgst[ 8]; - w[ 9] = dgst[ 9]; - w[10] = dgst[10]; - w[11] = dgst[11]; - w[12] = dgst[12]; - w[13] = dgst[13]; - w[14] = dgst[14]; - w[15] = dgst[15]; - - dgst[ 0] = opad[ 0]; - dgst[ 1] = opad[ 1]; - dgst[ 2] = opad[ 2]; - dgst[ 3] = opad[ 3]; - dgst[ 4] = opad[ 4]; - dgst[ 5] = opad[ 5]; - dgst[ 6] = opad[ 6]; - dgst[ 7] = opad[ 7]; - dgst[ 8] = opad[ 8]; - dgst[ 9] = opad[ 9]; - dgst[10] = opad[10]; - dgst[11] = opad[11]; - dgst[12] = opad[12]; - dgst[13] = opad[13]; - dgst[14] = opad[14]; - dgst[15] = opad[15]; - - whirlpool_transform (w, dgst, s_Ch, s_Cl); - - w[ 0] = 0x80000000; - w[ 1] = 0; - w[ 2] = 0; - w[ 3] = 0; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = (64 + 64) * 8; - - whirlpool_transform (w, dgst, s_Ch, s_Cl); -} - -__device__ static void hmac_init (u32 w[16], u32 ipad[16], u32 opad[16], u32 s_Ch[8][256], u32 s_Cl[8][256]) -{ - w[ 0] ^= 0x36363636; - w[ 1] ^= 0x36363636; - w[ 2] ^= 0x36363636; - w[ 3] ^= 0x36363636; - w[ 4] ^= 0x36363636; - w[ 5] ^= 0x36363636; - w[ 6] ^= 0x36363636; - w[ 7] ^= 0x36363636; - w[ 8] ^= 0x36363636; - w[ 9] ^= 0x36363636; - w[10] ^= 0x36363636; - w[11] ^= 0x36363636; - w[12] ^= 0x36363636; - w[13] ^= 0x36363636; - w[14] ^= 0x36363636; - w[15] ^= 0x36363636; - - ipad[ 0] = 0; - ipad[ 1] = 0; - ipad[ 2] = 0; - ipad[ 3] = 0; - ipad[ 4] = 0; - ipad[ 5] = 0; - ipad[ 6] = 0; - ipad[ 7] = 0; - ipad[ 8] = 0; - ipad[ 9] = 0; - ipad[10] = 0; - ipad[11] = 0; - ipad[12] = 0; - ipad[13] = 0; - ipad[14] = 0; - ipad[15] = 0; - - whirlpool_transform (w, ipad, s_Ch, s_Cl); - - w[ 0] ^= 0x6a6a6a6a; - w[ 1] ^= 0x6a6a6a6a; - w[ 2] ^= 0x6a6a6a6a; - w[ 3] ^= 0x6a6a6a6a; - w[ 4] ^= 0x6a6a6a6a; - w[ 5] ^= 0x6a6a6a6a; - w[ 6] ^= 0x6a6a6a6a; - w[ 7] ^= 0x6a6a6a6a; - w[ 8] ^= 0x6a6a6a6a; - w[ 9] ^= 0x6a6a6a6a; - w[10] ^= 0x6a6a6a6a; - w[11] ^= 0x6a6a6a6a; - w[12] ^= 0x6a6a6a6a; - w[13] ^= 0x6a6a6a6a; - w[14] ^= 0x6a6a6a6a; - w[15] ^= 0x6a6a6a6a; - - opad[ 0] = 0; - opad[ 1] = 0; - opad[ 2] = 0; - opad[ 3] = 0; - opad[ 4] = 0; - opad[ 5] = 0; - opad[ 6] = 0; - opad[ 7] = 0; - opad[ 8] = 0; - opad[ 9] = 0; - opad[10] = 0; - opad[11] = 0; - opad[12] = 0; - opad[13] = 0; - opad[14] = 0; - opad[15] = 0; - - whirlpool_transform (w, opad, s_Ch, s_Cl); -} - -__device__ static u32 u8add (const u32 a, const u32 b) -{ - const u32 a1 = (a >> 0) & 0xff; - const u32 a2 = (a >> 8) & 0xff; - const u32 a3 = (a >> 16) & 0xff; - const u32 a4 = (a >> 24) & 0xff; - - const u32 b1 = (b >> 0) & 0xff; - const u32 b2 = (b >> 8) & 0xff; - const u32 b3 = (b >> 16) & 0xff; - const u32 b4 = (b >> 24) & 0xff; - - const u32 r1 = (a1 + b1) & 0xff; - const u32 r2 = (a2 + b2) & 0xff; - const u32 r3 = (a3 + b3) & 0xff; - const u32 r4 = (a4 + b4) & 0xff; - - const u32 r = r1 << 0 - | r2 << 8 - | r3 << 16 - | r4 << 24; - - return r; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06231_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, tc_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const tc_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - /** - * keyfile - */ - - w0[0] = u8add (w0[0], esalt_bufs[salt_pos].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[salt_pos].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[salt_pos].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[salt_pos].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[salt_pos].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[salt_pos].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[salt_pos].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[salt_pos].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[salt_pos].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[salt_pos].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[salt_pos].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[salt_pos].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[salt_pos].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[salt_pos].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[salt_pos].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[salt_pos].keyfile_buf[15]); - - /** - * shared mem - */ - - __shared__ u32 s_Ch[8][256]; - __shared__ u32 s_Cl[8][256]; - - const u32 lid = threadIdx.x; - - #pragma unroll 8 - for (u32 i = 0; i < 8; i++) - { - s_Ch[i][lid] = Ch[i][lid]; - s_Cl[i][lid] = Cl[i][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * salt - */ - - u32 salt_buf1[16]; - - salt_buf1[ 0] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 0]); - salt_buf1[ 1] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 1]); - salt_buf1[ 2] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 2]); - salt_buf1[ 3] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[ 4] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[ 5] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[ 6] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[ 7] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 7]); - salt_buf1[ 8] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 8]); - salt_buf1[ 9] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 9]); - salt_buf1[10] = swap_workaround (esalt_bufs[salt_pos].salt_buf[10]); - salt_buf1[11] = swap_workaround (esalt_bufs[salt_pos].salt_buf[11]); - salt_buf1[12] = swap_workaround (esalt_bufs[salt_pos].salt_buf[12]); - salt_buf1[13] = swap_workaround (esalt_bufs[salt_pos].salt_buf[13]); - salt_buf1[14] = swap_workaround (esalt_bufs[salt_pos].salt_buf[14]); - salt_buf1[15] = swap_workaround (esalt_bufs[salt_pos].salt_buf[15]); - - u32 salt_buf2[16]; - - salt_buf2[ 0] = 0; - salt_buf2[ 1] = 0x80000000; - salt_buf2[ 2] = 0; - salt_buf2[ 3] = 0; - salt_buf2[ 4] = 0; - salt_buf2[ 5] = 0; - salt_buf2[ 6] = 0; - salt_buf2[ 7] = 0; - salt_buf2[ 8] = 0; - salt_buf2[ 9] = 0; - salt_buf2[10] = 0; - salt_buf2[11] = 0; - salt_buf2[12] = 0; - salt_buf2[13] = 0; - salt_buf2[14] = 0; - salt_buf2[15] = (64 + 64 + 4) * 8; - - const u32 truecrypt_mdlen = salt_bufs[0].truecrypt_mdlen; - - u32 w[16]; - - w[ 0] = swap_workaround (w0[0]); - w[ 1] = swap_workaround (w0[1]); - w[ 2] = swap_workaround (w0[2]); - w[ 3] = swap_workaround (w0[3]); - w[ 4] = swap_workaround (w1[0]); - w[ 5] = swap_workaround (w1[1]); - w[ 6] = swap_workaround (w1[2]); - w[ 7] = swap_workaround (w1[3]); - w[ 8] = swap_workaround (w2[0]); - w[ 9] = swap_workaround (w2[1]); - w[10] = swap_workaround (w2[2]); - w[11] = swap_workaround (w2[3]); - w[12] = swap_workaround (w3[0]); - w[13] = swap_workaround (w3[1]); - w[14] = swap_workaround (w3[2]); - w[15] = swap_workaround (w3[3]); - - u32 ipad[16]; - u32 opad[16]; - - hmac_init (w, ipad, opad, s_Ch, s_Cl); - - tmps[gid].ipad[ 0] = ipad[ 0]; - tmps[gid].ipad[ 1] = ipad[ 1]; - tmps[gid].ipad[ 2] = ipad[ 2]; - tmps[gid].ipad[ 3] = ipad[ 3]; - tmps[gid].ipad[ 4] = ipad[ 4]; - tmps[gid].ipad[ 5] = ipad[ 5]; - tmps[gid].ipad[ 6] = ipad[ 6]; - tmps[gid].ipad[ 7] = ipad[ 7]; - tmps[gid].ipad[ 8] = ipad[ 8]; - tmps[gid].ipad[ 9] = ipad[ 9]; - tmps[gid].ipad[10] = ipad[10]; - tmps[gid].ipad[11] = ipad[11]; - tmps[gid].ipad[12] = ipad[12]; - tmps[gid].ipad[13] = ipad[13]; - tmps[gid].ipad[14] = ipad[14]; - tmps[gid].ipad[15] = ipad[15]; - - tmps[gid].opad[ 0] = opad[ 0]; - tmps[gid].opad[ 1] = opad[ 1]; - tmps[gid].opad[ 2] = opad[ 2]; - tmps[gid].opad[ 3] = opad[ 3]; - tmps[gid].opad[ 4] = opad[ 4]; - tmps[gid].opad[ 5] = opad[ 5]; - tmps[gid].opad[ 6] = opad[ 6]; - tmps[gid].opad[ 7] = opad[ 7]; - tmps[gid].opad[ 8] = opad[ 8]; - tmps[gid].opad[ 9] = opad[ 9]; - tmps[gid].opad[10] = opad[10]; - tmps[gid].opad[11] = opad[11]; - tmps[gid].opad[12] = opad[12]; - tmps[gid].opad[13] = opad[13]; - tmps[gid].opad[14] = opad[14]; - tmps[gid].opad[15] = opad[15]; - - for (u32 i = 0, j = 1; i < (truecrypt_mdlen / 8 / 4); i += 16, j += 1) - { - salt_buf2[0] = j; - - u32 dgst[16]; - - hmac_run2 (salt_buf1, salt_buf2, ipad, opad, dgst, s_Ch, s_Cl); - - tmps[gid].dgst[i + 0] = dgst[ 0]; - tmps[gid].dgst[i + 1] = dgst[ 1]; - tmps[gid].dgst[i + 2] = dgst[ 2]; - tmps[gid].dgst[i + 3] = dgst[ 3]; - tmps[gid].dgst[i + 4] = dgst[ 4]; - tmps[gid].dgst[i + 5] = dgst[ 5]; - tmps[gid].dgst[i + 6] = dgst[ 6]; - tmps[gid].dgst[i + 7] = dgst[ 7]; - tmps[gid].dgst[i + 8] = dgst[ 8]; - tmps[gid].dgst[i + 9] = dgst[ 9]; - tmps[gid].dgst[i + 10] = dgst[10]; - tmps[gid].dgst[i + 11] = dgst[11]; - tmps[gid].dgst[i + 12] = dgst[12]; - tmps[gid].dgst[i + 13] = dgst[13]; - tmps[gid].dgst[i + 14] = dgst[14]; - tmps[gid].dgst[i + 15] = dgst[15]; - - tmps[gid].out[i + 0] = dgst[ 0]; - tmps[gid].out[i + 1] = dgst[ 1]; - tmps[gid].out[i + 2] = dgst[ 2]; - tmps[gid].out[i + 3] = dgst[ 3]; - tmps[gid].out[i + 4] = dgst[ 4]; - tmps[gid].out[i + 5] = dgst[ 5]; - tmps[gid].out[i + 6] = dgst[ 6]; - tmps[gid].out[i + 7] = dgst[ 7]; - tmps[gid].out[i + 8] = dgst[ 8]; - tmps[gid].out[i + 9] = dgst[ 9]; - tmps[gid].out[i + 10] = dgst[10]; - tmps[gid].out[i + 11] = dgst[11]; - tmps[gid].out[i + 12] = dgst[12]; - tmps[gid].out[i + 13] = dgst[13]; - tmps[gid].out[i + 14] = dgst[14]; - tmps[gid].out[i + 15] = dgst[15]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06231_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, tc_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const tc_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 truecrypt_mdlen = salt_bufs[0].truecrypt_mdlen; - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - __shared__ u32 s_Ch[8][256]; - __shared__ u32 s_Cl[8][256]; - - const u32 lid = threadIdx.x; - - #pragma unroll 8 - for (u32 i = 0; i < 8; i++) - { - s_Ch[i][lid] = Ch[i][lid]; - s_Cl[i][lid] = Cl[i][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - u32 ipad[16]; - - ipad[ 0] = tmps[gid].ipad[ 0]; - ipad[ 1] = tmps[gid].ipad[ 1], - ipad[ 2] = tmps[gid].ipad[ 2]; - ipad[ 3] = tmps[gid].ipad[ 3]; - ipad[ 4] = tmps[gid].ipad[ 4]; - ipad[ 5] = tmps[gid].ipad[ 5]; - ipad[ 6] = tmps[gid].ipad[ 6], - ipad[ 7] = tmps[gid].ipad[ 7]; - ipad[ 8] = tmps[gid].ipad[ 8]; - ipad[ 9] = tmps[gid].ipad[ 9]; - ipad[10] = tmps[gid].ipad[10]; - ipad[11] = tmps[gid].ipad[11], - ipad[12] = tmps[gid].ipad[12]; - ipad[13] = tmps[gid].ipad[13]; - ipad[14] = tmps[gid].ipad[14]; - ipad[15] = tmps[gid].ipad[15]; - - u32 opad[16]; - - opad[ 0] = tmps[gid].opad[ 0]; - opad[ 1] = tmps[gid].opad[ 1], - opad[ 2] = tmps[gid].opad[ 2]; - opad[ 3] = tmps[gid].opad[ 3]; - opad[ 4] = tmps[gid].opad[ 4]; - opad[ 5] = tmps[gid].opad[ 5]; - opad[ 6] = tmps[gid].opad[ 6], - opad[ 7] = tmps[gid].opad[ 7]; - opad[ 8] = tmps[gid].opad[ 8]; - opad[ 9] = tmps[gid].opad[ 9]; - opad[10] = tmps[gid].opad[10]; - opad[11] = tmps[gid].opad[11], - opad[12] = tmps[gid].opad[12]; - opad[13] = tmps[gid].opad[13]; - opad[14] = tmps[gid].opad[14]; - opad[15] = tmps[gid].opad[15]; - - for (u32 i = 0; i < (truecrypt_mdlen / 8 / 4); i += 16) - { - u32 dgst[16]; - - dgst[ 0] = tmps[gid].dgst[i + 0]; - dgst[ 1] = tmps[gid].dgst[i + 1]; - dgst[ 2] = tmps[gid].dgst[i + 2]; - dgst[ 3] = tmps[gid].dgst[i + 3]; - dgst[ 4] = tmps[gid].dgst[i + 4]; - dgst[ 5] = tmps[gid].dgst[i + 5]; - dgst[ 6] = tmps[gid].dgst[i + 6]; - dgst[ 7] = tmps[gid].dgst[i + 7]; - dgst[ 8] = tmps[gid].dgst[i + 8]; - dgst[ 9] = tmps[gid].dgst[i + 9]; - dgst[10] = tmps[gid].dgst[i + 10]; - dgst[11] = tmps[gid].dgst[i + 11]; - dgst[12] = tmps[gid].dgst[i + 12]; - dgst[13] = tmps[gid].dgst[i + 13]; - dgst[14] = tmps[gid].dgst[i + 14]; - dgst[15] = tmps[gid].dgst[i + 15]; - - u32 out[16]; - - out[ 0] = tmps[gid].out[i + 0]; - out[ 1] = tmps[gid].out[i + 1]; - out[ 2] = tmps[gid].out[i + 2]; - out[ 3] = tmps[gid].out[i + 3]; - out[ 4] = tmps[gid].out[i + 4]; - out[ 5] = tmps[gid].out[i + 5]; - out[ 6] = tmps[gid].out[i + 6]; - out[ 7] = tmps[gid].out[i + 7]; - out[ 8] = tmps[gid].out[i + 8]; - out[ 9] = tmps[gid].out[i + 9]; - out[10] = tmps[gid].out[i + 10]; - out[11] = tmps[gid].out[i + 11]; - out[12] = tmps[gid].out[i + 12]; - out[13] = tmps[gid].out[i + 13]; - out[14] = tmps[gid].out[i + 14]; - out[15] = tmps[gid].out[i + 15]; - - for (u32 j = 0; j < loop_cnt; j++) - { - u32 w1[16]; - - w1[ 0] = dgst[ 0]; - w1[ 1] = dgst[ 1]; - w1[ 2] = dgst[ 2]; - w1[ 3] = dgst[ 3]; - w1[ 4] = dgst[ 4]; - w1[ 5] = dgst[ 5]; - w1[ 6] = dgst[ 6]; - w1[ 7] = dgst[ 7]; - w1[ 8] = dgst[ 8]; - w1[ 9] = dgst[ 9]; - w1[10] = dgst[10]; - w1[11] = dgst[11]; - w1[12] = dgst[12]; - w1[13] = dgst[13]; - w1[14] = dgst[14]; - w1[15] = dgst[15]; - - u32 w2[16]; - - w2[ 0] = 0x80000000; - w2[ 1] = 0; - w2[ 2] = 0; - w2[ 3] = 0; - w2[ 4] = 0; - w2[ 5] = 0; - w2[ 6] = 0; - w2[ 7] = 0; - w2[ 8] = 0; - w2[ 9] = 0; - w2[10] = 0; - w2[11] = 0; - w2[12] = 0; - w2[13] = 0; - w2[14] = 0; - w2[15] = (64 + 64) * 8; - - hmac_run2 (w1, w2, ipad, opad, dgst, s_Ch, s_Cl); - - out[ 0] ^= dgst[ 0]; - out[ 1] ^= dgst[ 1]; - out[ 2] ^= dgst[ 2]; - out[ 3] ^= dgst[ 3]; - out[ 4] ^= dgst[ 4]; - out[ 5] ^= dgst[ 5]; - out[ 6] ^= dgst[ 6]; - out[ 7] ^= dgst[ 7]; - out[ 8] ^= dgst[ 8]; - out[ 9] ^= dgst[ 9]; - out[10] ^= dgst[10]; - out[11] ^= dgst[11]; - out[12] ^= dgst[12]; - out[13] ^= dgst[13]; - out[14] ^= dgst[14]; - out[15] ^= dgst[15]; - } - - tmps[gid].dgst[i + 0] = dgst[ 0]; - tmps[gid].dgst[i + 1] = dgst[ 1]; - tmps[gid].dgst[i + 2] = dgst[ 2]; - tmps[gid].dgst[i + 3] = dgst[ 3]; - tmps[gid].dgst[i + 4] = dgst[ 4]; - tmps[gid].dgst[i + 5] = dgst[ 5]; - tmps[gid].dgst[i + 6] = dgst[ 6]; - tmps[gid].dgst[i + 7] = dgst[ 7]; - tmps[gid].dgst[i + 8] = dgst[ 8]; - tmps[gid].dgst[i + 9] = dgst[ 9]; - tmps[gid].dgst[i + 10] = dgst[10]; - tmps[gid].dgst[i + 11] = dgst[11]; - tmps[gid].dgst[i + 12] = dgst[12]; - tmps[gid].dgst[i + 13] = dgst[13]; - tmps[gid].dgst[i + 14] = dgst[14]; - tmps[gid].dgst[i + 15] = dgst[15]; - - tmps[gid].out[i + 0] = out[ 0]; - tmps[gid].out[i + 1] = out[ 1]; - tmps[gid].out[i + 2] = out[ 2]; - tmps[gid].out[i + 3] = out[ 3]; - tmps[gid].out[i + 4] = out[ 4]; - tmps[gid].out[i + 5] = out[ 5]; - tmps[gid].out[i + 6] = out[ 6]; - tmps[gid].out[i + 7] = out[ 7]; - tmps[gid].out[i + 8] = out[ 8]; - tmps[gid].out[i + 9] = out[ 9]; - tmps[gid].out[i + 10] = out[10]; - tmps[gid].out[i + 11] = out[11]; - tmps[gid].out[i + 12] = out[12]; - tmps[gid].out[i + 13] = out[13]; - tmps[gid].out[i + 14] = out[14]; - tmps[gid].out[i + 15] = out[15]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06231_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, tc_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const tc_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - if (gid >= gid_max) return; - - u32 ukey1[8]; - - ukey1[0] = swap_workaround (tmps[gid].out[ 0]); - ukey1[1] = swap_workaround (tmps[gid].out[ 1]); - ukey1[2] = swap_workaround (tmps[gid].out[ 2]); - ukey1[3] = swap_workaround (tmps[gid].out[ 3]); - ukey1[4] = swap_workaround (tmps[gid].out[ 4]); - ukey1[5] = swap_workaround (tmps[gid].out[ 5]); - ukey1[6] = swap_workaround (tmps[gid].out[ 6]); - ukey1[7] = swap_workaround (tmps[gid].out[ 7]); - - u32 ukey2[8]; - - ukey2[0] = swap_workaround (tmps[gid].out[ 8]); - ukey2[1] = swap_workaround (tmps[gid].out[ 9]); - ukey2[2] = swap_workaround (tmps[gid].out[10]); - ukey2[3] = swap_workaround (tmps[gid].out[11]); - ukey2[4] = swap_workaround (tmps[gid].out[12]); - ukey2[5] = swap_workaround (tmps[gid].out[13]); - ukey2[6] = swap_workaround (tmps[gid].out[14]); - ukey2[7] = swap_workaround (tmps[gid].out[15]); - - u32 data[4]; - - data[0] = esalt_bufs[0].data_buf[0]; - data[1] = esalt_bufs[0].data_buf[1]; - data[2] = esalt_bufs[0].data_buf[2]; - data[3] = esalt_bufs[0].data_buf[3]; - - u32 tmp[4]; - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - aes256_decrypt_xts (ukey1, ukey2, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - serpent256_decrypt_xts (ukey1, ukey2, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - twofish256_decrypt_xts (ukey1, ukey2, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } -} diff --git a/nv/m06232.cu b/nv/m06232.cu deleted file mode 100644 index 10256ea..0000000 --- a/nv/m06232.cu +++ /dev/null @@ -1,2063 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _WHIRLPOOL_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#include "gpu_aes256_nv.c" -#include "gpu_twofish256_nv.c" -#include "gpu_serpent256_nv.c" - -#define R 10 - -#define BOX(S,n,i) (u32) ((S)[(n)][(i)]) - -__device__ __constant__ u32 Ch[8][256] = -{ - { - 0x18186018, 0x23238c23, 0xc6c63fc6, 0xe8e887e8, - 0x87872687, 0xb8b8dab8, 0x01010401, 0x4f4f214f, - 0x3636d836, 0xa6a6a2a6, 0xd2d26fd2, 0xf5f5f3f5, - 0x7979f979, 0x6f6fa16f, 0x91917e91, 0x52525552, - 0x60609d60, 0xbcbccabc, 0x9b9b569b, 0x8e8e028e, - 0xa3a3b6a3, 0x0c0c300c, 0x7b7bf17b, 0x3535d435, - 0x1d1d741d, 0xe0e0a7e0, 0xd7d77bd7, 0xc2c22fc2, - 0x2e2eb82e, 0x4b4b314b, 0xfefedffe, 0x57574157, - 0x15155415, 0x7777c177, 0x3737dc37, 0xe5e5b3e5, - 0x9f9f469f, 0xf0f0e7f0, 0x4a4a354a, 0xdada4fda, - 0x58587d58, 0xc9c903c9, 0x2929a429, 0x0a0a280a, - 0xb1b1feb1, 0xa0a0baa0, 0x6b6bb16b, 0x85852e85, - 0xbdbdcebd, 0x5d5d695d, 0x10104010, 0xf4f4f7f4, - 0xcbcb0bcb, 0x3e3ef83e, 0x05051405, 0x67678167, - 0xe4e4b7e4, 0x27279c27, 0x41411941, 0x8b8b168b, - 0xa7a7a6a7, 0x7d7de97d, 0x95956e95, 0xd8d847d8, - 0xfbfbcbfb, 0xeeee9fee, 0x7c7ced7c, 0x66668566, - 0xdddd53dd, 0x17175c17, 0x47470147, 0x9e9e429e, - 0xcaca0fca, 0x2d2db42d, 0xbfbfc6bf, 0x07071c07, - 0xadad8ead, 0x5a5a755a, 0x83833683, 0x3333cc33, - 0x63639163, 0x02020802, 0xaaaa92aa, 0x7171d971, - 0xc8c807c8, 0x19196419, 0x49493949, 0xd9d943d9, - 0xf2f2eff2, 0xe3e3abe3, 0x5b5b715b, 0x88881a88, - 0x9a9a529a, 0x26269826, 0x3232c832, 0xb0b0fab0, - 0xe9e983e9, 0x0f0f3c0f, 0xd5d573d5, 0x80803a80, - 0xbebec2be, 0xcdcd13cd, 0x3434d034, 0x48483d48, - 0xffffdbff, 0x7a7af57a, 0x90907a90, 0x5f5f615f, - 0x20208020, 0x6868bd68, 0x1a1a681a, 0xaeae82ae, - 0xb4b4eab4, 0x54544d54, 0x93937693, 0x22228822, - 0x64648d64, 0xf1f1e3f1, 0x7373d173, 0x12124812, - 0x40401d40, 0x08082008, 0xc3c32bc3, 0xecec97ec, - 0xdbdb4bdb, 0xa1a1bea1, 0x8d8d0e8d, 0x3d3df43d, - 0x97976697, 0x00000000, 0xcfcf1bcf, 0x2b2bac2b, - 0x7676c576, 0x82823282, 0xd6d67fd6, 0x1b1b6c1b, - 0xb5b5eeb5, 0xafaf86af, 0x6a6ab56a, 0x50505d50, - 0x45450945, 0xf3f3ebf3, 0x3030c030, 0xefef9bef, - 0x3f3ffc3f, 0x55554955, 0xa2a2b2a2, 0xeaea8fea, - 0x65658965, 0xbabad2ba, 0x2f2fbc2f, 0xc0c027c0, - 0xdede5fde, 0x1c1c701c, 0xfdfdd3fd, 0x4d4d294d, - 0x92927292, 0x7575c975, 0x06061806, 0x8a8a128a, - 0xb2b2f2b2, 0xe6e6bfe6, 0x0e0e380e, 0x1f1f7c1f, - 0x62629562, 0xd4d477d4, 0xa8a89aa8, 0x96966296, - 0xf9f9c3f9, 0xc5c533c5, 0x25259425, 0x59597959, - 0x84842a84, 0x7272d572, 0x3939e439, 0x4c4c2d4c, - 0x5e5e655e, 0x7878fd78, 0x3838e038, 0x8c8c0a8c, - 0xd1d163d1, 0xa5a5aea5, 0xe2e2afe2, 0x61619961, - 0xb3b3f6b3, 0x21218421, 0x9c9c4a9c, 0x1e1e781e, - 0x43431143, 0xc7c73bc7, 0xfcfcd7fc, 0x04041004, - 0x51515951, 0x99995e99, 0x6d6da96d, 0x0d0d340d, - 0xfafacffa, 0xdfdf5bdf, 0x7e7ee57e, 0x24249024, - 0x3b3bec3b, 0xabab96ab, 0xcece1fce, 0x11114411, - 0x8f8f068f, 0x4e4e254e, 0xb7b7e6b7, 0xebeb8beb, - 0x3c3cf03c, 0x81813e81, 0x94946a94, 0xf7f7fbf7, - 0xb9b9deb9, 0x13134c13, 0x2c2cb02c, 0xd3d36bd3, - 0xe7e7bbe7, 0x6e6ea56e, 0xc4c437c4, 0x03030c03, - 0x56564556, 0x44440d44, 0x7f7fe17f, 0xa9a99ea9, - 0x2a2aa82a, 0xbbbbd6bb, 0xc1c123c1, 0x53535153, - 0xdcdc57dc, 0x0b0b2c0b, 0x9d9d4e9d, 0x6c6cad6c, - 0x3131c431, 0x7474cd74, 0xf6f6fff6, 0x46460546, - 0xacac8aac, 0x89891e89, 0x14145014, 0xe1e1a3e1, - 0x16165816, 0x3a3ae83a, 0x6969b969, 0x09092409, - 0x7070dd70, 0xb6b6e2b6, 0xd0d067d0, 0xeded93ed, - 0xcccc17cc, 0x42421542, 0x98985a98, 0xa4a4aaa4, - 0x2828a028, 0x5c5c6d5c, 0xf8f8c7f8, 0x86862286, - }, - { - 0xd8181860, 0x2623238c, 0xb8c6c63f, 0xfbe8e887, - 0xcb878726, 0x11b8b8da, 0x09010104, 0x0d4f4f21, - 0x9b3636d8, 0xffa6a6a2, 0x0cd2d26f, 0x0ef5f5f3, - 0x967979f9, 0x306f6fa1, 0x6d91917e, 0xf8525255, - 0x4760609d, 0x35bcbcca, 0x379b9b56, 0x8a8e8e02, - 0xd2a3a3b6, 0x6c0c0c30, 0x847b7bf1, 0x803535d4, - 0xf51d1d74, 0xb3e0e0a7, 0x21d7d77b, 0x9cc2c22f, - 0x432e2eb8, 0x294b4b31, 0x5dfefedf, 0xd5575741, - 0xbd151554, 0xe87777c1, 0x923737dc, 0x9ee5e5b3, - 0x139f9f46, 0x23f0f0e7, 0x204a4a35, 0x44dada4f, - 0xa258587d, 0xcfc9c903, 0x7c2929a4, 0x5a0a0a28, - 0x50b1b1fe, 0xc9a0a0ba, 0x146b6bb1, 0xd985852e, - 0x3cbdbdce, 0x8f5d5d69, 0x90101040, 0x07f4f4f7, - 0xddcbcb0b, 0xd33e3ef8, 0x2d050514, 0x78676781, - 0x97e4e4b7, 0x0227279c, 0x73414119, 0xa78b8b16, - 0xf6a7a7a6, 0xb27d7de9, 0x4995956e, 0x56d8d847, - 0x70fbfbcb, 0xcdeeee9f, 0xbb7c7ced, 0x71666685, - 0x7bdddd53, 0xaf17175c, 0x45474701, 0x1a9e9e42, - 0xd4caca0f, 0x582d2db4, 0x2ebfbfc6, 0x3f07071c, - 0xacadad8e, 0xb05a5a75, 0xef838336, 0xb63333cc, - 0x5c636391, 0x12020208, 0x93aaaa92, 0xde7171d9, - 0xc6c8c807, 0xd1191964, 0x3b494939, 0x5fd9d943, - 0x31f2f2ef, 0xa8e3e3ab, 0xb95b5b71, 0xbc88881a, - 0x3e9a9a52, 0x0b262698, 0xbf3232c8, 0x59b0b0fa, - 0xf2e9e983, 0x770f0f3c, 0x33d5d573, 0xf480803a, - 0x27bebec2, 0xebcdcd13, 0x893434d0, 0x3248483d, - 0x54ffffdb, 0x8d7a7af5, 0x6490907a, 0x9d5f5f61, - 0x3d202080, 0x0f6868bd, 0xca1a1a68, 0xb7aeae82, - 0x7db4b4ea, 0xce54544d, 0x7f939376, 0x2f222288, - 0x6364648d, 0x2af1f1e3, 0xcc7373d1, 0x82121248, - 0x7a40401d, 0x48080820, 0x95c3c32b, 0xdfecec97, - 0x4ddbdb4b, 0xc0a1a1be, 0x918d8d0e, 0xc83d3df4, - 0x5b979766, 0x00000000, 0xf9cfcf1b, 0x6e2b2bac, - 0xe17676c5, 0xe6828232, 0x28d6d67f, 0xc31b1b6c, - 0x74b5b5ee, 0xbeafaf86, 0x1d6a6ab5, 0xea50505d, - 0x57454509, 0x38f3f3eb, 0xad3030c0, 0xc4efef9b, - 0xda3f3ffc, 0xc7555549, 0xdba2a2b2, 0xe9eaea8f, - 0x6a656589, 0x03babad2, 0x4a2f2fbc, 0x8ec0c027, - 0x60dede5f, 0xfc1c1c70, 0x46fdfdd3, 0x1f4d4d29, - 0x76929272, 0xfa7575c9, 0x36060618, 0xae8a8a12, - 0x4bb2b2f2, 0x85e6e6bf, 0x7e0e0e38, 0xe71f1f7c, - 0x55626295, 0x3ad4d477, 0x81a8a89a, 0x52969662, - 0x62f9f9c3, 0xa3c5c533, 0x10252594, 0xab595979, - 0xd084842a, 0xc57272d5, 0xec3939e4, 0x164c4c2d, - 0x945e5e65, 0x9f7878fd, 0xe53838e0, 0x988c8c0a, - 0x17d1d163, 0xe4a5a5ae, 0xa1e2e2af, 0x4e616199, - 0x42b3b3f6, 0x34212184, 0x089c9c4a, 0xee1e1e78, - 0x61434311, 0xb1c7c73b, 0x4ffcfcd7, 0x24040410, - 0xe3515159, 0x2599995e, 0x226d6da9, 0x650d0d34, - 0x79fafacf, 0x69dfdf5b, 0xa97e7ee5, 0x19242490, - 0xfe3b3bec, 0x9aabab96, 0xf0cece1f, 0x99111144, - 0x838f8f06, 0x044e4e25, 0x66b7b7e6, 0xe0ebeb8b, - 0xc13c3cf0, 0xfd81813e, 0x4094946a, 0x1cf7f7fb, - 0x18b9b9de, 0x8b13134c, 0x512c2cb0, 0x05d3d36b, - 0x8ce7e7bb, 0x396e6ea5, 0xaac4c437, 0x1b03030c, - 0xdc565645, 0x5e44440d, 0xa07f7fe1, 0x88a9a99e, - 0x672a2aa8, 0x0abbbbd6, 0x87c1c123, 0xf1535351, - 0x72dcdc57, 0x530b0b2c, 0x019d9d4e, 0x2b6c6cad, - 0xa43131c4, 0xf37474cd, 0x15f6f6ff, 0x4c464605, - 0xa5acac8a, 0xb589891e, 0xb4141450, 0xbae1e1a3, - 0xa6161658, 0xf73a3ae8, 0x066969b9, 0x41090924, - 0xd77070dd, 0x6fb6b6e2, 0x1ed0d067, 0xd6eded93, - 0xe2cccc17, 0x68424215, 0x2c98985a, 0xeda4a4aa, - 0x752828a0, 0x865c5c6d, 0x6bf8f8c7, 0xc2868622, - }, - { - 0x30d81818, 0x46262323, 0x91b8c6c6, 0xcdfbe8e8, - 0x13cb8787, 0x6d11b8b8, 0x02090101, 0x9e0d4f4f, - 0x6c9b3636, 0x51ffa6a6, 0xb90cd2d2, 0xf70ef5f5, - 0xf2967979, 0xde306f6f, 0x3f6d9191, 0xa4f85252, - 0xc0476060, 0x6535bcbc, 0x2b379b9b, 0x018a8e8e, - 0x5bd2a3a3, 0x186c0c0c, 0xf6847b7b, 0x6a803535, - 0x3af51d1d, 0xddb3e0e0, 0xb321d7d7, 0x999cc2c2, - 0x5c432e2e, 0x96294b4b, 0xe15dfefe, 0xaed55757, - 0x2abd1515, 0xeee87777, 0x6e923737, 0xd79ee5e5, - 0x23139f9f, 0xfd23f0f0, 0x94204a4a, 0xa944dada, - 0xb0a25858, 0x8fcfc9c9, 0x527c2929, 0x145a0a0a, - 0x7f50b1b1, 0x5dc9a0a0, 0xd6146b6b, 0x17d98585, - 0x673cbdbd, 0xba8f5d5d, 0x20901010, 0xf507f4f4, - 0x8bddcbcb, 0x7cd33e3e, 0x0a2d0505, 0xce786767, - 0xd597e4e4, 0x4e022727, 0x82734141, 0x0ba78b8b, - 0x53f6a7a7, 0xfab27d7d, 0x37499595, 0xad56d8d8, - 0xeb70fbfb, 0xc1cdeeee, 0xf8bb7c7c, 0xcc716666, - 0xa77bdddd, 0x2eaf1717, 0x8e454747, 0x211a9e9e, - 0x89d4caca, 0x5a582d2d, 0x632ebfbf, 0x0e3f0707, - 0x47acadad, 0xb4b05a5a, 0x1bef8383, 0x66b63333, - 0xc65c6363, 0x04120202, 0x4993aaaa, 0xe2de7171, - 0x8dc6c8c8, 0x32d11919, 0x923b4949, 0xaf5fd9d9, - 0xf931f2f2, 0xdba8e3e3, 0xb6b95b5b, 0x0dbc8888, - 0x293e9a9a, 0x4c0b2626, 0x64bf3232, 0x7d59b0b0, - 0xcff2e9e9, 0x1e770f0f, 0xb733d5d5, 0x1df48080, - 0x6127bebe, 0x87ebcdcd, 0x68893434, 0x90324848, - 0xe354ffff, 0xf48d7a7a, 0x3d649090, 0xbe9d5f5f, - 0x403d2020, 0xd00f6868, 0x34ca1a1a, 0x41b7aeae, - 0x757db4b4, 0xa8ce5454, 0x3b7f9393, 0x442f2222, - 0xc8636464, 0xff2af1f1, 0xe6cc7373, 0x24821212, - 0x807a4040, 0x10480808, 0x9b95c3c3, 0xc5dfecec, - 0xab4ddbdb, 0x5fc0a1a1, 0x07918d8d, 0x7ac83d3d, - 0x335b9797, 0x00000000, 0x83f9cfcf, 0x566e2b2b, - 0xece17676, 0x19e68282, 0xb128d6d6, 0x36c31b1b, - 0x7774b5b5, 0x43beafaf, 0xd41d6a6a, 0xa0ea5050, - 0x8a574545, 0xfb38f3f3, 0x60ad3030, 0xc3c4efef, - 0x7eda3f3f, 0xaac75555, 0x59dba2a2, 0xc9e9eaea, - 0xca6a6565, 0x6903baba, 0x5e4a2f2f, 0x9d8ec0c0, - 0xa160dede, 0x38fc1c1c, 0xe746fdfd, 0x9a1f4d4d, - 0x39769292, 0xeafa7575, 0x0c360606, 0x09ae8a8a, - 0x794bb2b2, 0xd185e6e6, 0x1c7e0e0e, 0x3ee71f1f, - 0xc4556262, 0xb53ad4d4, 0x4d81a8a8, 0x31529696, - 0xef62f9f9, 0x97a3c5c5, 0x4a102525, 0xb2ab5959, - 0x15d08484, 0xe4c57272, 0x72ec3939, 0x98164c4c, - 0xbc945e5e, 0xf09f7878, 0x70e53838, 0x05988c8c, - 0xbf17d1d1, 0x57e4a5a5, 0xd9a1e2e2, 0xc24e6161, - 0x7b42b3b3, 0x42342121, 0x25089c9c, 0x3cee1e1e, - 0x86614343, 0x93b1c7c7, 0xe54ffcfc, 0x08240404, - 0xa2e35151, 0x2f259999, 0xda226d6d, 0x1a650d0d, - 0xe979fafa, 0xa369dfdf, 0xfca97e7e, 0x48192424, - 0x76fe3b3b, 0x4b9aabab, 0x81f0cece, 0x22991111, - 0x03838f8f, 0x9c044e4e, 0x7366b7b7, 0xcbe0ebeb, - 0x78c13c3c, 0x1ffd8181, 0x35409494, 0xf31cf7f7, - 0x6f18b9b9, 0x268b1313, 0x58512c2c, 0xbb05d3d3, - 0xd38ce7e7, 0xdc396e6e, 0x95aac4c4, 0x061b0303, - 0xacdc5656, 0x885e4444, 0xfea07f7f, 0x4f88a9a9, - 0x54672a2a, 0x6b0abbbb, 0x9f87c1c1, 0xa6f15353, - 0xa572dcdc, 0x16530b0b, 0x27019d9d, 0xd82b6c6c, - 0x62a43131, 0xe8f37474, 0xf115f6f6, 0x8c4c4646, - 0x45a5acac, 0x0fb58989, 0x28b41414, 0xdfbae1e1, - 0x2ca61616, 0x74f73a3a, 0xd2066969, 0x12410909, - 0xe0d77070, 0x716fb6b6, 0xbd1ed0d0, 0xc7d6eded, - 0x85e2cccc, 0x84684242, 0x2d2c9898, 0x55eda4a4, - 0x50752828, 0xb8865c5c, 0xed6bf8f8, 0x11c28686, - }, - { - 0x7830d818, 0xaf462623, 0xf991b8c6, 0x6fcdfbe8, - 0xa113cb87, 0x626d11b8, 0x05020901, 0x6e9e0d4f, - 0xee6c9b36, 0x0451ffa6, 0xbdb90cd2, 0x06f70ef5, - 0x80f29679, 0xcede306f, 0xef3f6d91, 0x07a4f852, - 0xfdc04760, 0x766535bc, 0xcd2b379b, 0x8c018a8e, - 0x155bd2a3, 0x3c186c0c, 0x8af6847b, 0xe16a8035, - 0x693af51d, 0x47ddb3e0, 0xacb321d7, 0xed999cc2, - 0x965c432e, 0x7a96294b, 0x21e15dfe, 0x16aed557, - 0x412abd15, 0xb6eee877, 0xeb6e9237, 0x56d79ee5, - 0xd923139f, 0x17fd23f0, 0x7f94204a, 0x95a944da, - 0x25b0a258, 0xca8fcfc9, 0x8d527c29, 0x22145a0a, - 0x4f7f50b1, 0x1a5dc9a0, 0xdad6146b, 0xab17d985, - 0x73673cbd, 0x34ba8f5d, 0x50209010, 0x03f507f4, - 0xc08bddcb, 0xc67cd33e, 0x110a2d05, 0xe6ce7867, - 0x53d597e4, 0xbb4e0227, 0x58827341, 0x9d0ba78b, - 0x0153f6a7, 0x94fab27d, 0xfb374995, 0x9fad56d8, - 0x30eb70fb, 0x71c1cdee, 0x91f8bb7c, 0xe3cc7166, - 0x8ea77bdd, 0x4b2eaf17, 0x468e4547, 0xdc211a9e, - 0xc589d4ca, 0x995a582d, 0x79632ebf, 0x1b0e3f07, - 0x2347acad, 0x2fb4b05a, 0xb51bef83, 0xff66b633, - 0xf2c65c63, 0x0a041202, 0x384993aa, 0xa8e2de71, - 0xcf8dc6c8, 0x7d32d119, 0x70923b49, 0x9aaf5fd9, - 0x1df931f2, 0x48dba8e3, 0x2ab6b95b, 0x920dbc88, - 0xc8293e9a, 0xbe4c0b26, 0xfa64bf32, 0x4a7d59b0, - 0x6acff2e9, 0x331e770f, 0xa6b733d5, 0xba1df480, - 0x7c6127be, 0xde87ebcd, 0xe4688934, 0x75903248, - 0x24e354ff, 0x8ff48d7a, 0xea3d6490, 0x3ebe9d5f, - 0xa0403d20, 0xd5d00f68, 0x7234ca1a, 0x2c41b7ae, - 0x5e757db4, 0x19a8ce54, 0xe53b7f93, 0xaa442f22, - 0xe9c86364, 0x12ff2af1, 0xa2e6cc73, 0x5a248212, - 0x5d807a40, 0x28104808, 0xe89b95c3, 0x7bc5dfec, - 0x90ab4ddb, 0x1f5fc0a1, 0x8307918d, 0xc97ac83d, - 0xf1335b97, 0x00000000, 0xd483f9cf, 0x87566e2b, - 0xb3ece176, 0xb019e682, 0xa9b128d6, 0x7736c31b, - 0x5b7774b5, 0x2943beaf, 0xdfd41d6a, 0x0da0ea50, - 0x4c8a5745, 0x18fb38f3, 0xf060ad30, 0x74c3c4ef, - 0xc37eda3f, 0x1caac755, 0x1059dba2, 0x65c9e9ea, - 0xecca6a65, 0x686903ba, 0x935e4a2f, 0xe79d8ec0, - 0x81a160de, 0x6c38fc1c, 0x2ee746fd, 0x649a1f4d, - 0xe0397692, 0xbceafa75, 0x1e0c3606, 0x9809ae8a, - 0x40794bb2, 0x59d185e6, 0x361c7e0e, 0x633ee71f, - 0xf7c45562, 0xa3b53ad4, 0x324d81a8, 0xf4315296, - 0x3aef62f9, 0xf697a3c5, 0xb14a1025, 0x20b2ab59, - 0xae15d084, 0xa7e4c572, 0xdd72ec39, 0x6198164c, - 0x3bbc945e, 0x85f09f78, 0xd870e538, 0x8605988c, - 0xb2bf17d1, 0x0b57e4a5, 0x4dd9a1e2, 0xf8c24e61, - 0x457b42b3, 0xa5423421, 0xd625089c, 0x663cee1e, - 0x52866143, 0xfc93b1c7, 0x2be54ffc, 0x14082404, - 0x08a2e351, 0xc72f2599, 0xc4da226d, 0x391a650d, - 0x35e979fa, 0x84a369df, 0x9bfca97e, 0xb4481924, - 0xd776fe3b, 0x3d4b9aab, 0xd181f0ce, 0x55229911, - 0x8903838f, 0x6b9c044e, 0x517366b7, 0x60cbe0eb, - 0xcc78c13c, 0xbf1ffd81, 0xfe354094, 0x0cf31cf7, - 0x676f18b9, 0x5f268b13, 0x9c58512c, 0xb8bb05d3, - 0x5cd38ce7, 0xcbdc396e, 0xf395aac4, 0x0f061b03, - 0x13acdc56, 0x49885e44, 0x9efea07f, 0x374f88a9, - 0x8254672a, 0x6d6b0abb, 0xe29f87c1, 0x02a6f153, - 0x8ba572dc, 0x2716530b, 0xd327019d, 0xc1d82b6c, - 0xf562a431, 0xb9e8f374, 0x09f115f6, 0x438c4c46, - 0x2645a5ac, 0x970fb589, 0x4428b414, 0x42dfbae1, - 0x4e2ca616, 0xd274f73a, 0xd0d20669, 0x2d124109, - 0xade0d770, 0x54716fb6, 0xb7bd1ed0, 0x7ec7d6ed, - 0xdb85e2cc, 0x57846842, 0xc22d2c98, 0x0e55eda4, - 0x88507528, 0x31b8865c, 0x3fed6bf8, 0xa411c286, - }, - { - 0xc07830d8, 0x05af4626, 0x7ef991b8, 0x136fcdfb, - 0x4ca113cb, 0xa9626d11, 0x08050209, 0x426e9e0d, - 0xadee6c9b, 0x590451ff, 0xdebdb90c, 0xfb06f70e, - 0xef80f296, 0x5fcede30, 0xfcef3f6d, 0xaa07a4f8, - 0x27fdc047, 0x89766535, 0xaccd2b37, 0x048c018a, - 0x71155bd2, 0x603c186c, 0xff8af684, 0xb5e16a80, - 0xe8693af5, 0x5347ddb3, 0xf6acb321, 0x5eed999c, - 0x6d965c43, 0x627a9629, 0xa321e15d, 0x8216aed5, - 0xa8412abd, 0x9fb6eee8, 0xa5eb6e92, 0x7b56d79e, - 0x8cd92313, 0xd317fd23, 0x6a7f9420, 0x9e95a944, - 0xfa25b0a2, 0x06ca8fcf, 0x558d527c, 0x5022145a, - 0xe14f7f50, 0x691a5dc9, 0x7fdad614, 0x5cab17d9, - 0x8173673c, 0xd234ba8f, 0x80502090, 0xf303f507, - 0x16c08bdd, 0xedc67cd3, 0x28110a2d, 0x1fe6ce78, - 0x7353d597, 0x25bb4e02, 0x32588273, 0x2c9d0ba7, - 0x510153f6, 0xcf94fab2, 0xdcfb3749, 0x8e9fad56, - 0x8b30eb70, 0x2371c1cd, 0xc791f8bb, 0x17e3cc71, - 0xa68ea77b, 0xb84b2eaf, 0x02468e45, 0x84dc211a, - 0x1ec589d4, 0x75995a58, 0x9179632e, 0x381b0e3f, - 0x012347ac, 0xea2fb4b0, 0x6cb51bef, 0x85ff66b6, - 0x3ff2c65c, 0x100a0412, 0x39384993, 0xafa8e2de, - 0x0ecf8dc6, 0xc87d32d1, 0x7270923b, 0x869aaf5f, - 0xc31df931, 0x4b48dba8, 0xe22ab6b9, 0x34920dbc, - 0xa4c8293e, 0x2dbe4c0b, 0x8dfa64bf, 0xe94a7d59, - 0x1b6acff2, 0x78331e77, 0xe6a6b733, 0x74ba1df4, - 0x997c6127, 0x26de87eb, 0xbde46889, 0x7a759032, - 0xab24e354, 0xf78ff48d, 0xf4ea3d64, 0xc23ebe9d, - 0x1da0403d, 0x67d5d00f, 0xd07234ca, 0x192c41b7, - 0xc95e757d, 0x9a19a8ce, 0xece53b7f, 0x0daa442f, - 0x07e9c863, 0xdb12ff2a, 0xbfa2e6cc, 0x905a2482, - 0x3a5d807a, 0x40281048, 0x56e89b95, 0x337bc5df, - 0x9690ab4d, 0x611f5fc0, 0x1c830791, 0xf5c97ac8, - 0xccf1335b, 0x00000000, 0x36d483f9, 0x4587566e, - 0x97b3ece1, 0x64b019e6, 0xfea9b128, 0xd87736c3, - 0xc15b7774, 0x112943be, 0x77dfd41d, 0xba0da0ea, - 0x124c8a57, 0xcb18fb38, 0x9df060ad, 0x2b74c3c4, - 0xe5c37eda, 0x921caac7, 0x791059db, 0x0365c9e9, - 0x0fecca6a, 0xb9686903, 0x65935e4a, 0x4ee79d8e, - 0xbe81a160, 0xe06c38fc, 0xbb2ee746, 0x52649a1f, - 0xe4e03976, 0x8fbceafa, 0x301e0c36, 0x249809ae, - 0xf940794b, 0x6359d185, 0x70361c7e, 0xf8633ee7, - 0x37f7c455, 0xeea3b53a, 0x29324d81, 0xc4f43152, - 0x9b3aef62, 0x66f697a3, 0x35b14a10, 0xf220b2ab, - 0x54ae15d0, 0xb7a7e4c5, 0xd5dd72ec, 0x5a619816, - 0xca3bbc94, 0xe785f09f, 0xddd870e5, 0x14860598, - 0xc6b2bf17, 0x410b57e4, 0x434dd9a1, 0x2ff8c24e, - 0xf1457b42, 0x15a54234, 0x94d62508, 0xf0663cee, - 0x22528661, 0x76fc93b1, 0xb32be54f, 0x20140824, - 0xb208a2e3, 0xbcc72f25, 0x4fc4da22, 0x68391a65, - 0x8335e979, 0xb684a369, 0xd79bfca9, 0x3db44819, - 0xc5d776fe, 0x313d4b9a, 0x3ed181f0, 0x88552299, - 0x0c890383, 0x4a6b9c04, 0xd1517366, 0x0b60cbe0, - 0xfdcc78c1, 0x7cbf1ffd, 0xd4fe3540, 0xeb0cf31c, - 0xa1676f18, 0x985f268b, 0x7d9c5851, 0xd6b8bb05, - 0x6b5cd38c, 0x57cbdc39, 0x6ef395aa, 0x180f061b, - 0x8a13acdc, 0x1a49885e, 0xdf9efea0, 0x21374f88, - 0x4d825467, 0xb16d6b0a, 0x46e29f87, 0xa202a6f1, - 0xae8ba572, 0x58271653, 0x9cd32701, 0x47c1d82b, - 0x95f562a4, 0x87b9e8f3, 0xe309f115, 0x0a438c4c, - 0x092645a5, 0x3c970fb5, 0xa04428b4, 0x5b42dfba, - 0xb04e2ca6, 0xcdd274f7, 0x6fd0d206, 0x482d1241, - 0xa7ade0d7, 0xd954716f, 0xceb7bd1e, 0x3b7ec7d6, - 0x2edb85e2, 0x2a578468, 0xb4c22d2c, 0x490e55ed, - 0x5d885075, 0xda31b886, 0x933fed6b, 0x44a411c2, - }, - { - 0x18c07830, 0x2305af46, 0xc67ef991, 0xe8136fcd, - 0x874ca113, 0xb8a9626d, 0x01080502, 0x4f426e9e, - 0x36adee6c, 0xa6590451, 0xd2debdb9, 0xf5fb06f7, - 0x79ef80f2, 0x6f5fcede, 0x91fcef3f, 0x52aa07a4, - 0x6027fdc0, 0xbc897665, 0x9baccd2b, 0x8e048c01, - 0xa371155b, 0x0c603c18, 0x7bff8af6, 0x35b5e16a, - 0x1de8693a, 0xe05347dd, 0xd7f6acb3, 0xc25eed99, - 0x2e6d965c, 0x4b627a96, 0xfea321e1, 0x578216ae, - 0x15a8412a, 0x779fb6ee, 0x37a5eb6e, 0xe57b56d7, - 0x9f8cd923, 0xf0d317fd, 0x4a6a7f94, 0xda9e95a9, - 0x58fa25b0, 0xc906ca8f, 0x29558d52, 0x0a502214, - 0xb1e14f7f, 0xa0691a5d, 0x6b7fdad6, 0x855cab17, - 0xbd817367, 0x5dd234ba, 0x10805020, 0xf4f303f5, - 0xcb16c08b, 0x3eedc67c, 0x0528110a, 0x671fe6ce, - 0xe47353d5, 0x2725bb4e, 0x41325882, 0x8b2c9d0b, - 0xa7510153, 0x7dcf94fa, 0x95dcfb37, 0xd88e9fad, - 0xfb8b30eb, 0xee2371c1, 0x7cc791f8, 0x6617e3cc, - 0xdda68ea7, 0x17b84b2e, 0x4702468e, 0x9e84dc21, - 0xca1ec589, 0x2d75995a, 0xbf917963, 0x07381b0e, - 0xad012347, 0x5aea2fb4, 0x836cb51b, 0x3385ff66, - 0x633ff2c6, 0x02100a04, 0xaa393849, 0x71afa8e2, - 0xc80ecf8d, 0x19c87d32, 0x49727092, 0xd9869aaf, - 0xf2c31df9, 0xe34b48db, 0x5be22ab6, 0x8834920d, - 0x9aa4c829, 0x262dbe4c, 0x328dfa64, 0xb0e94a7d, - 0xe91b6acf, 0x0f78331e, 0xd5e6a6b7, 0x8074ba1d, - 0xbe997c61, 0xcd26de87, 0x34bde468, 0x487a7590, - 0xffab24e3, 0x7af78ff4, 0x90f4ea3d, 0x5fc23ebe, - 0x201da040, 0x6867d5d0, 0x1ad07234, 0xae192c41, - 0xb4c95e75, 0x549a19a8, 0x93ece53b, 0x220daa44, - 0x6407e9c8, 0xf1db12ff, 0x73bfa2e6, 0x12905a24, - 0x403a5d80, 0x08402810, 0xc356e89b, 0xec337bc5, - 0xdb9690ab, 0xa1611f5f, 0x8d1c8307, 0x3df5c97a, - 0x97ccf133, 0x00000000, 0xcf36d483, 0x2b458756, - 0x7697b3ec, 0x8264b019, 0xd6fea9b1, 0x1bd87736, - 0xb5c15b77, 0xaf112943, 0x6a77dfd4, 0x50ba0da0, - 0x45124c8a, 0xf3cb18fb, 0x309df060, 0xef2b74c3, - 0x3fe5c37e, 0x55921caa, 0xa2791059, 0xea0365c9, - 0x650fecca, 0xbab96869, 0x2f65935e, 0xc04ee79d, - 0xdebe81a1, 0x1ce06c38, 0xfdbb2ee7, 0x4d52649a, - 0x92e4e039, 0x758fbcea, 0x06301e0c, 0x8a249809, - 0xb2f94079, 0xe66359d1, 0x0e70361c, 0x1ff8633e, - 0x6237f7c4, 0xd4eea3b5, 0xa829324d, 0x96c4f431, - 0xf99b3aef, 0xc566f697, 0x2535b14a, 0x59f220b2, - 0x8454ae15, 0x72b7a7e4, 0x39d5dd72, 0x4c5a6198, - 0x5eca3bbc, 0x78e785f0, 0x38ddd870, 0x8c148605, - 0xd1c6b2bf, 0xa5410b57, 0xe2434dd9, 0x612ff8c2, - 0xb3f1457b, 0x2115a542, 0x9c94d625, 0x1ef0663c, - 0x43225286, 0xc776fc93, 0xfcb32be5, 0x04201408, - 0x51b208a2, 0x99bcc72f, 0x6d4fc4da, 0x0d68391a, - 0xfa8335e9, 0xdfb684a3, 0x7ed79bfc, 0x243db448, - 0x3bc5d776, 0xab313d4b, 0xce3ed181, 0x11885522, - 0x8f0c8903, 0x4e4a6b9c, 0xb7d15173, 0xeb0b60cb, - 0x3cfdcc78, 0x817cbf1f, 0x94d4fe35, 0xf7eb0cf3, - 0xb9a1676f, 0x13985f26, 0x2c7d9c58, 0xd3d6b8bb, - 0xe76b5cd3, 0x6e57cbdc, 0xc46ef395, 0x03180f06, - 0x568a13ac, 0x441a4988, 0x7fdf9efe, 0xa921374f, - 0x2a4d8254, 0xbbb16d6b, 0xc146e29f, 0x53a202a6, - 0xdcae8ba5, 0x0b582716, 0x9d9cd327, 0x6c47c1d8, - 0x3195f562, 0x7487b9e8, 0xf6e309f1, 0x460a438c, - 0xac092645, 0x893c970f, 0x14a04428, 0xe15b42df, - 0x16b04e2c, 0x3acdd274, 0x696fd0d2, 0x09482d12, - 0x70a7ade0, 0xb6d95471, 0xd0ceb7bd, 0xed3b7ec7, - 0xcc2edb85, 0x422a5784, 0x98b4c22d, 0xa4490e55, - 0x285d8850, 0x5cda31b8, 0xf8933fed, 0x8644a411, - }, - { - 0x6018c078, 0x8c2305af, 0x3fc67ef9, 0x87e8136f, - 0x26874ca1, 0xdab8a962, 0x04010805, 0x214f426e, - 0xd836adee, 0xa2a65904, 0x6fd2debd, 0xf3f5fb06, - 0xf979ef80, 0xa16f5fce, 0x7e91fcef, 0x5552aa07, - 0x9d6027fd, 0xcabc8976, 0x569baccd, 0x028e048c, - 0xb6a37115, 0x300c603c, 0xf17bff8a, 0xd435b5e1, - 0x741de869, 0xa7e05347, 0x7bd7f6ac, 0x2fc25eed, - 0xb82e6d96, 0x314b627a, 0xdffea321, 0x41578216, - 0x5415a841, 0xc1779fb6, 0xdc37a5eb, 0xb3e57b56, - 0x469f8cd9, 0xe7f0d317, 0x354a6a7f, 0x4fda9e95, - 0x7d58fa25, 0x03c906ca, 0xa429558d, 0x280a5022, - 0xfeb1e14f, 0xbaa0691a, 0xb16b7fda, 0x2e855cab, - 0xcebd8173, 0x695dd234, 0x40108050, 0xf7f4f303, - 0x0bcb16c0, 0xf83eedc6, 0x14052811, 0x81671fe6, - 0xb7e47353, 0x9c2725bb, 0x19413258, 0x168b2c9d, - 0xa6a75101, 0xe97dcf94, 0x6e95dcfb, 0x47d88e9f, - 0xcbfb8b30, 0x9fee2371, 0xed7cc791, 0x856617e3, - 0x53dda68e, 0x5c17b84b, 0x01470246, 0x429e84dc, - 0x0fca1ec5, 0xb42d7599, 0xc6bf9179, 0x1c07381b, - 0x8ead0123, 0x755aea2f, 0x36836cb5, 0xcc3385ff, - 0x91633ff2, 0x0802100a, 0x92aa3938, 0xd971afa8, - 0x07c80ecf, 0x6419c87d, 0x39497270, 0x43d9869a, - 0xeff2c31d, 0xabe34b48, 0x715be22a, 0x1a883492, - 0x529aa4c8, 0x98262dbe, 0xc8328dfa, 0xfab0e94a, - 0x83e91b6a, 0x3c0f7833, 0x73d5e6a6, 0x3a8074ba, - 0xc2be997c, 0x13cd26de, 0xd034bde4, 0x3d487a75, - 0xdbffab24, 0xf57af78f, 0x7a90f4ea, 0x615fc23e, - 0x80201da0, 0xbd6867d5, 0x681ad072, 0x82ae192c, - 0xeab4c95e, 0x4d549a19, 0x7693ece5, 0x88220daa, - 0x8d6407e9, 0xe3f1db12, 0xd173bfa2, 0x4812905a, - 0x1d403a5d, 0x20084028, 0x2bc356e8, 0x97ec337b, - 0x4bdb9690, 0xbea1611f, 0x0e8d1c83, 0xf43df5c9, - 0x6697ccf1, 0x00000000, 0x1bcf36d4, 0xac2b4587, - 0xc57697b3, 0x328264b0, 0x7fd6fea9, 0x6c1bd877, - 0xeeb5c15b, 0x86af1129, 0xb56a77df, 0x5d50ba0d, - 0x0945124c, 0xebf3cb18, 0xc0309df0, 0x9bef2b74, - 0xfc3fe5c3, 0x4955921c, 0xb2a27910, 0x8fea0365, - 0x89650fec, 0xd2bab968, 0xbc2f6593, 0x27c04ee7, - 0x5fdebe81, 0x701ce06c, 0xd3fdbb2e, 0x294d5264, - 0x7292e4e0, 0xc9758fbc, 0x1806301e, 0x128a2498, - 0xf2b2f940, 0xbfe66359, 0x380e7036, 0x7c1ff863, - 0x956237f7, 0x77d4eea3, 0x9aa82932, 0x6296c4f4, - 0xc3f99b3a, 0x33c566f6, 0x942535b1, 0x7959f220, - 0x2a8454ae, 0xd572b7a7, 0xe439d5dd, 0x2d4c5a61, - 0x655eca3b, 0xfd78e785, 0xe038ddd8, 0x0a8c1486, - 0x63d1c6b2, 0xaea5410b, 0xafe2434d, 0x99612ff8, - 0xf6b3f145, 0x842115a5, 0x4a9c94d6, 0x781ef066, - 0x11432252, 0x3bc776fc, 0xd7fcb32b, 0x10042014, - 0x5951b208, 0x5e99bcc7, 0xa96d4fc4, 0x340d6839, - 0xcffa8335, 0x5bdfb684, 0xe57ed79b, 0x90243db4, - 0xec3bc5d7, 0x96ab313d, 0x1fce3ed1, 0x44118855, - 0x068f0c89, 0x254e4a6b, 0xe6b7d151, 0x8beb0b60, - 0xf03cfdcc, 0x3e817cbf, 0x6a94d4fe, 0xfbf7eb0c, - 0xdeb9a167, 0x4c13985f, 0xb02c7d9c, 0x6bd3d6b8, - 0xbbe76b5c, 0xa56e57cb, 0x37c46ef3, 0x0c03180f, - 0x45568a13, 0x0d441a49, 0xe17fdf9e, 0x9ea92137, - 0xa82a4d82, 0xd6bbb16d, 0x23c146e2, 0x5153a202, - 0x57dcae8b, 0x2c0b5827, 0x4e9d9cd3, 0xad6c47c1, - 0xc43195f5, 0xcd7487b9, 0xfff6e309, 0x05460a43, - 0x8aac0926, 0x1e893c97, 0x5014a044, 0xa3e15b42, - 0x5816b04e, 0xe83acdd2, 0xb9696fd0, 0x2409482d, - 0xdd70a7ad, 0xe2b6d954, 0x67d0ceb7, 0x93ed3b7e, - 0x17cc2edb, 0x15422a57, 0x5a98b4c2, 0xaaa4490e, - 0xa0285d88, 0x6d5cda31, 0xc7f8933f, 0x228644a4, - }, - { - 0x186018c0, 0x238c2305, 0xc63fc67e, 0xe887e813, - 0x8726874c, 0xb8dab8a9, 0x01040108, 0x4f214f42, - 0x36d836ad, 0xa6a2a659, 0xd26fd2de, 0xf5f3f5fb, - 0x79f979ef, 0x6fa16f5f, 0x917e91fc, 0x525552aa, - 0x609d6027, 0xbccabc89, 0x9b569bac, 0x8e028e04, - 0xa3b6a371, 0x0c300c60, 0x7bf17bff, 0x35d435b5, - 0x1d741de8, 0xe0a7e053, 0xd77bd7f6, 0xc22fc25e, - 0x2eb82e6d, 0x4b314b62, 0xfedffea3, 0x57415782, - 0x155415a8, 0x77c1779f, 0x37dc37a5, 0xe5b3e57b, - 0x9f469f8c, 0xf0e7f0d3, 0x4a354a6a, 0xda4fda9e, - 0x587d58fa, 0xc903c906, 0x29a42955, 0x0a280a50, - 0xb1feb1e1, 0xa0baa069, 0x6bb16b7f, 0x852e855c, - 0xbdcebd81, 0x5d695dd2, 0x10401080, 0xf4f7f4f3, - 0xcb0bcb16, 0x3ef83eed, 0x05140528, 0x6781671f, - 0xe4b7e473, 0x279c2725, 0x41194132, 0x8b168b2c, - 0xa7a6a751, 0x7de97dcf, 0x956e95dc, 0xd847d88e, - 0xfbcbfb8b, 0xee9fee23, 0x7ced7cc7, 0x66856617, - 0xdd53dda6, 0x175c17b8, 0x47014702, 0x9e429e84, - 0xca0fca1e, 0x2db42d75, 0xbfc6bf91, 0x071c0738, - 0xad8ead01, 0x5a755aea, 0x8336836c, 0x33cc3385, - 0x6391633f, 0x02080210, 0xaa92aa39, 0x71d971af, - 0xc807c80e, 0x196419c8, 0x49394972, 0xd943d986, - 0xf2eff2c3, 0xe3abe34b, 0x5b715be2, 0x881a8834, - 0x9a529aa4, 0x2698262d, 0x32c8328d, 0xb0fab0e9, - 0xe983e91b, 0x0f3c0f78, 0xd573d5e6, 0x803a8074, - 0xbec2be99, 0xcd13cd26, 0x34d034bd, 0x483d487a, - 0xffdbffab, 0x7af57af7, 0x907a90f4, 0x5f615fc2, - 0x2080201d, 0x68bd6867, 0x1a681ad0, 0xae82ae19, - 0xb4eab4c9, 0x544d549a, 0x937693ec, 0x2288220d, - 0x648d6407, 0xf1e3f1db, 0x73d173bf, 0x12481290, - 0x401d403a, 0x08200840, 0xc32bc356, 0xec97ec33, - 0xdb4bdb96, 0xa1bea161, 0x8d0e8d1c, 0x3df43df5, - 0x976697cc, 0x00000000, 0xcf1bcf36, 0x2bac2b45, - 0x76c57697, 0x82328264, 0xd67fd6fe, 0x1b6c1bd8, - 0xb5eeb5c1, 0xaf86af11, 0x6ab56a77, 0x505d50ba, - 0x45094512, 0xf3ebf3cb, 0x30c0309d, 0xef9bef2b, - 0x3ffc3fe5, 0x55495592, 0xa2b2a279, 0xea8fea03, - 0x6589650f, 0xbad2bab9, 0x2fbc2f65, 0xc027c04e, - 0xde5fdebe, 0x1c701ce0, 0xfdd3fdbb, 0x4d294d52, - 0x927292e4, 0x75c9758f, 0x06180630, 0x8a128a24, - 0xb2f2b2f9, 0xe6bfe663, 0x0e380e70, 0x1f7c1ff8, - 0x62956237, 0xd477d4ee, 0xa89aa829, 0x966296c4, - 0xf9c3f99b, 0xc533c566, 0x25942535, 0x597959f2, - 0x842a8454, 0x72d572b7, 0x39e439d5, 0x4c2d4c5a, - 0x5e655eca, 0x78fd78e7, 0x38e038dd, 0x8c0a8c14, - 0xd163d1c6, 0xa5aea541, 0xe2afe243, 0x6199612f, - 0xb3f6b3f1, 0x21842115, 0x9c4a9c94, 0x1e781ef0, - 0x43114322, 0xc73bc776, 0xfcd7fcb3, 0x04100420, - 0x515951b2, 0x995e99bc, 0x6da96d4f, 0x0d340d68, - 0xfacffa83, 0xdf5bdfb6, 0x7ee57ed7, 0x2490243d, - 0x3bec3bc5, 0xab96ab31, 0xce1fce3e, 0x11441188, - 0x8f068f0c, 0x4e254e4a, 0xb7e6b7d1, 0xeb8beb0b, - 0x3cf03cfd, 0x813e817c, 0x946a94d4, 0xf7fbf7eb, - 0xb9deb9a1, 0x134c1398, 0x2cb02c7d, 0xd36bd3d6, - 0xe7bbe76b, 0x6ea56e57, 0xc437c46e, 0x030c0318, - 0x5645568a, 0x440d441a, 0x7fe17fdf, 0xa99ea921, - 0x2aa82a4d, 0xbbd6bbb1, 0xc123c146, 0x535153a2, - 0xdc57dcae, 0x0b2c0b58, 0x9d4e9d9c, 0x6cad6c47, - 0x31c43195, 0x74cd7487, 0xf6fff6e3, 0x4605460a, - 0xac8aac09, 0x891e893c, 0x145014a0, 0xe1a3e15b, - 0x165816b0, 0x3ae83acd, 0x69b9696f, 0x09240948, - 0x70dd70a7, 0xb6e2b6d9, 0xd067d0ce, 0xed93ed3b, - 0xcc17cc2e, 0x4215422a, 0x985a98b4, 0xa4aaa449, - 0x28a0285d, 0x5c6d5cda, 0xf8c7f893, 0x86228644, - } -}; - -__device__ __constant__ u32 Cl[8][256] = -{ - { - 0xc07830d8, 0x05af4626, 0x7ef991b8, 0x136fcdfb, - 0x4ca113cb, 0xa9626d11, 0x08050209, 0x426e9e0d, - 0xadee6c9b, 0x590451ff, 0xdebdb90c, 0xfb06f70e, - 0xef80f296, 0x5fcede30, 0xfcef3f6d, 0xaa07a4f8, - 0x27fdc047, 0x89766535, 0xaccd2b37, 0x048c018a, - 0x71155bd2, 0x603c186c, 0xff8af684, 0xb5e16a80, - 0xe8693af5, 0x5347ddb3, 0xf6acb321, 0x5eed999c, - 0x6d965c43, 0x627a9629, 0xa321e15d, 0x8216aed5, - 0xa8412abd, 0x9fb6eee8, 0xa5eb6e92, 0x7b56d79e, - 0x8cd92313, 0xd317fd23, 0x6a7f9420, 0x9e95a944, - 0xfa25b0a2, 0x06ca8fcf, 0x558d527c, 0x5022145a, - 0xe14f7f50, 0x691a5dc9, 0x7fdad614, 0x5cab17d9, - 0x8173673c, 0xd234ba8f, 0x80502090, 0xf303f507, - 0x16c08bdd, 0xedc67cd3, 0x28110a2d, 0x1fe6ce78, - 0x7353d597, 0x25bb4e02, 0x32588273, 0x2c9d0ba7, - 0x510153f6, 0xcf94fab2, 0xdcfb3749, 0x8e9fad56, - 0x8b30eb70, 0x2371c1cd, 0xc791f8bb, 0x17e3cc71, - 0xa68ea77b, 0xb84b2eaf, 0x02468e45, 0x84dc211a, - 0x1ec589d4, 0x75995a58, 0x9179632e, 0x381b0e3f, - 0x012347ac, 0xea2fb4b0, 0x6cb51bef, 0x85ff66b6, - 0x3ff2c65c, 0x100a0412, 0x39384993, 0xafa8e2de, - 0x0ecf8dc6, 0xc87d32d1, 0x7270923b, 0x869aaf5f, - 0xc31df931, 0x4b48dba8, 0xe22ab6b9, 0x34920dbc, - 0xa4c8293e, 0x2dbe4c0b, 0x8dfa64bf, 0xe94a7d59, - 0x1b6acff2, 0x78331e77, 0xe6a6b733, 0x74ba1df4, - 0x997c6127, 0x26de87eb, 0xbde46889, 0x7a759032, - 0xab24e354, 0xf78ff48d, 0xf4ea3d64, 0xc23ebe9d, - 0x1da0403d, 0x67d5d00f, 0xd07234ca, 0x192c41b7, - 0xc95e757d, 0x9a19a8ce, 0xece53b7f, 0x0daa442f, - 0x07e9c863, 0xdb12ff2a, 0xbfa2e6cc, 0x905a2482, - 0x3a5d807a, 0x40281048, 0x56e89b95, 0x337bc5df, - 0x9690ab4d, 0x611f5fc0, 0x1c830791, 0xf5c97ac8, - 0xccf1335b, 0x00000000, 0x36d483f9, 0x4587566e, - 0x97b3ece1, 0x64b019e6, 0xfea9b128, 0xd87736c3, - 0xc15b7774, 0x112943be, 0x77dfd41d, 0xba0da0ea, - 0x124c8a57, 0xcb18fb38, 0x9df060ad, 0x2b74c3c4, - 0xe5c37eda, 0x921caac7, 0x791059db, 0x0365c9e9, - 0x0fecca6a, 0xb9686903, 0x65935e4a, 0x4ee79d8e, - 0xbe81a160, 0xe06c38fc, 0xbb2ee746, 0x52649a1f, - 0xe4e03976, 0x8fbceafa, 0x301e0c36, 0x249809ae, - 0xf940794b, 0x6359d185, 0x70361c7e, 0xf8633ee7, - 0x37f7c455, 0xeea3b53a, 0x29324d81, 0xc4f43152, - 0x9b3aef62, 0x66f697a3, 0x35b14a10, 0xf220b2ab, - 0x54ae15d0, 0xb7a7e4c5, 0xd5dd72ec, 0x5a619816, - 0xca3bbc94, 0xe785f09f, 0xddd870e5, 0x14860598, - 0xc6b2bf17, 0x410b57e4, 0x434dd9a1, 0x2ff8c24e, - 0xf1457b42, 0x15a54234, 0x94d62508, 0xf0663cee, - 0x22528661, 0x76fc93b1, 0xb32be54f, 0x20140824, - 0xb208a2e3, 0xbcc72f25, 0x4fc4da22, 0x68391a65, - 0x8335e979, 0xb684a369, 0xd79bfca9, 0x3db44819, - 0xc5d776fe, 0x313d4b9a, 0x3ed181f0, 0x88552299, - 0x0c890383, 0x4a6b9c04, 0xd1517366, 0x0b60cbe0, - 0xfdcc78c1, 0x7cbf1ffd, 0xd4fe3540, 0xeb0cf31c, - 0xa1676f18, 0x985f268b, 0x7d9c5851, 0xd6b8bb05, - 0x6b5cd38c, 0x57cbdc39, 0x6ef395aa, 0x180f061b, - 0x8a13acdc, 0x1a49885e, 0xdf9efea0, 0x21374f88, - 0x4d825467, 0xb16d6b0a, 0x46e29f87, 0xa202a6f1, - 0xae8ba572, 0x58271653, 0x9cd32701, 0x47c1d82b, - 0x95f562a4, 0x87b9e8f3, 0xe309f115, 0x0a438c4c, - 0x092645a5, 0x3c970fb5, 0xa04428b4, 0x5b42dfba, - 0xb04e2ca6, 0xcdd274f7, 0x6fd0d206, 0x482d1241, - 0xa7ade0d7, 0xd954716f, 0xceb7bd1e, 0x3b7ec7d6, - 0x2edb85e2, 0x2a578468, 0xb4c22d2c, 0x490e55ed, - 0x5d885075, 0xda31b886, 0x933fed6b, 0x44a411c2, - }, - { - 0x18c07830, 0x2305af46, 0xc67ef991, 0xe8136fcd, - 0x874ca113, 0xb8a9626d, 0x01080502, 0x4f426e9e, - 0x36adee6c, 0xa6590451, 0xd2debdb9, 0xf5fb06f7, - 0x79ef80f2, 0x6f5fcede, 0x91fcef3f, 0x52aa07a4, - 0x6027fdc0, 0xbc897665, 0x9baccd2b, 0x8e048c01, - 0xa371155b, 0x0c603c18, 0x7bff8af6, 0x35b5e16a, - 0x1de8693a, 0xe05347dd, 0xd7f6acb3, 0xc25eed99, - 0x2e6d965c, 0x4b627a96, 0xfea321e1, 0x578216ae, - 0x15a8412a, 0x779fb6ee, 0x37a5eb6e, 0xe57b56d7, - 0x9f8cd923, 0xf0d317fd, 0x4a6a7f94, 0xda9e95a9, - 0x58fa25b0, 0xc906ca8f, 0x29558d52, 0x0a502214, - 0xb1e14f7f, 0xa0691a5d, 0x6b7fdad6, 0x855cab17, - 0xbd817367, 0x5dd234ba, 0x10805020, 0xf4f303f5, - 0xcb16c08b, 0x3eedc67c, 0x0528110a, 0x671fe6ce, - 0xe47353d5, 0x2725bb4e, 0x41325882, 0x8b2c9d0b, - 0xa7510153, 0x7dcf94fa, 0x95dcfb37, 0xd88e9fad, - 0xfb8b30eb, 0xee2371c1, 0x7cc791f8, 0x6617e3cc, - 0xdda68ea7, 0x17b84b2e, 0x4702468e, 0x9e84dc21, - 0xca1ec589, 0x2d75995a, 0xbf917963, 0x07381b0e, - 0xad012347, 0x5aea2fb4, 0x836cb51b, 0x3385ff66, - 0x633ff2c6, 0x02100a04, 0xaa393849, 0x71afa8e2, - 0xc80ecf8d, 0x19c87d32, 0x49727092, 0xd9869aaf, - 0xf2c31df9, 0xe34b48db, 0x5be22ab6, 0x8834920d, - 0x9aa4c829, 0x262dbe4c, 0x328dfa64, 0xb0e94a7d, - 0xe91b6acf, 0x0f78331e, 0xd5e6a6b7, 0x8074ba1d, - 0xbe997c61, 0xcd26de87, 0x34bde468, 0x487a7590, - 0xffab24e3, 0x7af78ff4, 0x90f4ea3d, 0x5fc23ebe, - 0x201da040, 0x6867d5d0, 0x1ad07234, 0xae192c41, - 0xb4c95e75, 0x549a19a8, 0x93ece53b, 0x220daa44, - 0x6407e9c8, 0xf1db12ff, 0x73bfa2e6, 0x12905a24, - 0x403a5d80, 0x08402810, 0xc356e89b, 0xec337bc5, - 0xdb9690ab, 0xa1611f5f, 0x8d1c8307, 0x3df5c97a, - 0x97ccf133, 0x00000000, 0xcf36d483, 0x2b458756, - 0x7697b3ec, 0x8264b019, 0xd6fea9b1, 0x1bd87736, - 0xb5c15b77, 0xaf112943, 0x6a77dfd4, 0x50ba0da0, - 0x45124c8a, 0xf3cb18fb, 0x309df060, 0xef2b74c3, - 0x3fe5c37e, 0x55921caa, 0xa2791059, 0xea0365c9, - 0x650fecca, 0xbab96869, 0x2f65935e, 0xc04ee79d, - 0xdebe81a1, 0x1ce06c38, 0xfdbb2ee7, 0x4d52649a, - 0x92e4e039, 0x758fbcea, 0x06301e0c, 0x8a249809, - 0xb2f94079, 0xe66359d1, 0x0e70361c, 0x1ff8633e, - 0x6237f7c4, 0xd4eea3b5, 0xa829324d, 0x96c4f431, - 0xf99b3aef, 0xc566f697, 0x2535b14a, 0x59f220b2, - 0x8454ae15, 0x72b7a7e4, 0x39d5dd72, 0x4c5a6198, - 0x5eca3bbc, 0x78e785f0, 0x38ddd870, 0x8c148605, - 0xd1c6b2bf, 0xa5410b57, 0xe2434dd9, 0x612ff8c2, - 0xb3f1457b, 0x2115a542, 0x9c94d625, 0x1ef0663c, - 0x43225286, 0xc776fc93, 0xfcb32be5, 0x04201408, - 0x51b208a2, 0x99bcc72f, 0x6d4fc4da, 0x0d68391a, - 0xfa8335e9, 0xdfb684a3, 0x7ed79bfc, 0x243db448, - 0x3bc5d776, 0xab313d4b, 0xce3ed181, 0x11885522, - 0x8f0c8903, 0x4e4a6b9c, 0xb7d15173, 0xeb0b60cb, - 0x3cfdcc78, 0x817cbf1f, 0x94d4fe35, 0xf7eb0cf3, - 0xb9a1676f, 0x13985f26, 0x2c7d9c58, 0xd3d6b8bb, - 0xe76b5cd3, 0x6e57cbdc, 0xc46ef395, 0x03180f06, - 0x568a13ac, 0x441a4988, 0x7fdf9efe, 0xa921374f, - 0x2a4d8254, 0xbbb16d6b, 0xc146e29f, 0x53a202a6, - 0xdcae8ba5, 0x0b582716, 0x9d9cd327, 0x6c47c1d8, - 0x3195f562, 0x7487b9e8, 0xf6e309f1, 0x460a438c, - 0xac092645, 0x893c970f, 0x14a04428, 0xe15b42df, - 0x16b04e2c, 0x3acdd274, 0x696fd0d2, 0x09482d12, - 0x70a7ade0, 0xb6d95471, 0xd0ceb7bd, 0xed3b7ec7, - 0xcc2edb85, 0x422a5784, 0x98b4c22d, 0xa4490e55, - 0x285d8850, 0x5cda31b8, 0xf8933fed, 0x8644a411, - }, - { - 0x6018c078, 0x8c2305af, 0x3fc67ef9, 0x87e8136f, - 0x26874ca1, 0xdab8a962, 0x04010805, 0x214f426e, - 0xd836adee, 0xa2a65904, 0x6fd2debd, 0xf3f5fb06, - 0xf979ef80, 0xa16f5fce, 0x7e91fcef, 0x5552aa07, - 0x9d6027fd, 0xcabc8976, 0x569baccd, 0x028e048c, - 0xb6a37115, 0x300c603c, 0xf17bff8a, 0xd435b5e1, - 0x741de869, 0xa7e05347, 0x7bd7f6ac, 0x2fc25eed, - 0xb82e6d96, 0x314b627a, 0xdffea321, 0x41578216, - 0x5415a841, 0xc1779fb6, 0xdc37a5eb, 0xb3e57b56, - 0x469f8cd9, 0xe7f0d317, 0x354a6a7f, 0x4fda9e95, - 0x7d58fa25, 0x03c906ca, 0xa429558d, 0x280a5022, - 0xfeb1e14f, 0xbaa0691a, 0xb16b7fda, 0x2e855cab, - 0xcebd8173, 0x695dd234, 0x40108050, 0xf7f4f303, - 0x0bcb16c0, 0xf83eedc6, 0x14052811, 0x81671fe6, - 0xb7e47353, 0x9c2725bb, 0x19413258, 0x168b2c9d, - 0xa6a75101, 0xe97dcf94, 0x6e95dcfb, 0x47d88e9f, - 0xcbfb8b30, 0x9fee2371, 0xed7cc791, 0x856617e3, - 0x53dda68e, 0x5c17b84b, 0x01470246, 0x429e84dc, - 0x0fca1ec5, 0xb42d7599, 0xc6bf9179, 0x1c07381b, - 0x8ead0123, 0x755aea2f, 0x36836cb5, 0xcc3385ff, - 0x91633ff2, 0x0802100a, 0x92aa3938, 0xd971afa8, - 0x07c80ecf, 0x6419c87d, 0x39497270, 0x43d9869a, - 0xeff2c31d, 0xabe34b48, 0x715be22a, 0x1a883492, - 0x529aa4c8, 0x98262dbe, 0xc8328dfa, 0xfab0e94a, - 0x83e91b6a, 0x3c0f7833, 0x73d5e6a6, 0x3a8074ba, - 0xc2be997c, 0x13cd26de, 0xd034bde4, 0x3d487a75, - 0xdbffab24, 0xf57af78f, 0x7a90f4ea, 0x615fc23e, - 0x80201da0, 0xbd6867d5, 0x681ad072, 0x82ae192c, - 0xeab4c95e, 0x4d549a19, 0x7693ece5, 0x88220daa, - 0x8d6407e9, 0xe3f1db12, 0xd173bfa2, 0x4812905a, - 0x1d403a5d, 0x20084028, 0x2bc356e8, 0x97ec337b, - 0x4bdb9690, 0xbea1611f, 0x0e8d1c83, 0xf43df5c9, - 0x6697ccf1, 0x00000000, 0x1bcf36d4, 0xac2b4587, - 0xc57697b3, 0x328264b0, 0x7fd6fea9, 0x6c1bd877, - 0xeeb5c15b, 0x86af1129, 0xb56a77df, 0x5d50ba0d, - 0x0945124c, 0xebf3cb18, 0xc0309df0, 0x9bef2b74, - 0xfc3fe5c3, 0x4955921c, 0xb2a27910, 0x8fea0365, - 0x89650fec, 0xd2bab968, 0xbc2f6593, 0x27c04ee7, - 0x5fdebe81, 0x701ce06c, 0xd3fdbb2e, 0x294d5264, - 0x7292e4e0, 0xc9758fbc, 0x1806301e, 0x128a2498, - 0xf2b2f940, 0xbfe66359, 0x380e7036, 0x7c1ff863, - 0x956237f7, 0x77d4eea3, 0x9aa82932, 0x6296c4f4, - 0xc3f99b3a, 0x33c566f6, 0x942535b1, 0x7959f220, - 0x2a8454ae, 0xd572b7a7, 0xe439d5dd, 0x2d4c5a61, - 0x655eca3b, 0xfd78e785, 0xe038ddd8, 0x0a8c1486, - 0x63d1c6b2, 0xaea5410b, 0xafe2434d, 0x99612ff8, - 0xf6b3f145, 0x842115a5, 0x4a9c94d6, 0x781ef066, - 0x11432252, 0x3bc776fc, 0xd7fcb32b, 0x10042014, - 0x5951b208, 0x5e99bcc7, 0xa96d4fc4, 0x340d6839, - 0xcffa8335, 0x5bdfb684, 0xe57ed79b, 0x90243db4, - 0xec3bc5d7, 0x96ab313d, 0x1fce3ed1, 0x44118855, - 0x068f0c89, 0x254e4a6b, 0xe6b7d151, 0x8beb0b60, - 0xf03cfdcc, 0x3e817cbf, 0x6a94d4fe, 0xfbf7eb0c, - 0xdeb9a167, 0x4c13985f, 0xb02c7d9c, 0x6bd3d6b8, - 0xbbe76b5c, 0xa56e57cb, 0x37c46ef3, 0x0c03180f, - 0x45568a13, 0x0d441a49, 0xe17fdf9e, 0x9ea92137, - 0xa82a4d82, 0xd6bbb16d, 0x23c146e2, 0x5153a202, - 0x57dcae8b, 0x2c0b5827, 0x4e9d9cd3, 0xad6c47c1, - 0xc43195f5, 0xcd7487b9, 0xfff6e309, 0x05460a43, - 0x8aac0926, 0x1e893c97, 0x5014a044, 0xa3e15b42, - 0x5816b04e, 0xe83acdd2, 0xb9696fd0, 0x2409482d, - 0xdd70a7ad, 0xe2b6d954, 0x67d0ceb7, 0x93ed3b7e, - 0x17cc2edb, 0x15422a57, 0x5a98b4c2, 0xaaa4490e, - 0xa0285d88, 0x6d5cda31, 0xc7f8933f, 0x228644a4, - }, - { - 0x186018c0, 0x238c2305, 0xc63fc67e, 0xe887e813, - 0x8726874c, 0xb8dab8a9, 0x01040108, 0x4f214f42, - 0x36d836ad, 0xa6a2a659, 0xd26fd2de, 0xf5f3f5fb, - 0x79f979ef, 0x6fa16f5f, 0x917e91fc, 0x525552aa, - 0x609d6027, 0xbccabc89, 0x9b569bac, 0x8e028e04, - 0xa3b6a371, 0x0c300c60, 0x7bf17bff, 0x35d435b5, - 0x1d741de8, 0xe0a7e053, 0xd77bd7f6, 0xc22fc25e, - 0x2eb82e6d, 0x4b314b62, 0xfedffea3, 0x57415782, - 0x155415a8, 0x77c1779f, 0x37dc37a5, 0xe5b3e57b, - 0x9f469f8c, 0xf0e7f0d3, 0x4a354a6a, 0xda4fda9e, - 0x587d58fa, 0xc903c906, 0x29a42955, 0x0a280a50, - 0xb1feb1e1, 0xa0baa069, 0x6bb16b7f, 0x852e855c, - 0xbdcebd81, 0x5d695dd2, 0x10401080, 0xf4f7f4f3, - 0xcb0bcb16, 0x3ef83eed, 0x05140528, 0x6781671f, - 0xe4b7e473, 0x279c2725, 0x41194132, 0x8b168b2c, - 0xa7a6a751, 0x7de97dcf, 0x956e95dc, 0xd847d88e, - 0xfbcbfb8b, 0xee9fee23, 0x7ced7cc7, 0x66856617, - 0xdd53dda6, 0x175c17b8, 0x47014702, 0x9e429e84, - 0xca0fca1e, 0x2db42d75, 0xbfc6bf91, 0x071c0738, - 0xad8ead01, 0x5a755aea, 0x8336836c, 0x33cc3385, - 0x6391633f, 0x02080210, 0xaa92aa39, 0x71d971af, - 0xc807c80e, 0x196419c8, 0x49394972, 0xd943d986, - 0xf2eff2c3, 0xe3abe34b, 0x5b715be2, 0x881a8834, - 0x9a529aa4, 0x2698262d, 0x32c8328d, 0xb0fab0e9, - 0xe983e91b, 0x0f3c0f78, 0xd573d5e6, 0x803a8074, - 0xbec2be99, 0xcd13cd26, 0x34d034bd, 0x483d487a, - 0xffdbffab, 0x7af57af7, 0x907a90f4, 0x5f615fc2, - 0x2080201d, 0x68bd6867, 0x1a681ad0, 0xae82ae19, - 0xb4eab4c9, 0x544d549a, 0x937693ec, 0x2288220d, - 0x648d6407, 0xf1e3f1db, 0x73d173bf, 0x12481290, - 0x401d403a, 0x08200840, 0xc32bc356, 0xec97ec33, - 0xdb4bdb96, 0xa1bea161, 0x8d0e8d1c, 0x3df43df5, - 0x976697cc, 0x00000000, 0xcf1bcf36, 0x2bac2b45, - 0x76c57697, 0x82328264, 0xd67fd6fe, 0x1b6c1bd8, - 0xb5eeb5c1, 0xaf86af11, 0x6ab56a77, 0x505d50ba, - 0x45094512, 0xf3ebf3cb, 0x30c0309d, 0xef9bef2b, - 0x3ffc3fe5, 0x55495592, 0xa2b2a279, 0xea8fea03, - 0x6589650f, 0xbad2bab9, 0x2fbc2f65, 0xc027c04e, - 0xde5fdebe, 0x1c701ce0, 0xfdd3fdbb, 0x4d294d52, - 0x927292e4, 0x75c9758f, 0x06180630, 0x8a128a24, - 0xb2f2b2f9, 0xe6bfe663, 0x0e380e70, 0x1f7c1ff8, - 0x62956237, 0xd477d4ee, 0xa89aa829, 0x966296c4, - 0xf9c3f99b, 0xc533c566, 0x25942535, 0x597959f2, - 0x842a8454, 0x72d572b7, 0x39e439d5, 0x4c2d4c5a, - 0x5e655eca, 0x78fd78e7, 0x38e038dd, 0x8c0a8c14, - 0xd163d1c6, 0xa5aea541, 0xe2afe243, 0x6199612f, - 0xb3f6b3f1, 0x21842115, 0x9c4a9c94, 0x1e781ef0, - 0x43114322, 0xc73bc776, 0xfcd7fcb3, 0x04100420, - 0x515951b2, 0x995e99bc, 0x6da96d4f, 0x0d340d68, - 0xfacffa83, 0xdf5bdfb6, 0x7ee57ed7, 0x2490243d, - 0x3bec3bc5, 0xab96ab31, 0xce1fce3e, 0x11441188, - 0x8f068f0c, 0x4e254e4a, 0xb7e6b7d1, 0xeb8beb0b, - 0x3cf03cfd, 0x813e817c, 0x946a94d4, 0xf7fbf7eb, - 0xb9deb9a1, 0x134c1398, 0x2cb02c7d, 0xd36bd3d6, - 0xe7bbe76b, 0x6ea56e57, 0xc437c46e, 0x030c0318, - 0x5645568a, 0x440d441a, 0x7fe17fdf, 0xa99ea921, - 0x2aa82a4d, 0xbbd6bbb1, 0xc123c146, 0x535153a2, - 0xdc57dcae, 0x0b2c0b58, 0x9d4e9d9c, 0x6cad6c47, - 0x31c43195, 0x74cd7487, 0xf6fff6e3, 0x4605460a, - 0xac8aac09, 0x891e893c, 0x145014a0, 0xe1a3e15b, - 0x165816b0, 0x3ae83acd, 0x69b9696f, 0x09240948, - 0x70dd70a7, 0xb6e2b6d9, 0xd067d0ce, 0xed93ed3b, - 0xcc17cc2e, 0x4215422a, 0x985a98b4, 0xa4aaa449, - 0x28a0285d, 0x5c6d5cda, 0xf8c7f893, 0x86228644, - }, - { - 0x18186018, 0x23238c23, 0xc6c63fc6, 0xe8e887e8, - 0x87872687, 0xb8b8dab8, 0x01010401, 0x4f4f214f, - 0x3636d836, 0xa6a6a2a6, 0xd2d26fd2, 0xf5f5f3f5, - 0x7979f979, 0x6f6fa16f, 0x91917e91, 0x52525552, - 0x60609d60, 0xbcbccabc, 0x9b9b569b, 0x8e8e028e, - 0xa3a3b6a3, 0x0c0c300c, 0x7b7bf17b, 0x3535d435, - 0x1d1d741d, 0xe0e0a7e0, 0xd7d77bd7, 0xc2c22fc2, - 0x2e2eb82e, 0x4b4b314b, 0xfefedffe, 0x57574157, - 0x15155415, 0x7777c177, 0x3737dc37, 0xe5e5b3e5, - 0x9f9f469f, 0xf0f0e7f0, 0x4a4a354a, 0xdada4fda, - 0x58587d58, 0xc9c903c9, 0x2929a429, 0x0a0a280a, - 0xb1b1feb1, 0xa0a0baa0, 0x6b6bb16b, 0x85852e85, - 0xbdbdcebd, 0x5d5d695d, 0x10104010, 0xf4f4f7f4, - 0xcbcb0bcb, 0x3e3ef83e, 0x05051405, 0x67678167, - 0xe4e4b7e4, 0x27279c27, 0x41411941, 0x8b8b168b, - 0xa7a7a6a7, 0x7d7de97d, 0x95956e95, 0xd8d847d8, - 0xfbfbcbfb, 0xeeee9fee, 0x7c7ced7c, 0x66668566, - 0xdddd53dd, 0x17175c17, 0x47470147, 0x9e9e429e, - 0xcaca0fca, 0x2d2db42d, 0xbfbfc6bf, 0x07071c07, - 0xadad8ead, 0x5a5a755a, 0x83833683, 0x3333cc33, - 0x63639163, 0x02020802, 0xaaaa92aa, 0x7171d971, - 0xc8c807c8, 0x19196419, 0x49493949, 0xd9d943d9, - 0xf2f2eff2, 0xe3e3abe3, 0x5b5b715b, 0x88881a88, - 0x9a9a529a, 0x26269826, 0x3232c832, 0xb0b0fab0, - 0xe9e983e9, 0x0f0f3c0f, 0xd5d573d5, 0x80803a80, - 0xbebec2be, 0xcdcd13cd, 0x3434d034, 0x48483d48, - 0xffffdbff, 0x7a7af57a, 0x90907a90, 0x5f5f615f, - 0x20208020, 0x6868bd68, 0x1a1a681a, 0xaeae82ae, - 0xb4b4eab4, 0x54544d54, 0x93937693, 0x22228822, - 0x64648d64, 0xf1f1e3f1, 0x7373d173, 0x12124812, - 0x40401d40, 0x08082008, 0xc3c32bc3, 0xecec97ec, - 0xdbdb4bdb, 0xa1a1bea1, 0x8d8d0e8d, 0x3d3df43d, - 0x97976697, 0x00000000, 0xcfcf1bcf, 0x2b2bac2b, - 0x7676c576, 0x82823282, 0xd6d67fd6, 0x1b1b6c1b, - 0xb5b5eeb5, 0xafaf86af, 0x6a6ab56a, 0x50505d50, - 0x45450945, 0xf3f3ebf3, 0x3030c030, 0xefef9bef, - 0x3f3ffc3f, 0x55554955, 0xa2a2b2a2, 0xeaea8fea, - 0x65658965, 0xbabad2ba, 0x2f2fbc2f, 0xc0c027c0, - 0xdede5fde, 0x1c1c701c, 0xfdfdd3fd, 0x4d4d294d, - 0x92927292, 0x7575c975, 0x06061806, 0x8a8a128a, - 0xb2b2f2b2, 0xe6e6bfe6, 0x0e0e380e, 0x1f1f7c1f, - 0x62629562, 0xd4d477d4, 0xa8a89aa8, 0x96966296, - 0xf9f9c3f9, 0xc5c533c5, 0x25259425, 0x59597959, - 0x84842a84, 0x7272d572, 0x3939e439, 0x4c4c2d4c, - 0x5e5e655e, 0x7878fd78, 0x3838e038, 0x8c8c0a8c, - 0xd1d163d1, 0xa5a5aea5, 0xe2e2afe2, 0x61619961, - 0xb3b3f6b3, 0x21218421, 0x9c9c4a9c, 0x1e1e781e, - 0x43431143, 0xc7c73bc7, 0xfcfcd7fc, 0x04041004, - 0x51515951, 0x99995e99, 0x6d6da96d, 0x0d0d340d, - 0xfafacffa, 0xdfdf5bdf, 0x7e7ee57e, 0x24249024, - 0x3b3bec3b, 0xabab96ab, 0xcece1fce, 0x11114411, - 0x8f8f068f, 0x4e4e254e, 0xb7b7e6b7, 0xebeb8beb, - 0x3c3cf03c, 0x81813e81, 0x94946a94, 0xf7f7fbf7, - 0xb9b9deb9, 0x13134c13, 0x2c2cb02c, 0xd3d36bd3, - 0xe7e7bbe7, 0x6e6ea56e, 0xc4c437c4, 0x03030c03, - 0x56564556, 0x44440d44, 0x7f7fe17f, 0xa9a99ea9, - 0x2a2aa82a, 0xbbbbd6bb, 0xc1c123c1, 0x53535153, - 0xdcdc57dc, 0x0b0b2c0b, 0x9d9d4e9d, 0x6c6cad6c, - 0x3131c431, 0x7474cd74, 0xf6f6fff6, 0x46460546, - 0xacac8aac, 0x89891e89, 0x14145014, 0xe1e1a3e1, - 0x16165816, 0x3a3ae83a, 0x6969b969, 0x09092409, - 0x7070dd70, 0xb6b6e2b6, 0xd0d067d0, 0xeded93ed, - 0xcccc17cc, 0x42421542, 0x98985a98, 0xa4a4aaa4, - 0x2828a028, 0x5c5c6d5c, 0xf8f8c7f8, 0x86862286, - }, - { - 0xd8181860, 0x2623238c, 0xb8c6c63f, 0xfbe8e887, - 0xcb878726, 0x11b8b8da, 0x09010104, 0x0d4f4f21, - 0x9b3636d8, 0xffa6a6a2, 0x0cd2d26f, 0x0ef5f5f3, - 0x967979f9, 0x306f6fa1, 0x6d91917e, 0xf8525255, - 0x4760609d, 0x35bcbcca, 0x379b9b56, 0x8a8e8e02, - 0xd2a3a3b6, 0x6c0c0c30, 0x847b7bf1, 0x803535d4, - 0xf51d1d74, 0xb3e0e0a7, 0x21d7d77b, 0x9cc2c22f, - 0x432e2eb8, 0x294b4b31, 0x5dfefedf, 0xd5575741, - 0xbd151554, 0xe87777c1, 0x923737dc, 0x9ee5e5b3, - 0x139f9f46, 0x23f0f0e7, 0x204a4a35, 0x44dada4f, - 0xa258587d, 0xcfc9c903, 0x7c2929a4, 0x5a0a0a28, - 0x50b1b1fe, 0xc9a0a0ba, 0x146b6bb1, 0xd985852e, - 0x3cbdbdce, 0x8f5d5d69, 0x90101040, 0x07f4f4f7, - 0xddcbcb0b, 0xd33e3ef8, 0x2d050514, 0x78676781, - 0x97e4e4b7, 0x0227279c, 0x73414119, 0xa78b8b16, - 0xf6a7a7a6, 0xb27d7de9, 0x4995956e, 0x56d8d847, - 0x70fbfbcb, 0xcdeeee9f, 0xbb7c7ced, 0x71666685, - 0x7bdddd53, 0xaf17175c, 0x45474701, 0x1a9e9e42, - 0xd4caca0f, 0x582d2db4, 0x2ebfbfc6, 0x3f07071c, - 0xacadad8e, 0xb05a5a75, 0xef838336, 0xb63333cc, - 0x5c636391, 0x12020208, 0x93aaaa92, 0xde7171d9, - 0xc6c8c807, 0xd1191964, 0x3b494939, 0x5fd9d943, - 0x31f2f2ef, 0xa8e3e3ab, 0xb95b5b71, 0xbc88881a, - 0x3e9a9a52, 0x0b262698, 0xbf3232c8, 0x59b0b0fa, - 0xf2e9e983, 0x770f0f3c, 0x33d5d573, 0xf480803a, - 0x27bebec2, 0xebcdcd13, 0x893434d0, 0x3248483d, - 0x54ffffdb, 0x8d7a7af5, 0x6490907a, 0x9d5f5f61, - 0x3d202080, 0x0f6868bd, 0xca1a1a68, 0xb7aeae82, - 0x7db4b4ea, 0xce54544d, 0x7f939376, 0x2f222288, - 0x6364648d, 0x2af1f1e3, 0xcc7373d1, 0x82121248, - 0x7a40401d, 0x48080820, 0x95c3c32b, 0xdfecec97, - 0x4ddbdb4b, 0xc0a1a1be, 0x918d8d0e, 0xc83d3df4, - 0x5b979766, 0x00000000, 0xf9cfcf1b, 0x6e2b2bac, - 0xe17676c5, 0xe6828232, 0x28d6d67f, 0xc31b1b6c, - 0x74b5b5ee, 0xbeafaf86, 0x1d6a6ab5, 0xea50505d, - 0x57454509, 0x38f3f3eb, 0xad3030c0, 0xc4efef9b, - 0xda3f3ffc, 0xc7555549, 0xdba2a2b2, 0xe9eaea8f, - 0x6a656589, 0x03babad2, 0x4a2f2fbc, 0x8ec0c027, - 0x60dede5f, 0xfc1c1c70, 0x46fdfdd3, 0x1f4d4d29, - 0x76929272, 0xfa7575c9, 0x36060618, 0xae8a8a12, - 0x4bb2b2f2, 0x85e6e6bf, 0x7e0e0e38, 0xe71f1f7c, - 0x55626295, 0x3ad4d477, 0x81a8a89a, 0x52969662, - 0x62f9f9c3, 0xa3c5c533, 0x10252594, 0xab595979, - 0xd084842a, 0xc57272d5, 0xec3939e4, 0x164c4c2d, - 0x945e5e65, 0x9f7878fd, 0xe53838e0, 0x988c8c0a, - 0x17d1d163, 0xe4a5a5ae, 0xa1e2e2af, 0x4e616199, - 0x42b3b3f6, 0x34212184, 0x089c9c4a, 0xee1e1e78, - 0x61434311, 0xb1c7c73b, 0x4ffcfcd7, 0x24040410, - 0xe3515159, 0x2599995e, 0x226d6da9, 0x650d0d34, - 0x79fafacf, 0x69dfdf5b, 0xa97e7ee5, 0x19242490, - 0xfe3b3bec, 0x9aabab96, 0xf0cece1f, 0x99111144, - 0x838f8f06, 0x044e4e25, 0x66b7b7e6, 0xe0ebeb8b, - 0xc13c3cf0, 0xfd81813e, 0x4094946a, 0x1cf7f7fb, - 0x18b9b9de, 0x8b13134c, 0x512c2cb0, 0x05d3d36b, - 0x8ce7e7bb, 0x396e6ea5, 0xaac4c437, 0x1b03030c, - 0xdc565645, 0x5e44440d, 0xa07f7fe1, 0x88a9a99e, - 0x672a2aa8, 0x0abbbbd6, 0x87c1c123, 0xf1535351, - 0x72dcdc57, 0x530b0b2c, 0x019d9d4e, 0x2b6c6cad, - 0xa43131c4, 0xf37474cd, 0x15f6f6ff, 0x4c464605, - 0xa5acac8a, 0xb589891e, 0xb4141450, 0xbae1e1a3, - 0xa6161658, 0xf73a3ae8, 0x066969b9, 0x41090924, - 0xd77070dd, 0x6fb6b6e2, 0x1ed0d067, 0xd6eded93, - 0xe2cccc17, 0x68424215, 0x2c98985a, 0xeda4a4aa, - 0x752828a0, 0x865c5c6d, 0x6bf8f8c7, 0xc2868622, - }, - { - 0x30d81818, 0x46262323, 0x91b8c6c6, 0xcdfbe8e8, - 0x13cb8787, 0x6d11b8b8, 0x02090101, 0x9e0d4f4f, - 0x6c9b3636, 0x51ffa6a6, 0xb90cd2d2, 0xf70ef5f5, - 0xf2967979, 0xde306f6f, 0x3f6d9191, 0xa4f85252, - 0xc0476060, 0x6535bcbc, 0x2b379b9b, 0x018a8e8e, - 0x5bd2a3a3, 0x186c0c0c, 0xf6847b7b, 0x6a803535, - 0x3af51d1d, 0xddb3e0e0, 0xb321d7d7, 0x999cc2c2, - 0x5c432e2e, 0x96294b4b, 0xe15dfefe, 0xaed55757, - 0x2abd1515, 0xeee87777, 0x6e923737, 0xd79ee5e5, - 0x23139f9f, 0xfd23f0f0, 0x94204a4a, 0xa944dada, - 0xb0a25858, 0x8fcfc9c9, 0x527c2929, 0x145a0a0a, - 0x7f50b1b1, 0x5dc9a0a0, 0xd6146b6b, 0x17d98585, - 0x673cbdbd, 0xba8f5d5d, 0x20901010, 0xf507f4f4, - 0x8bddcbcb, 0x7cd33e3e, 0x0a2d0505, 0xce786767, - 0xd597e4e4, 0x4e022727, 0x82734141, 0x0ba78b8b, - 0x53f6a7a7, 0xfab27d7d, 0x37499595, 0xad56d8d8, - 0xeb70fbfb, 0xc1cdeeee, 0xf8bb7c7c, 0xcc716666, - 0xa77bdddd, 0x2eaf1717, 0x8e454747, 0x211a9e9e, - 0x89d4caca, 0x5a582d2d, 0x632ebfbf, 0x0e3f0707, - 0x47acadad, 0xb4b05a5a, 0x1bef8383, 0x66b63333, - 0xc65c6363, 0x04120202, 0x4993aaaa, 0xe2de7171, - 0x8dc6c8c8, 0x32d11919, 0x923b4949, 0xaf5fd9d9, - 0xf931f2f2, 0xdba8e3e3, 0xb6b95b5b, 0x0dbc8888, - 0x293e9a9a, 0x4c0b2626, 0x64bf3232, 0x7d59b0b0, - 0xcff2e9e9, 0x1e770f0f, 0xb733d5d5, 0x1df48080, - 0x6127bebe, 0x87ebcdcd, 0x68893434, 0x90324848, - 0xe354ffff, 0xf48d7a7a, 0x3d649090, 0xbe9d5f5f, - 0x403d2020, 0xd00f6868, 0x34ca1a1a, 0x41b7aeae, - 0x757db4b4, 0xa8ce5454, 0x3b7f9393, 0x442f2222, - 0xc8636464, 0xff2af1f1, 0xe6cc7373, 0x24821212, - 0x807a4040, 0x10480808, 0x9b95c3c3, 0xc5dfecec, - 0xab4ddbdb, 0x5fc0a1a1, 0x07918d8d, 0x7ac83d3d, - 0x335b9797, 0x00000000, 0x83f9cfcf, 0x566e2b2b, - 0xece17676, 0x19e68282, 0xb128d6d6, 0x36c31b1b, - 0x7774b5b5, 0x43beafaf, 0xd41d6a6a, 0xa0ea5050, - 0x8a574545, 0xfb38f3f3, 0x60ad3030, 0xc3c4efef, - 0x7eda3f3f, 0xaac75555, 0x59dba2a2, 0xc9e9eaea, - 0xca6a6565, 0x6903baba, 0x5e4a2f2f, 0x9d8ec0c0, - 0xa160dede, 0x38fc1c1c, 0xe746fdfd, 0x9a1f4d4d, - 0x39769292, 0xeafa7575, 0x0c360606, 0x09ae8a8a, - 0x794bb2b2, 0xd185e6e6, 0x1c7e0e0e, 0x3ee71f1f, - 0xc4556262, 0xb53ad4d4, 0x4d81a8a8, 0x31529696, - 0xef62f9f9, 0x97a3c5c5, 0x4a102525, 0xb2ab5959, - 0x15d08484, 0xe4c57272, 0x72ec3939, 0x98164c4c, - 0xbc945e5e, 0xf09f7878, 0x70e53838, 0x05988c8c, - 0xbf17d1d1, 0x57e4a5a5, 0xd9a1e2e2, 0xc24e6161, - 0x7b42b3b3, 0x42342121, 0x25089c9c, 0x3cee1e1e, - 0x86614343, 0x93b1c7c7, 0xe54ffcfc, 0x08240404, - 0xa2e35151, 0x2f259999, 0xda226d6d, 0x1a650d0d, - 0xe979fafa, 0xa369dfdf, 0xfca97e7e, 0x48192424, - 0x76fe3b3b, 0x4b9aabab, 0x81f0cece, 0x22991111, - 0x03838f8f, 0x9c044e4e, 0x7366b7b7, 0xcbe0ebeb, - 0x78c13c3c, 0x1ffd8181, 0x35409494, 0xf31cf7f7, - 0x6f18b9b9, 0x268b1313, 0x58512c2c, 0xbb05d3d3, - 0xd38ce7e7, 0xdc396e6e, 0x95aac4c4, 0x061b0303, - 0xacdc5656, 0x885e4444, 0xfea07f7f, 0x4f88a9a9, - 0x54672a2a, 0x6b0abbbb, 0x9f87c1c1, 0xa6f15353, - 0xa572dcdc, 0x16530b0b, 0x27019d9d, 0xd82b6c6c, - 0x62a43131, 0xe8f37474, 0xf115f6f6, 0x8c4c4646, - 0x45a5acac, 0x0fb58989, 0x28b41414, 0xdfbae1e1, - 0x2ca61616, 0x74f73a3a, 0xd2066969, 0x12410909, - 0xe0d77070, 0x716fb6b6, 0xbd1ed0d0, 0xc7d6eded, - 0x85e2cccc, 0x84684242, 0x2d2c9898, 0x55eda4a4, - 0x50752828, 0xb8865c5c, 0xed6bf8f8, 0x11c28686, - }, - { - 0x7830d818, 0xaf462623, 0xf991b8c6, 0x6fcdfbe8, - 0xa113cb87, 0x626d11b8, 0x05020901, 0x6e9e0d4f, - 0xee6c9b36, 0x0451ffa6, 0xbdb90cd2, 0x06f70ef5, - 0x80f29679, 0xcede306f, 0xef3f6d91, 0x07a4f852, - 0xfdc04760, 0x766535bc, 0xcd2b379b, 0x8c018a8e, - 0x155bd2a3, 0x3c186c0c, 0x8af6847b, 0xe16a8035, - 0x693af51d, 0x47ddb3e0, 0xacb321d7, 0xed999cc2, - 0x965c432e, 0x7a96294b, 0x21e15dfe, 0x16aed557, - 0x412abd15, 0xb6eee877, 0xeb6e9237, 0x56d79ee5, - 0xd923139f, 0x17fd23f0, 0x7f94204a, 0x95a944da, - 0x25b0a258, 0xca8fcfc9, 0x8d527c29, 0x22145a0a, - 0x4f7f50b1, 0x1a5dc9a0, 0xdad6146b, 0xab17d985, - 0x73673cbd, 0x34ba8f5d, 0x50209010, 0x03f507f4, - 0xc08bddcb, 0xc67cd33e, 0x110a2d05, 0xe6ce7867, - 0x53d597e4, 0xbb4e0227, 0x58827341, 0x9d0ba78b, - 0x0153f6a7, 0x94fab27d, 0xfb374995, 0x9fad56d8, - 0x30eb70fb, 0x71c1cdee, 0x91f8bb7c, 0xe3cc7166, - 0x8ea77bdd, 0x4b2eaf17, 0x468e4547, 0xdc211a9e, - 0xc589d4ca, 0x995a582d, 0x79632ebf, 0x1b0e3f07, - 0x2347acad, 0x2fb4b05a, 0xb51bef83, 0xff66b633, - 0xf2c65c63, 0x0a041202, 0x384993aa, 0xa8e2de71, - 0xcf8dc6c8, 0x7d32d119, 0x70923b49, 0x9aaf5fd9, - 0x1df931f2, 0x48dba8e3, 0x2ab6b95b, 0x920dbc88, - 0xc8293e9a, 0xbe4c0b26, 0xfa64bf32, 0x4a7d59b0, - 0x6acff2e9, 0x331e770f, 0xa6b733d5, 0xba1df480, - 0x7c6127be, 0xde87ebcd, 0xe4688934, 0x75903248, - 0x24e354ff, 0x8ff48d7a, 0xea3d6490, 0x3ebe9d5f, - 0xa0403d20, 0xd5d00f68, 0x7234ca1a, 0x2c41b7ae, - 0x5e757db4, 0x19a8ce54, 0xe53b7f93, 0xaa442f22, - 0xe9c86364, 0x12ff2af1, 0xa2e6cc73, 0x5a248212, - 0x5d807a40, 0x28104808, 0xe89b95c3, 0x7bc5dfec, - 0x90ab4ddb, 0x1f5fc0a1, 0x8307918d, 0xc97ac83d, - 0xf1335b97, 0x00000000, 0xd483f9cf, 0x87566e2b, - 0xb3ece176, 0xb019e682, 0xa9b128d6, 0x7736c31b, - 0x5b7774b5, 0x2943beaf, 0xdfd41d6a, 0x0da0ea50, - 0x4c8a5745, 0x18fb38f3, 0xf060ad30, 0x74c3c4ef, - 0xc37eda3f, 0x1caac755, 0x1059dba2, 0x65c9e9ea, - 0xecca6a65, 0x686903ba, 0x935e4a2f, 0xe79d8ec0, - 0x81a160de, 0x6c38fc1c, 0x2ee746fd, 0x649a1f4d, - 0xe0397692, 0xbceafa75, 0x1e0c3606, 0x9809ae8a, - 0x40794bb2, 0x59d185e6, 0x361c7e0e, 0x633ee71f, - 0xf7c45562, 0xa3b53ad4, 0x324d81a8, 0xf4315296, - 0x3aef62f9, 0xf697a3c5, 0xb14a1025, 0x20b2ab59, - 0xae15d084, 0xa7e4c572, 0xdd72ec39, 0x6198164c, - 0x3bbc945e, 0x85f09f78, 0xd870e538, 0x8605988c, - 0xb2bf17d1, 0x0b57e4a5, 0x4dd9a1e2, 0xf8c24e61, - 0x457b42b3, 0xa5423421, 0xd625089c, 0x663cee1e, - 0x52866143, 0xfc93b1c7, 0x2be54ffc, 0x14082404, - 0x08a2e351, 0xc72f2599, 0xc4da226d, 0x391a650d, - 0x35e979fa, 0x84a369df, 0x9bfca97e, 0xb4481924, - 0xd776fe3b, 0x3d4b9aab, 0xd181f0ce, 0x55229911, - 0x8903838f, 0x6b9c044e, 0x517366b7, 0x60cbe0eb, - 0xcc78c13c, 0xbf1ffd81, 0xfe354094, 0x0cf31cf7, - 0x676f18b9, 0x5f268b13, 0x9c58512c, 0xb8bb05d3, - 0x5cd38ce7, 0xcbdc396e, 0xf395aac4, 0x0f061b03, - 0x13acdc56, 0x49885e44, 0x9efea07f, 0x374f88a9, - 0x8254672a, 0x6d6b0abb, 0xe29f87c1, 0x02a6f153, - 0x8ba572dc, 0x2716530b, 0xd327019d, 0xc1d82b6c, - 0xf562a431, 0xb9e8f374, 0x09f115f6, 0x438c4c46, - 0x2645a5ac, 0x970fb589, 0x4428b414, 0x42dfbae1, - 0x4e2ca616, 0xd274f73a, 0xd0d20669, 0x2d124109, - 0xade0d770, 0x54716fb6, 0xb7bd1ed0, 0x7ec7d6ed, - 0xdb85e2cc, 0x57846842, 0xc22d2c98, 0x0e55eda4, - 0x88507528, 0x31b8865c, 0x3fed6bf8, 0xa411c286, - }, -}; - -#ifdef VECT_SIZE1 -#define BOX(S,n,i) (u32) ((S)[(n)][(i)]) -#endif - -__device__ __constant__ u32 rch[R + 1] = -{ - 0x00000000, - 0x1823c6e8, - 0x36a6d2f5, - 0x60bc9b8e, - 0x1de0d7c2, - 0x157737e5, - 0x58c9290a, - 0xbd5d10f4, - 0xe427418b, - 0xfbee7c66, - 0xca2dbf07, -}; - -__device__ __constant__ u32 rcl[R + 1] = -{ - 0x00000000, - 0x87b8014f, - 0x796f9152, - 0xa30c7b35, - 0x2e4bfe57, - 0x9ff04ada, - 0xb1a06b85, - 0xcb3e0567, - 0xa77d95d8, - 0xdd17479e, - 0xad5a8333, -}; - -typedef unsigned char uchar; - -__device__ static void whirlpool_transform (const u32 w[16], u32 dgst[16], u32 s_Ch[8][256], u32 s_Cl[8][256]) -{ - u32 Kh[8]; - u32 Kl[8]; - - Kh[0] = dgst[ 0]; - Kl[0] = dgst[ 1]; - Kh[1] = dgst[ 2]; - Kl[1] = dgst[ 3]; - Kh[2] = dgst[ 4]; - Kl[2] = dgst[ 5]; - Kh[3] = dgst[ 6]; - Kl[3] = dgst[ 7]; - Kh[4] = dgst[ 8]; - Kl[4] = dgst[ 9]; - Kh[5] = dgst[10]; - Kl[5] = dgst[11]; - Kh[6] = dgst[12]; - Kl[6] = dgst[13]; - Kh[7] = dgst[14]; - Kl[7] = dgst[15]; - - u32 stateh[8]; - u32 statel[8]; - - stateh[0] = w[ 0] ^ Kh[0]; - statel[0] = w[ 1] ^ Kl[0]; - stateh[1] = w[ 2] ^ Kh[1]; - statel[1] = w[ 3] ^ Kl[1]; - stateh[2] = w[ 4] ^ Kh[2]; - statel[2] = w[ 5] ^ Kl[2]; - stateh[3] = w[ 6] ^ Kh[3]; - statel[3] = w[ 7] ^ Kl[3]; - stateh[4] = w[ 8] ^ Kh[4]; - statel[4] = w[ 9] ^ Kl[4]; - stateh[5] = w[10] ^ Kh[5]; - statel[5] = w[11] ^ Kl[5]; - stateh[6] = w[12] ^ Kh[6]; - statel[6] = w[13] ^ Kl[6]; - stateh[7] = w[14] ^ Kh[7]; - statel[7] = w[15] ^ Kl[7]; - - u32 r; - - for (r = 1; r <= R; r++) - { - u32 Lh[8]; - u32 Ll[8]; - - u32 i; - - #pragma unroll 8 - for (i = 0; i < 8; i++) - { - const u8 Lp0 = Kh[(i + 8) & 7] >> 24; - const u8 Lp1 = Kh[(i + 7) & 7] >> 16; - const u8 Lp2 = Kh[(i + 6) & 7] >> 8; - const u8 Lp3 = Kh[(i + 5) & 7] >> 0; - const u8 Lp4 = Kl[(i + 4) & 7] >> 24; - const u8 Lp5 = Kl[(i + 3) & 7] >> 16; - const u8 Lp6 = Kl[(i + 2) & 7] >> 8; - const u8 Lp7 = Kl[(i + 1) & 7] >> 0; - - Lh[i] = BOX (s_Ch, 0, Lp0 & 0xff) - ^ BOX (s_Ch, 1, Lp1 & 0xff) - ^ BOX (s_Ch, 2, Lp2 & 0xff) - ^ BOX (s_Ch, 3, Lp3 & 0xff) - ^ BOX (s_Ch, 4, Lp4 & 0xff) - ^ BOX (s_Ch, 5, Lp5 & 0xff) - ^ BOX (s_Ch, 6, Lp6 & 0xff) - ^ BOX (s_Ch, 7, Lp7 & 0xff); - - Ll[i] = BOX (s_Cl, 0, Lp0 & 0xff) - ^ BOX (s_Cl, 1, Lp1 & 0xff) - ^ BOX (s_Cl, 2, Lp2 & 0xff) - ^ BOX (s_Cl, 3, Lp3 & 0xff) - ^ BOX (s_Cl, 4, Lp4 & 0xff) - ^ BOX (s_Cl, 5, Lp5 & 0xff) - ^ BOX (s_Cl, 6, Lp6 & 0xff) - ^ BOX (s_Cl, 7, Lp7 & 0xff); - } - - Kh[0] = Lh[0] ^ rch[r]; - Kl[0] = Ll[0] ^ rcl[r]; - Kh[1] = Lh[1]; - Kl[1] = Ll[1]; - Kh[2] = Lh[2]; - Kl[2] = Ll[2]; - Kh[3] = Lh[3]; - Kl[3] = Ll[3]; - Kh[4] = Lh[4]; - Kl[4] = Ll[4]; - Kh[5] = Lh[5]; - Kl[5] = Ll[5]; - Kh[6] = Lh[6]; - Kl[6] = Ll[6]; - Kh[7] = Lh[7]; - Kl[7] = Ll[7]; - - #pragma unroll 8 - for (i = 0; i < 8; i++) - { - const u8 Lp0 = stateh[(i + 8) & 7] >> 24; - const u8 Lp1 = stateh[(i + 7) & 7] >> 16; - const u8 Lp2 = stateh[(i + 6) & 7] >> 8; - const u8 Lp3 = stateh[(i + 5) & 7] >> 0; - const u8 Lp4 = statel[(i + 4) & 7] >> 24; - const u8 Lp5 = statel[(i + 3) & 7] >> 16; - const u8 Lp6 = statel[(i + 2) & 7] >> 8; - const u8 Lp7 = statel[(i + 1) & 7] >> 0; - - Lh[i] = BOX (s_Ch, 0, Lp0 & 0xff) - ^ BOX (s_Ch, 1, Lp1 & 0xff) - ^ BOX (s_Ch, 2, Lp2 & 0xff) - ^ BOX (s_Ch, 3, Lp3 & 0xff) - ^ BOX (s_Ch, 4, Lp4 & 0xff) - ^ BOX (s_Ch, 5, Lp5 & 0xff) - ^ BOX (s_Ch, 6, Lp6 & 0xff) - ^ BOX (s_Ch, 7, Lp7 & 0xff); - - Ll[i] = BOX (s_Cl, 0, Lp0 & 0xff) - ^ BOX (s_Cl, 1, Lp1 & 0xff) - ^ BOX (s_Cl, 2, Lp2 & 0xff) - ^ BOX (s_Cl, 3, Lp3 & 0xff) - ^ BOX (s_Cl, 4, Lp4 & 0xff) - ^ BOX (s_Cl, 5, Lp5 & 0xff) - ^ BOX (s_Cl, 6, Lp6 & 0xff) - ^ BOX (s_Cl, 7, Lp7 & 0xff); - } - - stateh[0] = Lh[0] ^ Kh[0]; - statel[0] = Ll[0] ^ Kl[0]; - stateh[1] = Lh[1] ^ Kh[1]; - statel[1] = Ll[1] ^ Kl[1]; - stateh[2] = Lh[2] ^ Kh[2]; - statel[2] = Ll[2] ^ Kl[2]; - stateh[3] = Lh[3] ^ Kh[3]; - statel[3] = Ll[3] ^ Kl[3]; - stateh[4] = Lh[4] ^ Kh[4]; - statel[4] = Ll[4] ^ Kl[4]; - stateh[5] = Lh[5] ^ Kh[5]; - statel[5] = Ll[5] ^ Kl[5]; - stateh[6] = Lh[6] ^ Kh[6]; - statel[6] = Ll[6] ^ Kl[6]; - stateh[7] = Lh[7] ^ Kh[7]; - statel[7] = Ll[7] ^ Kl[7]; - } - - dgst[ 0] ^= stateh[0] ^ w[ 0]; - dgst[ 1] ^= statel[0] ^ w[ 1]; - dgst[ 2] ^= stateh[1] ^ w[ 2]; - dgst[ 3] ^= statel[1] ^ w[ 3]; - dgst[ 4] ^= stateh[2] ^ w[ 4]; - dgst[ 5] ^= statel[2] ^ w[ 5]; - dgst[ 6] ^= stateh[3] ^ w[ 6]; - dgst[ 7] ^= statel[3] ^ w[ 7]; - dgst[ 8] ^= stateh[4] ^ w[ 8]; - dgst[ 9] ^= statel[4] ^ w[ 9]; - dgst[10] ^= stateh[5] ^ w[10]; - dgst[11] ^= statel[5] ^ w[11]; - dgst[12] ^= stateh[6] ^ w[12]; - dgst[13] ^= statel[6] ^ w[13]; - dgst[14] ^= stateh[7] ^ w[14]; - dgst[15] ^= statel[7] ^ w[15]; -} - -__device__ static void hmac_run2 (const u32 w1[16], const u32 w2[16], const u32 ipad[16], const u32 opad[16], u32 dgst[16], u32 s_Ch[8][256], u32 s_Cl[8][256]) -{ - dgst[ 0] = ipad[ 0]; - dgst[ 1] = ipad[ 1]; - dgst[ 2] = ipad[ 2]; - dgst[ 3] = ipad[ 3]; - dgst[ 4] = ipad[ 4]; - dgst[ 5] = ipad[ 5]; - dgst[ 6] = ipad[ 6]; - dgst[ 7] = ipad[ 7]; - dgst[ 8] = ipad[ 8]; - dgst[ 9] = ipad[ 9]; - dgst[10] = ipad[10]; - dgst[11] = ipad[11]; - dgst[12] = ipad[12]; - dgst[13] = ipad[13]; - dgst[14] = ipad[14]; - dgst[15] = ipad[15]; - - whirlpool_transform (w1, dgst, s_Ch, s_Cl); - whirlpool_transform (w2, dgst, s_Ch, s_Cl); - - u32 w[16]; - - w[ 0] = dgst[ 0]; - w[ 1] = dgst[ 1]; - w[ 2] = dgst[ 2]; - w[ 3] = dgst[ 3]; - w[ 4] = dgst[ 4]; - w[ 5] = dgst[ 5]; - w[ 6] = dgst[ 6]; - w[ 7] = dgst[ 7]; - w[ 8] = dgst[ 8]; - w[ 9] = dgst[ 9]; - w[10] = dgst[10]; - w[11] = dgst[11]; - w[12] = dgst[12]; - w[13] = dgst[13]; - w[14] = dgst[14]; - w[15] = dgst[15]; - - dgst[ 0] = opad[ 0]; - dgst[ 1] = opad[ 1]; - dgst[ 2] = opad[ 2]; - dgst[ 3] = opad[ 3]; - dgst[ 4] = opad[ 4]; - dgst[ 5] = opad[ 5]; - dgst[ 6] = opad[ 6]; - dgst[ 7] = opad[ 7]; - dgst[ 8] = opad[ 8]; - dgst[ 9] = opad[ 9]; - dgst[10] = opad[10]; - dgst[11] = opad[11]; - dgst[12] = opad[12]; - dgst[13] = opad[13]; - dgst[14] = opad[14]; - dgst[15] = opad[15]; - - whirlpool_transform (w, dgst, s_Ch, s_Cl); - - w[ 0] = 0x80000000; - w[ 1] = 0; - w[ 2] = 0; - w[ 3] = 0; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = (64 + 64) * 8; - - whirlpool_transform (w, dgst, s_Ch, s_Cl); -} - -__device__ static void hmac_init (u32 w[16], u32 ipad[16], u32 opad[16], u32 s_Ch[8][256], u32 s_Cl[8][256]) -{ - w[ 0] ^= 0x36363636; - w[ 1] ^= 0x36363636; - w[ 2] ^= 0x36363636; - w[ 3] ^= 0x36363636; - w[ 4] ^= 0x36363636; - w[ 5] ^= 0x36363636; - w[ 6] ^= 0x36363636; - w[ 7] ^= 0x36363636; - w[ 8] ^= 0x36363636; - w[ 9] ^= 0x36363636; - w[10] ^= 0x36363636; - w[11] ^= 0x36363636; - w[12] ^= 0x36363636; - w[13] ^= 0x36363636; - w[14] ^= 0x36363636; - w[15] ^= 0x36363636; - - ipad[ 0] = 0; - ipad[ 1] = 0; - ipad[ 2] = 0; - ipad[ 3] = 0; - ipad[ 4] = 0; - ipad[ 5] = 0; - ipad[ 6] = 0; - ipad[ 7] = 0; - ipad[ 8] = 0; - ipad[ 9] = 0; - ipad[10] = 0; - ipad[11] = 0; - ipad[12] = 0; - ipad[13] = 0; - ipad[14] = 0; - ipad[15] = 0; - - whirlpool_transform (w, ipad, s_Ch, s_Cl); - - w[ 0] ^= 0x6a6a6a6a; - w[ 1] ^= 0x6a6a6a6a; - w[ 2] ^= 0x6a6a6a6a; - w[ 3] ^= 0x6a6a6a6a; - w[ 4] ^= 0x6a6a6a6a; - w[ 5] ^= 0x6a6a6a6a; - w[ 6] ^= 0x6a6a6a6a; - w[ 7] ^= 0x6a6a6a6a; - w[ 8] ^= 0x6a6a6a6a; - w[ 9] ^= 0x6a6a6a6a; - w[10] ^= 0x6a6a6a6a; - w[11] ^= 0x6a6a6a6a; - w[12] ^= 0x6a6a6a6a; - w[13] ^= 0x6a6a6a6a; - w[14] ^= 0x6a6a6a6a; - w[15] ^= 0x6a6a6a6a; - - opad[ 0] = 0; - opad[ 1] = 0; - opad[ 2] = 0; - opad[ 3] = 0; - opad[ 4] = 0; - opad[ 5] = 0; - opad[ 6] = 0; - opad[ 7] = 0; - opad[ 8] = 0; - opad[ 9] = 0; - opad[10] = 0; - opad[11] = 0; - opad[12] = 0; - opad[13] = 0; - opad[14] = 0; - opad[15] = 0; - - whirlpool_transform (w, opad, s_Ch, s_Cl); -} - -__device__ static u32 u8add (const u32 a, const u32 b) -{ - const u32 a1 = (a >> 0) & 0xff; - const u32 a2 = (a >> 8) & 0xff; - const u32 a3 = (a >> 16) & 0xff; - const u32 a4 = (a >> 24) & 0xff; - - const u32 b1 = (b >> 0) & 0xff; - const u32 b2 = (b >> 8) & 0xff; - const u32 b3 = (b >> 16) & 0xff; - const u32 b4 = (b >> 24) & 0xff; - - const u32 r1 = (a1 + b1) & 0xff; - const u32 r2 = (a2 + b2) & 0xff; - const u32 r3 = (a3 + b3) & 0xff; - const u32 r4 = (a4 + b4) & 0xff; - - const u32 r = r1 << 0 - | r2 << 8 - | r3 << 16 - | r4 << 24; - - return r; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06232_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, tc_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const tc_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - /** - * keyfile - */ - - w0[0] = u8add (w0[0], esalt_bufs[salt_pos].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[salt_pos].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[salt_pos].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[salt_pos].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[salt_pos].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[salt_pos].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[salt_pos].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[salt_pos].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[salt_pos].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[salt_pos].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[salt_pos].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[salt_pos].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[salt_pos].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[salt_pos].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[salt_pos].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[salt_pos].keyfile_buf[15]); - - /** - * shared mem - */ - - __shared__ u32 s_Ch[8][256]; - __shared__ u32 s_Cl[8][256]; - - const u32 lid = threadIdx.x; - - #pragma unroll 8 - for (u32 i = 0; i < 8; i++) - { - s_Ch[i][lid] = Ch[i][lid]; - s_Cl[i][lid] = Cl[i][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * salt - */ - - u32 salt_buf1[16]; - - salt_buf1[ 0] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 0]); - salt_buf1[ 1] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 1]); - salt_buf1[ 2] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 2]); - salt_buf1[ 3] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[ 4] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[ 5] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[ 6] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[ 7] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 7]); - salt_buf1[ 8] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 8]); - salt_buf1[ 9] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 9]); - salt_buf1[10] = swap_workaround (esalt_bufs[salt_pos].salt_buf[10]); - salt_buf1[11] = swap_workaround (esalt_bufs[salt_pos].salt_buf[11]); - salt_buf1[12] = swap_workaround (esalt_bufs[salt_pos].salt_buf[12]); - salt_buf1[13] = swap_workaround (esalt_bufs[salt_pos].salt_buf[13]); - salt_buf1[14] = swap_workaround (esalt_bufs[salt_pos].salt_buf[14]); - salt_buf1[15] = swap_workaround (esalt_bufs[salt_pos].salt_buf[15]); - - u32 salt_buf2[16]; - - salt_buf2[ 0] = 0; - salt_buf2[ 1] = 0x80000000; - salt_buf2[ 2] = 0; - salt_buf2[ 3] = 0; - salt_buf2[ 4] = 0; - salt_buf2[ 5] = 0; - salt_buf2[ 6] = 0; - salt_buf2[ 7] = 0; - salt_buf2[ 8] = 0; - salt_buf2[ 9] = 0; - salt_buf2[10] = 0; - salt_buf2[11] = 0; - salt_buf2[12] = 0; - salt_buf2[13] = 0; - salt_buf2[14] = 0; - salt_buf2[15] = (64 + 64 + 4) * 8; - - const u32 truecrypt_mdlen = salt_bufs[0].truecrypt_mdlen; - - u32 w[16]; - - w[ 0] = swap_workaround (w0[0]); - w[ 1] = swap_workaround (w0[1]); - w[ 2] = swap_workaround (w0[2]); - w[ 3] = swap_workaround (w0[3]); - w[ 4] = swap_workaround (w1[0]); - w[ 5] = swap_workaround (w1[1]); - w[ 6] = swap_workaround (w1[2]); - w[ 7] = swap_workaround (w1[3]); - w[ 8] = swap_workaround (w2[0]); - w[ 9] = swap_workaround (w2[1]); - w[10] = swap_workaround (w2[2]); - w[11] = swap_workaround (w2[3]); - w[12] = swap_workaround (w3[0]); - w[13] = swap_workaround (w3[1]); - w[14] = swap_workaround (w3[2]); - w[15] = swap_workaround (w3[3]); - - u32 ipad[16]; - u32 opad[16]; - - hmac_init (w, ipad, opad, s_Ch, s_Cl); - - tmps[gid].ipad[ 0] = ipad[ 0]; - tmps[gid].ipad[ 1] = ipad[ 1]; - tmps[gid].ipad[ 2] = ipad[ 2]; - tmps[gid].ipad[ 3] = ipad[ 3]; - tmps[gid].ipad[ 4] = ipad[ 4]; - tmps[gid].ipad[ 5] = ipad[ 5]; - tmps[gid].ipad[ 6] = ipad[ 6]; - tmps[gid].ipad[ 7] = ipad[ 7]; - tmps[gid].ipad[ 8] = ipad[ 8]; - tmps[gid].ipad[ 9] = ipad[ 9]; - tmps[gid].ipad[10] = ipad[10]; - tmps[gid].ipad[11] = ipad[11]; - tmps[gid].ipad[12] = ipad[12]; - tmps[gid].ipad[13] = ipad[13]; - tmps[gid].ipad[14] = ipad[14]; - tmps[gid].ipad[15] = ipad[15]; - - tmps[gid].opad[ 0] = opad[ 0]; - tmps[gid].opad[ 1] = opad[ 1]; - tmps[gid].opad[ 2] = opad[ 2]; - tmps[gid].opad[ 3] = opad[ 3]; - tmps[gid].opad[ 4] = opad[ 4]; - tmps[gid].opad[ 5] = opad[ 5]; - tmps[gid].opad[ 6] = opad[ 6]; - tmps[gid].opad[ 7] = opad[ 7]; - tmps[gid].opad[ 8] = opad[ 8]; - tmps[gid].opad[ 9] = opad[ 9]; - tmps[gid].opad[10] = opad[10]; - tmps[gid].opad[11] = opad[11]; - tmps[gid].opad[12] = opad[12]; - tmps[gid].opad[13] = opad[13]; - tmps[gid].opad[14] = opad[14]; - tmps[gid].opad[15] = opad[15]; - - for (u32 i = 0, j = 1; i < (truecrypt_mdlen / 8 / 4); i += 16, j += 1) - { - salt_buf2[0] = j; - - u32 dgst[16]; - - hmac_run2 (salt_buf1, salt_buf2, ipad, opad, dgst, s_Ch, s_Cl); - - tmps[gid].dgst[i + 0] = dgst[ 0]; - tmps[gid].dgst[i + 1] = dgst[ 1]; - tmps[gid].dgst[i + 2] = dgst[ 2]; - tmps[gid].dgst[i + 3] = dgst[ 3]; - tmps[gid].dgst[i + 4] = dgst[ 4]; - tmps[gid].dgst[i + 5] = dgst[ 5]; - tmps[gid].dgst[i + 6] = dgst[ 6]; - tmps[gid].dgst[i + 7] = dgst[ 7]; - tmps[gid].dgst[i + 8] = dgst[ 8]; - tmps[gid].dgst[i + 9] = dgst[ 9]; - tmps[gid].dgst[i + 10] = dgst[10]; - tmps[gid].dgst[i + 11] = dgst[11]; - tmps[gid].dgst[i + 12] = dgst[12]; - tmps[gid].dgst[i + 13] = dgst[13]; - tmps[gid].dgst[i + 14] = dgst[14]; - tmps[gid].dgst[i + 15] = dgst[15]; - - tmps[gid].out[i + 0] = dgst[ 0]; - tmps[gid].out[i + 1] = dgst[ 1]; - tmps[gid].out[i + 2] = dgst[ 2]; - tmps[gid].out[i + 3] = dgst[ 3]; - tmps[gid].out[i + 4] = dgst[ 4]; - tmps[gid].out[i + 5] = dgst[ 5]; - tmps[gid].out[i + 6] = dgst[ 6]; - tmps[gid].out[i + 7] = dgst[ 7]; - tmps[gid].out[i + 8] = dgst[ 8]; - tmps[gid].out[i + 9] = dgst[ 9]; - tmps[gid].out[i + 10] = dgst[10]; - tmps[gid].out[i + 11] = dgst[11]; - tmps[gid].out[i + 12] = dgst[12]; - tmps[gid].out[i + 13] = dgst[13]; - tmps[gid].out[i + 14] = dgst[14]; - tmps[gid].out[i + 15] = dgst[15]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06232_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, tc_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const tc_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 truecrypt_mdlen = salt_bufs[0].truecrypt_mdlen; - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - __shared__ u32 s_Ch[8][256]; - __shared__ u32 s_Cl[8][256]; - - const u32 lid = threadIdx.x; - - #pragma unroll 8 - for (u32 i = 0; i < 8; i++) - { - s_Ch[i][lid] = Ch[i][lid]; - s_Cl[i][lid] = Cl[i][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - u32 ipad[16]; - - ipad[ 0] = tmps[gid].ipad[ 0]; - ipad[ 1] = tmps[gid].ipad[ 1], - ipad[ 2] = tmps[gid].ipad[ 2]; - ipad[ 3] = tmps[gid].ipad[ 3]; - ipad[ 4] = tmps[gid].ipad[ 4]; - ipad[ 5] = tmps[gid].ipad[ 5]; - ipad[ 6] = tmps[gid].ipad[ 6], - ipad[ 7] = tmps[gid].ipad[ 7]; - ipad[ 8] = tmps[gid].ipad[ 8]; - ipad[ 9] = tmps[gid].ipad[ 9]; - ipad[10] = tmps[gid].ipad[10]; - ipad[11] = tmps[gid].ipad[11], - ipad[12] = tmps[gid].ipad[12]; - ipad[13] = tmps[gid].ipad[13]; - ipad[14] = tmps[gid].ipad[14]; - ipad[15] = tmps[gid].ipad[15]; - - u32 opad[16]; - - opad[ 0] = tmps[gid].opad[ 0]; - opad[ 1] = tmps[gid].opad[ 1], - opad[ 2] = tmps[gid].opad[ 2]; - opad[ 3] = tmps[gid].opad[ 3]; - opad[ 4] = tmps[gid].opad[ 4]; - opad[ 5] = tmps[gid].opad[ 5]; - opad[ 6] = tmps[gid].opad[ 6], - opad[ 7] = tmps[gid].opad[ 7]; - opad[ 8] = tmps[gid].opad[ 8]; - opad[ 9] = tmps[gid].opad[ 9]; - opad[10] = tmps[gid].opad[10]; - opad[11] = tmps[gid].opad[11], - opad[12] = tmps[gid].opad[12]; - opad[13] = tmps[gid].opad[13]; - opad[14] = tmps[gid].opad[14]; - opad[15] = tmps[gid].opad[15]; - - for (u32 i = 0; i < (truecrypt_mdlen / 8 / 4); i += 16) - { - u32 dgst[16]; - - dgst[ 0] = tmps[gid].dgst[i + 0]; - dgst[ 1] = tmps[gid].dgst[i + 1]; - dgst[ 2] = tmps[gid].dgst[i + 2]; - dgst[ 3] = tmps[gid].dgst[i + 3]; - dgst[ 4] = tmps[gid].dgst[i + 4]; - dgst[ 5] = tmps[gid].dgst[i + 5]; - dgst[ 6] = tmps[gid].dgst[i + 6]; - dgst[ 7] = tmps[gid].dgst[i + 7]; - dgst[ 8] = tmps[gid].dgst[i + 8]; - dgst[ 9] = tmps[gid].dgst[i + 9]; - dgst[10] = tmps[gid].dgst[i + 10]; - dgst[11] = tmps[gid].dgst[i + 11]; - dgst[12] = tmps[gid].dgst[i + 12]; - dgst[13] = tmps[gid].dgst[i + 13]; - dgst[14] = tmps[gid].dgst[i + 14]; - dgst[15] = tmps[gid].dgst[i + 15]; - - u32 out[16]; - - out[ 0] = tmps[gid].out[i + 0]; - out[ 1] = tmps[gid].out[i + 1]; - out[ 2] = tmps[gid].out[i + 2]; - out[ 3] = tmps[gid].out[i + 3]; - out[ 4] = tmps[gid].out[i + 4]; - out[ 5] = tmps[gid].out[i + 5]; - out[ 6] = tmps[gid].out[i + 6]; - out[ 7] = tmps[gid].out[i + 7]; - out[ 8] = tmps[gid].out[i + 8]; - out[ 9] = tmps[gid].out[i + 9]; - out[10] = tmps[gid].out[i + 10]; - out[11] = tmps[gid].out[i + 11]; - out[12] = tmps[gid].out[i + 12]; - out[13] = tmps[gid].out[i + 13]; - out[14] = tmps[gid].out[i + 14]; - out[15] = tmps[gid].out[i + 15]; - - for (u32 j = 0; j < loop_cnt; j++) - { - u32 w1[16]; - - w1[ 0] = dgst[ 0]; - w1[ 1] = dgst[ 1]; - w1[ 2] = dgst[ 2]; - w1[ 3] = dgst[ 3]; - w1[ 4] = dgst[ 4]; - w1[ 5] = dgst[ 5]; - w1[ 6] = dgst[ 6]; - w1[ 7] = dgst[ 7]; - w1[ 8] = dgst[ 8]; - w1[ 9] = dgst[ 9]; - w1[10] = dgst[10]; - w1[11] = dgst[11]; - w1[12] = dgst[12]; - w1[13] = dgst[13]; - w1[14] = dgst[14]; - w1[15] = dgst[15]; - - u32 w2[16]; - - w2[ 0] = 0x80000000; - w2[ 1] = 0; - w2[ 2] = 0; - w2[ 3] = 0; - w2[ 4] = 0; - w2[ 5] = 0; - w2[ 6] = 0; - w2[ 7] = 0; - w2[ 8] = 0; - w2[ 9] = 0; - w2[10] = 0; - w2[11] = 0; - w2[12] = 0; - w2[13] = 0; - w2[14] = 0; - w2[15] = (64 + 64) * 8; - - hmac_run2 (w1, w2, ipad, opad, dgst, s_Ch, s_Cl); - - out[ 0] ^= dgst[ 0]; - out[ 1] ^= dgst[ 1]; - out[ 2] ^= dgst[ 2]; - out[ 3] ^= dgst[ 3]; - out[ 4] ^= dgst[ 4]; - out[ 5] ^= dgst[ 5]; - out[ 6] ^= dgst[ 6]; - out[ 7] ^= dgst[ 7]; - out[ 8] ^= dgst[ 8]; - out[ 9] ^= dgst[ 9]; - out[10] ^= dgst[10]; - out[11] ^= dgst[11]; - out[12] ^= dgst[12]; - out[13] ^= dgst[13]; - out[14] ^= dgst[14]; - out[15] ^= dgst[15]; - } - - tmps[gid].dgst[i + 0] = dgst[ 0]; - tmps[gid].dgst[i + 1] = dgst[ 1]; - tmps[gid].dgst[i + 2] = dgst[ 2]; - tmps[gid].dgst[i + 3] = dgst[ 3]; - tmps[gid].dgst[i + 4] = dgst[ 4]; - tmps[gid].dgst[i + 5] = dgst[ 5]; - tmps[gid].dgst[i + 6] = dgst[ 6]; - tmps[gid].dgst[i + 7] = dgst[ 7]; - tmps[gid].dgst[i + 8] = dgst[ 8]; - tmps[gid].dgst[i + 9] = dgst[ 9]; - tmps[gid].dgst[i + 10] = dgst[10]; - tmps[gid].dgst[i + 11] = dgst[11]; - tmps[gid].dgst[i + 12] = dgst[12]; - tmps[gid].dgst[i + 13] = dgst[13]; - tmps[gid].dgst[i + 14] = dgst[14]; - tmps[gid].dgst[i + 15] = dgst[15]; - - tmps[gid].out[i + 0] = out[ 0]; - tmps[gid].out[i + 1] = out[ 1]; - tmps[gid].out[i + 2] = out[ 2]; - tmps[gid].out[i + 3] = out[ 3]; - tmps[gid].out[i + 4] = out[ 4]; - tmps[gid].out[i + 5] = out[ 5]; - tmps[gid].out[i + 6] = out[ 6]; - tmps[gid].out[i + 7] = out[ 7]; - tmps[gid].out[i + 8] = out[ 8]; - tmps[gid].out[i + 9] = out[ 9]; - tmps[gid].out[i + 10] = out[10]; - tmps[gid].out[i + 11] = out[11]; - tmps[gid].out[i + 12] = out[12]; - tmps[gid].out[i + 13] = out[13]; - tmps[gid].out[i + 14] = out[14]; - tmps[gid].out[i + 15] = out[15]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06232_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, tc_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const tc_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - if (gid >= gid_max) return; - - u32 ukey1[8]; - - ukey1[0] = swap_workaround (tmps[gid].out[ 0]); - ukey1[1] = swap_workaround (tmps[gid].out[ 1]); - ukey1[2] = swap_workaround (tmps[gid].out[ 2]); - ukey1[3] = swap_workaround (tmps[gid].out[ 3]); - ukey1[4] = swap_workaround (tmps[gid].out[ 4]); - ukey1[5] = swap_workaround (tmps[gid].out[ 5]); - ukey1[6] = swap_workaround (tmps[gid].out[ 6]); - ukey1[7] = swap_workaround (tmps[gid].out[ 7]); - - u32 ukey2[8]; - - ukey2[0] = swap_workaround (tmps[gid].out[ 8]); - ukey2[1] = swap_workaround (tmps[gid].out[ 9]); - ukey2[2] = swap_workaround (tmps[gid].out[10]); - ukey2[3] = swap_workaround (tmps[gid].out[11]); - ukey2[4] = swap_workaround (tmps[gid].out[12]); - ukey2[5] = swap_workaround (tmps[gid].out[13]); - ukey2[6] = swap_workaround (tmps[gid].out[14]); - ukey2[7] = swap_workaround (tmps[gid].out[15]); - - u32 data[4]; - - data[0] = esalt_bufs[0].data_buf[0]; - data[1] = esalt_bufs[0].data_buf[1]; - data[2] = esalt_bufs[0].data_buf[2]; - data[3] = esalt_bufs[0].data_buf[3]; - - u32 tmp[4]; - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - aes256_decrypt_xts (ukey1, ukey2, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - serpent256_decrypt_xts (ukey1, ukey2, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - twofish256_decrypt_xts (ukey1, ukey2, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - u32 ukey3[8]; - - ukey3[0] = swap_workaround (tmps[gid].out[16]); - ukey3[1] = swap_workaround (tmps[gid].out[17]); - ukey3[2] = swap_workaround (tmps[gid].out[18]); - ukey3[3] = swap_workaround (tmps[gid].out[19]); - ukey3[4] = swap_workaround (tmps[gid].out[20]); - ukey3[5] = swap_workaround (tmps[gid].out[21]); - ukey3[6] = swap_workaround (tmps[gid].out[22]); - ukey3[7] = swap_workaround (tmps[gid].out[23]); - - u32 ukey4[8]; - - ukey4[0] = swap_workaround (tmps[gid].out[24]); - ukey4[1] = swap_workaround (tmps[gid].out[25]); - ukey4[2] = swap_workaround (tmps[gid].out[26]); - ukey4[3] = swap_workaround (tmps[gid].out[27]); - ukey4[4] = swap_workaround (tmps[gid].out[28]); - ukey4[5] = swap_workaround (tmps[gid].out[29]); - ukey4[6] = swap_workaround (tmps[gid].out[30]); - ukey4[7] = swap_workaround (tmps[gid].out[31]); - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - aes256_decrypt_xts (ukey2, ukey4, tmp, tmp); - twofish256_decrypt_xts (ukey1, ukey3, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - serpent256_decrypt_xts (ukey2, ukey4, tmp, tmp); - aes256_decrypt_xts (ukey1, ukey3, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - twofish256_decrypt_xts (ukey2, ukey4, tmp, tmp); - serpent256_decrypt_xts (ukey1, ukey3, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } -} diff --git a/nv/m06233.cu b/nv/m06233.cu deleted file mode 100644 index 9fb3349..0000000 --- a/nv/m06233.cu +++ /dev/null @@ -1,2121 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _WHIRLPOOL_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#include "gpu_aes256_nv.c" -#include "gpu_twofish256_nv.c" -#include "gpu_serpent256_nv.c" - -#define R 10 - -#define BOX(S,n,i) (u32) ((S)[(n)][(i)]) - -__device__ __constant__ u32 Ch[8][256] = -{ - { - 0x18186018, 0x23238c23, 0xc6c63fc6, 0xe8e887e8, - 0x87872687, 0xb8b8dab8, 0x01010401, 0x4f4f214f, - 0x3636d836, 0xa6a6a2a6, 0xd2d26fd2, 0xf5f5f3f5, - 0x7979f979, 0x6f6fa16f, 0x91917e91, 0x52525552, - 0x60609d60, 0xbcbccabc, 0x9b9b569b, 0x8e8e028e, - 0xa3a3b6a3, 0x0c0c300c, 0x7b7bf17b, 0x3535d435, - 0x1d1d741d, 0xe0e0a7e0, 0xd7d77bd7, 0xc2c22fc2, - 0x2e2eb82e, 0x4b4b314b, 0xfefedffe, 0x57574157, - 0x15155415, 0x7777c177, 0x3737dc37, 0xe5e5b3e5, - 0x9f9f469f, 0xf0f0e7f0, 0x4a4a354a, 0xdada4fda, - 0x58587d58, 0xc9c903c9, 0x2929a429, 0x0a0a280a, - 0xb1b1feb1, 0xa0a0baa0, 0x6b6bb16b, 0x85852e85, - 0xbdbdcebd, 0x5d5d695d, 0x10104010, 0xf4f4f7f4, - 0xcbcb0bcb, 0x3e3ef83e, 0x05051405, 0x67678167, - 0xe4e4b7e4, 0x27279c27, 0x41411941, 0x8b8b168b, - 0xa7a7a6a7, 0x7d7de97d, 0x95956e95, 0xd8d847d8, - 0xfbfbcbfb, 0xeeee9fee, 0x7c7ced7c, 0x66668566, - 0xdddd53dd, 0x17175c17, 0x47470147, 0x9e9e429e, - 0xcaca0fca, 0x2d2db42d, 0xbfbfc6bf, 0x07071c07, - 0xadad8ead, 0x5a5a755a, 0x83833683, 0x3333cc33, - 0x63639163, 0x02020802, 0xaaaa92aa, 0x7171d971, - 0xc8c807c8, 0x19196419, 0x49493949, 0xd9d943d9, - 0xf2f2eff2, 0xe3e3abe3, 0x5b5b715b, 0x88881a88, - 0x9a9a529a, 0x26269826, 0x3232c832, 0xb0b0fab0, - 0xe9e983e9, 0x0f0f3c0f, 0xd5d573d5, 0x80803a80, - 0xbebec2be, 0xcdcd13cd, 0x3434d034, 0x48483d48, - 0xffffdbff, 0x7a7af57a, 0x90907a90, 0x5f5f615f, - 0x20208020, 0x6868bd68, 0x1a1a681a, 0xaeae82ae, - 0xb4b4eab4, 0x54544d54, 0x93937693, 0x22228822, - 0x64648d64, 0xf1f1e3f1, 0x7373d173, 0x12124812, - 0x40401d40, 0x08082008, 0xc3c32bc3, 0xecec97ec, - 0xdbdb4bdb, 0xa1a1bea1, 0x8d8d0e8d, 0x3d3df43d, - 0x97976697, 0x00000000, 0xcfcf1bcf, 0x2b2bac2b, - 0x7676c576, 0x82823282, 0xd6d67fd6, 0x1b1b6c1b, - 0xb5b5eeb5, 0xafaf86af, 0x6a6ab56a, 0x50505d50, - 0x45450945, 0xf3f3ebf3, 0x3030c030, 0xefef9bef, - 0x3f3ffc3f, 0x55554955, 0xa2a2b2a2, 0xeaea8fea, - 0x65658965, 0xbabad2ba, 0x2f2fbc2f, 0xc0c027c0, - 0xdede5fde, 0x1c1c701c, 0xfdfdd3fd, 0x4d4d294d, - 0x92927292, 0x7575c975, 0x06061806, 0x8a8a128a, - 0xb2b2f2b2, 0xe6e6bfe6, 0x0e0e380e, 0x1f1f7c1f, - 0x62629562, 0xd4d477d4, 0xa8a89aa8, 0x96966296, - 0xf9f9c3f9, 0xc5c533c5, 0x25259425, 0x59597959, - 0x84842a84, 0x7272d572, 0x3939e439, 0x4c4c2d4c, - 0x5e5e655e, 0x7878fd78, 0x3838e038, 0x8c8c0a8c, - 0xd1d163d1, 0xa5a5aea5, 0xe2e2afe2, 0x61619961, - 0xb3b3f6b3, 0x21218421, 0x9c9c4a9c, 0x1e1e781e, - 0x43431143, 0xc7c73bc7, 0xfcfcd7fc, 0x04041004, - 0x51515951, 0x99995e99, 0x6d6da96d, 0x0d0d340d, - 0xfafacffa, 0xdfdf5bdf, 0x7e7ee57e, 0x24249024, - 0x3b3bec3b, 0xabab96ab, 0xcece1fce, 0x11114411, - 0x8f8f068f, 0x4e4e254e, 0xb7b7e6b7, 0xebeb8beb, - 0x3c3cf03c, 0x81813e81, 0x94946a94, 0xf7f7fbf7, - 0xb9b9deb9, 0x13134c13, 0x2c2cb02c, 0xd3d36bd3, - 0xe7e7bbe7, 0x6e6ea56e, 0xc4c437c4, 0x03030c03, - 0x56564556, 0x44440d44, 0x7f7fe17f, 0xa9a99ea9, - 0x2a2aa82a, 0xbbbbd6bb, 0xc1c123c1, 0x53535153, - 0xdcdc57dc, 0x0b0b2c0b, 0x9d9d4e9d, 0x6c6cad6c, - 0x3131c431, 0x7474cd74, 0xf6f6fff6, 0x46460546, - 0xacac8aac, 0x89891e89, 0x14145014, 0xe1e1a3e1, - 0x16165816, 0x3a3ae83a, 0x6969b969, 0x09092409, - 0x7070dd70, 0xb6b6e2b6, 0xd0d067d0, 0xeded93ed, - 0xcccc17cc, 0x42421542, 0x98985a98, 0xa4a4aaa4, - 0x2828a028, 0x5c5c6d5c, 0xf8f8c7f8, 0x86862286, - }, - { - 0xd8181860, 0x2623238c, 0xb8c6c63f, 0xfbe8e887, - 0xcb878726, 0x11b8b8da, 0x09010104, 0x0d4f4f21, - 0x9b3636d8, 0xffa6a6a2, 0x0cd2d26f, 0x0ef5f5f3, - 0x967979f9, 0x306f6fa1, 0x6d91917e, 0xf8525255, - 0x4760609d, 0x35bcbcca, 0x379b9b56, 0x8a8e8e02, - 0xd2a3a3b6, 0x6c0c0c30, 0x847b7bf1, 0x803535d4, - 0xf51d1d74, 0xb3e0e0a7, 0x21d7d77b, 0x9cc2c22f, - 0x432e2eb8, 0x294b4b31, 0x5dfefedf, 0xd5575741, - 0xbd151554, 0xe87777c1, 0x923737dc, 0x9ee5e5b3, - 0x139f9f46, 0x23f0f0e7, 0x204a4a35, 0x44dada4f, - 0xa258587d, 0xcfc9c903, 0x7c2929a4, 0x5a0a0a28, - 0x50b1b1fe, 0xc9a0a0ba, 0x146b6bb1, 0xd985852e, - 0x3cbdbdce, 0x8f5d5d69, 0x90101040, 0x07f4f4f7, - 0xddcbcb0b, 0xd33e3ef8, 0x2d050514, 0x78676781, - 0x97e4e4b7, 0x0227279c, 0x73414119, 0xa78b8b16, - 0xf6a7a7a6, 0xb27d7de9, 0x4995956e, 0x56d8d847, - 0x70fbfbcb, 0xcdeeee9f, 0xbb7c7ced, 0x71666685, - 0x7bdddd53, 0xaf17175c, 0x45474701, 0x1a9e9e42, - 0xd4caca0f, 0x582d2db4, 0x2ebfbfc6, 0x3f07071c, - 0xacadad8e, 0xb05a5a75, 0xef838336, 0xb63333cc, - 0x5c636391, 0x12020208, 0x93aaaa92, 0xde7171d9, - 0xc6c8c807, 0xd1191964, 0x3b494939, 0x5fd9d943, - 0x31f2f2ef, 0xa8e3e3ab, 0xb95b5b71, 0xbc88881a, - 0x3e9a9a52, 0x0b262698, 0xbf3232c8, 0x59b0b0fa, - 0xf2e9e983, 0x770f0f3c, 0x33d5d573, 0xf480803a, - 0x27bebec2, 0xebcdcd13, 0x893434d0, 0x3248483d, - 0x54ffffdb, 0x8d7a7af5, 0x6490907a, 0x9d5f5f61, - 0x3d202080, 0x0f6868bd, 0xca1a1a68, 0xb7aeae82, - 0x7db4b4ea, 0xce54544d, 0x7f939376, 0x2f222288, - 0x6364648d, 0x2af1f1e3, 0xcc7373d1, 0x82121248, - 0x7a40401d, 0x48080820, 0x95c3c32b, 0xdfecec97, - 0x4ddbdb4b, 0xc0a1a1be, 0x918d8d0e, 0xc83d3df4, - 0x5b979766, 0x00000000, 0xf9cfcf1b, 0x6e2b2bac, - 0xe17676c5, 0xe6828232, 0x28d6d67f, 0xc31b1b6c, - 0x74b5b5ee, 0xbeafaf86, 0x1d6a6ab5, 0xea50505d, - 0x57454509, 0x38f3f3eb, 0xad3030c0, 0xc4efef9b, - 0xda3f3ffc, 0xc7555549, 0xdba2a2b2, 0xe9eaea8f, - 0x6a656589, 0x03babad2, 0x4a2f2fbc, 0x8ec0c027, - 0x60dede5f, 0xfc1c1c70, 0x46fdfdd3, 0x1f4d4d29, - 0x76929272, 0xfa7575c9, 0x36060618, 0xae8a8a12, - 0x4bb2b2f2, 0x85e6e6bf, 0x7e0e0e38, 0xe71f1f7c, - 0x55626295, 0x3ad4d477, 0x81a8a89a, 0x52969662, - 0x62f9f9c3, 0xa3c5c533, 0x10252594, 0xab595979, - 0xd084842a, 0xc57272d5, 0xec3939e4, 0x164c4c2d, - 0x945e5e65, 0x9f7878fd, 0xe53838e0, 0x988c8c0a, - 0x17d1d163, 0xe4a5a5ae, 0xa1e2e2af, 0x4e616199, - 0x42b3b3f6, 0x34212184, 0x089c9c4a, 0xee1e1e78, - 0x61434311, 0xb1c7c73b, 0x4ffcfcd7, 0x24040410, - 0xe3515159, 0x2599995e, 0x226d6da9, 0x650d0d34, - 0x79fafacf, 0x69dfdf5b, 0xa97e7ee5, 0x19242490, - 0xfe3b3bec, 0x9aabab96, 0xf0cece1f, 0x99111144, - 0x838f8f06, 0x044e4e25, 0x66b7b7e6, 0xe0ebeb8b, - 0xc13c3cf0, 0xfd81813e, 0x4094946a, 0x1cf7f7fb, - 0x18b9b9de, 0x8b13134c, 0x512c2cb0, 0x05d3d36b, - 0x8ce7e7bb, 0x396e6ea5, 0xaac4c437, 0x1b03030c, - 0xdc565645, 0x5e44440d, 0xa07f7fe1, 0x88a9a99e, - 0x672a2aa8, 0x0abbbbd6, 0x87c1c123, 0xf1535351, - 0x72dcdc57, 0x530b0b2c, 0x019d9d4e, 0x2b6c6cad, - 0xa43131c4, 0xf37474cd, 0x15f6f6ff, 0x4c464605, - 0xa5acac8a, 0xb589891e, 0xb4141450, 0xbae1e1a3, - 0xa6161658, 0xf73a3ae8, 0x066969b9, 0x41090924, - 0xd77070dd, 0x6fb6b6e2, 0x1ed0d067, 0xd6eded93, - 0xe2cccc17, 0x68424215, 0x2c98985a, 0xeda4a4aa, - 0x752828a0, 0x865c5c6d, 0x6bf8f8c7, 0xc2868622, - }, - { - 0x30d81818, 0x46262323, 0x91b8c6c6, 0xcdfbe8e8, - 0x13cb8787, 0x6d11b8b8, 0x02090101, 0x9e0d4f4f, - 0x6c9b3636, 0x51ffa6a6, 0xb90cd2d2, 0xf70ef5f5, - 0xf2967979, 0xde306f6f, 0x3f6d9191, 0xa4f85252, - 0xc0476060, 0x6535bcbc, 0x2b379b9b, 0x018a8e8e, - 0x5bd2a3a3, 0x186c0c0c, 0xf6847b7b, 0x6a803535, - 0x3af51d1d, 0xddb3e0e0, 0xb321d7d7, 0x999cc2c2, - 0x5c432e2e, 0x96294b4b, 0xe15dfefe, 0xaed55757, - 0x2abd1515, 0xeee87777, 0x6e923737, 0xd79ee5e5, - 0x23139f9f, 0xfd23f0f0, 0x94204a4a, 0xa944dada, - 0xb0a25858, 0x8fcfc9c9, 0x527c2929, 0x145a0a0a, - 0x7f50b1b1, 0x5dc9a0a0, 0xd6146b6b, 0x17d98585, - 0x673cbdbd, 0xba8f5d5d, 0x20901010, 0xf507f4f4, - 0x8bddcbcb, 0x7cd33e3e, 0x0a2d0505, 0xce786767, - 0xd597e4e4, 0x4e022727, 0x82734141, 0x0ba78b8b, - 0x53f6a7a7, 0xfab27d7d, 0x37499595, 0xad56d8d8, - 0xeb70fbfb, 0xc1cdeeee, 0xf8bb7c7c, 0xcc716666, - 0xa77bdddd, 0x2eaf1717, 0x8e454747, 0x211a9e9e, - 0x89d4caca, 0x5a582d2d, 0x632ebfbf, 0x0e3f0707, - 0x47acadad, 0xb4b05a5a, 0x1bef8383, 0x66b63333, - 0xc65c6363, 0x04120202, 0x4993aaaa, 0xe2de7171, - 0x8dc6c8c8, 0x32d11919, 0x923b4949, 0xaf5fd9d9, - 0xf931f2f2, 0xdba8e3e3, 0xb6b95b5b, 0x0dbc8888, - 0x293e9a9a, 0x4c0b2626, 0x64bf3232, 0x7d59b0b0, - 0xcff2e9e9, 0x1e770f0f, 0xb733d5d5, 0x1df48080, - 0x6127bebe, 0x87ebcdcd, 0x68893434, 0x90324848, - 0xe354ffff, 0xf48d7a7a, 0x3d649090, 0xbe9d5f5f, - 0x403d2020, 0xd00f6868, 0x34ca1a1a, 0x41b7aeae, - 0x757db4b4, 0xa8ce5454, 0x3b7f9393, 0x442f2222, - 0xc8636464, 0xff2af1f1, 0xe6cc7373, 0x24821212, - 0x807a4040, 0x10480808, 0x9b95c3c3, 0xc5dfecec, - 0xab4ddbdb, 0x5fc0a1a1, 0x07918d8d, 0x7ac83d3d, - 0x335b9797, 0x00000000, 0x83f9cfcf, 0x566e2b2b, - 0xece17676, 0x19e68282, 0xb128d6d6, 0x36c31b1b, - 0x7774b5b5, 0x43beafaf, 0xd41d6a6a, 0xa0ea5050, - 0x8a574545, 0xfb38f3f3, 0x60ad3030, 0xc3c4efef, - 0x7eda3f3f, 0xaac75555, 0x59dba2a2, 0xc9e9eaea, - 0xca6a6565, 0x6903baba, 0x5e4a2f2f, 0x9d8ec0c0, - 0xa160dede, 0x38fc1c1c, 0xe746fdfd, 0x9a1f4d4d, - 0x39769292, 0xeafa7575, 0x0c360606, 0x09ae8a8a, - 0x794bb2b2, 0xd185e6e6, 0x1c7e0e0e, 0x3ee71f1f, - 0xc4556262, 0xb53ad4d4, 0x4d81a8a8, 0x31529696, - 0xef62f9f9, 0x97a3c5c5, 0x4a102525, 0xb2ab5959, - 0x15d08484, 0xe4c57272, 0x72ec3939, 0x98164c4c, - 0xbc945e5e, 0xf09f7878, 0x70e53838, 0x05988c8c, - 0xbf17d1d1, 0x57e4a5a5, 0xd9a1e2e2, 0xc24e6161, - 0x7b42b3b3, 0x42342121, 0x25089c9c, 0x3cee1e1e, - 0x86614343, 0x93b1c7c7, 0xe54ffcfc, 0x08240404, - 0xa2e35151, 0x2f259999, 0xda226d6d, 0x1a650d0d, - 0xe979fafa, 0xa369dfdf, 0xfca97e7e, 0x48192424, - 0x76fe3b3b, 0x4b9aabab, 0x81f0cece, 0x22991111, - 0x03838f8f, 0x9c044e4e, 0x7366b7b7, 0xcbe0ebeb, - 0x78c13c3c, 0x1ffd8181, 0x35409494, 0xf31cf7f7, - 0x6f18b9b9, 0x268b1313, 0x58512c2c, 0xbb05d3d3, - 0xd38ce7e7, 0xdc396e6e, 0x95aac4c4, 0x061b0303, - 0xacdc5656, 0x885e4444, 0xfea07f7f, 0x4f88a9a9, - 0x54672a2a, 0x6b0abbbb, 0x9f87c1c1, 0xa6f15353, - 0xa572dcdc, 0x16530b0b, 0x27019d9d, 0xd82b6c6c, - 0x62a43131, 0xe8f37474, 0xf115f6f6, 0x8c4c4646, - 0x45a5acac, 0x0fb58989, 0x28b41414, 0xdfbae1e1, - 0x2ca61616, 0x74f73a3a, 0xd2066969, 0x12410909, - 0xe0d77070, 0x716fb6b6, 0xbd1ed0d0, 0xc7d6eded, - 0x85e2cccc, 0x84684242, 0x2d2c9898, 0x55eda4a4, - 0x50752828, 0xb8865c5c, 0xed6bf8f8, 0x11c28686, - }, - { - 0x7830d818, 0xaf462623, 0xf991b8c6, 0x6fcdfbe8, - 0xa113cb87, 0x626d11b8, 0x05020901, 0x6e9e0d4f, - 0xee6c9b36, 0x0451ffa6, 0xbdb90cd2, 0x06f70ef5, - 0x80f29679, 0xcede306f, 0xef3f6d91, 0x07a4f852, - 0xfdc04760, 0x766535bc, 0xcd2b379b, 0x8c018a8e, - 0x155bd2a3, 0x3c186c0c, 0x8af6847b, 0xe16a8035, - 0x693af51d, 0x47ddb3e0, 0xacb321d7, 0xed999cc2, - 0x965c432e, 0x7a96294b, 0x21e15dfe, 0x16aed557, - 0x412abd15, 0xb6eee877, 0xeb6e9237, 0x56d79ee5, - 0xd923139f, 0x17fd23f0, 0x7f94204a, 0x95a944da, - 0x25b0a258, 0xca8fcfc9, 0x8d527c29, 0x22145a0a, - 0x4f7f50b1, 0x1a5dc9a0, 0xdad6146b, 0xab17d985, - 0x73673cbd, 0x34ba8f5d, 0x50209010, 0x03f507f4, - 0xc08bddcb, 0xc67cd33e, 0x110a2d05, 0xe6ce7867, - 0x53d597e4, 0xbb4e0227, 0x58827341, 0x9d0ba78b, - 0x0153f6a7, 0x94fab27d, 0xfb374995, 0x9fad56d8, - 0x30eb70fb, 0x71c1cdee, 0x91f8bb7c, 0xe3cc7166, - 0x8ea77bdd, 0x4b2eaf17, 0x468e4547, 0xdc211a9e, - 0xc589d4ca, 0x995a582d, 0x79632ebf, 0x1b0e3f07, - 0x2347acad, 0x2fb4b05a, 0xb51bef83, 0xff66b633, - 0xf2c65c63, 0x0a041202, 0x384993aa, 0xa8e2de71, - 0xcf8dc6c8, 0x7d32d119, 0x70923b49, 0x9aaf5fd9, - 0x1df931f2, 0x48dba8e3, 0x2ab6b95b, 0x920dbc88, - 0xc8293e9a, 0xbe4c0b26, 0xfa64bf32, 0x4a7d59b0, - 0x6acff2e9, 0x331e770f, 0xa6b733d5, 0xba1df480, - 0x7c6127be, 0xde87ebcd, 0xe4688934, 0x75903248, - 0x24e354ff, 0x8ff48d7a, 0xea3d6490, 0x3ebe9d5f, - 0xa0403d20, 0xd5d00f68, 0x7234ca1a, 0x2c41b7ae, - 0x5e757db4, 0x19a8ce54, 0xe53b7f93, 0xaa442f22, - 0xe9c86364, 0x12ff2af1, 0xa2e6cc73, 0x5a248212, - 0x5d807a40, 0x28104808, 0xe89b95c3, 0x7bc5dfec, - 0x90ab4ddb, 0x1f5fc0a1, 0x8307918d, 0xc97ac83d, - 0xf1335b97, 0x00000000, 0xd483f9cf, 0x87566e2b, - 0xb3ece176, 0xb019e682, 0xa9b128d6, 0x7736c31b, - 0x5b7774b5, 0x2943beaf, 0xdfd41d6a, 0x0da0ea50, - 0x4c8a5745, 0x18fb38f3, 0xf060ad30, 0x74c3c4ef, - 0xc37eda3f, 0x1caac755, 0x1059dba2, 0x65c9e9ea, - 0xecca6a65, 0x686903ba, 0x935e4a2f, 0xe79d8ec0, - 0x81a160de, 0x6c38fc1c, 0x2ee746fd, 0x649a1f4d, - 0xe0397692, 0xbceafa75, 0x1e0c3606, 0x9809ae8a, - 0x40794bb2, 0x59d185e6, 0x361c7e0e, 0x633ee71f, - 0xf7c45562, 0xa3b53ad4, 0x324d81a8, 0xf4315296, - 0x3aef62f9, 0xf697a3c5, 0xb14a1025, 0x20b2ab59, - 0xae15d084, 0xa7e4c572, 0xdd72ec39, 0x6198164c, - 0x3bbc945e, 0x85f09f78, 0xd870e538, 0x8605988c, - 0xb2bf17d1, 0x0b57e4a5, 0x4dd9a1e2, 0xf8c24e61, - 0x457b42b3, 0xa5423421, 0xd625089c, 0x663cee1e, - 0x52866143, 0xfc93b1c7, 0x2be54ffc, 0x14082404, - 0x08a2e351, 0xc72f2599, 0xc4da226d, 0x391a650d, - 0x35e979fa, 0x84a369df, 0x9bfca97e, 0xb4481924, - 0xd776fe3b, 0x3d4b9aab, 0xd181f0ce, 0x55229911, - 0x8903838f, 0x6b9c044e, 0x517366b7, 0x60cbe0eb, - 0xcc78c13c, 0xbf1ffd81, 0xfe354094, 0x0cf31cf7, - 0x676f18b9, 0x5f268b13, 0x9c58512c, 0xb8bb05d3, - 0x5cd38ce7, 0xcbdc396e, 0xf395aac4, 0x0f061b03, - 0x13acdc56, 0x49885e44, 0x9efea07f, 0x374f88a9, - 0x8254672a, 0x6d6b0abb, 0xe29f87c1, 0x02a6f153, - 0x8ba572dc, 0x2716530b, 0xd327019d, 0xc1d82b6c, - 0xf562a431, 0xb9e8f374, 0x09f115f6, 0x438c4c46, - 0x2645a5ac, 0x970fb589, 0x4428b414, 0x42dfbae1, - 0x4e2ca616, 0xd274f73a, 0xd0d20669, 0x2d124109, - 0xade0d770, 0x54716fb6, 0xb7bd1ed0, 0x7ec7d6ed, - 0xdb85e2cc, 0x57846842, 0xc22d2c98, 0x0e55eda4, - 0x88507528, 0x31b8865c, 0x3fed6bf8, 0xa411c286, - }, - { - 0xc07830d8, 0x05af4626, 0x7ef991b8, 0x136fcdfb, - 0x4ca113cb, 0xa9626d11, 0x08050209, 0x426e9e0d, - 0xadee6c9b, 0x590451ff, 0xdebdb90c, 0xfb06f70e, - 0xef80f296, 0x5fcede30, 0xfcef3f6d, 0xaa07a4f8, - 0x27fdc047, 0x89766535, 0xaccd2b37, 0x048c018a, - 0x71155bd2, 0x603c186c, 0xff8af684, 0xb5e16a80, - 0xe8693af5, 0x5347ddb3, 0xf6acb321, 0x5eed999c, - 0x6d965c43, 0x627a9629, 0xa321e15d, 0x8216aed5, - 0xa8412abd, 0x9fb6eee8, 0xa5eb6e92, 0x7b56d79e, - 0x8cd92313, 0xd317fd23, 0x6a7f9420, 0x9e95a944, - 0xfa25b0a2, 0x06ca8fcf, 0x558d527c, 0x5022145a, - 0xe14f7f50, 0x691a5dc9, 0x7fdad614, 0x5cab17d9, - 0x8173673c, 0xd234ba8f, 0x80502090, 0xf303f507, - 0x16c08bdd, 0xedc67cd3, 0x28110a2d, 0x1fe6ce78, - 0x7353d597, 0x25bb4e02, 0x32588273, 0x2c9d0ba7, - 0x510153f6, 0xcf94fab2, 0xdcfb3749, 0x8e9fad56, - 0x8b30eb70, 0x2371c1cd, 0xc791f8bb, 0x17e3cc71, - 0xa68ea77b, 0xb84b2eaf, 0x02468e45, 0x84dc211a, - 0x1ec589d4, 0x75995a58, 0x9179632e, 0x381b0e3f, - 0x012347ac, 0xea2fb4b0, 0x6cb51bef, 0x85ff66b6, - 0x3ff2c65c, 0x100a0412, 0x39384993, 0xafa8e2de, - 0x0ecf8dc6, 0xc87d32d1, 0x7270923b, 0x869aaf5f, - 0xc31df931, 0x4b48dba8, 0xe22ab6b9, 0x34920dbc, - 0xa4c8293e, 0x2dbe4c0b, 0x8dfa64bf, 0xe94a7d59, - 0x1b6acff2, 0x78331e77, 0xe6a6b733, 0x74ba1df4, - 0x997c6127, 0x26de87eb, 0xbde46889, 0x7a759032, - 0xab24e354, 0xf78ff48d, 0xf4ea3d64, 0xc23ebe9d, - 0x1da0403d, 0x67d5d00f, 0xd07234ca, 0x192c41b7, - 0xc95e757d, 0x9a19a8ce, 0xece53b7f, 0x0daa442f, - 0x07e9c863, 0xdb12ff2a, 0xbfa2e6cc, 0x905a2482, - 0x3a5d807a, 0x40281048, 0x56e89b95, 0x337bc5df, - 0x9690ab4d, 0x611f5fc0, 0x1c830791, 0xf5c97ac8, - 0xccf1335b, 0x00000000, 0x36d483f9, 0x4587566e, - 0x97b3ece1, 0x64b019e6, 0xfea9b128, 0xd87736c3, - 0xc15b7774, 0x112943be, 0x77dfd41d, 0xba0da0ea, - 0x124c8a57, 0xcb18fb38, 0x9df060ad, 0x2b74c3c4, - 0xe5c37eda, 0x921caac7, 0x791059db, 0x0365c9e9, - 0x0fecca6a, 0xb9686903, 0x65935e4a, 0x4ee79d8e, - 0xbe81a160, 0xe06c38fc, 0xbb2ee746, 0x52649a1f, - 0xe4e03976, 0x8fbceafa, 0x301e0c36, 0x249809ae, - 0xf940794b, 0x6359d185, 0x70361c7e, 0xf8633ee7, - 0x37f7c455, 0xeea3b53a, 0x29324d81, 0xc4f43152, - 0x9b3aef62, 0x66f697a3, 0x35b14a10, 0xf220b2ab, - 0x54ae15d0, 0xb7a7e4c5, 0xd5dd72ec, 0x5a619816, - 0xca3bbc94, 0xe785f09f, 0xddd870e5, 0x14860598, - 0xc6b2bf17, 0x410b57e4, 0x434dd9a1, 0x2ff8c24e, - 0xf1457b42, 0x15a54234, 0x94d62508, 0xf0663cee, - 0x22528661, 0x76fc93b1, 0xb32be54f, 0x20140824, - 0xb208a2e3, 0xbcc72f25, 0x4fc4da22, 0x68391a65, - 0x8335e979, 0xb684a369, 0xd79bfca9, 0x3db44819, - 0xc5d776fe, 0x313d4b9a, 0x3ed181f0, 0x88552299, - 0x0c890383, 0x4a6b9c04, 0xd1517366, 0x0b60cbe0, - 0xfdcc78c1, 0x7cbf1ffd, 0xd4fe3540, 0xeb0cf31c, - 0xa1676f18, 0x985f268b, 0x7d9c5851, 0xd6b8bb05, - 0x6b5cd38c, 0x57cbdc39, 0x6ef395aa, 0x180f061b, - 0x8a13acdc, 0x1a49885e, 0xdf9efea0, 0x21374f88, - 0x4d825467, 0xb16d6b0a, 0x46e29f87, 0xa202a6f1, - 0xae8ba572, 0x58271653, 0x9cd32701, 0x47c1d82b, - 0x95f562a4, 0x87b9e8f3, 0xe309f115, 0x0a438c4c, - 0x092645a5, 0x3c970fb5, 0xa04428b4, 0x5b42dfba, - 0xb04e2ca6, 0xcdd274f7, 0x6fd0d206, 0x482d1241, - 0xa7ade0d7, 0xd954716f, 0xceb7bd1e, 0x3b7ec7d6, - 0x2edb85e2, 0x2a578468, 0xb4c22d2c, 0x490e55ed, - 0x5d885075, 0xda31b886, 0x933fed6b, 0x44a411c2, - }, - { - 0x18c07830, 0x2305af46, 0xc67ef991, 0xe8136fcd, - 0x874ca113, 0xb8a9626d, 0x01080502, 0x4f426e9e, - 0x36adee6c, 0xa6590451, 0xd2debdb9, 0xf5fb06f7, - 0x79ef80f2, 0x6f5fcede, 0x91fcef3f, 0x52aa07a4, - 0x6027fdc0, 0xbc897665, 0x9baccd2b, 0x8e048c01, - 0xa371155b, 0x0c603c18, 0x7bff8af6, 0x35b5e16a, - 0x1de8693a, 0xe05347dd, 0xd7f6acb3, 0xc25eed99, - 0x2e6d965c, 0x4b627a96, 0xfea321e1, 0x578216ae, - 0x15a8412a, 0x779fb6ee, 0x37a5eb6e, 0xe57b56d7, - 0x9f8cd923, 0xf0d317fd, 0x4a6a7f94, 0xda9e95a9, - 0x58fa25b0, 0xc906ca8f, 0x29558d52, 0x0a502214, - 0xb1e14f7f, 0xa0691a5d, 0x6b7fdad6, 0x855cab17, - 0xbd817367, 0x5dd234ba, 0x10805020, 0xf4f303f5, - 0xcb16c08b, 0x3eedc67c, 0x0528110a, 0x671fe6ce, - 0xe47353d5, 0x2725bb4e, 0x41325882, 0x8b2c9d0b, - 0xa7510153, 0x7dcf94fa, 0x95dcfb37, 0xd88e9fad, - 0xfb8b30eb, 0xee2371c1, 0x7cc791f8, 0x6617e3cc, - 0xdda68ea7, 0x17b84b2e, 0x4702468e, 0x9e84dc21, - 0xca1ec589, 0x2d75995a, 0xbf917963, 0x07381b0e, - 0xad012347, 0x5aea2fb4, 0x836cb51b, 0x3385ff66, - 0x633ff2c6, 0x02100a04, 0xaa393849, 0x71afa8e2, - 0xc80ecf8d, 0x19c87d32, 0x49727092, 0xd9869aaf, - 0xf2c31df9, 0xe34b48db, 0x5be22ab6, 0x8834920d, - 0x9aa4c829, 0x262dbe4c, 0x328dfa64, 0xb0e94a7d, - 0xe91b6acf, 0x0f78331e, 0xd5e6a6b7, 0x8074ba1d, - 0xbe997c61, 0xcd26de87, 0x34bde468, 0x487a7590, - 0xffab24e3, 0x7af78ff4, 0x90f4ea3d, 0x5fc23ebe, - 0x201da040, 0x6867d5d0, 0x1ad07234, 0xae192c41, - 0xb4c95e75, 0x549a19a8, 0x93ece53b, 0x220daa44, - 0x6407e9c8, 0xf1db12ff, 0x73bfa2e6, 0x12905a24, - 0x403a5d80, 0x08402810, 0xc356e89b, 0xec337bc5, - 0xdb9690ab, 0xa1611f5f, 0x8d1c8307, 0x3df5c97a, - 0x97ccf133, 0x00000000, 0xcf36d483, 0x2b458756, - 0x7697b3ec, 0x8264b019, 0xd6fea9b1, 0x1bd87736, - 0xb5c15b77, 0xaf112943, 0x6a77dfd4, 0x50ba0da0, - 0x45124c8a, 0xf3cb18fb, 0x309df060, 0xef2b74c3, - 0x3fe5c37e, 0x55921caa, 0xa2791059, 0xea0365c9, - 0x650fecca, 0xbab96869, 0x2f65935e, 0xc04ee79d, - 0xdebe81a1, 0x1ce06c38, 0xfdbb2ee7, 0x4d52649a, - 0x92e4e039, 0x758fbcea, 0x06301e0c, 0x8a249809, - 0xb2f94079, 0xe66359d1, 0x0e70361c, 0x1ff8633e, - 0x6237f7c4, 0xd4eea3b5, 0xa829324d, 0x96c4f431, - 0xf99b3aef, 0xc566f697, 0x2535b14a, 0x59f220b2, - 0x8454ae15, 0x72b7a7e4, 0x39d5dd72, 0x4c5a6198, - 0x5eca3bbc, 0x78e785f0, 0x38ddd870, 0x8c148605, - 0xd1c6b2bf, 0xa5410b57, 0xe2434dd9, 0x612ff8c2, - 0xb3f1457b, 0x2115a542, 0x9c94d625, 0x1ef0663c, - 0x43225286, 0xc776fc93, 0xfcb32be5, 0x04201408, - 0x51b208a2, 0x99bcc72f, 0x6d4fc4da, 0x0d68391a, - 0xfa8335e9, 0xdfb684a3, 0x7ed79bfc, 0x243db448, - 0x3bc5d776, 0xab313d4b, 0xce3ed181, 0x11885522, - 0x8f0c8903, 0x4e4a6b9c, 0xb7d15173, 0xeb0b60cb, - 0x3cfdcc78, 0x817cbf1f, 0x94d4fe35, 0xf7eb0cf3, - 0xb9a1676f, 0x13985f26, 0x2c7d9c58, 0xd3d6b8bb, - 0xe76b5cd3, 0x6e57cbdc, 0xc46ef395, 0x03180f06, - 0x568a13ac, 0x441a4988, 0x7fdf9efe, 0xa921374f, - 0x2a4d8254, 0xbbb16d6b, 0xc146e29f, 0x53a202a6, - 0xdcae8ba5, 0x0b582716, 0x9d9cd327, 0x6c47c1d8, - 0x3195f562, 0x7487b9e8, 0xf6e309f1, 0x460a438c, - 0xac092645, 0x893c970f, 0x14a04428, 0xe15b42df, - 0x16b04e2c, 0x3acdd274, 0x696fd0d2, 0x09482d12, - 0x70a7ade0, 0xb6d95471, 0xd0ceb7bd, 0xed3b7ec7, - 0xcc2edb85, 0x422a5784, 0x98b4c22d, 0xa4490e55, - 0x285d8850, 0x5cda31b8, 0xf8933fed, 0x8644a411, - }, - { - 0x6018c078, 0x8c2305af, 0x3fc67ef9, 0x87e8136f, - 0x26874ca1, 0xdab8a962, 0x04010805, 0x214f426e, - 0xd836adee, 0xa2a65904, 0x6fd2debd, 0xf3f5fb06, - 0xf979ef80, 0xa16f5fce, 0x7e91fcef, 0x5552aa07, - 0x9d6027fd, 0xcabc8976, 0x569baccd, 0x028e048c, - 0xb6a37115, 0x300c603c, 0xf17bff8a, 0xd435b5e1, - 0x741de869, 0xa7e05347, 0x7bd7f6ac, 0x2fc25eed, - 0xb82e6d96, 0x314b627a, 0xdffea321, 0x41578216, - 0x5415a841, 0xc1779fb6, 0xdc37a5eb, 0xb3e57b56, - 0x469f8cd9, 0xe7f0d317, 0x354a6a7f, 0x4fda9e95, - 0x7d58fa25, 0x03c906ca, 0xa429558d, 0x280a5022, - 0xfeb1e14f, 0xbaa0691a, 0xb16b7fda, 0x2e855cab, - 0xcebd8173, 0x695dd234, 0x40108050, 0xf7f4f303, - 0x0bcb16c0, 0xf83eedc6, 0x14052811, 0x81671fe6, - 0xb7e47353, 0x9c2725bb, 0x19413258, 0x168b2c9d, - 0xa6a75101, 0xe97dcf94, 0x6e95dcfb, 0x47d88e9f, - 0xcbfb8b30, 0x9fee2371, 0xed7cc791, 0x856617e3, - 0x53dda68e, 0x5c17b84b, 0x01470246, 0x429e84dc, - 0x0fca1ec5, 0xb42d7599, 0xc6bf9179, 0x1c07381b, - 0x8ead0123, 0x755aea2f, 0x36836cb5, 0xcc3385ff, - 0x91633ff2, 0x0802100a, 0x92aa3938, 0xd971afa8, - 0x07c80ecf, 0x6419c87d, 0x39497270, 0x43d9869a, - 0xeff2c31d, 0xabe34b48, 0x715be22a, 0x1a883492, - 0x529aa4c8, 0x98262dbe, 0xc8328dfa, 0xfab0e94a, - 0x83e91b6a, 0x3c0f7833, 0x73d5e6a6, 0x3a8074ba, - 0xc2be997c, 0x13cd26de, 0xd034bde4, 0x3d487a75, - 0xdbffab24, 0xf57af78f, 0x7a90f4ea, 0x615fc23e, - 0x80201da0, 0xbd6867d5, 0x681ad072, 0x82ae192c, - 0xeab4c95e, 0x4d549a19, 0x7693ece5, 0x88220daa, - 0x8d6407e9, 0xe3f1db12, 0xd173bfa2, 0x4812905a, - 0x1d403a5d, 0x20084028, 0x2bc356e8, 0x97ec337b, - 0x4bdb9690, 0xbea1611f, 0x0e8d1c83, 0xf43df5c9, - 0x6697ccf1, 0x00000000, 0x1bcf36d4, 0xac2b4587, - 0xc57697b3, 0x328264b0, 0x7fd6fea9, 0x6c1bd877, - 0xeeb5c15b, 0x86af1129, 0xb56a77df, 0x5d50ba0d, - 0x0945124c, 0xebf3cb18, 0xc0309df0, 0x9bef2b74, - 0xfc3fe5c3, 0x4955921c, 0xb2a27910, 0x8fea0365, - 0x89650fec, 0xd2bab968, 0xbc2f6593, 0x27c04ee7, - 0x5fdebe81, 0x701ce06c, 0xd3fdbb2e, 0x294d5264, - 0x7292e4e0, 0xc9758fbc, 0x1806301e, 0x128a2498, - 0xf2b2f940, 0xbfe66359, 0x380e7036, 0x7c1ff863, - 0x956237f7, 0x77d4eea3, 0x9aa82932, 0x6296c4f4, - 0xc3f99b3a, 0x33c566f6, 0x942535b1, 0x7959f220, - 0x2a8454ae, 0xd572b7a7, 0xe439d5dd, 0x2d4c5a61, - 0x655eca3b, 0xfd78e785, 0xe038ddd8, 0x0a8c1486, - 0x63d1c6b2, 0xaea5410b, 0xafe2434d, 0x99612ff8, - 0xf6b3f145, 0x842115a5, 0x4a9c94d6, 0x781ef066, - 0x11432252, 0x3bc776fc, 0xd7fcb32b, 0x10042014, - 0x5951b208, 0x5e99bcc7, 0xa96d4fc4, 0x340d6839, - 0xcffa8335, 0x5bdfb684, 0xe57ed79b, 0x90243db4, - 0xec3bc5d7, 0x96ab313d, 0x1fce3ed1, 0x44118855, - 0x068f0c89, 0x254e4a6b, 0xe6b7d151, 0x8beb0b60, - 0xf03cfdcc, 0x3e817cbf, 0x6a94d4fe, 0xfbf7eb0c, - 0xdeb9a167, 0x4c13985f, 0xb02c7d9c, 0x6bd3d6b8, - 0xbbe76b5c, 0xa56e57cb, 0x37c46ef3, 0x0c03180f, - 0x45568a13, 0x0d441a49, 0xe17fdf9e, 0x9ea92137, - 0xa82a4d82, 0xd6bbb16d, 0x23c146e2, 0x5153a202, - 0x57dcae8b, 0x2c0b5827, 0x4e9d9cd3, 0xad6c47c1, - 0xc43195f5, 0xcd7487b9, 0xfff6e309, 0x05460a43, - 0x8aac0926, 0x1e893c97, 0x5014a044, 0xa3e15b42, - 0x5816b04e, 0xe83acdd2, 0xb9696fd0, 0x2409482d, - 0xdd70a7ad, 0xe2b6d954, 0x67d0ceb7, 0x93ed3b7e, - 0x17cc2edb, 0x15422a57, 0x5a98b4c2, 0xaaa4490e, - 0xa0285d88, 0x6d5cda31, 0xc7f8933f, 0x228644a4, - }, - { - 0x186018c0, 0x238c2305, 0xc63fc67e, 0xe887e813, - 0x8726874c, 0xb8dab8a9, 0x01040108, 0x4f214f42, - 0x36d836ad, 0xa6a2a659, 0xd26fd2de, 0xf5f3f5fb, - 0x79f979ef, 0x6fa16f5f, 0x917e91fc, 0x525552aa, - 0x609d6027, 0xbccabc89, 0x9b569bac, 0x8e028e04, - 0xa3b6a371, 0x0c300c60, 0x7bf17bff, 0x35d435b5, - 0x1d741de8, 0xe0a7e053, 0xd77bd7f6, 0xc22fc25e, - 0x2eb82e6d, 0x4b314b62, 0xfedffea3, 0x57415782, - 0x155415a8, 0x77c1779f, 0x37dc37a5, 0xe5b3e57b, - 0x9f469f8c, 0xf0e7f0d3, 0x4a354a6a, 0xda4fda9e, - 0x587d58fa, 0xc903c906, 0x29a42955, 0x0a280a50, - 0xb1feb1e1, 0xa0baa069, 0x6bb16b7f, 0x852e855c, - 0xbdcebd81, 0x5d695dd2, 0x10401080, 0xf4f7f4f3, - 0xcb0bcb16, 0x3ef83eed, 0x05140528, 0x6781671f, - 0xe4b7e473, 0x279c2725, 0x41194132, 0x8b168b2c, - 0xa7a6a751, 0x7de97dcf, 0x956e95dc, 0xd847d88e, - 0xfbcbfb8b, 0xee9fee23, 0x7ced7cc7, 0x66856617, - 0xdd53dda6, 0x175c17b8, 0x47014702, 0x9e429e84, - 0xca0fca1e, 0x2db42d75, 0xbfc6bf91, 0x071c0738, - 0xad8ead01, 0x5a755aea, 0x8336836c, 0x33cc3385, - 0x6391633f, 0x02080210, 0xaa92aa39, 0x71d971af, - 0xc807c80e, 0x196419c8, 0x49394972, 0xd943d986, - 0xf2eff2c3, 0xe3abe34b, 0x5b715be2, 0x881a8834, - 0x9a529aa4, 0x2698262d, 0x32c8328d, 0xb0fab0e9, - 0xe983e91b, 0x0f3c0f78, 0xd573d5e6, 0x803a8074, - 0xbec2be99, 0xcd13cd26, 0x34d034bd, 0x483d487a, - 0xffdbffab, 0x7af57af7, 0x907a90f4, 0x5f615fc2, - 0x2080201d, 0x68bd6867, 0x1a681ad0, 0xae82ae19, - 0xb4eab4c9, 0x544d549a, 0x937693ec, 0x2288220d, - 0x648d6407, 0xf1e3f1db, 0x73d173bf, 0x12481290, - 0x401d403a, 0x08200840, 0xc32bc356, 0xec97ec33, - 0xdb4bdb96, 0xa1bea161, 0x8d0e8d1c, 0x3df43df5, - 0x976697cc, 0x00000000, 0xcf1bcf36, 0x2bac2b45, - 0x76c57697, 0x82328264, 0xd67fd6fe, 0x1b6c1bd8, - 0xb5eeb5c1, 0xaf86af11, 0x6ab56a77, 0x505d50ba, - 0x45094512, 0xf3ebf3cb, 0x30c0309d, 0xef9bef2b, - 0x3ffc3fe5, 0x55495592, 0xa2b2a279, 0xea8fea03, - 0x6589650f, 0xbad2bab9, 0x2fbc2f65, 0xc027c04e, - 0xde5fdebe, 0x1c701ce0, 0xfdd3fdbb, 0x4d294d52, - 0x927292e4, 0x75c9758f, 0x06180630, 0x8a128a24, - 0xb2f2b2f9, 0xe6bfe663, 0x0e380e70, 0x1f7c1ff8, - 0x62956237, 0xd477d4ee, 0xa89aa829, 0x966296c4, - 0xf9c3f99b, 0xc533c566, 0x25942535, 0x597959f2, - 0x842a8454, 0x72d572b7, 0x39e439d5, 0x4c2d4c5a, - 0x5e655eca, 0x78fd78e7, 0x38e038dd, 0x8c0a8c14, - 0xd163d1c6, 0xa5aea541, 0xe2afe243, 0x6199612f, - 0xb3f6b3f1, 0x21842115, 0x9c4a9c94, 0x1e781ef0, - 0x43114322, 0xc73bc776, 0xfcd7fcb3, 0x04100420, - 0x515951b2, 0x995e99bc, 0x6da96d4f, 0x0d340d68, - 0xfacffa83, 0xdf5bdfb6, 0x7ee57ed7, 0x2490243d, - 0x3bec3bc5, 0xab96ab31, 0xce1fce3e, 0x11441188, - 0x8f068f0c, 0x4e254e4a, 0xb7e6b7d1, 0xeb8beb0b, - 0x3cf03cfd, 0x813e817c, 0x946a94d4, 0xf7fbf7eb, - 0xb9deb9a1, 0x134c1398, 0x2cb02c7d, 0xd36bd3d6, - 0xe7bbe76b, 0x6ea56e57, 0xc437c46e, 0x030c0318, - 0x5645568a, 0x440d441a, 0x7fe17fdf, 0xa99ea921, - 0x2aa82a4d, 0xbbd6bbb1, 0xc123c146, 0x535153a2, - 0xdc57dcae, 0x0b2c0b58, 0x9d4e9d9c, 0x6cad6c47, - 0x31c43195, 0x74cd7487, 0xf6fff6e3, 0x4605460a, - 0xac8aac09, 0x891e893c, 0x145014a0, 0xe1a3e15b, - 0x165816b0, 0x3ae83acd, 0x69b9696f, 0x09240948, - 0x70dd70a7, 0xb6e2b6d9, 0xd067d0ce, 0xed93ed3b, - 0xcc17cc2e, 0x4215422a, 0x985a98b4, 0xa4aaa449, - 0x28a0285d, 0x5c6d5cda, 0xf8c7f893, 0x86228644, - } -}; - -__device__ __constant__ u32 Cl[8][256] = -{ - { - 0xc07830d8, 0x05af4626, 0x7ef991b8, 0x136fcdfb, - 0x4ca113cb, 0xa9626d11, 0x08050209, 0x426e9e0d, - 0xadee6c9b, 0x590451ff, 0xdebdb90c, 0xfb06f70e, - 0xef80f296, 0x5fcede30, 0xfcef3f6d, 0xaa07a4f8, - 0x27fdc047, 0x89766535, 0xaccd2b37, 0x048c018a, - 0x71155bd2, 0x603c186c, 0xff8af684, 0xb5e16a80, - 0xe8693af5, 0x5347ddb3, 0xf6acb321, 0x5eed999c, - 0x6d965c43, 0x627a9629, 0xa321e15d, 0x8216aed5, - 0xa8412abd, 0x9fb6eee8, 0xa5eb6e92, 0x7b56d79e, - 0x8cd92313, 0xd317fd23, 0x6a7f9420, 0x9e95a944, - 0xfa25b0a2, 0x06ca8fcf, 0x558d527c, 0x5022145a, - 0xe14f7f50, 0x691a5dc9, 0x7fdad614, 0x5cab17d9, - 0x8173673c, 0xd234ba8f, 0x80502090, 0xf303f507, - 0x16c08bdd, 0xedc67cd3, 0x28110a2d, 0x1fe6ce78, - 0x7353d597, 0x25bb4e02, 0x32588273, 0x2c9d0ba7, - 0x510153f6, 0xcf94fab2, 0xdcfb3749, 0x8e9fad56, - 0x8b30eb70, 0x2371c1cd, 0xc791f8bb, 0x17e3cc71, - 0xa68ea77b, 0xb84b2eaf, 0x02468e45, 0x84dc211a, - 0x1ec589d4, 0x75995a58, 0x9179632e, 0x381b0e3f, - 0x012347ac, 0xea2fb4b0, 0x6cb51bef, 0x85ff66b6, - 0x3ff2c65c, 0x100a0412, 0x39384993, 0xafa8e2de, - 0x0ecf8dc6, 0xc87d32d1, 0x7270923b, 0x869aaf5f, - 0xc31df931, 0x4b48dba8, 0xe22ab6b9, 0x34920dbc, - 0xa4c8293e, 0x2dbe4c0b, 0x8dfa64bf, 0xe94a7d59, - 0x1b6acff2, 0x78331e77, 0xe6a6b733, 0x74ba1df4, - 0x997c6127, 0x26de87eb, 0xbde46889, 0x7a759032, - 0xab24e354, 0xf78ff48d, 0xf4ea3d64, 0xc23ebe9d, - 0x1da0403d, 0x67d5d00f, 0xd07234ca, 0x192c41b7, - 0xc95e757d, 0x9a19a8ce, 0xece53b7f, 0x0daa442f, - 0x07e9c863, 0xdb12ff2a, 0xbfa2e6cc, 0x905a2482, - 0x3a5d807a, 0x40281048, 0x56e89b95, 0x337bc5df, - 0x9690ab4d, 0x611f5fc0, 0x1c830791, 0xf5c97ac8, - 0xccf1335b, 0x00000000, 0x36d483f9, 0x4587566e, - 0x97b3ece1, 0x64b019e6, 0xfea9b128, 0xd87736c3, - 0xc15b7774, 0x112943be, 0x77dfd41d, 0xba0da0ea, - 0x124c8a57, 0xcb18fb38, 0x9df060ad, 0x2b74c3c4, - 0xe5c37eda, 0x921caac7, 0x791059db, 0x0365c9e9, - 0x0fecca6a, 0xb9686903, 0x65935e4a, 0x4ee79d8e, - 0xbe81a160, 0xe06c38fc, 0xbb2ee746, 0x52649a1f, - 0xe4e03976, 0x8fbceafa, 0x301e0c36, 0x249809ae, - 0xf940794b, 0x6359d185, 0x70361c7e, 0xf8633ee7, - 0x37f7c455, 0xeea3b53a, 0x29324d81, 0xc4f43152, - 0x9b3aef62, 0x66f697a3, 0x35b14a10, 0xf220b2ab, - 0x54ae15d0, 0xb7a7e4c5, 0xd5dd72ec, 0x5a619816, - 0xca3bbc94, 0xe785f09f, 0xddd870e5, 0x14860598, - 0xc6b2bf17, 0x410b57e4, 0x434dd9a1, 0x2ff8c24e, - 0xf1457b42, 0x15a54234, 0x94d62508, 0xf0663cee, - 0x22528661, 0x76fc93b1, 0xb32be54f, 0x20140824, - 0xb208a2e3, 0xbcc72f25, 0x4fc4da22, 0x68391a65, - 0x8335e979, 0xb684a369, 0xd79bfca9, 0x3db44819, - 0xc5d776fe, 0x313d4b9a, 0x3ed181f0, 0x88552299, - 0x0c890383, 0x4a6b9c04, 0xd1517366, 0x0b60cbe0, - 0xfdcc78c1, 0x7cbf1ffd, 0xd4fe3540, 0xeb0cf31c, - 0xa1676f18, 0x985f268b, 0x7d9c5851, 0xd6b8bb05, - 0x6b5cd38c, 0x57cbdc39, 0x6ef395aa, 0x180f061b, - 0x8a13acdc, 0x1a49885e, 0xdf9efea0, 0x21374f88, - 0x4d825467, 0xb16d6b0a, 0x46e29f87, 0xa202a6f1, - 0xae8ba572, 0x58271653, 0x9cd32701, 0x47c1d82b, - 0x95f562a4, 0x87b9e8f3, 0xe309f115, 0x0a438c4c, - 0x092645a5, 0x3c970fb5, 0xa04428b4, 0x5b42dfba, - 0xb04e2ca6, 0xcdd274f7, 0x6fd0d206, 0x482d1241, - 0xa7ade0d7, 0xd954716f, 0xceb7bd1e, 0x3b7ec7d6, - 0x2edb85e2, 0x2a578468, 0xb4c22d2c, 0x490e55ed, - 0x5d885075, 0xda31b886, 0x933fed6b, 0x44a411c2, - }, - { - 0x18c07830, 0x2305af46, 0xc67ef991, 0xe8136fcd, - 0x874ca113, 0xb8a9626d, 0x01080502, 0x4f426e9e, - 0x36adee6c, 0xa6590451, 0xd2debdb9, 0xf5fb06f7, - 0x79ef80f2, 0x6f5fcede, 0x91fcef3f, 0x52aa07a4, - 0x6027fdc0, 0xbc897665, 0x9baccd2b, 0x8e048c01, - 0xa371155b, 0x0c603c18, 0x7bff8af6, 0x35b5e16a, - 0x1de8693a, 0xe05347dd, 0xd7f6acb3, 0xc25eed99, - 0x2e6d965c, 0x4b627a96, 0xfea321e1, 0x578216ae, - 0x15a8412a, 0x779fb6ee, 0x37a5eb6e, 0xe57b56d7, - 0x9f8cd923, 0xf0d317fd, 0x4a6a7f94, 0xda9e95a9, - 0x58fa25b0, 0xc906ca8f, 0x29558d52, 0x0a502214, - 0xb1e14f7f, 0xa0691a5d, 0x6b7fdad6, 0x855cab17, - 0xbd817367, 0x5dd234ba, 0x10805020, 0xf4f303f5, - 0xcb16c08b, 0x3eedc67c, 0x0528110a, 0x671fe6ce, - 0xe47353d5, 0x2725bb4e, 0x41325882, 0x8b2c9d0b, - 0xa7510153, 0x7dcf94fa, 0x95dcfb37, 0xd88e9fad, - 0xfb8b30eb, 0xee2371c1, 0x7cc791f8, 0x6617e3cc, - 0xdda68ea7, 0x17b84b2e, 0x4702468e, 0x9e84dc21, - 0xca1ec589, 0x2d75995a, 0xbf917963, 0x07381b0e, - 0xad012347, 0x5aea2fb4, 0x836cb51b, 0x3385ff66, - 0x633ff2c6, 0x02100a04, 0xaa393849, 0x71afa8e2, - 0xc80ecf8d, 0x19c87d32, 0x49727092, 0xd9869aaf, - 0xf2c31df9, 0xe34b48db, 0x5be22ab6, 0x8834920d, - 0x9aa4c829, 0x262dbe4c, 0x328dfa64, 0xb0e94a7d, - 0xe91b6acf, 0x0f78331e, 0xd5e6a6b7, 0x8074ba1d, - 0xbe997c61, 0xcd26de87, 0x34bde468, 0x487a7590, - 0xffab24e3, 0x7af78ff4, 0x90f4ea3d, 0x5fc23ebe, - 0x201da040, 0x6867d5d0, 0x1ad07234, 0xae192c41, - 0xb4c95e75, 0x549a19a8, 0x93ece53b, 0x220daa44, - 0x6407e9c8, 0xf1db12ff, 0x73bfa2e6, 0x12905a24, - 0x403a5d80, 0x08402810, 0xc356e89b, 0xec337bc5, - 0xdb9690ab, 0xa1611f5f, 0x8d1c8307, 0x3df5c97a, - 0x97ccf133, 0x00000000, 0xcf36d483, 0x2b458756, - 0x7697b3ec, 0x8264b019, 0xd6fea9b1, 0x1bd87736, - 0xb5c15b77, 0xaf112943, 0x6a77dfd4, 0x50ba0da0, - 0x45124c8a, 0xf3cb18fb, 0x309df060, 0xef2b74c3, - 0x3fe5c37e, 0x55921caa, 0xa2791059, 0xea0365c9, - 0x650fecca, 0xbab96869, 0x2f65935e, 0xc04ee79d, - 0xdebe81a1, 0x1ce06c38, 0xfdbb2ee7, 0x4d52649a, - 0x92e4e039, 0x758fbcea, 0x06301e0c, 0x8a249809, - 0xb2f94079, 0xe66359d1, 0x0e70361c, 0x1ff8633e, - 0x6237f7c4, 0xd4eea3b5, 0xa829324d, 0x96c4f431, - 0xf99b3aef, 0xc566f697, 0x2535b14a, 0x59f220b2, - 0x8454ae15, 0x72b7a7e4, 0x39d5dd72, 0x4c5a6198, - 0x5eca3bbc, 0x78e785f0, 0x38ddd870, 0x8c148605, - 0xd1c6b2bf, 0xa5410b57, 0xe2434dd9, 0x612ff8c2, - 0xb3f1457b, 0x2115a542, 0x9c94d625, 0x1ef0663c, - 0x43225286, 0xc776fc93, 0xfcb32be5, 0x04201408, - 0x51b208a2, 0x99bcc72f, 0x6d4fc4da, 0x0d68391a, - 0xfa8335e9, 0xdfb684a3, 0x7ed79bfc, 0x243db448, - 0x3bc5d776, 0xab313d4b, 0xce3ed181, 0x11885522, - 0x8f0c8903, 0x4e4a6b9c, 0xb7d15173, 0xeb0b60cb, - 0x3cfdcc78, 0x817cbf1f, 0x94d4fe35, 0xf7eb0cf3, - 0xb9a1676f, 0x13985f26, 0x2c7d9c58, 0xd3d6b8bb, - 0xe76b5cd3, 0x6e57cbdc, 0xc46ef395, 0x03180f06, - 0x568a13ac, 0x441a4988, 0x7fdf9efe, 0xa921374f, - 0x2a4d8254, 0xbbb16d6b, 0xc146e29f, 0x53a202a6, - 0xdcae8ba5, 0x0b582716, 0x9d9cd327, 0x6c47c1d8, - 0x3195f562, 0x7487b9e8, 0xf6e309f1, 0x460a438c, - 0xac092645, 0x893c970f, 0x14a04428, 0xe15b42df, - 0x16b04e2c, 0x3acdd274, 0x696fd0d2, 0x09482d12, - 0x70a7ade0, 0xb6d95471, 0xd0ceb7bd, 0xed3b7ec7, - 0xcc2edb85, 0x422a5784, 0x98b4c22d, 0xa4490e55, - 0x285d8850, 0x5cda31b8, 0xf8933fed, 0x8644a411, - }, - { - 0x6018c078, 0x8c2305af, 0x3fc67ef9, 0x87e8136f, - 0x26874ca1, 0xdab8a962, 0x04010805, 0x214f426e, - 0xd836adee, 0xa2a65904, 0x6fd2debd, 0xf3f5fb06, - 0xf979ef80, 0xa16f5fce, 0x7e91fcef, 0x5552aa07, - 0x9d6027fd, 0xcabc8976, 0x569baccd, 0x028e048c, - 0xb6a37115, 0x300c603c, 0xf17bff8a, 0xd435b5e1, - 0x741de869, 0xa7e05347, 0x7bd7f6ac, 0x2fc25eed, - 0xb82e6d96, 0x314b627a, 0xdffea321, 0x41578216, - 0x5415a841, 0xc1779fb6, 0xdc37a5eb, 0xb3e57b56, - 0x469f8cd9, 0xe7f0d317, 0x354a6a7f, 0x4fda9e95, - 0x7d58fa25, 0x03c906ca, 0xa429558d, 0x280a5022, - 0xfeb1e14f, 0xbaa0691a, 0xb16b7fda, 0x2e855cab, - 0xcebd8173, 0x695dd234, 0x40108050, 0xf7f4f303, - 0x0bcb16c0, 0xf83eedc6, 0x14052811, 0x81671fe6, - 0xb7e47353, 0x9c2725bb, 0x19413258, 0x168b2c9d, - 0xa6a75101, 0xe97dcf94, 0x6e95dcfb, 0x47d88e9f, - 0xcbfb8b30, 0x9fee2371, 0xed7cc791, 0x856617e3, - 0x53dda68e, 0x5c17b84b, 0x01470246, 0x429e84dc, - 0x0fca1ec5, 0xb42d7599, 0xc6bf9179, 0x1c07381b, - 0x8ead0123, 0x755aea2f, 0x36836cb5, 0xcc3385ff, - 0x91633ff2, 0x0802100a, 0x92aa3938, 0xd971afa8, - 0x07c80ecf, 0x6419c87d, 0x39497270, 0x43d9869a, - 0xeff2c31d, 0xabe34b48, 0x715be22a, 0x1a883492, - 0x529aa4c8, 0x98262dbe, 0xc8328dfa, 0xfab0e94a, - 0x83e91b6a, 0x3c0f7833, 0x73d5e6a6, 0x3a8074ba, - 0xc2be997c, 0x13cd26de, 0xd034bde4, 0x3d487a75, - 0xdbffab24, 0xf57af78f, 0x7a90f4ea, 0x615fc23e, - 0x80201da0, 0xbd6867d5, 0x681ad072, 0x82ae192c, - 0xeab4c95e, 0x4d549a19, 0x7693ece5, 0x88220daa, - 0x8d6407e9, 0xe3f1db12, 0xd173bfa2, 0x4812905a, - 0x1d403a5d, 0x20084028, 0x2bc356e8, 0x97ec337b, - 0x4bdb9690, 0xbea1611f, 0x0e8d1c83, 0xf43df5c9, - 0x6697ccf1, 0x00000000, 0x1bcf36d4, 0xac2b4587, - 0xc57697b3, 0x328264b0, 0x7fd6fea9, 0x6c1bd877, - 0xeeb5c15b, 0x86af1129, 0xb56a77df, 0x5d50ba0d, - 0x0945124c, 0xebf3cb18, 0xc0309df0, 0x9bef2b74, - 0xfc3fe5c3, 0x4955921c, 0xb2a27910, 0x8fea0365, - 0x89650fec, 0xd2bab968, 0xbc2f6593, 0x27c04ee7, - 0x5fdebe81, 0x701ce06c, 0xd3fdbb2e, 0x294d5264, - 0x7292e4e0, 0xc9758fbc, 0x1806301e, 0x128a2498, - 0xf2b2f940, 0xbfe66359, 0x380e7036, 0x7c1ff863, - 0x956237f7, 0x77d4eea3, 0x9aa82932, 0x6296c4f4, - 0xc3f99b3a, 0x33c566f6, 0x942535b1, 0x7959f220, - 0x2a8454ae, 0xd572b7a7, 0xe439d5dd, 0x2d4c5a61, - 0x655eca3b, 0xfd78e785, 0xe038ddd8, 0x0a8c1486, - 0x63d1c6b2, 0xaea5410b, 0xafe2434d, 0x99612ff8, - 0xf6b3f145, 0x842115a5, 0x4a9c94d6, 0x781ef066, - 0x11432252, 0x3bc776fc, 0xd7fcb32b, 0x10042014, - 0x5951b208, 0x5e99bcc7, 0xa96d4fc4, 0x340d6839, - 0xcffa8335, 0x5bdfb684, 0xe57ed79b, 0x90243db4, - 0xec3bc5d7, 0x96ab313d, 0x1fce3ed1, 0x44118855, - 0x068f0c89, 0x254e4a6b, 0xe6b7d151, 0x8beb0b60, - 0xf03cfdcc, 0x3e817cbf, 0x6a94d4fe, 0xfbf7eb0c, - 0xdeb9a167, 0x4c13985f, 0xb02c7d9c, 0x6bd3d6b8, - 0xbbe76b5c, 0xa56e57cb, 0x37c46ef3, 0x0c03180f, - 0x45568a13, 0x0d441a49, 0xe17fdf9e, 0x9ea92137, - 0xa82a4d82, 0xd6bbb16d, 0x23c146e2, 0x5153a202, - 0x57dcae8b, 0x2c0b5827, 0x4e9d9cd3, 0xad6c47c1, - 0xc43195f5, 0xcd7487b9, 0xfff6e309, 0x05460a43, - 0x8aac0926, 0x1e893c97, 0x5014a044, 0xa3e15b42, - 0x5816b04e, 0xe83acdd2, 0xb9696fd0, 0x2409482d, - 0xdd70a7ad, 0xe2b6d954, 0x67d0ceb7, 0x93ed3b7e, - 0x17cc2edb, 0x15422a57, 0x5a98b4c2, 0xaaa4490e, - 0xa0285d88, 0x6d5cda31, 0xc7f8933f, 0x228644a4, - }, - { - 0x186018c0, 0x238c2305, 0xc63fc67e, 0xe887e813, - 0x8726874c, 0xb8dab8a9, 0x01040108, 0x4f214f42, - 0x36d836ad, 0xa6a2a659, 0xd26fd2de, 0xf5f3f5fb, - 0x79f979ef, 0x6fa16f5f, 0x917e91fc, 0x525552aa, - 0x609d6027, 0xbccabc89, 0x9b569bac, 0x8e028e04, - 0xa3b6a371, 0x0c300c60, 0x7bf17bff, 0x35d435b5, - 0x1d741de8, 0xe0a7e053, 0xd77bd7f6, 0xc22fc25e, - 0x2eb82e6d, 0x4b314b62, 0xfedffea3, 0x57415782, - 0x155415a8, 0x77c1779f, 0x37dc37a5, 0xe5b3e57b, - 0x9f469f8c, 0xf0e7f0d3, 0x4a354a6a, 0xda4fda9e, - 0x587d58fa, 0xc903c906, 0x29a42955, 0x0a280a50, - 0xb1feb1e1, 0xa0baa069, 0x6bb16b7f, 0x852e855c, - 0xbdcebd81, 0x5d695dd2, 0x10401080, 0xf4f7f4f3, - 0xcb0bcb16, 0x3ef83eed, 0x05140528, 0x6781671f, - 0xe4b7e473, 0x279c2725, 0x41194132, 0x8b168b2c, - 0xa7a6a751, 0x7de97dcf, 0x956e95dc, 0xd847d88e, - 0xfbcbfb8b, 0xee9fee23, 0x7ced7cc7, 0x66856617, - 0xdd53dda6, 0x175c17b8, 0x47014702, 0x9e429e84, - 0xca0fca1e, 0x2db42d75, 0xbfc6bf91, 0x071c0738, - 0xad8ead01, 0x5a755aea, 0x8336836c, 0x33cc3385, - 0x6391633f, 0x02080210, 0xaa92aa39, 0x71d971af, - 0xc807c80e, 0x196419c8, 0x49394972, 0xd943d986, - 0xf2eff2c3, 0xe3abe34b, 0x5b715be2, 0x881a8834, - 0x9a529aa4, 0x2698262d, 0x32c8328d, 0xb0fab0e9, - 0xe983e91b, 0x0f3c0f78, 0xd573d5e6, 0x803a8074, - 0xbec2be99, 0xcd13cd26, 0x34d034bd, 0x483d487a, - 0xffdbffab, 0x7af57af7, 0x907a90f4, 0x5f615fc2, - 0x2080201d, 0x68bd6867, 0x1a681ad0, 0xae82ae19, - 0xb4eab4c9, 0x544d549a, 0x937693ec, 0x2288220d, - 0x648d6407, 0xf1e3f1db, 0x73d173bf, 0x12481290, - 0x401d403a, 0x08200840, 0xc32bc356, 0xec97ec33, - 0xdb4bdb96, 0xa1bea161, 0x8d0e8d1c, 0x3df43df5, - 0x976697cc, 0x00000000, 0xcf1bcf36, 0x2bac2b45, - 0x76c57697, 0x82328264, 0xd67fd6fe, 0x1b6c1bd8, - 0xb5eeb5c1, 0xaf86af11, 0x6ab56a77, 0x505d50ba, - 0x45094512, 0xf3ebf3cb, 0x30c0309d, 0xef9bef2b, - 0x3ffc3fe5, 0x55495592, 0xa2b2a279, 0xea8fea03, - 0x6589650f, 0xbad2bab9, 0x2fbc2f65, 0xc027c04e, - 0xde5fdebe, 0x1c701ce0, 0xfdd3fdbb, 0x4d294d52, - 0x927292e4, 0x75c9758f, 0x06180630, 0x8a128a24, - 0xb2f2b2f9, 0xe6bfe663, 0x0e380e70, 0x1f7c1ff8, - 0x62956237, 0xd477d4ee, 0xa89aa829, 0x966296c4, - 0xf9c3f99b, 0xc533c566, 0x25942535, 0x597959f2, - 0x842a8454, 0x72d572b7, 0x39e439d5, 0x4c2d4c5a, - 0x5e655eca, 0x78fd78e7, 0x38e038dd, 0x8c0a8c14, - 0xd163d1c6, 0xa5aea541, 0xe2afe243, 0x6199612f, - 0xb3f6b3f1, 0x21842115, 0x9c4a9c94, 0x1e781ef0, - 0x43114322, 0xc73bc776, 0xfcd7fcb3, 0x04100420, - 0x515951b2, 0x995e99bc, 0x6da96d4f, 0x0d340d68, - 0xfacffa83, 0xdf5bdfb6, 0x7ee57ed7, 0x2490243d, - 0x3bec3bc5, 0xab96ab31, 0xce1fce3e, 0x11441188, - 0x8f068f0c, 0x4e254e4a, 0xb7e6b7d1, 0xeb8beb0b, - 0x3cf03cfd, 0x813e817c, 0x946a94d4, 0xf7fbf7eb, - 0xb9deb9a1, 0x134c1398, 0x2cb02c7d, 0xd36bd3d6, - 0xe7bbe76b, 0x6ea56e57, 0xc437c46e, 0x030c0318, - 0x5645568a, 0x440d441a, 0x7fe17fdf, 0xa99ea921, - 0x2aa82a4d, 0xbbd6bbb1, 0xc123c146, 0x535153a2, - 0xdc57dcae, 0x0b2c0b58, 0x9d4e9d9c, 0x6cad6c47, - 0x31c43195, 0x74cd7487, 0xf6fff6e3, 0x4605460a, - 0xac8aac09, 0x891e893c, 0x145014a0, 0xe1a3e15b, - 0x165816b0, 0x3ae83acd, 0x69b9696f, 0x09240948, - 0x70dd70a7, 0xb6e2b6d9, 0xd067d0ce, 0xed93ed3b, - 0xcc17cc2e, 0x4215422a, 0x985a98b4, 0xa4aaa449, - 0x28a0285d, 0x5c6d5cda, 0xf8c7f893, 0x86228644, - }, - { - 0x18186018, 0x23238c23, 0xc6c63fc6, 0xe8e887e8, - 0x87872687, 0xb8b8dab8, 0x01010401, 0x4f4f214f, - 0x3636d836, 0xa6a6a2a6, 0xd2d26fd2, 0xf5f5f3f5, - 0x7979f979, 0x6f6fa16f, 0x91917e91, 0x52525552, - 0x60609d60, 0xbcbccabc, 0x9b9b569b, 0x8e8e028e, - 0xa3a3b6a3, 0x0c0c300c, 0x7b7bf17b, 0x3535d435, - 0x1d1d741d, 0xe0e0a7e0, 0xd7d77bd7, 0xc2c22fc2, - 0x2e2eb82e, 0x4b4b314b, 0xfefedffe, 0x57574157, - 0x15155415, 0x7777c177, 0x3737dc37, 0xe5e5b3e5, - 0x9f9f469f, 0xf0f0e7f0, 0x4a4a354a, 0xdada4fda, - 0x58587d58, 0xc9c903c9, 0x2929a429, 0x0a0a280a, - 0xb1b1feb1, 0xa0a0baa0, 0x6b6bb16b, 0x85852e85, - 0xbdbdcebd, 0x5d5d695d, 0x10104010, 0xf4f4f7f4, - 0xcbcb0bcb, 0x3e3ef83e, 0x05051405, 0x67678167, - 0xe4e4b7e4, 0x27279c27, 0x41411941, 0x8b8b168b, - 0xa7a7a6a7, 0x7d7de97d, 0x95956e95, 0xd8d847d8, - 0xfbfbcbfb, 0xeeee9fee, 0x7c7ced7c, 0x66668566, - 0xdddd53dd, 0x17175c17, 0x47470147, 0x9e9e429e, - 0xcaca0fca, 0x2d2db42d, 0xbfbfc6bf, 0x07071c07, - 0xadad8ead, 0x5a5a755a, 0x83833683, 0x3333cc33, - 0x63639163, 0x02020802, 0xaaaa92aa, 0x7171d971, - 0xc8c807c8, 0x19196419, 0x49493949, 0xd9d943d9, - 0xf2f2eff2, 0xe3e3abe3, 0x5b5b715b, 0x88881a88, - 0x9a9a529a, 0x26269826, 0x3232c832, 0xb0b0fab0, - 0xe9e983e9, 0x0f0f3c0f, 0xd5d573d5, 0x80803a80, - 0xbebec2be, 0xcdcd13cd, 0x3434d034, 0x48483d48, - 0xffffdbff, 0x7a7af57a, 0x90907a90, 0x5f5f615f, - 0x20208020, 0x6868bd68, 0x1a1a681a, 0xaeae82ae, - 0xb4b4eab4, 0x54544d54, 0x93937693, 0x22228822, - 0x64648d64, 0xf1f1e3f1, 0x7373d173, 0x12124812, - 0x40401d40, 0x08082008, 0xc3c32bc3, 0xecec97ec, - 0xdbdb4bdb, 0xa1a1bea1, 0x8d8d0e8d, 0x3d3df43d, - 0x97976697, 0x00000000, 0xcfcf1bcf, 0x2b2bac2b, - 0x7676c576, 0x82823282, 0xd6d67fd6, 0x1b1b6c1b, - 0xb5b5eeb5, 0xafaf86af, 0x6a6ab56a, 0x50505d50, - 0x45450945, 0xf3f3ebf3, 0x3030c030, 0xefef9bef, - 0x3f3ffc3f, 0x55554955, 0xa2a2b2a2, 0xeaea8fea, - 0x65658965, 0xbabad2ba, 0x2f2fbc2f, 0xc0c027c0, - 0xdede5fde, 0x1c1c701c, 0xfdfdd3fd, 0x4d4d294d, - 0x92927292, 0x7575c975, 0x06061806, 0x8a8a128a, - 0xb2b2f2b2, 0xe6e6bfe6, 0x0e0e380e, 0x1f1f7c1f, - 0x62629562, 0xd4d477d4, 0xa8a89aa8, 0x96966296, - 0xf9f9c3f9, 0xc5c533c5, 0x25259425, 0x59597959, - 0x84842a84, 0x7272d572, 0x3939e439, 0x4c4c2d4c, - 0x5e5e655e, 0x7878fd78, 0x3838e038, 0x8c8c0a8c, - 0xd1d163d1, 0xa5a5aea5, 0xe2e2afe2, 0x61619961, - 0xb3b3f6b3, 0x21218421, 0x9c9c4a9c, 0x1e1e781e, - 0x43431143, 0xc7c73bc7, 0xfcfcd7fc, 0x04041004, - 0x51515951, 0x99995e99, 0x6d6da96d, 0x0d0d340d, - 0xfafacffa, 0xdfdf5bdf, 0x7e7ee57e, 0x24249024, - 0x3b3bec3b, 0xabab96ab, 0xcece1fce, 0x11114411, - 0x8f8f068f, 0x4e4e254e, 0xb7b7e6b7, 0xebeb8beb, - 0x3c3cf03c, 0x81813e81, 0x94946a94, 0xf7f7fbf7, - 0xb9b9deb9, 0x13134c13, 0x2c2cb02c, 0xd3d36bd3, - 0xe7e7bbe7, 0x6e6ea56e, 0xc4c437c4, 0x03030c03, - 0x56564556, 0x44440d44, 0x7f7fe17f, 0xa9a99ea9, - 0x2a2aa82a, 0xbbbbd6bb, 0xc1c123c1, 0x53535153, - 0xdcdc57dc, 0x0b0b2c0b, 0x9d9d4e9d, 0x6c6cad6c, - 0x3131c431, 0x7474cd74, 0xf6f6fff6, 0x46460546, - 0xacac8aac, 0x89891e89, 0x14145014, 0xe1e1a3e1, - 0x16165816, 0x3a3ae83a, 0x6969b969, 0x09092409, - 0x7070dd70, 0xb6b6e2b6, 0xd0d067d0, 0xeded93ed, - 0xcccc17cc, 0x42421542, 0x98985a98, 0xa4a4aaa4, - 0x2828a028, 0x5c5c6d5c, 0xf8f8c7f8, 0x86862286, - }, - { - 0xd8181860, 0x2623238c, 0xb8c6c63f, 0xfbe8e887, - 0xcb878726, 0x11b8b8da, 0x09010104, 0x0d4f4f21, - 0x9b3636d8, 0xffa6a6a2, 0x0cd2d26f, 0x0ef5f5f3, - 0x967979f9, 0x306f6fa1, 0x6d91917e, 0xf8525255, - 0x4760609d, 0x35bcbcca, 0x379b9b56, 0x8a8e8e02, - 0xd2a3a3b6, 0x6c0c0c30, 0x847b7bf1, 0x803535d4, - 0xf51d1d74, 0xb3e0e0a7, 0x21d7d77b, 0x9cc2c22f, - 0x432e2eb8, 0x294b4b31, 0x5dfefedf, 0xd5575741, - 0xbd151554, 0xe87777c1, 0x923737dc, 0x9ee5e5b3, - 0x139f9f46, 0x23f0f0e7, 0x204a4a35, 0x44dada4f, - 0xa258587d, 0xcfc9c903, 0x7c2929a4, 0x5a0a0a28, - 0x50b1b1fe, 0xc9a0a0ba, 0x146b6bb1, 0xd985852e, - 0x3cbdbdce, 0x8f5d5d69, 0x90101040, 0x07f4f4f7, - 0xddcbcb0b, 0xd33e3ef8, 0x2d050514, 0x78676781, - 0x97e4e4b7, 0x0227279c, 0x73414119, 0xa78b8b16, - 0xf6a7a7a6, 0xb27d7de9, 0x4995956e, 0x56d8d847, - 0x70fbfbcb, 0xcdeeee9f, 0xbb7c7ced, 0x71666685, - 0x7bdddd53, 0xaf17175c, 0x45474701, 0x1a9e9e42, - 0xd4caca0f, 0x582d2db4, 0x2ebfbfc6, 0x3f07071c, - 0xacadad8e, 0xb05a5a75, 0xef838336, 0xb63333cc, - 0x5c636391, 0x12020208, 0x93aaaa92, 0xde7171d9, - 0xc6c8c807, 0xd1191964, 0x3b494939, 0x5fd9d943, - 0x31f2f2ef, 0xa8e3e3ab, 0xb95b5b71, 0xbc88881a, - 0x3e9a9a52, 0x0b262698, 0xbf3232c8, 0x59b0b0fa, - 0xf2e9e983, 0x770f0f3c, 0x33d5d573, 0xf480803a, - 0x27bebec2, 0xebcdcd13, 0x893434d0, 0x3248483d, - 0x54ffffdb, 0x8d7a7af5, 0x6490907a, 0x9d5f5f61, - 0x3d202080, 0x0f6868bd, 0xca1a1a68, 0xb7aeae82, - 0x7db4b4ea, 0xce54544d, 0x7f939376, 0x2f222288, - 0x6364648d, 0x2af1f1e3, 0xcc7373d1, 0x82121248, - 0x7a40401d, 0x48080820, 0x95c3c32b, 0xdfecec97, - 0x4ddbdb4b, 0xc0a1a1be, 0x918d8d0e, 0xc83d3df4, - 0x5b979766, 0x00000000, 0xf9cfcf1b, 0x6e2b2bac, - 0xe17676c5, 0xe6828232, 0x28d6d67f, 0xc31b1b6c, - 0x74b5b5ee, 0xbeafaf86, 0x1d6a6ab5, 0xea50505d, - 0x57454509, 0x38f3f3eb, 0xad3030c0, 0xc4efef9b, - 0xda3f3ffc, 0xc7555549, 0xdba2a2b2, 0xe9eaea8f, - 0x6a656589, 0x03babad2, 0x4a2f2fbc, 0x8ec0c027, - 0x60dede5f, 0xfc1c1c70, 0x46fdfdd3, 0x1f4d4d29, - 0x76929272, 0xfa7575c9, 0x36060618, 0xae8a8a12, - 0x4bb2b2f2, 0x85e6e6bf, 0x7e0e0e38, 0xe71f1f7c, - 0x55626295, 0x3ad4d477, 0x81a8a89a, 0x52969662, - 0x62f9f9c3, 0xa3c5c533, 0x10252594, 0xab595979, - 0xd084842a, 0xc57272d5, 0xec3939e4, 0x164c4c2d, - 0x945e5e65, 0x9f7878fd, 0xe53838e0, 0x988c8c0a, - 0x17d1d163, 0xe4a5a5ae, 0xa1e2e2af, 0x4e616199, - 0x42b3b3f6, 0x34212184, 0x089c9c4a, 0xee1e1e78, - 0x61434311, 0xb1c7c73b, 0x4ffcfcd7, 0x24040410, - 0xe3515159, 0x2599995e, 0x226d6da9, 0x650d0d34, - 0x79fafacf, 0x69dfdf5b, 0xa97e7ee5, 0x19242490, - 0xfe3b3bec, 0x9aabab96, 0xf0cece1f, 0x99111144, - 0x838f8f06, 0x044e4e25, 0x66b7b7e6, 0xe0ebeb8b, - 0xc13c3cf0, 0xfd81813e, 0x4094946a, 0x1cf7f7fb, - 0x18b9b9de, 0x8b13134c, 0x512c2cb0, 0x05d3d36b, - 0x8ce7e7bb, 0x396e6ea5, 0xaac4c437, 0x1b03030c, - 0xdc565645, 0x5e44440d, 0xa07f7fe1, 0x88a9a99e, - 0x672a2aa8, 0x0abbbbd6, 0x87c1c123, 0xf1535351, - 0x72dcdc57, 0x530b0b2c, 0x019d9d4e, 0x2b6c6cad, - 0xa43131c4, 0xf37474cd, 0x15f6f6ff, 0x4c464605, - 0xa5acac8a, 0xb589891e, 0xb4141450, 0xbae1e1a3, - 0xa6161658, 0xf73a3ae8, 0x066969b9, 0x41090924, - 0xd77070dd, 0x6fb6b6e2, 0x1ed0d067, 0xd6eded93, - 0xe2cccc17, 0x68424215, 0x2c98985a, 0xeda4a4aa, - 0x752828a0, 0x865c5c6d, 0x6bf8f8c7, 0xc2868622, - }, - { - 0x30d81818, 0x46262323, 0x91b8c6c6, 0xcdfbe8e8, - 0x13cb8787, 0x6d11b8b8, 0x02090101, 0x9e0d4f4f, - 0x6c9b3636, 0x51ffa6a6, 0xb90cd2d2, 0xf70ef5f5, - 0xf2967979, 0xde306f6f, 0x3f6d9191, 0xa4f85252, - 0xc0476060, 0x6535bcbc, 0x2b379b9b, 0x018a8e8e, - 0x5bd2a3a3, 0x186c0c0c, 0xf6847b7b, 0x6a803535, - 0x3af51d1d, 0xddb3e0e0, 0xb321d7d7, 0x999cc2c2, - 0x5c432e2e, 0x96294b4b, 0xe15dfefe, 0xaed55757, - 0x2abd1515, 0xeee87777, 0x6e923737, 0xd79ee5e5, - 0x23139f9f, 0xfd23f0f0, 0x94204a4a, 0xa944dada, - 0xb0a25858, 0x8fcfc9c9, 0x527c2929, 0x145a0a0a, - 0x7f50b1b1, 0x5dc9a0a0, 0xd6146b6b, 0x17d98585, - 0x673cbdbd, 0xba8f5d5d, 0x20901010, 0xf507f4f4, - 0x8bddcbcb, 0x7cd33e3e, 0x0a2d0505, 0xce786767, - 0xd597e4e4, 0x4e022727, 0x82734141, 0x0ba78b8b, - 0x53f6a7a7, 0xfab27d7d, 0x37499595, 0xad56d8d8, - 0xeb70fbfb, 0xc1cdeeee, 0xf8bb7c7c, 0xcc716666, - 0xa77bdddd, 0x2eaf1717, 0x8e454747, 0x211a9e9e, - 0x89d4caca, 0x5a582d2d, 0x632ebfbf, 0x0e3f0707, - 0x47acadad, 0xb4b05a5a, 0x1bef8383, 0x66b63333, - 0xc65c6363, 0x04120202, 0x4993aaaa, 0xe2de7171, - 0x8dc6c8c8, 0x32d11919, 0x923b4949, 0xaf5fd9d9, - 0xf931f2f2, 0xdba8e3e3, 0xb6b95b5b, 0x0dbc8888, - 0x293e9a9a, 0x4c0b2626, 0x64bf3232, 0x7d59b0b0, - 0xcff2e9e9, 0x1e770f0f, 0xb733d5d5, 0x1df48080, - 0x6127bebe, 0x87ebcdcd, 0x68893434, 0x90324848, - 0xe354ffff, 0xf48d7a7a, 0x3d649090, 0xbe9d5f5f, - 0x403d2020, 0xd00f6868, 0x34ca1a1a, 0x41b7aeae, - 0x757db4b4, 0xa8ce5454, 0x3b7f9393, 0x442f2222, - 0xc8636464, 0xff2af1f1, 0xe6cc7373, 0x24821212, - 0x807a4040, 0x10480808, 0x9b95c3c3, 0xc5dfecec, - 0xab4ddbdb, 0x5fc0a1a1, 0x07918d8d, 0x7ac83d3d, - 0x335b9797, 0x00000000, 0x83f9cfcf, 0x566e2b2b, - 0xece17676, 0x19e68282, 0xb128d6d6, 0x36c31b1b, - 0x7774b5b5, 0x43beafaf, 0xd41d6a6a, 0xa0ea5050, - 0x8a574545, 0xfb38f3f3, 0x60ad3030, 0xc3c4efef, - 0x7eda3f3f, 0xaac75555, 0x59dba2a2, 0xc9e9eaea, - 0xca6a6565, 0x6903baba, 0x5e4a2f2f, 0x9d8ec0c0, - 0xa160dede, 0x38fc1c1c, 0xe746fdfd, 0x9a1f4d4d, - 0x39769292, 0xeafa7575, 0x0c360606, 0x09ae8a8a, - 0x794bb2b2, 0xd185e6e6, 0x1c7e0e0e, 0x3ee71f1f, - 0xc4556262, 0xb53ad4d4, 0x4d81a8a8, 0x31529696, - 0xef62f9f9, 0x97a3c5c5, 0x4a102525, 0xb2ab5959, - 0x15d08484, 0xe4c57272, 0x72ec3939, 0x98164c4c, - 0xbc945e5e, 0xf09f7878, 0x70e53838, 0x05988c8c, - 0xbf17d1d1, 0x57e4a5a5, 0xd9a1e2e2, 0xc24e6161, - 0x7b42b3b3, 0x42342121, 0x25089c9c, 0x3cee1e1e, - 0x86614343, 0x93b1c7c7, 0xe54ffcfc, 0x08240404, - 0xa2e35151, 0x2f259999, 0xda226d6d, 0x1a650d0d, - 0xe979fafa, 0xa369dfdf, 0xfca97e7e, 0x48192424, - 0x76fe3b3b, 0x4b9aabab, 0x81f0cece, 0x22991111, - 0x03838f8f, 0x9c044e4e, 0x7366b7b7, 0xcbe0ebeb, - 0x78c13c3c, 0x1ffd8181, 0x35409494, 0xf31cf7f7, - 0x6f18b9b9, 0x268b1313, 0x58512c2c, 0xbb05d3d3, - 0xd38ce7e7, 0xdc396e6e, 0x95aac4c4, 0x061b0303, - 0xacdc5656, 0x885e4444, 0xfea07f7f, 0x4f88a9a9, - 0x54672a2a, 0x6b0abbbb, 0x9f87c1c1, 0xa6f15353, - 0xa572dcdc, 0x16530b0b, 0x27019d9d, 0xd82b6c6c, - 0x62a43131, 0xe8f37474, 0xf115f6f6, 0x8c4c4646, - 0x45a5acac, 0x0fb58989, 0x28b41414, 0xdfbae1e1, - 0x2ca61616, 0x74f73a3a, 0xd2066969, 0x12410909, - 0xe0d77070, 0x716fb6b6, 0xbd1ed0d0, 0xc7d6eded, - 0x85e2cccc, 0x84684242, 0x2d2c9898, 0x55eda4a4, - 0x50752828, 0xb8865c5c, 0xed6bf8f8, 0x11c28686, - }, - { - 0x7830d818, 0xaf462623, 0xf991b8c6, 0x6fcdfbe8, - 0xa113cb87, 0x626d11b8, 0x05020901, 0x6e9e0d4f, - 0xee6c9b36, 0x0451ffa6, 0xbdb90cd2, 0x06f70ef5, - 0x80f29679, 0xcede306f, 0xef3f6d91, 0x07a4f852, - 0xfdc04760, 0x766535bc, 0xcd2b379b, 0x8c018a8e, - 0x155bd2a3, 0x3c186c0c, 0x8af6847b, 0xe16a8035, - 0x693af51d, 0x47ddb3e0, 0xacb321d7, 0xed999cc2, - 0x965c432e, 0x7a96294b, 0x21e15dfe, 0x16aed557, - 0x412abd15, 0xb6eee877, 0xeb6e9237, 0x56d79ee5, - 0xd923139f, 0x17fd23f0, 0x7f94204a, 0x95a944da, - 0x25b0a258, 0xca8fcfc9, 0x8d527c29, 0x22145a0a, - 0x4f7f50b1, 0x1a5dc9a0, 0xdad6146b, 0xab17d985, - 0x73673cbd, 0x34ba8f5d, 0x50209010, 0x03f507f4, - 0xc08bddcb, 0xc67cd33e, 0x110a2d05, 0xe6ce7867, - 0x53d597e4, 0xbb4e0227, 0x58827341, 0x9d0ba78b, - 0x0153f6a7, 0x94fab27d, 0xfb374995, 0x9fad56d8, - 0x30eb70fb, 0x71c1cdee, 0x91f8bb7c, 0xe3cc7166, - 0x8ea77bdd, 0x4b2eaf17, 0x468e4547, 0xdc211a9e, - 0xc589d4ca, 0x995a582d, 0x79632ebf, 0x1b0e3f07, - 0x2347acad, 0x2fb4b05a, 0xb51bef83, 0xff66b633, - 0xf2c65c63, 0x0a041202, 0x384993aa, 0xa8e2de71, - 0xcf8dc6c8, 0x7d32d119, 0x70923b49, 0x9aaf5fd9, - 0x1df931f2, 0x48dba8e3, 0x2ab6b95b, 0x920dbc88, - 0xc8293e9a, 0xbe4c0b26, 0xfa64bf32, 0x4a7d59b0, - 0x6acff2e9, 0x331e770f, 0xa6b733d5, 0xba1df480, - 0x7c6127be, 0xde87ebcd, 0xe4688934, 0x75903248, - 0x24e354ff, 0x8ff48d7a, 0xea3d6490, 0x3ebe9d5f, - 0xa0403d20, 0xd5d00f68, 0x7234ca1a, 0x2c41b7ae, - 0x5e757db4, 0x19a8ce54, 0xe53b7f93, 0xaa442f22, - 0xe9c86364, 0x12ff2af1, 0xa2e6cc73, 0x5a248212, - 0x5d807a40, 0x28104808, 0xe89b95c3, 0x7bc5dfec, - 0x90ab4ddb, 0x1f5fc0a1, 0x8307918d, 0xc97ac83d, - 0xf1335b97, 0x00000000, 0xd483f9cf, 0x87566e2b, - 0xb3ece176, 0xb019e682, 0xa9b128d6, 0x7736c31b, - 0x5b7774b5, 0x2943beaf, 0xdfd41d6a, 0x0da0ea50, - 0x4c8a5745, 0x18fb38f3, 0xf060ad30, 0x74c3c4ef, - 0xc37eda3f, 0x1caac755, 0x1059dba2, 0x65c9e9ea, - 0xecca6a65, 0x686903ba, 0x935e4a2f, 0xe79d8ec0, - 0x81a160de, 0x6c38fc1c, 0x2ee746fd, 0x649a1f4d, - 0xe0397692, 0xbceafa75, 0x1e0c3606, 0x9809ae8a, - 0x40794bb2, 0x59d185e6, 0x361c7e0e, 0x633ee71f, - 0xf7c45562, 0xa3b53ad4, 0x324d81a8, 0xf4315296, - 0x3aef62f9, 0xf697a3c5, 0xb14a1025, 0x20b2ab59, - 0xae15d084, 0xa7e4c572, 0xdd72ec39, 0x6198164c, - 0x3bbc945e, 0x85f09f78, 0xd870e538, 0x8605988c, - 0xb2bf17d1, 0x0b57e4a5, 0x4dd9a1e2, 0xf8c24e61, - 0x457b42b3, 0xa5423421, 0xd625089c, 0x663cee1e, - 0x52866143, 0xfc93b1c7, 0x2be54ffc, 0x14082404, - 0x08a2e351, 0xc72f2599, 0xc4da226d, 0x391a650d, - 0x35e979fa, 0x84a369df, 0x9bfca97e, 0xb4481924, - 0xd776fe3b, 0x3d4b9aab, 0xd181f0ce, 0x55229911, - 0x8903838f, 0x6b9c044e, 0x517366b7, 0x60cbe0eb, - 0xcc78c13c, 0xbf1ffd81, 0xfe354094, 0x0cf31cf7, - 0x676f18b9, 0x5f268b13, 0x9c58512c, 0xb8bb05d3, - 0x5cd38ce7, 0xcbdc396e, 0xf395aac4, 0x0f061b03, - 0x13acdc56, 0x49885e44, 0x9efea07f, 0x374f88a9, - 0x8254672a, 0x6d6b0abb, 0xe29f87c1, 0x02a6f153, - 0x8ba572dc, 0x2716530b, 0xd327019d, 0xc1d82b6c, - 0xf562a431, 0xb9e8f374, 0x09f115f6, 0x438c4c46, - 0x2645a5ac, 0x970fb589, 0x4428b414, 0x42dfbae1, - 0x4e2ca616, 0xd274f73a, 0xd0d20669, 0x2d124109, - 0xade0d770, 0x54716fb6, 0xb7bd1ed0, 0x7ec7d6ed, - 0xdb85e2cc, 0x57846842, 0xc22d2c98, 0x0e55eda4, - 0x88507528, 0x31b8865c, 0x3fed6bf8, 0xa411c286, - }, -}; - -#ifdef VECT_SIZE1 -#define BOX(S,n,i) (u32) ((S)[(n)][(i)]) -#endif - -__device__ __constant__ u32 rch[R + 1] = -{ - 0x00000000, - 0x1823c6e8, - 0x36a6d2f5, - 0x60bc9b8e, - 0x1de0d7c2, - 0x157737e5, - 0x58c9290a, - 0xbd5d10f4, - 0xe427418b, - 0xfbee7c66, - 0xca2dbf07, -}; - -__device__ __constant__ u32 rcl[R + 1] = -{ - 0x00000000, - 0x87b8014f, - 0x796f9152, - 0xa30c7b35, - 0x2e4bfe57, - 0x9ff04ada, - 0xb1a06b85, - 0xcb3e0567, - 0xa77d95d8, - 0xdd17479e, - 0xad5a8333, -}; - -typedef unsigned char uchar; - -__device__ static void whirlpool_transform (const u32 w[16], u32 dgst[16], u32 s_Ch[8][256], u32 s_Cl[8][256]) -{ - u32 Kh[8]; - u32 Kl[8]; - - Kh[0] = dgst[ 0]; - Kl[0] = dgst[ 1]; - Kh[1] = dgst[ 2]; - Kl[1] = dgst[ 3]; - Kh[2] = dgst[ 4]; - Kl[2] = dgst[ 5]; - Kh[3] = dgst[ 6]; - Kl[3] = dgst[ 7]; - Kh[4] = dgst[ 8]; - Kl[4] = dgst[ 9]; - Kh[5] = dgst[10]; - Kl[5] = dgst[11]; - Kh[6] = dgst[12]; - Kl[6] = dgst[13]; - Kh[7] = dgst[14]; - Kl[7] = dgst[15]; - - u32 stateh[8]; - u32 statel[8]; - - stateh[0] = w[ 0] ^ Kh[0]; - statel[0] = w[ 1] ^ Kl[0]; - stateh[1] = w[ 2] ^ Kh[1]; - statel[1] = w[ 3] ^ Kl[1]; - stateh[2] = w[ 4] ^ Kh[2]; - statel[2] = w[ 5] ^ Kl[2]; - stateh[3] = w[ 6] ^ Kh[3]; - statel[3] = w[ 7] ^ Kl[3]; - stateh[4] = w[ 8] ^ Kh[4]; - statel[4] = w[ 9] ^ Kl[4]; - stateh[5] = w[10] ^ Kh[5]; - statel[5] = w[11] ^ Kl[5]; - stateh[6] = w[12] ^ Kh[6]; - statel[6] = w[13] ^ Kl[6]; - stateh[7] = w[14] ^ Kh[7]; - statel[7] = w[15] ^ Kl[7]; - - u32 r; - - for (r = 1; r <= R; r++) - { - u32 Lh[8]; - u32 Ll[8]; - - u32 i; - - #pragma unroll 8 - for (i = 0; i < 8; i++) - { - const u8 Lp0 = Kh[(i + 8) & 7] >> 24; - const u8 Lp1 = Kh[(i + 7) & 7] >> 16; - const u8 Lp2 = Kh[(i + 6) & 7] >> 8; - const u8 Lp3 = Kh[(i + 5) & 7] >> 0; - const u8 Lp4 = Kl[(i + 4) & 7] >> 24; - const u8 Lp5 = Kl[(i + 3) & 7] >> 16; - const u8 Lp6 = Kl[(i + 2) & 7] >> 8; - const u8 Lp7 = Kl[(i + 1) & 7] >> 0; - - Lh[i] = BOX (s_Ch, 0, Lp0 & 0xff) - ^ BOX (s_Ch, 1, Lp1 & 0xff) - ^ BOX (s_Ch, 2, Lp2 & 0xff) - ^ BOX (s_Ch, 3, Lp3 & 0xff) - ^ BOX (s_Ch, 4, Lp4 & 0xff) - ^ BOX (s_Ch, 5, Lp5 & 0xff) - ^ BOX (s_Ch, 6, Lp6 & 0xff) - ^ BOX (s_Ch, 7, Lp7 & 0xff); - - Ll[i] = BOX (s_Cl, 0, Lp0 & 0xff) - ^ BOX (s_Cl, 1, Lp1 & 0xff) - ^ BOX (s_Cl, 2, Lp2 & 0xff) - ^ BOX (s_Cl, 3, Lp3 & 0xff) - ^ BOX (s_Cl, 4, Lp4 & 0xff) - ^ BOX (s_Cl, 5, Lp5 & 0xff) - ^ BOX (s_Cl, 6, Lp6 & 0xff) - ^ BOX (s_Cl, 7, Lp7 & 0xff); - } - - Kh[0] = Lh[0] ^ rch[r]; - Kl[0] = Ll[0] ^ rcl[r]; - Kh[1] = Lh[1]; - Kl[1] = Ll[1]; - Kh[2] = Lh[2]; - Kl[2] = Ll[2]; - Kh[3] = Lh[3]; - Kl[3] = Ll[3]; - Kh[4] = Lh[4]; - Kl[4] = Ll[4]; - Kh[5] = Lh[5]; - Kl[5] = Ll[5]; - Kh[6] = Lh[6]; - Kl[6] = Ll[6]; - Kh[7] = Lh[7]; - Kl[7] = Ll[7]; - - #pragma unroll 8 - for (i = 0; i < 8; i++) - { - const u8 Lp0 = stateh[(i + 8) & 7] >> 24; - const u8 Lp1 = stateh[(i + 7) & 7] >> 16; - const u8 Lp2 = stateh[(i + 6) & 7] >> 8; - const u8 Lp3 = stateh[(i + 5) & 7] >> 0; - const u8 Lp4 = statel[(i + 4) & 7] >> 24; - const u8 Lp5 = statel[(i + 3) & 7] >> 16; - const u8 Lp6 = statel[(i + 2) & 7] >> 8; - const u8 Lp7 = statel[(i + 1) & 7] >> 0; - - Lh[i] = BOX (s_Ch, 0, Lp0 & 0xff) - ^ BOX (s_Ch, 1, Lp1 & 0xff) - ^ BOX (s_Ch, 2, Lp2 & 0xff) - ^ BOX (s_Ch, 3, Lp3 & 0xff) - ^ BOX (s_Ch, 4, Lp4 & 0xff) - ^ BOX (s_Ch, 5, Lp5 & 0xff) - ^ BOX (s_Ch, 6, Lp6 & 0xff) - ^ BOX (s_Ch, 7, Lp7 & 0xff); - - Ll[i] = BOX (s_Cl, 0, Lp0 & 0xff) - ^ BOX (s_Cl, 1, Lp1 & 0xff) - ^ BOX (s_Cl, 2, Lp2 & 0xff) - ^ BOX (s_Cl, 3, Lp3 & 0xff) - ^ BOX (s_Cl, 4, Lp4 & 0xff) - ^ BOX (s_Cl, 5, Lp5 & 0xff) - ^ BOX (s_Cl, 6, Lp6 & 0xff) - ^ BOX (s_Cl, 7, Lp7 & 0xff); - } - - stateh[0] = Lh[0] ^ Kh[0]; - statel[0] = Ll[0] ^ Kl[0]; - stateh[1] = Lh[1] ^ Kh[1]; - statel[1] = Ll[1] ^ Kl[1]; - stateh[2] = Lh[2] ^ Kh[2]; - statel[2] = Ll[2] ^ Kl[2]; - stateh[3] = Lh[3] ^ Kh[3]; - statel[3] = Ll[3] ^ Kl[3]; - stateh[4] = Lh[4] ^ Kh[4]; - statel[4] = Ll[4] ^ Kl[4]; - stateh[5] = Lh[5] ^ Kh[5]; - statel[5] = Ll[5] ^ Kl[5]; - stateh[6] = Lh[6] ^ Kh[6]; - statel[6] = Ll[6] ^ Kl[6]; - stateh[7] = Lh[7] ^ Kh[7]; - statel[7] = Ll[7] ^ Kl[7]; - } - - dgst[ 0] ^= stateh[0] ^ w[ 0]; - dgst[ 1] ^= statel[0] ^ w[ 1]; - dgst[ 2] ^= stateh[1] ^ w[ 2]; - dgst[ 3] ^= statel[1] ^ w[ 3]; - dgst[ 4] ^= stateh[2] ^ w[ 4]; - dgst[ 5] ^= statel[2] ^ w[ 5]; - dgst[ 6] ^= stateh[3] ^ w[ 6]; - dgst[ 7] ^= statel[3] ^ w[ 7]; - dgst[ 8] ^= stateh[4] ^ w[ 8]; - dgst[ 9] ^= statel[4] ^ w[ 9]; - dgst[10] ^= stateh[5] ^ w[10]; - dgst[11] ^= statel[5] ^ w[11]; - dgst[12] ^= stateh[6] ^ w[12]; - dgst[13] ^= statel[6] ^ w[13]; - dgst[14] ^= stateh[7] ^ w[14]; - dgst[15] ^= statel[7] ^ w[15]; -} - -__device__ static void hmac_run2 (const u32 w1[16], const u32 w2[16], const u32 ipad[16], const u32 opad[16], u32 dgst[16], u32 s_Ch[8][256], u32 s_Cl[8][256]) -{ - dgst[ 0] = ipad[ 0]; - dgst[ 1] = ipad[ 1]; - dgst[ 2] = ipad[ 2]; - dgst[ 3] = ipad[ 3]; - dgst[ 4] = ipad[ 4]; - dgst[ 5] = ipad[ 5]; - dgst[ 6] = ipad[ 6]; - dgst[ 7] = ipad[ 7]; - dgst[ 8] = ipad[ 8]; - dgst[ 9] = ipad[ 9]; - dgst[10] = ipad[10]; - dgst[11] = ipad[11]; - dgst[12] = ipad[12]; - dgst[13] = ipad[13]; - dgst[14] = ipad[14]; - dgst[15] = ipad[15]; - - whirlpool_transform (w1, dgst, s_Ch, s_Cl); - whirlpool_transform (w2, dgst, s_Ch, s_Cl); - - u32 w[16]; - - w[ 0] = dgst[ 0]; - w[ 1] = dgst[ 1]; - w[ 2] = dgst[ 2]; - w[ 3] = dgst[ 3]; - w[ 4] = dgst[ 4]; - w[ 5] = dgst[ 5]; - w[ 6] = dgst[ 6]; - w[ 7] = dgst[ 7]; - w[ 8] = dgst[ 8]; - w[ 9] = dgst[ 9]; - w[10] = dgst[10]; - w[11] = dgst[11]; - w[12] = dgst[12]; - w[13] = dgst[13]; - w[14] = dgst[14]; - w[15] = dgst[15]; - - dgst[ 0] = opad[ 0]; - dgst[ 1] = opad[ 1]; - dgst[ 2] = opad[ 2]; - dgst[ 3] = opad[ 3]; - dgst[ 4] = opad[ 4]; - dgst[ 5] = opad[ 5]; - dgst[ 6] = opad[ 6]; - dgst[ 7] = opad[ 7]; - dgst[ 8] = opad[ 8]; - dgst[ 9] = opad[ 9]; - dgst[10] = opad[10]; - dgst[11] = opad[11]; - dgst[12] = opad[12]; - dgst[13] = opad[13]; - dgst[14] = opad[14]; - dgst[15] = opad[15]; - - whirlpool_transform (w, dgst, s_Ch, s_Cl); - - w[ 0] = 0x80000000; - w[ 1] = 0; - w[ 2] = 0; - w[ 3] = 0; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = (64 + 64) * 8; - - whirlpool_transform (w, dgst, s_Ch, s_Cl); -} - -__device__ static void hmac_init (u32 w[16], u32 ipad[16], u32 opad[16], u32 s_Ch[8][256], u32 s_Cl[8][256]) -{ - w[ 0] ^= 0x36363636; - w[ 1] ^= 0x36363636; - w[ 2] ^= 0x36363636; - w[ 3] ^= 0x36363636; - w[ 4] ^= 0x36363636; - w[ 5] ^= 0x36363636; - w[ 6] ^= 0x36363636; - w[ 7] ^= 0x36363636; - w[ 8] ^= 0x36363636; - w[ 9] ^= 0x36363636; - w[10] ^= 0x36363636; - w[11] ^= 0x36363636; - w[12] ^= 0x36363636; - w[13] ^= 0x36363636; - w[14] ^= 0x36363636; - w[15] ^= 0x36363636; - - ipad[ 0] = 0; - ipad[ 1] = 0; - ipad[ 2] = 0; - ipad[ 3] = 0; - ipad[ 4] = 0; - ipad[ 5] = 0; - ipad[ 6] = 0; - ipad[ 7] = 0; - ipad[ 8] = 0; - ipad[ 9] = 0; - ipad[10] = 0; - ipad[11] = 0; - ipad[12] = 0; - ipad[13] = 0; - ipad[14] = 0; - ipad[15] = 0; - - whirlpool_transform (w, ipad, s_Ch, s_Cl); - - w[ 0] ^= 0x6a6a6a6a; - w[ 1] ^= 0x6a6a6a6a; - w[ 2] ^= 0x6a6a6a6a; - w[ 3] ^= 0x6a6a6a6a; - w[ 4] ^= 0x6a6a6a6a; - w[ 5] ^= 0x6a6a6a6a; - w[ 6] ^= 0x6a6a6a6a; - w[ 7] ^= 0x6a6a6a6a; - w[ 8] ^= 0x6a6a6a6a; - w[ 9] ^= 0x6a6a6a6a; - w[10] ^= 0x6a6a6a6a; - w[11] ^= 0x6a6a6a6a; - w[12] ^= 0x6a6a6a6a; - w[13] ^= 0x6a6a6a6a; - w[14] ^= 0x6a6a6a6a; - w[15] ^= 0x6a6a6a6a; - - opad[ 0] = 0; - opad[ 1] = 0; - opad[ 2] = 0; - opad[ 3] = 0; - opad[ 4] = 0; - opad[ 5] = 0; - opad[ 6] = 0; - opad[ 7] = 0; - opad[ 8] = 0; - opad[ 9] = 0; - opad[10] = 0; - opad[11] = 0; - opad[12] = 0; - opad[13] = 0; - opad[14] = 0; - opad[15] = 0; - - whirlpool_transform (w, opad, s_Ch, s_Cl); -} - -__device__ static u32 u8add (const u32 a, const u32 b) -{ - const u32 a1 = (a >> 0) & 0xff; - const u32 a2 = (a >> 8) & 0xff; - const u32 a3 = (a >> 16) & 0xff; - const u32 a4 = (a >> 24) & 0xff; - - const u32 b1 = (b >> 0) & 0xff; - const u32 b2 = (b >> 8) & 0xff; - const u32 b3 = (b >> 16) & 0xff; - const u32 b4 = (b >> 24) & 0xff; - - const u32 r1 = (a1 + b1) & 0xff; - const u32 r2 = (a2 + b2) & 0xff; - const u32 r3 = (a3 + b3) & 0xff; - const u32 r4 = (a4 + b4) & 0xff; - - const u32 r = r1 << 0 - | r2 << 8 - | r3 << 16 - | r4 << 24; - - return r; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06233_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, tc_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const tc_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - /** - * keyfile - */ - - w0[0] = u8add (w0[0], esalt_bufs[salt_pos].keyfile_buf[ 0]); - w0[1] = u8add (w0[1], esalt_bufs[salt_pos].keyfile_buf[ 1]); - w0[2] = u8add (w0[2], esalt_bufs[salt_pos].keyfile_buf[ 2]); - w0[3] = u8add (w0[3], esalt_bufs[salt_pos].keyfile_buf[ 3]); - w1[0] = u8add (w1[0], esalt_bufs[salt_pos].keyfile_buf[ 4]); - w1[1] = u8add (w1[1], esalt_bufs[salt_pos].keyfile_buf[ 5]); - w1[2] = u8add (w1[2], esalt_bufs[salt_pos].keyfile_buf[ 6]); - w1[3] = u8add (w1[3], esalt_bufs[salt_pos].keyfile_buf[ 7]); - w2[0] = u8add (w2[0], esalt_bufs[salt_pos].keyfile_buf[ 8]); - w2[1] = u8add (w2[1], esalt_bufs[salt_pos].keyfile_buf[ 9]); - w2[2] = u8add (w2[2], esalt_bufs[salt_pos].keyfile_buf[10]); - w2[3] = u8add (w2[3], esalt_bufs[salt_pos].keyfile_buf[11]); - w3[0] = u8add (w3[0], esalt_bufs[salt_pos].keyfile_buf[12]); - w3[1] = u8add (w3[1], esalt_bufs[salt_pos].keyfile_buf[13]); - w3[2] = u8add (w3[2], esalt_bufs[salt_pos].keyfile_buf[14]); - w3[3] = u8add (w3[3], esalt_bufs[salt_pos].keyfile_buf[15]); - - /** - * shared mem - */ - - __shared__ u32 s_Ch[8][256]; - __shared__ u32 s_Cl[8][256]; - - const u32 lid = threadIdx.x; - - #pragma unroll 8 - for (u32 i = 0; i < 8; i++) - { - s_Ch[i][lid] = Ch[i][lid]; - s_Cl[i][lid] = Cl[i][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * salt - */ - - u32 salt_buf1[16]; - - salt_buf1[ 0] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 0]); - salt_buf1[ 1] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 1]); - salt_buf1[ 2] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 2]); - salt_buf1[ 3] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 3]); - salt_buf1[ 4] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[ 5] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[ 6] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[ 7] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 7]); - salt_buf1[ 8] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 8]); - salt_buf1[ 9] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 9]); - salt_buf1[10] = swap_workaround (esalt_bufs[salt_pos].salt_buf[10]); - salt_buf1[11] = swap_workaround (esalt_bufs[salt_pos].salt_buf[11]); - salt_buf1[12] = swap_workaround (esalt_bufs[salt_pos].salt_buf[12]); - salt_buf1[13] = swap_workaround (esalt_bufs[salt_pos].salt_buf[13]); - salt_buf1[14] = swap_workaround (esalt_bufs[salt_pos].salt_buf[14]); - salt_buf1[15] = swap_workaround (esalt_bufs[salt_pos].salt_buf[15]); - - u32 salt_buf2[16]; - - salt_buf2[ 0] = 0; - salt_buf2[ 1] = 0x80000000; - salt_buf2[ 2] = 0; - salt_buf2[ 3] = 0; - salt_buf2[ 4] = 0; - salt_buf2[ 5] = 0; - salt_buf2[ 6] = 0; - salt_buf2[ 7] = 0; - salt_buf2[ 8] = 0; - salt_buf2[ 9] = 0; - salt_buf2[10] = 0; - salt_buf2[11] = 0; - salt_buf2[12] = 0; - salt_buf2[13] = 0; - salt_buf2[14] = 0; - salt_buf2[15] = (64 + 64 + 4) * 8; - - const u32 truecrypt_mdlen = salt_bufs[0].truecrypt_mdlen; - - u32 w[16]; - - w[ 0] = swap_workaround (w0[0]); - w[ 1] = swap_workaround (w0[1]); - w[ 2] = swap_workaround (w0[2]); - w[ 3] = swap_workaround (w0[3]); - w[ 4] = swap_workaround (w1[0]); - w[ 5] = swap_workaround (w1[1]); - w[ 6] = swap_workaround (w1[2]); - w[ 7] = swap_workaround (w1[3]); - w[ 8] = swap_workaround (w2[0]); - w[ 9] = swap_workaround (w2[1]); - w[10] = swap_workaround (w2[2]); - w[11] = swap_workaround (w2[3]); - w[12] = swap_workaround (w3[0]); - w[13] = swap_workaround (w3[1]); - w[14] = swap_workaround (w3[2]); - w[15] = swap_workaround (w3[3]); - - u32 ipad[16]; - u32 opad[16]; - - hmac_init (w, ipad, opad, s_Ch, s_Cl); - - tmps[gid].ipad[ 0] = ipad[ 0]; - tmps[gid].ipad[ 1] = ipad[ 1]; - tmps[gid].ipad[ 2] = ipad[ 2]; - tmps[gid].ipad[ 3] = ipad[ 3]; - tmps[gid].ipad[ 4] = ipad[ 4]; - tmps[gid].ipad[ 5] = ipad[ 5]; - tmps[gid].ipad[ 6] = ipad[ 6]; - tmps[gid].ipad[ 7] = ipad[ 7]; - tmps[gid].ipad[ 8] = ipad[ 8]; - tmps[gid].ipad[ 9] = ipad[ 9]; - tmps[gid].ipad[10] = ipad[10]; - tmps[gid].ipad[11] = ipad[11]; - tmps[gid].ipad[12] = ipad[12]; - tmps[gid].ipad[13] = ipad[13]; - tmps[gid].ipad[14] = ipad[14]; - tmps[gid].ipad[15] = ipad[15]; - - tmps[gid].opad[ 0] = opad[ 0]; - tmps[gid].opad[ 1] = opad[ 1]; - tmps[gid].opad[ 2] = opad[ 2]; - tmps[gid].opad[ 3] = opad[ 3]; - tmps[gid].opad[ 4] = opad[ 4]; - tmps[gid].opad[ 5] = opad[ 5]; - tmps[gid].opad[ 6] = opad[ 6]; - tmps[gid].opad[ 7] = opad[ 7]; - tmps[gid].opad[ 8] = opad[ 8]; - tmps[gid].opad[ 9] = opad[ 9]; - tmps[gid].opad[10] = opad[10]; - tmps[gid].opad[11] = opad[11]; - tmps[gid].opad[12] = opad[12]; - tmps[gid].opad[13] = opad[13]; - tmps[gid].opad[14] = opad[14]; - tmps[gid].opad[15] = opad[15]; - - for (u32 i = 0, j = 1; i < (truecrypt_mdlen / 8 / 4); i += 16, j += 1) - { - salt_buf2[0] = j; - - u32 dgst[16]; - - hmac_run2 (salt_buf1, salt_buf2, ipad, opad, dgst, s_Ch, s_Cl); - - tmps[gid].dgst[i + 0] = dgst[ 0]; - tmps[gid].dgst[i + 1] = dgst[ 1]; - tmps[gid].dgst[i + 2] = dgst[ 2]; - tmps[gid].dgst[i + 3] = dgst[ 3]; - tmps[gid].dgst[i + 4] = dgst[ 4]; - tmps[gid].dgst[i + 5] = dgst[ 5]; - tmps[gid].dgst[i + 6] = dgst[ 6]; - tmps[gid].dgst[i + 7] = dgst[ 7]; - tmps[gid].dgst[i + 8] = dgst[ 8]; - tmps[gid].dgst[i + 9] = dgst[ 9]; - tmps[gid].dgst[i + 10] = dgst[10]; - tmps[gid].dgst[i + 11] = dgst[11]; - tmps[gid].dgst[i + 12] = dgst[12]; - tmps[gid].dgst[i + 13] = dgst[13]; - tmps[gid].dgst[i + 14] = dgst[14]; - tmps[gid].dgst[i + 15] = dgst[15]; - - tmps[gid].out[i + 0] = dgst[ 0]; - tmps[gid].out[i + 1] = dgst[ 1]; - tmps[gid].out[i + 2] = dgst[ 2]; - tmps[gid].out[i + 3] = dgst[ 3]; - tmps[gid].out[i + 4] = dgst[ 4]; - tmps[gid].out[i + 5] = dgst[ 5]; - tmps[gid].out[i + 6] = dgst[ 6]; - tmps[gid].out[i + 7] = dgst[ 7]; - tmps[gid].out[i + 8] = dgst[ 8]; - tmps[gid].out[i + 9] = dgst[ 9]; - tmps[gid].out[i + 10] = dgst[10]; - tmps[gid].out[i + 11] = dgst[11]; - tmps[gid].out[i + 12] = dgst[12]; - tmps[gid].out[i + 13] = dgst[13]; - tmps[gid].out[i + 14] = dgst[14]; - tmps[gid].out[i + 15] = dgst[15]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06233_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, tc_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const tc_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 truecrypt_mdlen = salt_bufs[0].truecrypt_mdlen; - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - __shared__ u32 s_Ch[8][256]; - __shared__ u32 s_Cl[8][256]; - - const u32 lid = threadIdx.x; - - #pragma unroll 8 - for (u32 i = 0; i < 8; i++) - { - s_Ch[i][lid] = Ch[i][lid]; - s_Cl[i][lid] = Cl[i][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - u32 ipad[16]; - - ipad[ 0] = tmps[gid].ipad[ 0]; - ipad[ 1] = tmps[gid].ipad[ 1], - ipad[ 2] = tmps[gid].ipad[ 2]; - ipad[ 3] = tmps[gid].ipad[ 3]; - ipad[ 4] = tmps[gid].ipad[ 4]; - ipad[ 5] = tmps[gid].ipad[ 5]; - ipad[ 6] = tmps[gid].ipad[ 6], - ipad[ 7] = tmps[gid].ipad[ 7]; - ipad[ 8] = tmps[gid].ipad[ 8]; - ipad[ 9] = tmps[gid].ipad[ 9]; - ipad[10] = tmps[gid].ipad[10]; - ipad[11] = tmps[gid].ipad[11], - ipad[12] = tmps[gid].ipad[12]; - ipad[13] = tmps[gid].ipad[13]; - ipad[14] = tmps[gid].ipad[14]; - ipad[15] = tmps[gid].ipad[15]; - - u32 opad[16]; - - opad[ 0] = tmps[gid].opad[ 0]; - opad[ 1] = tmps[gid].opad[ 1], - opad[ 2] = tmps[gid].opad[ 2]; - opad[ 3] = tmps[gid].opad[ 3]; - opad[ 4] = tmps[gid].opad[ 4]; - opad[ 5] = tmps[gid].opad[ 5]; - opad[ 6] = tmps[gid].opad[ 6], - opad[ 7] = tmps[gid].opad[ 7]; - opad[ 8] = tmps[gid].opad[ 8]; - opad[ 9] = tmps[gid].opad[ 9]; - opad[10] = tmps[gid].opad[10]; - opad[11] = tmps[gid].opad[11], - opad[12] = tmps[gid].opad[12]; - opad[13] = tmps[gid].opad[13]; - opad[14] = tmps[gid].opad[14]; - opad[15] = tmps[gid].opad[15]; - - for (u32 i = 0; i < (truecrypt_mdlen / 8 / 4); i += 16) - { - u32 dgst[16]; - - dgst[ 0] = tmps[gid].dgst[i + 0]; - dgst[ 1] = tmps[gid].dgst[i + 1]; - dgst[ 2] = tmps[gid].dgst[i + 2]; - dgst[ 3] = tmps[gid].dgst[i + 3]; - dgst[ 4] = tmps[gid].dgst[i + 4]; - dgst[ 5] = tmps[gid].dgst[i + 5]; - dgst[ 6] = tmps[gid].dgst[i + 6]; - dgst[ 7] = tmps[gid].dgst[i + 7]; - dgst[ 8] = tmps[gid].dgst[i + 8]; - dgst[ 9] = tmps[gid].dgst[i + 9]; - dgst[10] = tmps[gid].dgst[i + 10]; - dgst[11] = tmps[gid].dgst[i + 11]; - dgst[12] = tmps[gid].dgst[i + 12]; - dgst[13] = tmps[gid].dgst[i + 13]; - dgst[14] = tmps[gid].dgst[i + 14]; - dgst[15] = tmps[gid].dgst[i + 15]; - - u32 out[16]; - - out[ 0] = tmps[gid].out[i + 0]; - out[ 1] = tmps[gid].out[i + 1]; - out[ 2] = tmps[gid].out[i + 2]; - out[ 3] = tmps[gid].out[i + 3]; - out[ 4] = tmps[gid].out[i + 4]; - out[ 5] = tmps[gid].out[i + 5]; - out[ 6] = tmps[gid].out[i + 6]; - out[ 7] = tmps[gid].out[i + 7]; - out[ 8] = tmps[gid].out[i + 8]; - out[ 9] = tmps[gid].out[i + 9]; - out[10] = tmps[gid].out[i + 10]; - out[11] = tmps[gid].out[i + 11]; - out[12] = tmps[gid].out[i + 12]; - out[13] = tmps[gid].out[i + 13]; - out[14] = tmps[gid].out[i + 14]; - out[15] = tmps[gid].out[i + 15]; - - for (u32 j = 0; j < loop_cnt; j++) - { - u32 w1[16]; - - w1[ 0] = dgst[ 0]; - w1[ 1] = dgst[ 1]; - w1[ 2] = dgst[ 2]; - w1[ 3] = dgst[ 3]; - w1[ 4] = dgst[ 4]; - w1[ 5] = dgst[ 5]; - w1[ 6] = dgst[ 6]; - w1[ 7] = dgst[ 7]; - w1[ 8] = dgst[ 8]; - w1[ 9] = dgst[ 9]; - w1[10] = dgst[10]; - w1[11] = dgst[11]; - w1[12] = dgst[12]; - w1[13] = dgst[13]; - w1[14] = dgst[14]; - w1[15] = dgst[15]; - - u32 w2[16]; - - w2[ 0] = 0x80000000; - w2[ 1] = 0; - w2[ 2] = 0; - w2[ 3] = 0; - w2[ 4] = 0; - w2[ 5] = 0; - w2[ 6] = 0; - w2[ 7] = 0; - w2[ 8] = 0; - w2[ 9] = 0; - w2[10] = 0; - w2[11] = 0; - w2[12] = 0; - w2[13] = 0; - w2[14] = 0; - w2[15] = (64 + 64) * 8; - - hmac_run2 (w1, w2, ipad, opad, dgst, s_Ch, s_Cl); - - out[ 0] ^= dgst[ 0]; - out[ 1] ^= dgst[ 1]; - out[ 2] ^= dgst[ 2]; - out[ 3] ^= dgst[ 3]; - out[ 4] ^= dgst[ 4]; - out[ 5] ^= dgst[ 5]; - out[ 6] ^= dgst[ 6]; - out[ 7] ^= dgst[ 7]; - out[ 8] ^= dgst[ 8]; - out[ 9] ^= dgst[ 9]; - out[10] ^= dgst[10]; - out[11] ^= dgst[11]; - out[12] ^= dgst[12]; - out[13] ^= dgst[13]; - out[14] ^= dgst[14]; - out[15] ^= dgst[15]; - } - - tmps[gid].dgst[i + 0] = dgst[ 0]; - tmps[gid].dgst[i + 1] = dgst[ 1]; - tmps[gid].dgst[i + 2] = dgst[ 2]; - tmps[gid].dgst[i + 3] = dgst[ 3]; - tmps[gid].dgst[i + 4] = dgst[ 4]; - tmps[gid].dgst[i + 5] = dgst[ 5]; - tmps[gid].dgst[i + 6] = dgst[ 6]; - tmps[gid].dgst[i + 7] = dgst[ 7]; - tmps[gid].dgst[i + 8] = dgst[ 8]; - tmps[gid].dgst[i + 9] = dgst[ 9]; - tmps[gid].dgst[i + 10] = dgst[10]; - tmps[gid].dgst[i + 11] = dgst[11]; - tmps[gid].dgst[i + 12] = dgst[12]; - tmps[gid].dgst[i + 13] = dgst[13]; - tmps[gid].dgst[i + 14] = dgst[14]; - tmps[gid].dgst[i + 15] = dgst[15]; - - tmps[gid].out[i + 0] = out[ 0]; - tmps[gid].out[i + 1] = out[ 1]; - tmps[gid].out[i + 2] = out[ 2]; - tmps[gid].out[i + 3] = out[ 3]; - tmps[gid].out[i + 4] = out[ 4]; - tmps[gid].out[i + 5] = out[ 5]; - tmps[gid].out[i + 6] = out[ 6]; - tmps[gid].out[i + 7] = out[ 7]; - tmps[gid].out[i + 8] = out[ 8]; - tmps[gid].out[i + 9] = out[ 9]; - tmps[gid].out[i + 10] = out[10]; - tmps[gid].out[i + 11] = out[11]; - tmps[gid].out[i + 12] = out[12]; - tmps[gid].out[i + 13] = out[13]; - tmps[gid].out[i + 14] = out[14]; - tmps[gid].out[i + 15] = out[15]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06233_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, tc_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const tc_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - if (gid >= gid_max) return; - - u32 ukey1[8]; - - ukey1[0] = swap_workaround (tmps[gid].out[ 0]); - ukey1[1] = swap_workaround (tmps[gid].out[ 1]); - ukey1[2] = swap_workaround (tmps[gid].out[ 2]); - ukey1[3] = swap_workaround (tmps[gid].out[ 3]); - ukey1[4] = swap_workaround (tmps[gid].out[ 4]); - ukey1[5] = swap_workaround (tmps[gid].out[ 5]); - ukey1[6] = swap_workaround (tmps[gid].out[ 6]); - ukey1[7] = swap_workaround (tmps[gid].out[ 7]); - - u32 ukey2[8]; - - ukey2[0] = swap_workaround (tmps[gid].out[ 8]); - ukey2[1] = swap_workaround (tmps[gid].out[ 9]); - ukey2[2] = swap_workaround (tmps[gid].out[10]); - ukey2[3] = swap_workaround (tmps[gid].out[11]); - ukey2[4] = swap_workaround (tmps[gid].out[12]); - ukey2[5] = swap_workaround (tmps[gid].out[13]); - ukey2[6] = swap_workaround (tmps[gid].out[14]); - ukey2[7] = swap_workaround (tmps[gid].out[15]); - - u32 data[4]; - - data[0] = esalt_bufs[0].data_buf[0]; - data[1] = esalt_bufs[0].data_buf[1]; - data[2] = esalt_bufs[0].data_buf[2]; - data[3] = esalt_bufs[0].data_buf[3]; - - u32 tmp[4]; - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - aes256_decrypt_xts (ukey1, ukey2, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - serpent256_decrypt_xts (ukey1, ukey2, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - twofish256_decrypt_xts (ukey1, ukey2, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - u32 ukey3[8]; - - ukey3[0] = swap_workaround (tmps[gid].out[16]); - ukey3[1] = swap_workaround (tmps[gid].out[17]); - ukey3[2] = swap_workaround (tmps[gid].out[18]); - ukey3[3] = swap_workaround (tmps[gid].out[19]); - ukey3[4] = swap_workaround (tmps[gid].out[20]); - ukey3[5] = swap_workaround (tmps[gid].out[21]); - ukey3[6] = swap_workaround (tmps[gid].out[22]); - ukey3[7] = swap_workaround (tmps[gid].out[23]); - - u32 ukey4[8]; - - ukey4[0] = swap_workaround (tmps[gid].out[24]); - ukey4[1] = swap_workaround (tmps[gid].out[25]); - ukey4[2] = swap_workaround (tmps[gid].out[26]); - ukey4[3] = swap_workaround (tmps[gid].out[27]); - ukey4[4] = swap_workaround (tmps[gid].out[28]); - ukey4[5] = swap_workaround (tmps[gid].out[29]); - ukey4[6] = swap_workaround (tmps[gid].out[30]); - ukey4[7] = swap_workaround (tmps[gid].out[31]); - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - aes256_decrypt_xts (ukey2, ukey4, tmp, tmp); - twofish256_decrypt_xts (ukey1, ukey3, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - serpent256_decrypt_xts (ukey2, ukey4, tmp, tmp); - aes256_decrypt_xts (ukey1, ukey3, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - twofish256_decrypt_xts (ukey2, ukey4, tmp, tmp); - serpent256_decrypt_xts (ukey1, ukey3, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - u32 ukey5[8]; - - ukey5[0] = swap_workaround (tmps[gid].out[32]); - ukey5[1] = swap_workaround (tmps[gid].out[33]); - ukey5[2] = swap_workaround (tmps[gid].out[34]); - ukey5[3] = swap_workaround (tmps[gid].out[35]); - ukey5[4] = swap_workaround (tmps[gid].out[36]); - ukey5[5] = swap_workaround (tmps[gid].out[37]); - ukey5[6] = swap_workaround (tmps[gid].out[38]); - ukey5[7] = swap_workaround (tmps[gid].out[39]); - - u32 ukey6[8]; - - ukey6[0] = swap_workaround (tmps[gid].out[40]); - ukey6[1] = swap_workaround (tmps[gid].out[41]); - ukey6[2] = swap_workaround (tmps[gid].out[42]); - ukey6[3] = swap_workaround (tmps[gid].out[43]); - ukey6[4] = swap_workaround (tmps[gid].out[44]); - ukey6[5] = swap_workaround (tmps[gid].out[45]); - ukey6[6] = swap_workaround (tmps[gid].out[46]); - ukey6[7] = swap_workaround (tmps[gid].out[47]); - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - aes256_decrypt_xts (ukey3, ukey6, tmp, tmp); - twofish256_decrypt_xts (ukey2, ukey5, tmp, tmp); - serpent256_decrypt_xts (ukey1, ukey4, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - { - tmp[0] = data[0]; - tmp[1] = data[1]; - tmp[2] = data[2]; - tmp[3] = data[3]; - - serpent256_decrypt_xts (ukey3, ukey6, tmp, tmp); - twofish256_decrypt_xts (ukey2, ukey5, tmp, tmp); - aes256_decrypt_xts (ukey1, ukey4, tmp, tmp); - - if (((tmp[0] == 0x45555254) && (tmp[3] == 0)) || ((tmp[0] == 0x45555254) && ((tmp[1] >> 16) <= 5))) - { - mark_hash_s0 (plains_buf, hashes_shown, 0, gid, 0); - - d_return_buf[lid] = 1; - } - } -} diff --git a/nv/m06300.cu b/nv/m06300.cu deleted file mode 100644 index a8b79d6..0000000 --- a/nv/m06300.cu +++ /dev/null @@ -1,1081 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = 0; - - u32x tmp2; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void memcat16 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32x append[4]) -{ - u32x tmp0; - u32x tmp1; - u32x tmp2; - u32x tmp3; - u32x tmp4; - - #if __CUDA_ARCH__ >= 200 - - const int offset_minus_4 = 4 - (block_len & 3); - - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - - tmp0 = __byte_perm ( 0, append[0], selector); - tmp1 = __byte_perm (append[0], append[1], selector); - tmp2 = __byte_perm (append[1], append[2], selector); - tmp3 = __byte_perm (append[2], append[3], selector); - tmp4 = __byte_perm (append[3], 0, selector); - - #else - - const u32 mod = block_len & 3; - - switch (mod) - { - case 0: tmp0 = append[0]; - tmp1 = append[1]; - tmp2 = append[2]; - tmp3 = append[3]; - tmp4 = 0; - break; - case 1: tmp0 = append[0] << 8; - tmp1 = append[0] >> 24 | append[1] << 8; - tmp2 = append[1] >> 24 | append[2] << 8; - tmp3 = append[2] >> 24 | append[3] << 8; - tmp4 = append[3] >> 24; - break; - case 2: tmp0 = append[0] << 16; - tmp1 = append[0] >> 16 | append[1] << 16; - tmp2 = append[1] >> 16 | append[2] << 16; - tmp3 = append[2] >> 16 | append[3] << 16; - tmp4 = append[3] >> 16; - break; - case 3: tmp0 = append[0] << 24; - tmp1 = append[0] >> 8 | append[1] << 24; - tmp2 = append[1] >> 8 | append[2] << 24; - tmp3 = append[2] >> 8 | append[3] << 24; - tmp4 = append[3] >> 8; - break; - } - - #endif - - const u32 div = block_len / 4; - - switch (div) - { - case 0: block0[0] |= tmp0; - block0[1] = tmp1; - block0[2] = tmp2; - block0[3] = tmp3; - block1[0] = tmp4; - break; - case 1: block0[1] |= tmp0; - block0[2] = tmp1; - block0[3] = tmp2; - block1[0] = tmp3; - block1[1] = tmp4; - break; - case 2: block0[2] |= tmp0; - block0[3] = tmp1; - block1[0] = tmp2; - block1[1] = tmp3; - block1[2] = tmp4; - break; - case 3: block0[3] |= tmp0; - block1[0] = tmp1; - block1[1] = tmp2; - block1[2] = tmp3; - block1[3] = tmp4; - break; - case 4: block1[0] |= tmp0; - block1[1] = tmp1; - block1[2] = tmp2; - block1[3] = tmp3; - block2[0] = tmp4; - break; - case 5: block1[1] |= tmp0; - block1[2] = tmp1; - block1[3] = tmp2; - block2[0] = tmp3; - block2[1] = tmp4; - break; - case 6: block1[2] |= tmp0; - block1[3] = tmp1; - block2[0] = tmp2; - block2[1] = tmp3; - block2[2] = tmp4; - break; - case 7: block1[3] |= tmp0; - block2[0] = tmp1; - block2[1] = tmp2; - block2[2] = tmp3; - block2[3] = tmp4; - break; - case 8: block2[0] |= tmp0; - block2[1] = tmp1; - block2[2] = tmp2; - block2[3] = tmp3; - block3[0] = tmp4; - break; - case 9: block2[1] |= tmp0; - block2[2] = tmp1; - block2[3] = tmp2; - block3[0] = tmp3; - block3[1] = tmp4; - break; - } - - return; -} - -__device__ static void memcat16_x80 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32x append[4]) -{ - u32x tmp0; - u32x tmp1; - u32x tmp2; - u32x tmp3; - u32x tmp4; - - #if __CUDA_ARCH__ >= 200 - - const int offset_minus_4 = 4 - (block_len & 3); - - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - - tmp0 = __byte_perm ( 0, append[0], selector); - tmp1 = __byte_perm (append[0], append[1], selector); - tmp2 = __byte_perm (append[1], append[2], selector); - tmp3 = __byte_perm (append[2], append[3], selector); - tmp4 = __byte_perm (append[3], 0x80, selector); - - #else - - const u32 mod = block_len & 3; - - switch (mod) - { - case 0: tmp0 = append[0]; - tmp1 = append[1]; - tmp2 = append[2]; - tmp3 = append[3]; - tmp4 = 0x80; - break; - case 1: tmp0 = append[0] << 8; - tmp1 = append[0] >> 24 | append[1] << 8; - tmp2 = append[1] >> 24 | append[2] << 8; - tmp3 = append[2] >> 24 | append[3] << 8; - tmp4 = append[3] >> 24; - break; - case 2: tmp0 = append[0] << 16; - tmp1 = append[0] >> 16 | append[1] << 16; - tmp2 = append[1] >> 16 | append[2] << 16; - tmp3 = append[2] >> 16 | append[3] << 16; - tmp4 = append[3] >> 16; - break; - case 3: tmp0 = append[0] << 24; - tmp1 = append[0] >> 8 | append[1] << 24; - tmp2 = append[1] >> 8 | append[2] << 24; - tmp3 = append[2] >> 8 | append[3] << 24; - tmp4 = append[3] >> 8; - break; - } - - #endif - - const u32 div = block_len / 4; - - switch (div) - { - case 0: block0[0] |= tmp0; - block0[1] = tmp1; - block0[2] = tmp2; - block0[3] = tmp3; - block1[0] = tmp4; - break; - case 1: block0[1] |= tmp0; - block0[2] = tmp1; - block0[3] = tmp2; - block1[0] = tmp3; - block1[1] = tmp4; - break; - case 2: block0[2] |= tmp0; - block0[3] = tmp1; - block1[0] = tmp2; - block1[1] = tmp3; - block1[2] = tmp4; - break; - case 3: block0[3] |= tmp0; - block1[0] = tmp1; - block1[1] = tmp2; - block1[2] = tmp3; - block1[3] = tmp4; - break; - case 4: block1[0] |= tmp0; - block1[1] = tmp1; - block1[2] = tmp2; - block1[3] = tmp3; - block2[0] = tmp4; - break; - case 5: block1[1] |= tmp0; - block1[2] = tmp1; - block1[3] = tmp2; - block2[0] = tmp3; - block2[1] = tmp4; - break; - case 6: block1[2] |= tmp0; - block1[3] = tmp1; - block2[0] = tmp2; - block2[1] = tmp3; - block2[2] = tmp4; - break; - case 7: block1[3] |= tmp0; - block2[0] = tmp1; - block2[1] = tmp2; - block2[2] = tmp3; - block2[3] = tmp4; - break; - case 8: block2[0] |= tmp0; - block2[1] = tmp1; - block2[2] = tmp2; - block2[3] = tmp3; - block3[0] = tmp4; - break; - case 9: block2[1] |= tmp0; - block2[2] = tmp1; - block2[3] = tmp2; - block3[0] = tmp3; - block3[1] = tmp4; - break; - } - - return; -} - -__device__ static void memcat8 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32 append[2]) -{ - u32x tmp0; - u32x tmp1; - u32x tmp2; - - #if __CUDA_ARCH__ >= 200 - - const int offset_minus_4 = 4 - (block_len & 3); - - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - - tmp0 = __byte_perm ( 0, append[0], selector); - tmp1 = __byte_perm (append[0], append[1], selector); - tmp2 = __byte_perm (append[1], 0, selector); - - #else - - const u32 mod = block_len & 3; - - switch (mod) - { - case 0: tmp0 = append[0]; - tmp1 = append[1]; - tmp2 = 0; - break; - case 1: tmp0 = append[0] << 8; - tmp1 = append[0] >> 24 | append[1] << 8; - tmp2 = append[1] >> 24; - break; - case 2: tmp0 = append[0] << 16; - tmp1 = append[0] >> 16 | append[1] << 16; - tmp2 = append[1] >> 16; - break; - case 3: tmp0 = append[0] << 24; - tmp1 = append[0] >> 8 | append[1] << 24; - tmp2 = append[1] >> 8; - break; - } - - #endif - - const u32 div = block_len / 4; - - switch (div) - { - case 0: block0[0] |= tmp0; - block0[1] = tmp1; - block0[2] = tmp2; - break; - case 1: block0[1] |= tmp0; - block0[2] = tmp1; - block0[3] = tmp2; - break; - case 2: block0[2] |= tmp0; - block0[3] = tmp1; - block1[0] = tmp2; - break; - case 3: block0[3] |= tmp0; - block1[0] = tmp1; - block1[1] = tmp2; - break; - case 4: block1[0] |= tmp0; - block1[1] = tmp1; - block1[2] = tmp2; - break; - case 5: block1[1] |= tmp0; - block1[2] = tmp1; - block1[3] = tmp2; - break; - case 6: block1[2] |= tmp0; - block1[3] = tmp1; - block2[0] = tmp2; - break; - case 7: block1[3] |= tmp0; - block2[0] = tmp1; - block2[1] = tmp2; - break; - case 8: block2[0] |= tmp0; - block2[1] = tmp1; - block2[2] = tmp2; - break; - case 9: block2[1] |= tmp0; - block2[2] = tmp1; - block2[3] = tmp2; - break; - case 10: block2[2] |= tmp0; - block2[3] = tmp1; - block3[0] = tmp2; - break; - case 11: block2[3] |= tmp0; - block3[0] = tmp1; - block3[1] = tmp2; - break; - } - - return; -} - -__device__ static void append_1st (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32x append) -{ - switch (block_len) - { - case 0: - block0[0] = append; - break; - - case 1: - block0[0] = block0[0] | append << 8; - break; - - case 2: - block0[0] = block0[0] | append << 16; - break; - - case 3: - block0[0] = block0[0] | append << 24; - break; - - case 4: - block0[1] = append; - break; - - case 5: - block0[1] = block0[1] | append << 8; - break; - - case 6: - block0[1] = block0[1] | append << 16; - break; - - case 7: - block0[1] = block0[1] | append << 24; - break; - - case 8: - block0[2] = append; - break; - - case 9: - block0[2] = block0[2] | append << 8; - break; - - case 10: - block0[2] = block0[2] | append << 16; - break; - - case 11: - block0[2] = block0[2] | append << 24; - break; - - case 12: - block0[3] = append; - break; - - case 13: - block0[3] = block0[3] | append << 8; - break; - - case 14: - block0[3] = block0[3] | append << 16; - break; - - case 15: - block0[3] = block0[3] | append << 24; - break; - - case 16: - block1[0] = append; - break; - - case 17: - block1[0] = block1[0] | append << 8; - break; - - case 18: - block1[0] = block1[0] | append << 16; - break; - - case 19: - block1[0] = block1[0] | append << 24; - break; - - case 20: - block1[1] = append; - break; - - case 21: - block1[1] = block1[1] | append << 8; - break; - - case 22: - block1[1] = block1[1] | append << 16; - break; - - case 23: - block1[1] = block1[1] | append << 24; - break; - - case 24: - block1[2] = append; - break; - - case 25: - block1[2] = block1[2] | append << 8; - break; - - case 26: - block1[2] = block1[2] | append << 16; - break; - - case 27: - block1[2] = block1[2] | append << 24; - break; - - case 28: - block1[3] = append; - break; - - case 29: - block1[3] = block1[3] | append << 8; - break; - - case 30: - block1[3] = block1[3] | append << 16; - break; - - case 31: - block1[3] = block1[3] | append << 24; - break; - - case 32: - block2[0] = append; - break; - - case 33: - block2[0] = block2[0] | append << 8; - break; - - case 34: - block2[0] = block2[0] | append << 16; - break; - - case 35: - block2[0] = block2[0] | append << 24; - break; - - case 36: - block2[1] = append; - break; - - case 37: - block2[1] = block2[1] | append << 8; - break; - - case 38: - block2[1] = block2[1] | append << 16; - break; - - case 39: - block2[1] = block2[1] | append << 24; - break; - - case 40: - block2[2] = append; - break; - - case 41: - block2[2] = block2[2] | append << 8; - break; - - case 42: - block2[2] = block2[2] | append << 16; - break; - - case 43: - block2[2] = block2[2] | append << 24; - break; - - case 44: - block2[3] = append; - break; - - case 45: - block2[3] = block2[3] | append << 8; - break; - - case 46: - block2[3] = block2[3] | append << 16; - break; - - case 47: - block2[3] = block2[3] | append << 24; - break; - - case 48: - block3[0] = append; - break; - - case 49: - block3[0] = block3[0] | append << 8; - break; - - case 50: - block3[0] = block3[0] | append << 16; - break; - - case 51: - block3[0] = block3[0] | append << 24; - break; - - case 52: - block3[1] = append; - break; - - case 53: - block3[1] = block3[1] | append << 8; - break; - - case 54: - block3[1] = block3[1] | append << 16; - break; - - case 55: - block3[1] = block3[1] | append << 24; - break; - - case 56: - block3[2] = append; - break; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06300_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, md5crypt_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[0]; - w0[1] = pws[gid].i[1]; - w0[2] = pws[gid].i[2]; - w0[3] = pws[gid].i[3]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf[2]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * init - */ - - //memcat16 (block0, block1, block2, block3, block_len, w0); - //block_len += pw_len; - - u32 block_len = pw_len; - - u32x block0[4]; - - block0[0] = w0[0]; - block0[1] = w0[1]; - block0[2] = w0[2]; - block0[3] = w0[3]; - - u32x block1[4]; - - block1[0] = 0; - block1[1] = 0; - block1[2] = 0; - block1[3] = 0; - - u32x block2[4]; - - block2[0] = 0; - block2[1] = 0; - block2[2] = 0; - block2[3] = 0; - - u32x block3[4]; - - block3[0] = 0; - block3[1] = 0; - block3[2] = 0; - block3[3] = 0; - - memcat8 (block0, block1, block2, block3, block_len, salt_buf); - - block_len += salt_len; - - memcat16 (block0, block1, block2, block3, block_len, w0); - - block_len += pw_len; - - append_0x80_4 (block0, block1, block2, block3, block_len); - - block3[2] = block_len * 8; - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (block0, block1, block2, block3, digest); - - /* The password first, since that is what is most unknown */ - /* Then the raw salt */ - /* Then just as many characters of the MD5(pw,salt,pw) */ - - //memcat16 (block0, block1, block2, block3, block_len, w); - //block_len += pw_len; - - block_len = pw_len; - - block0[0] = w0[0]; - block0[1] = w0[1]; - block0[2] = w0[2]; - block0[3] = w0[3]; - - block1[0] = 0; - block1[1] = 0; - block1[2] = 0; - block1[3] = 0; - - block2[0] = 0; - block2[1] = 0; - block2[2] = 0; - block2[3] = 0; - - block3[0] = 0; - block3[1] = 0; - block3[2] = 0; - block3[3] = 0; - - memcat8 (block0, block1, block2, block3, block_len, salt_buf); - - block_len += salt_len; - - truncate_block (digest, pw_len); - - memcat16 (block0, block1, block2, block3, block_len, digest); - - block_len += pw_len; - - /* Then something really weird... */ - - u32x append = block0[0] & 0xFF; - - for (u32 j = pw_len; j; j >>= 1) - { - if ((j & 1) == 0) - { - append_1st (block0, block1, block2, block3, block_len, append); - } - - block_len++; - } - - append_0x80_4 (block0, block1, block2, block3, block_len); - - block3[2] = block_len * 8; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (block0, block1, block2, block3, digest); - - tmps[gid].digest_buf[0] = digest[0]; - tmps[gid].digest_buf[1] = digest[1]; - tmps[gid].digest_buf[2] = digest[2]; - tmps[gid].digest_buf[3] = digest[3]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06300_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, md5crypt_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[0]; - w0[1] = pws[gid].i[1]; - w0[2] = pws[gid].i[2]; - w0[3] = pws[gid].i[3]; - - const u32 pw_len = pws[gid].pw_len; - - u32x w0_x80[4]; - - w0_x80[0] = w0[0]; - w0_x80[1] = w0[1]; - w0_x80[2] = w0[2]; - w0_x80[3] = w0[3]; - - append_0x80_1 (w0_x80, pw_len); - - /** - * salt - */ - - u32 salt_buf[2]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - u32x digest[4]; - - digest[0] = tmps[gid].digest_buf[0]; - digest[1] = tmps[gid].digest_buf[1]; - digest[2] = tmps[gid].digest_buf[2]; - digest[3] = tmps[gid].digest_buf[3]; - - /** - * loop - */ - - /* and now, just to make sure things don't run too fast */ - - u32 block_len; - - u32x block0[4]; - - block0[0] = 0; - block0[1] = 0; - block0[2] = 0; - block0[3] = 0; - - u32x block1[4]; - - block1[0] = 0; - block1[1] = 0; - block1[2] = 0; - block1[3] = 0; - - u32x block2[4]; - - block2[0] = 0; - block2[1] = 0; - block2[2] = 0; - block2[3] = 0; - - u32x block3[4]; - - block3[0] = 0; - block3[1] = 0; - block3[2] = 0; - block3[3] = 0; - - for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++) - { - block1[0] = 0; - block1[1] = 0; - block1[2] = 0; - block1[3] = 0; - block2[0] = 0; - block2[1] = 0; - block2[2] = 0; - block2[3] = 0; - block3[0] = 0; - block3[1] = 0; - - const u32 j1 = (j & 1) ? 1 : 0; - const u32 j3 = (j % 3) ? 1 : 0; - const u32 j7 = (j % 7) ? 1 : 0; - - if (j1) - { - block0[0] = w0[0]; - block0[1] = w0[1]; - block0[2] = w0[2]; - block0[3] = w0[3]; - - block_len = pw_len; - - if (j3) - { - memcat8 (block0, block1, block2, block3, block_len, salt_buf); - - block_len += salt_len; - } - - if (j7) - { - memcat16 (block0, block1, block2, block3, block_len, w0); - - block_len += pw_len; - } - - memcat16_x80 (block0, block1, block2, block3, block_len, digest); - - block_len += 16; - } - else - { - block0[0] = digest[0]; - block0[1] = digest[1]; - block0[2] = digest[2]; - block0[3] = digest[3]; - - block_len = 16; - - if (j3 && j7) - { - block1[0] = salt_buf[0]; - block1[1] = salt_buf[1]; - - block_len += salt_len; - - memcat16 (block0, block1, block2, block3, block_len, w0); - - block_len += pw_len; - } - else if (j3) - { - block1[0] = salt_buf[0]; - block1[1] = salt_buf[1]; - - block_len += salt_len; - } - else if (j7) - { - block1[0] = w0[0]; - block1[1] = w0[1]; - block1[2] = w0[2]; - block1[3] = w0[3]; - - block_len += pw_len; - } - - memcat16 (block0, block1, block2, block3, block_len, w0_x80); - - block_len += pw_len; - } - - block3[2] = block_len * 8; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (block0, block1, block2, block3, digest); - } - - tmps[gid].digest_buf[0] = digest[0]; - tmps[gid].digest_buf[1] = digest[1]; - tmps[gid].digest_buf[2] = digest[2]; - tmps[gid].digest_buf[3] = digest[3]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06300_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, md5crypt_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32x r0 = tmps[gid].digest_buf[DGST_R0]; - const u32x r1 = tmps[gid].digest_buf[DGST_R1]; - const u32x r2 = tmps[gid].digest_buf[DGST_R2]; - const u32x r3 = tmps[gid].digest_buf[DGST_R3]; - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m06400.cu b/nv/m06400.cu deleted file mode 100644 index 5c8224a..0000000 --- a/nv/m06400.cu +++ /dev/null @@ -1,555 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA256_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; -} - -__device__ static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = SHA256M_A; - ipad[1] = SHA256M_B; - ipad[2] = SHA256M_C; - ipad[3] = SHA256M_D; - ipad[4] = SHA256M_E; - ipad[5] = SHA256M_F; - ipad[6] = SHA256M_G; - ipad[7] = SHA256M_H; - - sha256_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = SHA256M_A; - opad[1] = SHA256M_B; - opad[2] = SHA256M_C; - opad[3] = SHA256M_D; - opad[4] = SHA256M_E; - opad[5] = SHA256M_F; - opad[6] = SHA256M_G; - opad[7] = SHA256M_H; - - sha256_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha256_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8], u32x digest[8]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - digest[5] = ipad[5]; - digest[6] = ipad[6]; - digest[7] = ipad[7]; - - sha256_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = digest[5]; - w1[2] = digest[6]; - w1[3] = digest[7]; - w2[0] = 0x80000000; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 32) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - digest[5] = opad[5]; - digest[6] = opad[6]; - digest[7] = opad[7]; - - sha256_transform (w0, w1, w2, w3, digest); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06400_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, sha256aix_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - /** - * salt - */ - - u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - append_0x01_4 (salt_buf0, salt_buf1, salt_buf2, salt_buf3, salt_len + 3); - - append_0x80_4 (salt_buf0, salt_buf1, salt_buf2, salt_buf3, salt_len + 4); - - /** - * pads - */ - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - w2[0] = swap_workaround (w2[0]); - w2[1] = swap_workaround (w2[1]); - w2[2] = swap_workaround (w2[2]); - w2[3] = swap_workaround (w2[3]); - w3[0] = swap_workaround (w3[0]); - w3[1] = swap_workaround (w3[1]); - w3[2] = swap_workaround (w3[2]); - w3[3] = swap_workaround (w3[3]); - - u32x ipad[8]; - u32x opad[8]; - - hmac_sha256_pad (w0, w1, w2, w3, ipad, opad); - - tmps[gid].ipad[0] = ipad[0]; - tmps[gid].ipad[1] = ipad[1]; - tmps[gid].ipad[2] = ipad[2]; - tmps[gid].ipad[3] = ipad[3]; - tmps[gid].ipad[4] = ipad[4]; - tmps[gid].ipad[5] = ipad[5]; - tmps[gid].ipad[6] = ipad[6]; - tmps[gid].ipad[7] = ipad[7]; - - tmps[gid].opad[0] = opad[0]; - tmps[gid].opad[1] = opad[1]; - tmps[gid].opad[2] = opad[2]; - tmps[gid].opad[3] = opad[3]; - tmps[gid].opad[4] = opad[4]; - tmps[gid].opad[5] = opad[5]; - tmps[gid].opad[6] = opad[6]; - tmps[gid].opad[7] = opad[7]; - - w0[0] = salt_buf0[0]; - w0[1] = salt_buf0[1]; - w0[2] = salt_buf0[2]; - w0[3] = salt_buf0[3]; - w1[0] = salt_buf1[0]; - w1[1] = salt_buf1[1]; - w1[2] = salt_buf1[2]; - w1[3] = salt_buf1[3]; - w2[0] = salt_buf2[0]; - w2[1] = salt_buf2[1]; - w2[2] = salt_buf2[2]; - w2[3] = salt_buf2[3]; - w3[0] = salt_buf3[0]; - w3[1] = salt_buf3[1]; - w3[2] = salt_buf3[2]; - // w3[3] = 0; - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - w2[0] = swap_workaround (w2[0]); - w2[1] = swap_workaround (w2[1]); - w2[2] = swap_workaround (w2[2]); - w2[3] = swap_workaround (w2[3]); - w3[0] = swap_workaround (w3[0]); - w3[1] = swap_workaround (w3[1]); - w3[2] = swap_workaround (w3[2]); - w3[3] = (64 + salt_len + 4) * 8; - - u32x dgst[8]; - - hmac_sha256_run (w0, w1, w2, w3, ipad, opad, dgst); - - tmps[gid].dgst[0] = dgst[0]; - tmps[gid].dgst[1] = dgst[1]; - tmps[gid].dgst[2] = dgst[2]; - tmps[gid].dgst[3] = dgst[3]; - tmps[gid].dgst[4] = dgst[4]; - tmps[gid].dgst[5] = dgst[5]; - tmps[gid].dgst[6] = dgst[6]; - tmps[gid].dgst[7] = dgst[7]; - - tmps[gid].out[0] = dgst[0]; - tmps[gid].out[1] = dgst[1]; - tmps[gid].out[2] = dgst[2]; - tmps[gid].out[3] = dgst[3]; - tmps[gid].out[4] = dgst[4]; - tmps[gid].out[5] = dgst[5]; - tmps[gid].out[6] = dgst[6]; - tmps[gid].out[7] = dgst[7]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06400_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, sha256aix_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x ipad[8]; - u32x opad[8]; - - ipad[0] = tmps[gid].ipad[0]; - ipad[1] = tmps[gid].ipad[1]; - ipad[2] = tmps[gid].ipad[2]; - ipad[3] = tmps[gid].ipad[3]; - ipad[4] = tmps[gid].ipad[4]; - ipad[5] = tmps[gid].ipad[5]; - ipad[6] = tmps[gid].ipad[6]; - ipad[7] = tmps[gid].ipad[7]; - - opad[0] = tmps[gid].opad[0]; - opad[1] = tmps[gid].opad[1]; - opad[2] = tmps[gid].opad[2]; - opad[3] = tmps[gid].opad[3]; - opad[4] = tmps[gid].opad[4]; - opad[5] = tmps[gid].opad[5]; - opad[6] = tmps[gid].opad[6]; - opad[7] = tmps[gid].opad[7]; - - u32x dgst[8]; - u32x out[8]; - - dgst[0] = tmps[gid].dgst[0]; - dgst[1] = tmps[gid].dgst[1]; - dgst[2] = tmps[gid].dgst[2]; - dgst[3] = tmps[gid].dgst[3]; - dgst[4] = tmps[gid].dgst[4]; - dgst[5] = tmps[gid].dgst[5]; - dgst[6] = tmps[gid].dgst[6]; - dgst[7] = tmps[gid].dgst[7]; - - out[0] = tmps[gid].out[0]; - out[1] = tmps[gid].out[1]; - out[2] = tmps[gid].out[2]; - out[3] = tmps[gid].out[3]; - out[4] = tmps[gid].out[4]; - out[5] = tmps[gid].out[5]; - out[6] = tmps[gid].out[6]; - out[7] = tmps[gid].out[7]; - - for (u32 j = 0; j < loop_cnt; j++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = dgst[0]; - w0[1] = dgst[1]; - w0[2] = dgst[2]; - w0[3] = dgst[3]; - w1[0] = dgst[4]; - w1[1] = dgst[5]; - w1[2] = dgst[6]; - w1[3] = dgst[7]; - w2[0] = 0x80000000; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 32) * 8; - - hmac_sha256_run (w0, w1, w2, w3, ipad, opad, dgst); - - out[0] ^= dgst[0]; - out[1] ^= dgst[1]; - out[2] ^= dgst[2]; - out[3] ^= dgst[3]; - out[4] ^= dgst[4]; - out[5] ^= dgst[5]; - out[6] ^= dgst[6]; - out[7] ^= dgst[7]; - } - - tmps[gid].dgst[0] = dgst[0]; - tmps[gid].dgst[1] = dgst[1]; - tmps[gid].dgst[2] = dgst[2]; - tmps[gid].dgst[3] = dgst[3]; - tmps[gid].dgst[4] = dgst[4]; - tmps[gid].dgst[5] = dgst[5]; - tmps[gid].dgst[6] = dgst[6]; - tmps[gid].dgst[7] = dgst[7]; - - tmps[gid].out[0] = out[0]; - tmps[gid].out[1] = out[1]; - tmps[gid].out[2] = out[2]; - tmps[gid].out[3] = out[3]; - tmps[gid].out[4] = out[4]; - tmps[gid].out[5] = out[5]; - tmps[gid].out[6] = out[6]; - tmps[gid].out[7] = out[7]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06400_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, sha256aix_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32x r0 = tmps[gid].out[DGST_R0]; - const u32x r1 = tmps[gid].out[DGST_R1]; - const u32x r2 = tmps[gid].out[DGST_R2]; - const u32x r3 = tmps[gid].out[DGST_R3]; - - /* - u32x a = tmps[gid].out[0]; - u32x b = tmps[gid].out[1]; - u32x c = tmps[gid].out[2]; - u32x d = tmps[gid].out[3]; - u32x e = tmps[gid].out[4]; - u32x f = tmps[gid].out[5]; - u32x g = tmps[gid].out[6]; - u32x h = tmps[gid].out[7] & 0xffff03ff; - */ - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m06500.cu b/nv/m06500.cu deleted file mode 100644 index ed429d4..0000000 --- a/nv/m06500.cu +++ /dev/null @@ -1,583 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA512_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ __constant__ u64 k[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -#define ROUND_EXPAND() \ -{ \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ -} - -#define ROUND_STEP(i) \ -{ \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k[i + 15]); \ -} - -__device__ static void sha512_transform (const u64x w0[4], const u64x w1[4], const u64x w2[4], const u64x w3[4], u64x digest[8]) -{ - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; - - u64x w0_t = w0[0]; - u64x w1_t = w0[1]; - u64x w2_t = w0[2]; - u64x w3_t = w0[3]; - u64x w4_t = w1[0]; - u64x w5_t = w1[1]; - u64x w6_t = w1[2]; - u64x w7_t = w1[3]; - u64x w8_t = w2[0]; - u64x w9_t = w2[1]; - u64x wa_t = w2[2]; - u64x wb_t = w2[3]; - u64x wc_t = w3[0]; - u64x wd_t = w3[1]; - u64x we_t = w3[2]; - u64x wf_t = w3[3]; - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; -} - -__device__ static void hmac_sha512_pad (u64x w0[4], u64x w1[4], u64x w2[4], u64x w3[4], u64x ipad[8], u64x opad[8]) -{ - w0[0] = w0[0] ^ 0x3636363636363636; - w0[1] = w0[1] ^ 0x3636363636363636; - w0[2] = w0[2] ^ 0x3636363636363636; - w0[3] = w0[3] ^ 0x3636363636363636; - w1[0] = w1[0] ^ 0x3636363636363636; - w1[1] = w1[1] ^ 0x3636363636363636; - w1[2] = w1[2] ^ 0x3636363636363636; - w1[3] = w1[3] ^ 0x3636363636363636; - w2[0] = w2[0] ^ 0x3636363636363636; - w2[1] = w2[1] ^ 0x3636363636363636; - w2[2] = w2[2] ^ 0x3636363636363636; - w2[3] = w2[3] ^ 0x3636363636363636; - w3[0] = w3[0] ^ 0x3636363636363636; - w3[1] = w3[1] ^ 0x3636363636363636; - w3[2] = w3[2] ^ 0x3636363636363636; - w3[3] = w3[3] ^ 0x3636363636363636; - - ipad[0] = SHA512M_A; - ipad[1] = SHA512M_B; - ipad[2] = SHA512M_C; - ipad[3] = SHA512M_D; - ipad[4] = SHA512M_E; - ipad[5] = SHA512M_F; - ipad[6] = SHA512M_G; - ipad[7] = SHA512M_H; - - sha512_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a6a6a6a6a; - - opad[0] = SHA512M_A; - opad[1] = SHA512M_B; - opad[2] = SHA512M_C; - opad[3] = SHA512M_D; - opad[4] = SHA512M_E; - opad[5] = SHA512M_F; - opad[6] = SHA512M_G; - opad[7] = SHA512M_H; - - sha512_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha512_run (u64x w0[4], u64x w1[4], u64x w2[4], u64x w3[4], u64x ipad[8], u64x opad[8], u64x digest[8]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - digest[5] = ipad[5]; - digest[6] = ipad[6]; - digest[7] = ipad[7]; - - sha512_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = digest[5]; - w1[2] = digest[6]; - w1[3] = digest[7]; - w2[0] = 0x8000000000000000; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (128 + 64) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - digest[5] = opad[5]; - digest[6] = opad[6]; - digest[7] = opad[7]; - - sha512_transform (w0, w1, w2, w3, digest); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06500_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, sha512aix_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - /** - * salt - */ - - u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - append_0x01_4 (salt_buf0, salt_buf1, salt_buf2, salt_buf3, salt_len + 3); - - append_0x80_4 (salt_buf0, salt_buf1, salt_buf2, salt_buf3, salt_len + 4); - - /** - * pads - */ - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - w2[0] = swap_workaround (w2[0]); - w2[1] = swap_workaround (w2[1]); - w2[2] = swap_workaround (w2[2]); - w2[3] = swap_workaround (w2[3]); - w3[0] = swap_workaround (w3[0]); - w3[1] = swap_workaround (w3[1]); - w3[2] = swap_workaround (w3[2]); - w3[3] = swap_workaround (w3[3]); - - u64x w0l[4]; - u64x w1l[4]; - u64x w2l[4]; - u64x w3l[4]; - - w0l[0] = (u64x) (w0[0]) << 32 | (u64x) (w0[1]); - w0l[1] = (u64x) (w0[2]) << 32 | (u64x) (w0[3]); - w0l[2] = (u64x) (w1[0]) << 32 | (u64x) (w1[1]); - w0l[3] = (u64x) (w1[2]) << 32 | (u64x) (w1[3]); - w1l[0] = (u64x) (w2[0]) << 32 | (u64x) (w2[1]); - w1l[1] = (u64x) (w2[2]) << 32 | (u64x) (w2[3]); - w1l[2] = (u64x) (w3[0]) << 32 | (u64x) (w3[1]); - w1l[3] = (u64x) (w3[2]) << 32 | (u64x) (w3[3]); - w2l[0] = 0; - w2l[1] = 0; - w2l[2] = 0; - w2l[3] = 0; - w3l[0] = 0; - w3l[1] = 0; - w3l[2] = 0; - w3l[3] = 0; - - u64x ipad[8]; - u64x opad[8]; - - hmac_sha512_pad (w0l, w1l, w2l, w3l, ipad, opad); - - tmps[gid].ipad[0] = ipad[0]; - tmps[gid].ipad[1] = ipad[1]; - tmps[gid].ipad[2] = ipad[2]; - tmps[gid].ipad[3] = ipad[3]; - tmps[gid].ipad[4] = ipad[4]; - tmps[gid].ipad[5] = ipad[5]; - tmps[gid].ipad[6] = ipad[6]; - tmps[gid].ipad[7] = ipad[7]; - - tmps[gid].opad[0] = opad[0]; - tmps[gid].opad[1] = opad[1]; - tmps[gid].opad[2] = opad[2]; - tmps[gid].opad[3] = opad[3]; - tmps[gid].opad[4] = opad[4]; - tmps[gid].opad[5] = opad[5]; - tmps[gid].opad[6] = opad[6]; - tmps[gid].opad[7] = opad[7]; - - w0l[0] = (u64x) salt_buf0[1] << 32 | (u64x) salt_buf0[0]; - w0l[1] = (u64x) salt_buf0[3] << 32 | (u64x) salt_buf0[2]; - w0l[2] = (u64x) salt_buf1[1] << 32 | (u64x) salt_buf1[0]; - w0l[3] = (u64x) salt_buf1[3] << 32 | (u64x) salt_buf1[2]; - w1l[0] = (u64x) salt_buf2[1] << 32 | (u64x) salt_buf2[0]; - w1l[1] = (u64x) salt_buf2[3] << 32 | (u64x) salt_buf2[2]; - w1l[2] = (u64x) salt_buf3[1] << 32 | (u64x) salt_buf3[0]; - w1l[3] = (u64x) salt_buf3[3] << 32 | (u64x) salt_buf3[2]; - w2l[0] = 0; - w2l[1] = 0; - w2l[2] = 0; - w2l[3] = 0; - w3l[0] = 0; - w3l[1] = 0; - w3l[2] = 0; - w3l[3] = 0; - - w0l[0] = swap_workaround (w0l[0]); - w0l[1] = swap_workaround (w0l[1]); - w0l[2] = swap_workaround (w0l[2]); - w0l[3] = swap_workaround (w0l[3]); - w1l[0] = swap_workaround (w1l[0]); - w1l[1] = swap_workaround (w1l[1]); - w1l[2] = swap_workaround (w1l[2]); - w1l[3] = swap_workaround (w1l[3]); - w2l[0] = 0; - w2l[1] = 0; - w2l[2] = 0; - w2l[3] = 0; - w3l[0] = 0; - w3l[1] = 0; - w3l[2] = 0; - w3l[3] = (128 + salt_len + 4) * 8; - - u64x dgst[8]; - - hmac_sha512_run (w0l, w1l, w2l, w3l, ipad, opad, dgst); - - tmps[gid].dgst[0] = dgst[0]; - tmps[gid].dgst[1] = dgst[1]; - tmps[gid].dgst[2] = dgst[2]; - tmps[gid].dgst[3] = dgst[3]; - tmps[gid].dgst[4] = dgst[4]; - tmps[gid].dgst[5] = dgst[5]; - tmps[gid].dgst[6] = dgst[6]; - tmps[gid].dgst[7] = dgst[7]; - - tmps[gid].out[0] = dgst[0]; - tmps[gid].out[1] = dgst[1]; - tmps[gid].out[2] = dgst[2]; - tmps[gid].out[3] = dgst[3]; - tmps[gid].out[4] = dgst[4]; - tmps[gid].out[5] = dgst[5]; - tmps[gid].out[6] = dgst[6]; - tmps[gid].out[7] = dgst[7]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06500_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, sha512aix_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u64x ipad[8]; - u64x opad[8]; - - ipad[0] = tmps[gid].ipad[0]; - ipad[1] = tmps[gid].ipad[1]; - ipad[2] = tmps[gid].ipad[2]; - ipad[3] = tmps[gid].ipad[3]; - ipad[4] = tmps[gid].ipad[4]; - ipad[5] = tmps[gid].ipad[5]; - ipad[6] = tmps[gid].ipad[6]; - ipad[7] = tmps[gid].ipad[7]; - - opad[0] = tmps[gid].opad[0]; - opad[1] = tmps[gid].opad[1]; - opad[2] = tmps[gid].opad[2]; - opad[3] = tmps[gid].opad[3]; - opad[4] = tmps[gid].opad[4]; - opad[5] = tmps[gid].opad[5]; - opad[6] = tmps[gid].opad[6]; - opad[7] = tmps[gid].opad[7]; - - u64x dgst[8]; - u64x out[8]; - - dgst[0] = tmps[gid].dgst[0]; - dgst[1] = tmps[gid].dgst[1]; - dgst[2] = tmps[gid].dgst[2]; - dgst[3] = tmps[gid].dgst[3]; - dgst[4] = tmps[gid].dgst[4]; - dgst[5] = tmps[gid].dgst[5]; - dgst[6] = tmps[gid].dgst[6]; - dgst[7] = tmps[gid].dgst[7]; - - out[0] = tmps[gid].out[0]; - out[1] = tmps[gid].out[1]; - out[2] = tmps[gid].out[2]; - out[3] = tmps[gid].out[3]; - out[4] = tmps[gid].out[4]; - out[5] = tmps[gid].out[5]; - out[6] = tmps[gid].out[6]; - out[7] = tmps[gid].out[7]; - - for (u32 j = 0; j < loop_cnt; j++) - { - u64x w0[4]; - u64x w1[4]; - u64x w2[4]; - u64x w3[4]; - - w0[0] = dgst[0]; - w0[1] = dgst[1]; - w0[2] = dgst[2]; - w0[3] = dgst[3]; - w1[0] = dgst[4]; - w1[1] = dgst[5]; - w1[2] = dgst[6]; - w1[3] = dgst[7]; - w2[0] = 0x8000000000000000; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (128 + 64) * 8; - - hmac_sha512_run (w0, w1, w2, w3, ipad, opad, dgst); - - out[0] ^= dgst[0]; - out[1] ^= dgst[1]; - out[2] ^= dgst[2]; - out[3] ^= dgst[3]; - out[4] ^= dgst[4]; - out[5] ^= dgst[5]; - out[6] ^= dgst[6]; - out[7] ^= dgst[7]; - } - - tmps[gid].dgst[0] = dgst[0]; - tmps[gid].dgst[1] = dgst[1]; - tmps[gid].dgst[2] = dgst[2]; - tmps[gid].dgst[3] = dgst[3]; - tmps[gid].dgst[4] = dgst[4]; - tmps[gid].dgst[5] = dgst[5]; - tmps[gid].dgst[6] = dgst[6]; - tmps[gid].dgst[7] = dgst[7]; - - tmps[gid].out[0] = out[0]; - tmps[gid].out[1] = out[1]; - tmps[gid].out[2] = out[2]; - tmps[gid].out[3] = out[3]; - tmps[gid].out[4] = out[4]; - tmps[gid].out[5] = out[5]; - tmps[gid].out[6] = out[6]; - tmps[gid].out[7] = out[7]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06500_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, sha512aix_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 lid = threadIdx.x; - - /** - * digest - */ - - /* - u64x a = tmps[gid].out[0]; - u64x b = tmps[gid].out[1]; - u64x c = tmps[gid].out[2]; - u64x d = tmps[gid].out[3]; - u64x e = tmps[gid].out[4]; - u64x f = tmps[gid].out[5]; - u64x g = tmps[gid].out[6]; - u64x h = tmps[gid].out[7] & 0xffffffffffffff00; - */ - - const u32x r0 = l32_from_64 (tmps[gid].out[0]); - const u32x r1 = h32_from_64 (tmps[gid].out[0]); - const u32x r2 = l32_from_64 (tmps[gid].out[1]); - const u32x r3 = h32_from_64 (tmps[gid].out[1]); - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m06600.cu b/nv/m06600.cu deleted file mode 100644 index cd606ac..0000000 --- a/nv/m06600.cu +++ /dev/null @@ -1,1414 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ __constant__ u32 te0[256] = -{ - 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, - 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, - 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, - 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, - 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, - 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, - 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, - 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, - 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, - 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, - 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, - 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, - 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, - 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, - 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, - 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, - 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, - 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, - 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, - 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, - 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, - 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, - 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, - 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, - 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, - 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, - 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, - 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, - 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, - 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, - 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, - 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, - 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, - 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, - 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, - 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, - 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, - 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, - 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, - 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, - 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, - 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, - 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, - 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, - 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, - 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, - 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, - 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, - 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, - 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, - 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, - 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, - 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, - 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, - 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, - 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, - 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, - 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, - 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, - 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, - 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, - 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, - 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, - 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a, -}; - -__device__ __constant__ u32 te1[256] = -{ - 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b, - 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5, - 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b, - 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676, - 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d, - 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0, - 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf, - 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0, - 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626, - 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc, - 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1, - 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515, - 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3, - 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a, - 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2, - 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575, - 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a, - 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0, - 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3, - 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484, - 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded, - 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b, - 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939, - 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf, - 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb, - 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585, - 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f, - 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8, - 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f, - 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5, - 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121, - 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2, - 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec, - 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717, - 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d, - 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373, - 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc, - 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888, - 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414, - 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb, - 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a, - 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c, - 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262, - 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979, - 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d, - 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9, - 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea, - 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808, - 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e, - 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6, - 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f, - 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a, - 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666, - 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e, - 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9, - 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e, - 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111, - 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494, - 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9, - 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf, - 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d, - 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868, - 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f, - 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616, -}; - -__device__ __constant__ u32 te2[256] = -{ - 0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b, - 0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5, - 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b, - 0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76, - 0xca458fca, 0x829d1f82, 0xc94089c9, 0x7d87fa7d, - 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0, - 0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af, - 0x9cbf239c, 0xa4f753a4, 0x7296e472, 0xc05b9bc0, - 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26, - 0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc, - 0x345c6834, 0xa5f451a5, 0xe534d1e5, 0xf108f9f1, - 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15, - 0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3, - 0x18283018, 0x96a13796, 0x050f0a05, 0x9ab52f9a, - 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2, - 0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75, - 0x091b1209, 0x839e1d83, 0x2c74582c, 0x1a2e341a, - 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0, - 0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3, - 0x297b5229, 0xe33edde3, 0x2f715e2f, 0x84971384, - 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed, - 0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b, - 0x6abed46a, 0xcb468dcb, 0xbed967be, 0x394b7239, - 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf, - 0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb, - 0x43c58643, 0x4dd79a4d, 0x33556633, 0x85941185, - 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f, - 0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8, - 0x51f3a251, 0xa3fe5da3, 0x40c08040, 0x8f8a058f, - 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5, - 0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221, - 0x10302010, 0xff1ae5ff, 0xf30efdf3, 0xd26dbfd2, - 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec, - 0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17, - 0xc45793c4, 0xa7f255a7, 0x7e82fc7e, 0x3d477a3d, - 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673, - 0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc, - 0x22664422, 0x2a7e542a, 0x90ab3b90, 0x88830b88, - 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814, - 0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb, - 0xe03bdbe0, 0x32566432, 0x3a4e743a, 0x0a1e140a, - 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c, - 0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462, - 0x91a83991, 0x95a43195, 0xe437d3e4, 0x798bf279, - 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d, - 0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9, - 0x6cb4d86c, 0x56faac56, 0xf407f3f4, 0xea25cfea, - 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008, - 0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e, - 0x1c24381c, 0xa6f157a6, 0xb4c773b4, 0xc65197c6, - 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f, - 0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a, - 0x7090e070, 0x3e427c3e, 0xb5c471b5, 0x66aacc66, - 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e, - 0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9, - 0x86911786, 0xc15899c1, 0x1d273a1d, 0x9eb9279e, - 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211, - 0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394, - 0x9bb62d9b, 0x1e223c1e, 0x87921587, 0xe920c9e9, - 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df, - 0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d, - 0xbfda65bf, 0xe631d7e6, 0x42c68442, 0x68b8d068, - 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f, - 0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16, -}; - -__device__ __constant__ u32 te3[256] = -{ - 0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6, - 0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491, - 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56, - 0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec, - 0xcaca458f, 0x82829d1f, 0xc9c94089, 0x7d7d87fa, - 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb, - 0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45, - 0x9c9cbf23, 0xa4a4f753, 0x727296e4, 0xc0c05b9b, - 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c, - 0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83, - 0x34345c68, 0xa5a5f451, 0xe5e534d1, 0xf1f108f9, - 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a, - 0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d, - 0x18182830, 0x9696a137, 0x05050f0a, 0x9a9ab52f, - 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf, - 0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea, - 0x09091b12, 0x83839e1d, 0x2c2c7458, 0x1a1a2e34, - 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b, - 0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d, - 0x29297b52, 0xe3e33edd, 0x2f2f715e, 0x84849713, - 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1, - 0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6, - 0x6a6abed4, 0xcbcb468d, 0xbebed967, 0x39394b72, - 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85, - 0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed, - 0x4343c586, 0x4d4dd79a, 0x33335566, 0x85859411, - 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe, - 0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b, - 0x5151f3a2, 0xa3a3fe5d, 0x4040c080, 0x8f8f8a05, - 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1, - 0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342, - 0x10103020, 0xffff1ae5, 0xf3f30efd, 0xd2d26dbf, - 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3, - 0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e, - 0xc4c45793, 0xa7a7f255, 0x7e7e82fc, 0x3d3d477a, - 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6, - 0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3, - 0x22226644, 0x2a2a7e54, 0x9090ab3b, 0x8888830b, - 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28, - 0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad, - 0xe0e03bdb, 0x32325664, 0x3a3a4e74, 0x0a0a1e14, - 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8, - 0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4, - 0x9191a839, 0x9595a431, 0xe4e437d3, 0x79798bf2, - 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da, - 0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049, - 0x6c6cb4d8, 0x5656faac, 0xf4f407f3, 0xeaea25cf, - 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810, - 0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c, - 0x1c1c2438, 0xa6a6f157, 0xb4b4c773, 0xc6c65197, - 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e, - 0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f, - 0x707090e0, 0x3e3e427c, 0xb5b5c471, 0x6666aacc, - 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c, - 0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069, - 0x86869117, 0xc1c15899, 0x1d1d273a, 0x9e9eb927, - 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322, - 0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733, - 0x9b9bb62d, 0x1e1e223c, 0x87879215, 0xe9e920c9, - 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5, - 0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a, - 0xbfbfda65, 0xe6e631d7, 0x4242c684, 0x6868b8d0, - 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e, - 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c, -}; - -__device__ __constant__ u32 te4[256] = -{ - 0x63636363, 0x7c7c7c7c, 0x77777777, 0x7b7b7b7b, - 0xf2f2f2f2, 0x6b6b6b6b, 0x6f6f6f6f, 0xc5c5c5c5, - 0x30303030, 0x01010101, 0x67676767, 0x2b2b2b2b, - 0xfefefefe, 0xd7d7d7d7, 0xabababab, 0x76767676, - 0xcacacaca, 0x82828282, 0xc9c9c9c9, 0x7d7d7d7d, - 0xfafafafa, 0x59595959, 0x47474747, 0xf0f0f0f0, - 0xadadadad, 0xd4d4d4d4, 0xa2a2a2a2, 0xafafafaf, - 0x9c9c9c9c, 0xa4a4a4a4, 0x72727272, 0xc0c0c0c0, - 0xb7b7b7b7, 0xfdfdfdfd, 0x93939393, 0x26262626, - 0x36363636, 0x3f3f3f3f, 0xf7f7f7f7, 0xcccccccc, - 0x34343434, 0xa5a5a5a5, 0xe5e5e5e5, 0xf1f1f1f1, - 0x71717171, 0xd8d8d8d8, 0x31313131, 0x15151515, - 0x04040404, 0xc7c7c7c7, 0x23232323, 0xc3c3c3c3, - 0x18181818, 0x96969696, 0x05050505, 0x9a9a9a9a, - 0x07070707, 0x12121212, 0x80808080, 0xe2e2e2e2, - 0xebebebeb, 0x27272727, 0xb2b2b2b2, 0x75757575, - 0x09090909, 0x83838383, 0x2c2c2c2c, 0x1a1a1a1a, - 0x1b1b1b1b, 0x6e6e6e6e, 0x5a5a5a5a, 0xa0a0a0a0, - 0x52525252, 0x3b3b3b3b, 0xd6d6d6d6, 0xb3b3b3b3, - 0x29292929, 0xe3e3e3e3, 0x2f2f2f2f, 0x84848484, - 0x53535353, 0xd1d1d1d1, 0x00000000, 0xedededed, - 0x20202020, 0xfcfcfcfc, 0xb1b1b1b1, 0x5b5b5b5b, - 0x6a6a6a6a, 0xcbcbcbcb, 0xbebebebe, 0x39393939, - 0x4a4a4a4a, 0x4c4c4c4c, 0x58585858, 0xcfcfcfcf, - 0xd0d0d0d0, 0xefefefef, 0xaaaaaaaa, 0xfbfbfbfb, - 0x43434343, 0x4d4d4d4d, 0x33333333, 0x85858585, - 0x45454545, 0xf9f9f9f9, 0x02020202, 0x7f7f7f7f, - 0x50505050, 0x3c3c3c3c, 0x9f9f9f9f, 0xa8a8a8a8, - 0x51515151, 0xa3a3a3a3, 0x40404040, 0x8f8f8f8f, - 0x92929292, 0x9d9d9d9d, 0x38383838, 0xf5f5f5f5, - 0xbcbcbcbc, 0xb6b6b6b6, 0xdadadada, 0x21212121, - 0x10101010, 0xffffffff, 0xf3f3f3f3, 0xd2d2d2d2, - 0xcdcdcdcd, 0x0c0c0c0c, 0x13131313, 0xecececec, - 0x5f5f5f5f, 0x97979797, 0x44444444, 0x17171717, - 0xc4c4c4c4, 0xa7a7a7a7, 0x7e7e7e7e, 0x3d3d3d3d, - 0x64646464, 0x5d5d5d5d, 0x19191919, 0x73737373, - 0x60606060, 0x81818181, 0x4f4f4f4f, 0xdcdcdcdc, - 0x22222222, 0x2a2a2a2a, 0x90909090, 0x88888888, - 0x46464646, 0xeeeeeeee, 0xb8b8b8b8, 0x14141414, - 0xdededede, 0x5e5e5e5e, 0x0b0b0b0b, 0xdbdbdbdb, - 0xe0e0e0e0, 0x32323232, 0x3a3a3a3a, 0x0a0a0a0a, - 0x49494949, 0x06060606, 0x24242424, 0x5c5c5c5c, - 0xc2c2c2c2, 0xd3d3d3d3, 0xacacacac, 0x62626262, - 0x91919191, 0x95959595, 0xe4e4e4e4, 0x79797979, - 0xe7e7e7e7, 0xc8c8c8c8, 0x37373737, 0x6d6d6d6d, - 0x8d8d8d8d, 0xd5d5d5d5, 0x4e4e4e4e, 0xa9a9a9a9, - 0x6c6c6c6c, 0x56565656, 0xf4f4f4f4, 0xeaeaeaea, - 0x65656565, 0x7a7a7a7a, 0xaeaeaeae, 0x08080808, - 0xbabababa, 0x78787878, 0x25252525, 0x2e2e2e2e, - 0x1c1c1c1c, 0xa6a6a6a6, 0xb4b4b4b4, 0xc6c6c6c6, - 0xe8e8e8e8, 0xdddddddd, 0x74747474, 0x1f1f1f1f, - 0x4b4b4b4b, 0xbdbdbdbd, 0x8b8b8b8b, 0x8a8a8a8a, - 0x70707070, 0x3e3e3e3e, 0xb5b5b5b5, 0x66666666, - 0x48484848, 0x03030303, 0xf6f6f6f6, 0x0e0e0e0e, - 0x61616161, 0x35353535, 0x57575757, 0xb9b9b9b9, - 0x86868686, 0xc1c1c1c1, 0x1d1d1d1d, 0x9e9e9e9e, - 0xe1e1e1e1, 0xf8f8f8f8, 0x98989898, 0x11111111, - 0x69696969, 0xd9d9d9d9, 0x8e8e8e8e, 0x94949494, - 0x9b9b9b9b, 0x1e1e1e1e, 0x87878787, 0xe9e9e9e9, - 0xcececece, 0x55555555, 0x28282828, 0xdfdfdfdf, - 0x8c8c8c8c, 0xa1a1a1a1, 0x89898989, 0x0d0d0d0d, - 0xbfbfbfbf, 0xe6e6e6e6, 0x42424242, 0x68686868, - 0x41414141, 0x99999999, 0x2d2d2d2d, 0x0f0f0f0f, - 0xb0b0b0b0, 0x54545454, 0xbbbbbbbb, 0x16161616, -}; - -__device__ __constant__ u32 td0[256] = -{ - 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, - 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, - 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, - 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, - 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, - 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, - 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, - 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, - 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, - 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, - 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, - 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, - 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, - 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, - 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, - 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, - 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, - 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, - 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, - 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, - 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, - 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, - 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, - 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, - 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, - 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, - 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, - 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, - 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, - 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, - 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, - 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, - 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, - 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, - 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, - 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, - 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, - 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, - 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, - 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, - 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, - 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, - 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, - 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, - 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, - 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, - 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, - 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, - 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, - 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, - 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, - 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, - 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, - 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, - 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, - 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, - 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, - 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, - 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, - 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, - 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, - 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, - 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, - 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742, -}; - -__device__ __constant__ u32 td1[256] = -{ - 0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e, - 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303, - 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c, - 0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3, - 0x49deb15a, 0x6725ba1b, 0x9845ea0e, 0xe15dfec0, - 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9, - 0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259, - 0x2dd4be83, 0xd3587421, 0x2949e069, 0x448ec9c8, - 0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971, - 0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a, - 0x1863df4a, 0x82e51a31, 0x60975133, 0x4562537f, - 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b, - 0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8, - 0x23ab73d3, 0xe2724b02, 0x57e31f8f, 0x2a6655ab, - 0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708, - 0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682, - 0x2b8acf1c, 0x92a779b4, 0xf0f307f2, 0xa14e69e2, - 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe, - 0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb, - 0x390b83ec, 0xaa4060ef, 0x065e719f, 0x51bd6e10, - 0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd, - 0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015, - 0x241998fb, 0x97d6bde9, 0xcc894043, 0x7767d99e, - 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee, - 0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000, - 0x83098086, 0x48322bed, 0xac1e1170, 0x4e6c5a72, - 0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39, - 0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e, - 0xb10c0a67, 0x0f9357e7, 0xd2b4ee96, 0x9e1b9b91, - 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a, - 0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17, - 0x0b0e090d, 0xadf28bc7, 0xb92db6a8, 0xc8141ea9, - 0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60, - 0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e, - 0x768b4329, 0xdccb23c6, 0x68b6edfc, 0x63b8e4f1, - 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611, - 0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1, - 0x4b1d9e2f, 0xf3dcb230, 0xec0d8652, 0xd077c1e3, - 0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964, - 0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390, - 0xc787494e, 0xc1d938d1, 0xfe8ccaa2, 0x3698d40b, - 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf, - 0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46, - 0xc2f68d13, 0xe890d8b8, 0x5e2e39f7, 0xf582c3af, - 0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512, - 0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb, - 0x09cd2678, 0xf46e5918, 0x01ec9ab7, 0xa8834f9a, - 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8, - 0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c, - 0xaf31a4b2, 0x312a3f23, 0x30c6a594, 0xc035a266, - 0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8, - 0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6, - 0x8d764dd6, 0x4d43efb0, 0x54ccaa4d, 0xdfe49604, - 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551, - 0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41, - 0x5ab3671d, 0x5292dbd2, 0x33e91056, 0x136dd647, - 0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c, - 0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1, - 0x599cd2df, 0x3f55f273, 0x791814ce, 0xbf73c737, - 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db, - 0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340, - 0x72161dc3, 0x0cbce225, 0x8b283c49, 0x41ff0d95, - 0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1, - 0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857, -}; - -__device__ __constant__ u32 td2[256] = -{ - 0xa75051f4, 0x65537e41, 0xa4c31a17, 0x5e963a27, - 0x6bcb3bab, 0x45f11f9d, 0x58abacfa, 0x03934be3, - 0xfa552030, 0x6df6ad76, 0x769188cc, 0x4c25f502, - 0xd7fc4fe5, 0xcbd7c52a, 0x44802635, 0xa38fb562, - 0x5a49deb1, 0x1b6725ba, 0x0e9845ea, 0xc0e15dfe, - 0x7502c32f, 0xf012814c, 0x97a38d46, 0xf9c66bd3, - 0x5fe7038f, 0x9c951592, 0x7aebbf6d, 0x59da9552, - 0x832dd4be, 0x21d35874, 0x692949e0, 0xc8448ec9, - 0x896a75c2, 0x7978f48e, 0x3e6b9958, 0x71dd27b9, - 0x4fb6bee1, 0xad17f088, 0xac66c920, 0x3ab47dce, - 0x4a1863df, 0x3182e51a, 0x33609751, 0x7f456253, - 0x77e0b164, 0xae84bb6b, 0xa01cfe81, 0x2b94f908, - 0x68587048, 0xfd198f45, 0x6c8794de, 0xf8b7527b, - 0xd323ab73, 0x02e2724b, 0x8f57e31f, 0xab2a6655, - 0x2807b2eb, 0xc2032fb5, 0x7b9a86c5, 0x08a5d337, - 0x87f23028, 0xa5b223bf, 0x6aba0203, 0x825ced16, - 0x1c2b8acf, 0xb492a779, 0xf2f0f307, 0xe2a14e69, - 0xf4cd65da, 0xbed50605, 0x621fd134, 0xfe8ac4a6, - 0x539d342e, 0x55a0a2f3, 0xe132058a, 0xeb75a4f6, - 0xec390b83, 0xefaa4060, 0x9f065e71, 0x1051bd6e, - 0x8af93e21, 0x063d96dd, 0x05aedd3e, 0xbd464de6, - 0x8db59154, 0x5d0571c4, 0xd46f0406, 0x15ff6050, - 0xfb241998, 0xe997d6bd, 0x43cc8940, 0x9e7767d9, - 0x42bdb0e8, 0x8b880789, 0x5b38e719, 0xeedb79c8, - 0x0a47a17c, 0x0fe97c42, 0x1ec9f884, 0x00000000, - 0x86830980, 0xed48322b, 0x70ac1e11, 0x724e6c5a, - 0xfffbfd0e, 0x38560f85, 0xd51e3dae, 0x3927362d, - 0xd9640a0f, 0xa621685c, 0x54d19b5b, 0x2e3a2436, - 0x67b10c0a, 0xe70f9357, 0x96d2b4ee, 0x919e1b9b, - 0xc54f80c0, 0x20a261dc, 0x4b695a77, 0x1a161c12, - 0xba0ae293, 0x2ae5c0a0, 0xe0433c22, 0x171d121b, - 0x0d0b0e09, 0xc7adf28b, 0xa8b92db6, 0xa9c8141e, - 0x198557f1, 0x074caf75, 0xddbbee99, 0x60fda37f, - 0x269ff701, 0xf5bc5c72, 0x3bc54466, 0x7e345bfb, - 0x29768b43, 0xc6dccb23, 0xfc68b6ed, 0xf163b8e4, - 0xdccad731, 0x85104263, 0x22401397, 0x112084c6, - 0x247d854a, 0x3df8d2bb, 0x3211aef9, 0xa16dc729, - 0x2f4b1d9e, 0x30f3dcb2, 0x52ec0d86, 0xe3d077c1, - 0x166c2bb3, 0xb999a970, 0x48fa1194, 0x642247e9, - 0x8cc4a8fc, 0x3f1aa0f0, 0x2cd8567d, 0x90ef2233, - 0x4ec78749, 0xd1c1d938, 0xa2fe8cca, 0x0b3698d4, - 0x81cfa6f5, 0xde28a57a, 0x8e26dab7, 0xbfa43fad, - 0x9de42c3a, 0x920d5078, 0xcc9b6a5f, 0x4662547e, - 0x13c2f68d, 0xb8e890d8, 0xf75e2e39, 0xaff582c3, - 0x80be9f5d, 0x937c69d0, 0x2da96fd5, 0x12b3cf25, - 0x993bc8ac, 0x7da71018, 0x636ee89c, 0xbb7bdb3b, - 0x7809cd26, 0x18f46e59, 0xb701ec9a, 0x9aa8834f, - 0x6e65e695, 0xe67eaaff, 0xcf0821bc, 0xe8e6ef15, - 0x9bd9bae7, 0x36ce4a6f, 0x09d4ea9f, 0x7cd629b0, - 0xb2af31a4, 0x23312a3f, 0x9430c6a5, 0x66c035a2, - 0xbc37744e, 0xcaa6fc82, 0xd0b0e090, 0xd81533a7, - 0x984af104, 0xdaf741ec, 0x500e7fcd, 0xf62f1791, - 0xd68d764d, 0xb04d43ef, 0x4d54ccaa, 0x04dfe496, - 0xb5e39ed1, 0x881b4c6a, 0x1fb8c12c, 0x517f4665, - 0xea049d5e, 0x355d018c, 0x7473fa87, 0x412efb0b, - 0x1d5ab367, 0xd25292db, 0x5633e910, 0x47136dd6, - 0x618c9ad7, 0x0c7a37a1, 0x148e59f8, 0x3c89eb13, - 0x27eecea9, 0xc935b761, 0xe5ede11c, 0xb13c7a47, - 0xdf599cd2, 0x733f55f2, 0xce791814, 0x37bf73c7, - 0xcdea53f7, 0xaa5b5ffd, 0x6f14df3d, 0xdb867844, - 0xf381caaf, 0xc43eb968, 0x342c3824, 0x405fc2a3, - 0xc372161d, 0x250cbce2, 0x498b283c, 0x9541ff0d, - 0x017139a8, 0xb3de080c, 0xe49cd8b4, 0xc1906456, - 0x84617bcb, 0xb670d532, 0x5c74486c, 0x5742d0b8, -}; - -__device__ __constant__ u32 td3[256] = -{ - 0xf4a75051, 0x4165537e, 0x17a4c31a, 0x275e963a, - 0xab6bcb3b, 0x9d45f11f, 0xfa58abac, 0xe303934b, - 0x30fa5520, 0x766df6ad, 0xcc769188, 0x024c25f5, - 0xe5d7fc4f, 0x2acbd7c5, 0x35448026, 0x62a38fb5, - 0xb15a49de, 0xba1b6725, 0xea0e9845, 0xfec0e15d, - 0x2f7502c3, 0x4cf01281, 0x4697a38d, 0xd3f9c66b, - 0x8f5fe703, 0x929c9515, 0x6d7aebbf, 0x5259da95, - 0xbe832dd4, 0x7421d358, 0xe0692949, 0xc9c8448e, - 0xc2896a75, 0x8e7978f4, 0x583e6b99, 0xb971dd27, - 0xe14fb6be, 0x88ad17f0, 0x20ac66c9, 0xce3ab47d, - 0xdf4a1863, 0x1a3182e5, 0x51336097, 0x537f4562, - 0x6477e0b1, 0x6bae84bb, 0x81a01cfe, 0x082b94f9, - 0x48685870, 0x45fd198f, 0xde6c8794, 0x7bf8b752, - 0x73d323ab, 0x4b02e272, 0x1f8f57e3, 0x55ab2a66, - 0xeb2807b2, 0xb5c2032f, 0xc57b9a86, 0x3708a5d3, - 0x2887f230, 0xbfa5b223, 0x036aba02, 0x16825ced, - 0xcf1c2b8a, 0x79b492a7, 0x07f2f0f3, 0x69e2a14e, - 0xdaf4cd65, 0x05bed506, 0x34621fd1, 0xa6fe8ac4, - 0x2e539d34, 0xf355a0a2, 0x8ae13205, 0xf6eb75a4, - 0x83ec390b, 0x60efaa40, 0x719f065e, 0x6e1051bd, - 0x218af93e, 0xdd063d96, 0x3e05aedd, 0xe6bd464d, - 0x548db591, 0xc45d0571, 0x06d46f04, 0x5015ff60, - 0x98fb2419, 0xbde997d6, 0x4043cc89, 0xd99e7767, - 0xe842bdb0, 0x898b8807, 0x195b38e7, 0xc8eedb79, - 0x7c0a47a1, 0x420fe97c, 0x841ec9f8, 0x00000000, - 0x80868309, 0x2bed4832, 0x1170ac1e, 0x5a724e6c, - 0x0efffbfd, 0x8538560f, 0xaed51e3d, 0x2d392736, - 0x0fd9640a, 0x5ca62168, 0x5b54d19b, 0x362e3a24, - 0x0a67b10c, 0x57e70f93, 0xee96d2b4, 0x9b919e1b, - 0xc0c54f80, 0xdc20a261, 0x774b695a, 0x121a161c, - 0x93ba0ae2, 0xa02ae5c0, 0x22e0433c, 0x1b171d12, - 0x090d0b0e, 0x8bc7adf2, 0xb6a8b92d, 0x1ea9c814, - 0xf1198557, 0x75074caf, 0x99ddbbee, 0x7f60fda3, - 0x01269ff7, 0x72f5bc5c, 0x663bc544, 0xfb7e345b, - 0x4329768b, 0x23c6dccb, 0xedfc68b6, 0xe4f163b8, - 0x31dccad7, 0x63851042, 0x97224013, 0xc6112084, - 0x4a247d85, 0xbb3df8d2, 0xf93211ae, 0x29a16dc7, - 0x9e2f4b1d, 0xb230f3dc, 0x8652ec0d, 0xc1e3d077, - 0xb3166c2b, 0x70b999a9, 0x9448fa11, 0xe9642247, - 0xfc8cc4a8, 0xf03f1aa0, 0x7d2cd856, 0x3390ef22, - 0x494ec787, 0x38d1c1d9, 0xcaa2fe8c, 0xd40b3698, - 0xf581cfa6, 0x7ade28a5, 0xb78e26da, 0xadbfa43f, - 0x3a9de42c, 0x78920d50, 0x5fcc9b6a, 0x7e466254, - 0x8d13c2f6, 0xd8b8e890, 0x39f75e2e, 0xc3aff582, - 0x5d80be9f, 0xd0937c69, 0xd52da96f, 0x2512b3cf, - 0xac993bc8, 0x187da710, 0x9c636ee8, 0x3bbb7bdb, - 0x267809cd, 0x5918f46e, 0x9ab701ec, 0x4f9aa883, - 0x956e65e6, 0xffe67eaa, 0xbccf0821, 0x15e8e6ef, - 0xe79bd9ba, 0x6f36ce4a, 0x9f09d4ea, 0xb07cd629, - 0xa4b2af31, 0x3f23312a, 0xa59430c6, 0xa266c035, - 0x4ebc3774, 0x82caa6fc, 0x90d0b0e0, 0xa7d81533, - 0x04984af1, 0xecdaf741, 0xcd500e7f, 0x91f62f17, - 0x4dd68d76, 0xefb04d43, 0xaa4d54cc, 0x9604dfe4, - 0xd1b5e39e, 0x6a881b4c, 0x2c1fb8c1, 0x65517f46, - 0x5eea049d, 0x8c355d01, 0x877473fa, 0x0b412efb, - 0x671d5ab3, 0xdbd25292, 0x105633e9, 0xd647136d, - 0xd7618c9a, 0xa10c7a37, 0xf8148e59, 0x133c89eb, - 0xa927eece, 0x61c935b7, 0x1ce5ede1, 0x47b13c7a, - 0xd2df599c, 0xf2733f55, 0x14ce7918, 0xc737bf73, - 0xf7cdea53, 0xfdaa5b5f, 0x3d6f14df, 0x44db8678, - 0xaff381ca, 0x68c43eb9, 0x24342c38, 0xa3405fc2, - 0x1dc37216, 0xe2250cbc, 0x3c498b28, 0x0d9541ff, - 0xa8017139, 0x0cb3de08, 0xb4e49cd8, 0x56c19064, - 0xcb84617b, 0x32b670d5, 0x6c5c7448, 0xb85742d0, -}; - -__device__ __constant__ u32 td4[256] = -{ - 0x52525252, 0x09090909, 0x6a6a6a6a, 0xd5d5d5d5, - 0x30303030, 0x36363636, 0xa5a5a5a5, 0x38383838, - 0xbfbfbfbf, 0x40404040, 0xa3a3a3a3, 0x9e9e9e9e, - 0x81818181, 0xf3f3f3f3, 0xd7d7d7d7, 0xfbfbfbfb, - 0x7c7c7c7c, 0xe3e3e3e3, 0x39393939, 0x82828282, - 0x9b9b9b9b, 0x2f2f2f2f, 0xffffffff, 0x87878787, - 0x34343434, 0x8e8e8e8e, 0x43434343, 0x44444444, - 0xc4c4c4c4, 0xdededede, 0xe9e9e9e9, 0xcbcbcbcb, - 0x54545454, 0x7b7b7b7b, 0x94949494, 0x32323232, - 0xa6a6a6a6, 0xc2c2c2c2, 0x23232323, 0x3d3d3d3d, - 0xeeeeeeee, 0x4c4c4c4c, 0x95959595, 0x0b0b0b0b, - 0x42424242, 0xfafafafa, 0xc3c3c3c3, 0x4e4e4e4e, - 0x08080808, 0x2e2e2e2e, 0xa1a1a1a1, 0x66666666, - 0x28282828, 0xd9d9d9d9, 0x24242424, 0xb2b2b2b2, - 0x76767676, 0x5b5b5b5b, 0xa2a2a2a2, 0x49494949, - 0x6d6d6d6d, 0x8b8b8b8b, 0xd1d1d1d1, 0x25252525, - 0x72727272, 0xf8f8f8f8, 0xf6f6f6f6, 0x64646464, - 0x86868686, 0x68686868, 0x98989898, 0x16161616, - 0xd4d4d4d4, 0xa4a4a4a4, 0x5c5c5c5c, 0xcccccccc, - 0x5d5d5d5d, 0x65656565, 0xb6b6b6b6, 0x92929292, - 0x6c6c6c6c, 0x70707070, 0x48484848, 0x50505050, - 0xfdfdfdfd, 0xedededed, 0xb9b9b9b9, 0xdadadada, - 0x5e5e5e5e, 0x15151515, 0x46464646, 0x57575757, - 0xa7a7a7a7, 0x8d8d8d8d, 0x9d9d9d9d, 0x84848484, - 0x90909090, 0xd8d8d8d8, 0xabababab, 0x00000000, - 0x8c8c8c8c, 0xbcbcbcbc, 0xd3d3d3d3, 0x0a0a0a0a, - 0xf7f7f7f7, 0xe4e4e4e4, 0x58585858, 0x05050505, - 0xb8b8b8b8, 0xb3b3b3b3, 0x45454545, 0x06060606, - 0xd0d0d0d0, 0x2c2c2c2c, 0x1e1e1e1e, 0x8f8f8f8f, - 0xcacacaca, 0x3f3f3f3f, 0x0f0f0f0f, 0x02020202, - 0xc1c1c1c1, 0xafafafaf, 0xbdbdbdbd, 0x03030303, - 0x01010101, 0x13131313, 0x8a8a8a8a, 0x6b6b6b6b, - 0x3a3a3a3a, 0x91919191, 0x11111111, 0x41414141, - 0x4f4f4f4f, 0x67676767, 0xdcdcdcdc, 0xeaeaeaea, - 0x97979797, 0xf2f2f2f2, 0xcfcfcfcf, 0xcececece, - 0xf0f0f0f0, 0xb4b4b4b4, 0xe6e6e6e6, 0x73737373, - 0x96969696, 0xacacacac, 0x74747474, 0x22222222, - 0xe7e7e7e7, 0xadadadad, 0x35353535, 0x85858585, - 0xe2e2e2e2, 0xf9f9f9f9, 0x37373737, 0xe8e8e8e8, - 0x1c1c1c1c, 0x75757575, 0xdfdfdfdf, 0x6e6e6e6e, - 0x47474747, 0xf1f1f1f1, 0x1a1a1a1a, 0x71717171, - 0x1d1d1d1d, 0x29292929, 0xc5c5c5c5, 0x89898989, - 0x6f6f6f6f, 0xb7b7b7b7, 0x62626262, 0x0e0e0e0e, - 0xaaaaaaaa, 0x18181818, 0xbebebebe, 0x1b1b1b1b, - 0xfcfcfcfc, 0x56565656, 0x3e3e3e3e, 0x4b4b4b4b, - 0xc6c6c6c6, 0xd2d2d2d2, 0x79797979, 0x20202020, - 0x9a9a9a9a, 0xdbdbdbdb, 0xc0c0c0c0, 0xfefefefe, - 0x78787878, 0xcdcdcdcd, 0x5a5a5a5a, 0xf4f4f4f4, - 0x1f1f1f1f, 0xdddddddd, 0xa8a8a8a8, 0x33333333, - 0x88888888, 0x07070707, 0xc7c7c7c7, 0x31313131, - 0xb1b1b1b1, 0x12121212, 0x10101010, 0x59595959, - 0x27272727, 0x80808080, 0xecececec, 0x5f5f5f5f, - 0x60606060, 0x51515151, 0x7f7f7f7f, 0xa9a9a9a9, - 0x19191919, 0xb5b5b5b5, 0x4a4a4a4a, 0x0d0d0d0d, - 0x2d2d2d2d, 0xe5e5e5e5, 0x7a7a7a7a, 0x9f9f9f9f, - 0x93939393, 0xc9c9c9c9, 0x9c9c9c9c, 0xefefefef, - 0xa0a0a0a0, 0xe0e0e0e0, 0x3b3b3b3b, 0x4d4d4d4d, - 0xaeaeaeae, 0x2a2a2a2a, 0xf5f5f5f5, 0xb0b0b0b0, - 0xc8c8c8c8, 0xebebebeb, 0xbbbbbbbb, 0x3c3c3c3c, - 0x83838383, 0x53535353, 0x99999999, 0x61616161, - 0x17171717, 0x2b2b2b2b, 0x04040404, 0x7e7e7e7e, - 0xbabababa, 0x77777777, 0xd6d6d6d6, 0x26262626, - 0xe1e1e1e1, 0x69696969, 0x14141414, 0x63636363, - 0x55555555, 0x21212121, 0x0c0c0c0c, 0x7d7d7d7d, -}; - -__device__ __constant__ u32 rcon[] = -{ - 0x01000000, 0x02000000, 0x04000000, 0x08000000, - 0x10000000, 0x20000000, 0x40000000, 0x80000000, - 0x1b000000, 0x36000000, -}; - -__device__ static void AES128_ExpandKey (u32 *userkey, u32 *rek, u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - rek[0] = userkey[0]; - rek[1] = userkey[1]; - rek[2] = userkey[2]; - rek[3] = userkey[3]; - - #pragma unroll 10 - for (u32 i = 0, j = 0; i < 10; i += 1, j += 4) - { - u32 temp = rek[j + 3]; - - temp = (s_te2[(temp >> 16) & 0xff] & 0xff000000) - ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000) - ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00) - ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff); - - rek[j + 4] = rek[j + 0] - ^ temp - ^ rcon[i]; - - rek[j + 5] = rek[j + 1] ^ rek[j + 4]; - rek[j + 6] = rek[j + 2] ^ rek[j + 5]; - rek[j + 7] = rek[j + 3] ^ rek[j + 6]; - } -} - -__device__ static void AES128_InvertKey (u32 *rdk, u32 s_td0[256], u32 s_td1[256], u32 s_td2[256], u32 s_td3[256], u32 s_td4[256], u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - for (u32 i = 0, j = 40; i < j; i += 4, j -= 4) - { - u32 temp; - - temp = rdk[i + 0]; rdk[i + 0] = rdk[j + 0]; rdk[j + 0] = temp; - temp = rdk[i + 1]; rdk[i + 1] = rdk[j + 1]; rdk[j + 1] = temp; - temp = rdk[i + 2]; rdk[i + 2] = rdk[j + 2]; rdk[j + 2] = temp; - temp = rdk[i + 3]; rdk[i + 3] = rdk[j + 3]; rdk[j + 3] = temp; - } - - for (u32 i = 1, j = 4; i < 10; i += 1, j += 4) - { - rdk[j + 0] = - s_td0[s_te1[(rdk[j + 0] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 0] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 0] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 0] >> 0) & 0xff] & 0xff]; - - rdk[j + 1] = - s_td0[s_te1[(rdk[j + 1] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 1] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 1] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 1] >> 0) & 0xff] & 0xff]; - - rdk[j + 2] = - s_td0[s_te1[(rdk[j + 2] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 2] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 2] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 2] >> 0) & 0xff] & 0xff]; - - rdk[j + 3] = - s_td0[s_te1[(rdk[j + 3] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 3] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 3] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 3] >> 0) & 0xff] & 0xff]; - } -} - -__device__ static void AES128_decrypt (const u32 *in, u32 *out, const u32 *rdk, u32 s_td0[256], u32 s_td1[256], u32 s_td2[256], u32 s_td3[256], u32 s_td4[256]) -{ - u32 s0 = in[0] ^ rdk[0]; - u32 s1 = in[1] ^ rdk[1]; - u32 s2 = in[2] ^ rdk[2]; - u32 s3 = in[3] ^ rdk[3]; - - u32 t0; - u32 t1; - u32 t2; - u32 t3; - - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[ 4]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[ 5]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[ 6]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[ 7]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[ 8]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[ 9]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[10]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[11]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[12]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[13]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[14]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[15]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[16]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[17]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[18]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[19]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[20]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[21]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[22]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[23]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[24]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[25]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[26]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[27]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[28]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[29]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[30]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[31]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[32]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[33]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[34]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[35]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[36]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[37]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[38]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[39]; - - out[0] = (s_td4[(t0 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t3 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t2 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t1 >> 0) & 0xff] & 0x000000ff) - ^ rdk[40]; - - out[1] = (s_td4[(t1 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t0 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t3 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t2 >> 0) & 0xff] & 0x000000ff) - ^ rdk[41]; - - out[2] = (s_td4[(t2 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t1 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t0 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t3 >> 0) & 0xff] & 0x000000ff) - ^ rdk[42]; - - out[3] = (s_td4[(t3 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t2 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t1 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t0 >> 0) & 0xff] & 0x000000ff) - ^ rdk[43]; -} - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = SHA1M_A; - ipad[1] = SHA1M_B; - ipad[2] = SHA1M_C; - ipad[3] = SHA1M_D; - ipad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = SHA1M_A; - opad[1] = SHA1M_B; - opad[2] = SHA1M_C; - opad[3] = SHA1M_D; - opad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - - sha1_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - - sha1_transform (w0, w1, w2, w3, digest); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06600_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, agilekey_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - /** - * salt - */ - - u32 salt_len = 8; - - u32 salt_buf[2]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - - /** - * pads - */ - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - w2[0] = swap_workaround (w2[0]); - w2[1] = swap_workaround (w2[1]); - w2[2] = swap_workaround (w2[2]); - w2[3] = swap_workaround (w2[3]); - w3[0] = swap_workaround (w3[0]); - w3[1] = swap_workaround (w3[1]); - w3[2] = swap_workaround (w3[2]); - w3[3] = swap_workaround (w3[3]); - - u32x ipad[5]; - u32x opad[5]; - - hmac_sha1_pad (w0, w1, w2, w3, ipad, opad); - - tmps[gid].ipad[0] = ipad[0]; - tmps[gid].ipad[1] = ipad[1]; - tmps[gid].ipad[2] = ipad[2]; - tmps[gid].ipad[3] = ipad[3]; - tmps[gid].ipad[4] = ipad[4]; - - tmps[gid].opad[0] = opad[0]; - tmps[gid].opad[1] = opad[1]; - tmps[gid].opad[2] = opad[2]; - tmps[gid].opad[3] = opad[3]; - tmps[gid].opad[4] = opad[4]; - - w0[0] = salt_buf[0]; - w0[1] = salt_buf[1]; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - append_0x01_1 (w0, salt_len + 3); - append_0x80_1 (w0, salt_len + 4); - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + salt_len + 4) * 8; - - u32x dgst[5]; - - hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst); - - tmps[gid].dgst[0] = dgst[0]; - tmps[gid].dgst[1] = dgst[1]; - tmps[gid].dgst[2] = dgst[2]; - tmps[gid].dgst[3] = dgst[3]; - tmps[gid].dgst[4] = dgst[4]; - - tmps[gid].out[0] = dgst[0]; - tmps[gid].out[1] = dgst[1]; - tmps[gid].out[2] = dgst[2]; - tmps[gid].out[3] = dgst[3]; - tmps[gid].out[4] = dgst[4]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06600_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, agilekey_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x ipad[5]; - u32x opad[5]; - - ipad[0] = tmps[gid].ipad[0]; - ipad[1] = tmps[gid].ipad[1]; - ipad[2] = tmps[gid].ipad[2]; - ipad[3] = tmps[gid].ipad[3]; - ipad[4] = tmps[gid].ipad[4]; - - opad[0] = tmps[gid].opad[0]; - opad[1] = tmps[gid].opad[1]; - opad[2] = tmps[gid].opad[2]; - opad[3] = tmps[gid].opad[3]; - opad[4] = tmps[gid].opad[4]; - - u32x dgst[5]; - u32x out[5]; - - dgst[0] = tmps[gid].dgst[0]; - dgst[1] = tmps[gid].dgst[1]; - dgst[2] = tmps[gid].dgst[2]; - dgst[3] = tmps[gid].dgst[3]; - dgst[4] = tmps[gid].dgst[4]; - - out[0] = tmps[gid].out[0]; - out[1] = tmps[gid].out[1]; - out[2] = tmps[gid].out[2]; - out[3] = tmps[gid].out[3]; - out[4] = tmps[gid].out[4]; - - for (u32 j = 0; j < loop_cnt; j++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = dgst[0]; - w0[1] = dgst[1]; - w0[2] = dgst[2]; - w0[3] = dgst[3]; - w1[0] = dgst[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst); - - out[0] ^= dgst[0]; - out[1] ^= dgst[1]; - out[2] ^= dgst[2]; - out[3] ^= dgst[3]; - out[4] ^= dgst[4]; - } - - tmps[gid].dgst[0] = dgst[0]; - tmps[gid].dgst[1] = dgst[1]; - tmps[gid].dgst[2] = dgst[2]; - tmps[gid].dgst[3] = dgst[3]; - tmps[gid].dgst[4] = dgst[4]; - - tmps[gid].out[0] = out[0]; - tmps[gid].out[1] = out[1]; - tmps[gid].out[2] = out[2]; - tmps[gid].out[3] = out[3]; - tmps[gid].out[4] = out[4]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06600_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, agilekey_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - const u32 iv[4] = - { - salt_bufs[salt_pos].salt_buf[ 4], - salt_bufs[salt_pos].salt_buf[ 5], - salt_bufs[salt_pos].salt_buf[ 6], - salt_bufs[salt_pos].salt_buf[ 7] - }; - - const u32 data[4] = - { - salt_bufs[salt_pos].salt_buf[ 8], - salt_bufs[salt_pos].salt_buf[ 9], - salt_bufs[salt_pos].salt_buf[10], - salt_bufs[salt_pos].salt_buf[11] - }; - - /** - * aes shared - */ - - __shared__ u32 s_td0[256]; - __shared__ u32 s_td1[256]; - __shared__ u32 s_td2[256]; - __shared__ u32 s_td3[256]; - __shared__ u32 s_td4[256]; - - __shared__ u32 s_te0[256]; - __shared__ u32 s_te1[256]; - __shared__ u32 s_te2[256]; - __shared__ u32 s_te3[256]; - __shared__ u32 s_te4[256]; - - s_td0[lid] = td0[lid]; - s_td1[lid] = td1[lid]; - s_td2[lid] = td2[lid]; - s_td3[lid] = td3[lid]; - s_td4[lid] = td4[lid]; - - s_te0[lid] = te0[lid]; - s_te1[lid] = te1[lid]; - s_te2[lid] = te2[lid]; - s_te3[lid] = te3[lid]; - s_te4[lid] = te4[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * aes init - */ - - u32x ukey[4]; - - ukey[0] = tmps[gid].out[0]; - ukey[1] = tmps[gid].out[1]; - ukey[2] = tmps[gid].out[2]; - ukey[3] = tmps[gid].out[3]; - - u32x a; - u32x b; - u32x c; - u32x d; - - #define KEYLEN 44 - - u32 rek[KEYLEN]; - u32 rdk[KEYLEN]; - - u32 out[4]; - - /** - * aes decrypt key - */ - - AES128_ExpandKey (ukey, rek, s_te0, s_te1, s_te2, s_te3, s_te4); - - #pragma unroll 44 - for (u32 i = 0; i < KEYLEN; i++) rdk[i] = rek[i]; - - AES128_InvertKey (rdk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4); - - AES128_decrypt (data, out, rdk, s_td0, s_td1, s_td2, s_td3, s_td4); - - a = out[0] ^ iv[0]; - b = out[1] ^ iv[1]; - c = out[2] ^ iv[2]; - d = out[3] ^ iv[3]; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = c; - const u32x r3 = d; - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m06700.cu b/nv/m06700.cu deleted file mode 100644 index 26dc67f..0000000 --- a/nv/m06700.cu +++ /dev/null @@ -1,532 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = SHA1M_A; - ipad[1] = SHA1M_B; - ipad[2] = SHA1M_C; - ipad[3] = SHA1M_D; - ipad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = SHA1M_A; - opad[1] = SHA1M_B; - opad[2] = SHA1M_C; - opad[3] = SHA1M_D; - opad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - - sha1_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - - sha1_transform (w0, w1, w2, w3, digest); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06700_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, sha1aix_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - /** - * salt - */ - - u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - append_0x01_4 (salt_buf0, salt_buf1, salt_buf2, salt_buf3, salt_len + 3); - - append_0x80_4 (salt_buf0, salt_buf1, salt_buf2, salt_buf3, salt_len + 4); - - /** - * pads - */ - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - w2[0] = swap_workaround (w2[0]); - w2[1] = swap_workaround (w2[1]); - w2[2] = swap_workaround (w2[2]); - w2[3] = swap_workaround (w2[3]); - w3[0] = swap_workaround (w3[0]); - w3[1] = swap_workaround (w3[1]); - w3[2] = swap_workaround (w3[2]); - w3[3] = swap_workaround (w3[3]); - - u32x ipad[5]; - u32x opad[5]; - - hmac_sha1_pad (w0, w1, w2, w3, ipad, opad); - - tmps[gid].ipad[0] = ipad[0]; - tmps[gid].ipad[1] = ipad[1]; - tmps[gid].ipad[2] = ipad[2]; - tmps[gid].ipad[3] = ipad[3]; - tmps[gid].ipad[4] = ipad[4]; - - tmps[gid].opad[0] = opad[0]; - tmps[gid].opad[1] = opad[1]; - tmps[gid].opad[2] = opad[2]; - tmps[gid].opad[3] = opad[3]; - tmps[gid].opad[4] = opad[4]; - - w0[0] = salt_buf0[0]; - w0[1] = salt_buf0[1]; - w0[2] = salt_buf0[2]; - w0[3] = salt_buf0[3]; - w1[0] = salt_buf1[0]; - w1[1] = salt_buf1[1]; - w1[2] = salt_buf1[2]; - w1[3] = salt_buf1[3]; - w2[0] = salt_buf2[0]; - w2[1] = salt_buf2[1]; - w2[2] = salt_buf2[2]; - w2[3] = salt_buf2[3]; - w3[0] = salt_buf3[0]; - w3[1] = salt_buf3[1]; - w3[2] = salt_buf3[2]; - //w3[3] = salt_buf3[3]; - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - w2[0] = swap_workaround (w2[0]); - w2[1] = swap_workaround (w2[1]); - w2[2] = swap_workaround (w2[2]); - w2[3] = swap_workaround (w2[3]); - w3[0] = swap_workaround (w3[0]); - w3[1] = swap_workaround (w3[1]); - w3[2] = swap_workaround (w3[2]); - w3[3] = (64 + salt_len + 4) * 8; - - u32x dgst[5]; - - hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst); - - tmps[gid].dgst[0] = dgst[0]; - tmps[gid].dgst[1] = dgst[1]; - tmps[gid].dgst[2] = dgst[2]; - tmps[gid].dgst[3] = dgst[3]; - tmps[gid].dgst[4] = dgst[4]; - - tmps[gid].out[0] = dgst[0]; - tmps[gid].out[1] = dgst[1]; - tmps[gid].out[2] = dgst[2]; - tmps[gid].out[3] = dgst[3]; - tmps[gid].out[4] = dgst[4]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06700_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, sha1aix_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x ipad[5]; - u32x opad[5]; - - ipad[0] = tmps[gid].ipad[0]; - ipad[1] = tmps[gid].ipad[1]; - ipad[2] = tmps[gid].ipad[2]; - ipad[3] = tmps[gid].ipad[3]; - ipad[4] = tmps[gid].ipad[4]; - - opad[0] = tmps[gid].opad[0]; - opad[1] = tmps[gid].opad[1]; - opad[2] = tmps[gid].opad[2]; - opad[3] = tmps[gid].opad[3]; - opad[4] = tmps[gid].opad[4]; - - u32x dgst[5]; - u32x out[5]; - - dgst[0] = tmps[gid].dgst[0]; - dgst[1] = tmps[gid].dgst[1]; - dgst[2] = tmps[gid].dgst[2]; - dgst[3] = tmps[gid].dgst[3]; - dgst[4] = tmps[gid].dgst[4]; - - out[0] = tmps[gid].out[0]; - out[1] = tmps[gid].out[1]; - out[2] = tmps[gid].out[2]; - out[3] = tmps[gid].out[3]; - out[4] = tmps[gid].out[4]; - - for (u32 j = 0; j < loop_cnt; j++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = dgst[0]; - w0[1] = dgst[1]; - w0[2] = dgst[2]; - w0[3] = dgst[3]; - w1[0] = dgst[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst); - - out[0] ^= dgst[0]; - out[1] ^= dgst[1]; - out[2] ^= dgst[2]; - out[3] ^= dgst[3]; - out[4] ^= dgst[4]; - } - - tmps[gid].dgst[0] = dgst[0]; - tmps[gid].dgst[1] = dgst[1]; - tmps[gid].dgst[2] = dgst[2]; - tmps[gid].dgst[3] = dgst[3]; - tmps[gid].dgst[4] = dgst[4]; - - tmps[gid].out[0] = out[0]; - tmps[gid].out[1] = out[1]; - tmps[gid].out[2] = out[2]; - tmps[gid].out[3] = out[3]; - tmps[gid].out[4] = out[4]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06700_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, sha1aix_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 lid = threadIdx.x; - - /** - * digest - */ - - /* - u32x a = tmps[gid].out[0]; - u32x b = tmps[gid].out[1]; - u32x c = tmps[gid].out[2]; - u32x d = tmps[gid].out[3]; - u32x e = tmps[gid].out[4] & 0xffff03ff; - */ - - const u32x r0 = tmps[gid].out[DGST_R0]; - const u32x r1 = tmps[gid].out[DGST_R1]; - const u32x r2 = tmps[gid].out[DGST_R2]; - const u32x r3 = tmps[gid].out[DGST_R3]; - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m06800.cu b/nv/m06800.cu deleted file mode 100644 index d9478c8..0000000 --- a/nv/m06800.cu +++ /dev/null @@ -1,1603 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA256_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ __constant__ u32 te0[256] = -{ - 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, - 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, - 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, - 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, - 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, - 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, - 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, - 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, - 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, - 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, - 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, - 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, - 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, - 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, - 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, - 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, - 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, - 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, - 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, - 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, - 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, - 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, - 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, - 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, - 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, - 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, - 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, - 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, - 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, - 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, - 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, - 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, - 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, - 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, - 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, - 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, - 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, - 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, - 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, - 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, - 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, - 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, - 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, - 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, - 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, - 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, - 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, - 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, - 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, - 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, - 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, - 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, - 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, - 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, - 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, - 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, - 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, - 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, - 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, - 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, - 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, - 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, - 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, - 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a, -}; - -__device__ __constant__ u32 te1[256] = -{ - 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b, - 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5, - 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b, - 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676, - 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d, - 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0, - 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf, - 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0, - 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626, - 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc, - 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1, - 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515, - 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3, - 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a, - 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2, - 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575, - 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a, - 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0, - 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3, - 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484, - 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded, - 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b, - 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939, - 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf, - 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb, - 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585, - 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f, - 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8, - 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f, - 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5, - 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121, - 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2, - 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec, - 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717, - 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d, - 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373, - 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc, - 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888, - 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414, - 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb, - 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a, - 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c, - 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262, - 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979, - 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d, - 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9, - 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea, - 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808, - 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e, - 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6, - 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f, - 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a, - 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666, - 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e, - 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9, - 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e, - 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111, - 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494, - 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9, - 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf, - 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d, - 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868, - 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f, - 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616, -}; - -__device__ __constant__ u32 te2[256] = -{ - 0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b, - 0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5, - 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b, - 0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76, - 0xca458fca, 0x829d1f82, 0xc94089c9, 0x7d87fa7d, - 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0, - 0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af, - 0x9cbf239c, 0xa4f753a4, 0x7296e472, 0xc05b9bc0, - 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26, - 0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc, - 0x345c6834, 0xa5f451a5, 0xe534d1e5, 0xf108f9f1, - 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15, - 0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3, - 0x18283018, 0x96a13796, 0x050f0a05, 0x9ab52f9a, - 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2, - 0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75, - 0x091b1209, 0x839e1d83, 0x2c74582c, 0x1a2e341a, - 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0, - 0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3, - 0x297b5229, 0xe33edde3, 0x2f715e2f, 0x84971384, - 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed, - 0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b, - 0x6abed46a, 0xcb468dcb, 0xbed967be, 0x394b7239, - 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf, - 0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb, - 0x43c58643, 0x4dd79a4d, 0x33556633, 0x85941185, - 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f, - 0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8, - 0x51f3a251, 0xa3fe5da3, 0x40c08040, 0x8f8a058f, - 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5, - 0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221, - 0x10302010, 0xff1ae5ff, 0xf30efdf3, 0xd26dbfd2, - 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec, - 0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17, - 0xc45793c4, 0xa7f255a7, 0x7e82fc7e, 0x3d477a3d, - 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673, - 0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc, - 0x22664422, 0x2a7e542a, 0x90ab3b90, 0x88830b88, - 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814, - 0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb, - 0xe03bdbe0, 0x32566432, 0x3a4e743a, 0x0a1e140a, - 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c, - 0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462, - 0x91a83991, 0x95a43195, 0xe437d3e4, 0x798bf279, - 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d, - 0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9, - 0x6cb4d86c, 0x56faac56, 0xf407f3f4, 0xea25cfea, - 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008, - 0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e, - 0x1c24381c, 0xa6f157a6, 0xb4c773b4, 0xc65197c6, - 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f, - 0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a, - 0x7090e070, 0x3e427c3e, 0xb5c471b5, 0x66aacc66, - 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e, - 0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9, - 0x86911786, 0xc15899c1, 0x1d273a1d, 0x9eb9279e, - 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211, - 0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394, - 0x9bb62d9b, 0x1e223c1e, 0x87921587, 0xe920c9e9, - 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df, - 0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d, - 0xbfda65bf, 0xe631d7e6, 0x42c68442, 0x68b8d068, - 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f, - 0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16, -}; - -__device__ __constant__ u32 te3[256] = -{ - 0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6, - 0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491, - 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56, - 0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec, - 0xcaca458f, 0x82829d1f, 0xc9c94089, 0x7d7d87fa, - 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb, - 0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45, - 0x9c9cbf23, 0xa4a4f753, 0x727296e4, 0xc0c05b9b, - 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c, - 0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83, - 0x34345c68, 0xa5a5f451, 0xe5e534d1, 0xf1f108f9, - 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a, - 0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d, - 0x18182830, 0x9696a137, 0x05050f0a, 0x9a9ab52f, - 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf, - 0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea, - 0x09091b12, 0x83839e1d, 0x2c2c7458, 0x1a1a2e34, - 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b, - 0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d, - 0x29297b52, 0xe3e33edd, 0x2f2f715e, 0x84849713, - 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1, - 0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6, - 0x6a6abed4, 0xcbcb468d, 0xbebed967, 0x39394b72, - 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85, - 0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed, - 0x4343c586, 0x4d4dd79a, 0x33335566, 0x85859411, - 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe, - 0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b, - 0x5151f3a2, 0xa3a3fe5d, 0x4040c080, 0x8f8f8a05, - 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1, - 0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342, - 0x10103020, 0xffff1ae5, 0xf3f30efd, 0xd2d26dbf, - 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3, - 0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e, - 0xc4c45793, 0xa7a7f255, 0x7e7e82fc, 0x3d3d477a, - 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6, - 0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3, - 0x22226644, 0x2a2a7e54, 0x9090ab3b, 0x8888830b, - 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28, - 0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad, - 0xe0e03bdb, 0x32325664, 0x3a3a4e74, 0x0a0a1e14, - 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8, - 0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4, - 0x9191a839, 0x9595a431, 0xe4e437d3, 0x79798bf2, - 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da, - 0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049, - 0x6c6cb4d8, 0x5656faac, 0xf4f407f3, 0xeaea25cf, - 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810, - 0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c, - 0x1c1c2438, 0xa6a6f157, 0xb4b4c773, 0xc6c65197, - 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e, - 0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f, - 0x707090e0, 0x3e3e427c, 0xb5b5c471, 0x6666aacc, - 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c, - 0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069, - 0x86869117, 0xc1c15899, 0x1d1d273a, 0x9e9eb927, - 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322, - 0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733, - 0x9b9bb62d, 0x1e1e223c, 0x87879215, 0xe9e920c9, - 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5, - 0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a, - 0xbfbfda65, 0xe6e631d7, 0x4242c684, 0x6868b8d0, - 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e, - 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c, -}; - -__device__ __constant__ u32 te4[256] = -{ - 0x63636363, 0x7c7c7c7c, 0x77777777, 0x7b7b7b7b, - 0xf2f2f2f2, 0x6b6b6b6b, 0x6f6f6f6f, 0xc5c5c5c5, - 0x30303030, 0x01010101, 0x67676767, 0x2b2b2b2b, - 0xfefefefe, 0xd7d7d7d7, 0xabababab, 0x76767676, - 0xcacacaca, 0x82828282, 0xc9c9c9c9, 0x7d7d7d7d, - 0xfafafafa, 0x59595959, 0x47474747, 0xf0f0f0f0, - 0xadadadad, 0xd4d4d4d4, 0xa2a2a2a2, 0xafafafaf, - 0x9c9c9c9c, 0xa4a4a4a4, 0x72727272, 0xc0c0c0c0, - 0xb7b7b7b7, 0xfdfdfdfd, 0x93939393, 0x26262626, - 0x36363636, 0x3f3f3f3f, 0xf7f7f7f7, 0xcccccccc, - 0x34343434, 0xa5a5a5a5, 0xe5e5e5e5, 0xf1f1f1f1, - 0x71717171, 0xd8d8d8d8, 0x31313131, 0x15151515, - 0x04040404, 0xc7c7c7c7, 0x23232323, 0xc3c3c3c3, - 0x18181818, 0x96969696, 0x05050505, 0x9a9a9a9a, - 0x07070707, 0x12121212, 0x80808080, 0xe2e2e2e2, - 0xebebebeb, 0x27272727, 0xb2b2b2b2, 0x75757575, - 0x09090909, 0x83838383, 0x2c2c2c2c, 0x1a1a1a1a, - 0x1b1b1b1b, 0x6e6e6e6e, 0x5a5a5a5a, 0xa0a0a0a0, - 0x52525252, 0x3b3b3b3b, 0xd6d6d6d6, 0xb3b3b3b3, - 0x29292929, 0xe3e3e3e3, 0x2f2f2f2f, 0x84848484, - 0x53535353, 0xd1d1d1d1, 0x00000000, 0xedededed, - 0x20202020, 0xfcfcfcfc, 0xb1b1b1b1, 0x5b5b5b5b, - 0x6a6a6a6a, 0xcbcbcbcb, 0xbebebebe, 0x39393939, - 0x4a4a4a4a, 0x4c4c4c4c, 0x58585858, 0xcfcfcfcf, - 0xd0d0d0d0, 0xefefefef, 0xaaaaaaaa, 0xfbfbfbfb, - 0x43434343, 0x4d4d4d4d, 0x33333333, 0x85858585, - 0x45454545, 0xf9f9f9f9, 0x02020202, 0x7f7f7f7f, - 0x50505050, 0x3c3c3c3c, 0x9f9f9f9f, 0xa8a8a8a8, - 0x51515151, 0xa3a3a3a3, 0x40404040, 0x8f8f8f8f, - 0x92929292, 0x9d9d9d9d, 0x38383838, 0xf5f5f5f5, - 0xbcbcbcbc, 0xb6b6b6b6, 0xdadadada, 0x21212121, - 0x10101010, 0xffffffff, 0xf3f3f3f3, 0xd2d2d2d2, - 0xcdcdcdcd, 0x0c0c0c0c, 0x13131313, 0xecececec, - 0x5f5f5f5f, 0x97979797, 0x44444444, 0x17171717, - 0xc4c4c4c4, 0xa7a7a7a7, 0x7e7e7e7e, 0x3d3d3d3d, - 0x64646464, 0x5d5d5d5d, 0x19191919, 0x73737373, - 0x60606060, 0x81818181, 0x4f4f4f4f, 0xdcdcdcdc, - 0x22222222, 0x2a2a2a2a, 0x90909090, 0x88888888, - 0x46464646, 0xeeeeeeee, 0xb8b8b8b8, 0x14141414, - 0xdededede, 0x5e5e5e5e, 0x0b0b0b0b, 0xdbdbdbdb, - 0xe0e0e0e0, 0x32323232, 0x3a3a3a3a, 0x0a0a0a0a, - 0x49494949, 0x06060606, 0x24242424, 0x5c5c5c5c, - 0xc2c2c2c2, 0xd3d3d3d3, 0xacacacac, 0x62626262, - 0x91919191, 0x95959595, 0xe4e4e4e4, 0x79797979, - 0xe7e7e7e7, 0xc8c8c8c8, 0x37373737, 0x6d6d6d6d, - 0x8d8d8d8d, 0xd5d5d5d5, 0x4e4e4e4e, 0xa9a9a9a9, - 0x6c6c6c6c, 0x56565656, 0xf4f4f4f4, 0xeaeaeaea, - 0x65656565, 0x7a7a7a7a, 0xaeaeaeae, 0x08080808, - 0xbabababa, 0x78787878, 0x25252525, 0x2e2e2e2e, - 0x1c1c1c1c, 0xa6a6a6a6, 0xb4b4b4b4, 0xc6c6c6c6, - 0xe8e8e8e8, 0xdddddddd, 0x74747474, 0x1f1f1f1f, - 0x4b4b4b4b, 0xbdbdbdbd, 0x8b8b8b8b, 0x8a8a8a8a, - 0x70707070, 0x3e3e3e3e, 0xb5b5b5b5, 0x66666666, - 0x48484848, 0x03030303, 0xf6f6f6f6, 0x0e0e0e0e, - 0x61616161, 0x35353535, 0x57575757, 0xb9b9b9b9, - 0x86868686, 0xc1c1c1c1, 0x1d1d1d1d, 0x9e9e9e9e, - 0xe1e1e1e1, 0xf8f8f8f8, 0x98989898, 0x11111111, - 0x69696969, 0xd9d9d9d9, 0x8e8e8e8e, 0x94949494, - 0x9b9b9b9b, 0x1e1e1e1e, 0x87878787, 0xe9e9e9e9, - 0xcececece, 0x55555555, 0x28282828, 0xdfdfdfdf, - 0x8c8c8c8c, 0xa1a1a1a1, 0x89898989, 0x0d0d0d0d, - 0xbfbfbfbf, 0xe6e6e6e6, 0x42424242, 0x68686868, - 0x41414141, 0x99999999, 0x2d2d2d2d, 0x0f0f0f0f, - 0xb0b0b0b0, 0x54545454, 0xbbbbbbbb, 0x16161616, -}; - -__device__ __constant__ u32 td0[256] = -{ - 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, - 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, - 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, - 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, - 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, - 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, - 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, - 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, - 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, - 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, - 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, - 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, - 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, - 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, - 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, - 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, - 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, - 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, - 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, - 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, - 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, - 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, - 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, - 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, - 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, - 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, - 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, - 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, - 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, - 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, - 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, - 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, - 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, - 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, - 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, - 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, - 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, - 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, - 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, - 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, - 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, - 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, - 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, - 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, - 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, - 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, - 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, - 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, - 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, - 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, - 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, - 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, - 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, - 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, - 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, - 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, - 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, - 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, - 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, - 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, - 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, - 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, - 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, - 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742, -}; - -__device__ __constant__ u32 td1[256] = -{ - 0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e, - 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303, - 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c, - 0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3, - 0x49deb15a, 0x6725ba1b, 0x9845ea0e, 0xe15dfec0, - 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9, - 0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259, - 0x2dd4be83, 0xd3587421, 0x2949e069, 0x448ec9c8, - 0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971, - 0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a, - 0x1863df4a, 0x82e51a31, 0x60975133, 0x4562537f, - 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b, - 0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8, - 0x23ab73d3, 0xe2724b02, 0x57e31f8f, 0x2a6655ab, - 0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708, - 0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682, - 0x2b8acf1c, 0x92a779b4, 0xf0f307f2, 0xa14e69e2, - 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe, - 0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb, - 0x390b83ec, 0xaa4060ef, 0x065e719f, 0x51bd6e10, - 0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd, - 0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015, - 0x241998fb, 0x97d6bde9, 0xcc894043, 0x7767d99e, - 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee, - 0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000, - 0x83098086, 0x48322bed, 0xac1e1170, 0x4e6c5a72, - 0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39, - 0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e, - 0xb10c0a67, 0x0f9357e7, 0xd2b4ee96, 0x9e1b9b91, - 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a, - 0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17, - 0x0b0e090d, 0xadf28bc7, 0xb92db6a8, 0xc8141ea9, - 0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60, - 0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e, - 0x768b4329, 0xdccb23c6, 0x68b6edfc, 0x63b8e4f1, - 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611, - 0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1, - 0x4b1d9e2f, 0xf3dcb230, 0xec0d8652, 0xd077c1e3, - 0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964, - 0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390, - 0xc787494e, 0xc1d938d1, 0xfe8ccaa2, 0x3698d40b, - 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf, - 0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46, - 0xc2f68d13, 0xe890d8b8, 0x5e2e39f7, 0xf582c3af, - 0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512, - 0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb, - 0x09cd2678, 0xf46e5918, 0x01ec9ab7, 0xa8834f9a, - 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8, - 0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c, - 0xaf31a4b2, 0x312a3f23, 0x30c6a594, 0xc035a266, - 0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8, - 0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6, - 0x8d764dd6, 0x4d43efb0, 0x54ccaa4d, 0xdfe49604, - 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551, - 0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41, - 0x5ab3671d, 0x5292dbd2, 0x33e91056, 0x136dd647, - 0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c, - 0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1, - 0x599cd2df, 0x3f55f273, 0x791814ce, 0xbf73c737, - 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db, - 0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340, - 0x72161dc3, 0x0cbce225, 0x8b283c49, 0x41ff0d95, - 0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1, - 0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857, -}; - -__device__ __constant__ u32 td2[256] = -{ - 0xa75051f4, 0x65537e41, 0xa4c31a17, 0x5e963a27, - 0x6bcb3bab, 0x45f11f9d, 0x58abacfa, 0x03934be3, - 0xfa552030, 0x6df6ad76, 0x769188cc, 0x4c25f502, - 0xd7fc4fe5, 0xcbd7c52a, 0x44802635, 0xa38fb562, - 0x5a49deb1, 0x1b6725ba, 0x0e9845ea, 0xc0e15dfe, - 0x7502c32f, 0xf012814c, 0x97a38d46, 0xf9c66bd3, - 0x5fe7038f, 0x9c951592, 0x7aebbf6d, 0x59da9552, - 0x832dd4be, 0x21d35874, 0x692949e0, 0xc8448ec9, - 0x896a75c2, 0x7978f48e, 0x3e6b9958, 0x71dd27b9, - 0x4fb6bee1, 0xad17f088, 0xac66c920, 0x3ab47dce, - 0x4a1863df, 0x3182e51a, 0x33609751, 0x7f456253, - 0x77e0b164, 0xae84bb6b, 0xa01cfe81, 0x2b94f908, - 0x68587048, 0xfd198f45, 0x6c8794de, 0xf8b7527b, - 0xd323ab73, 0x02e2724b, 0x8f57e31f, 0xab2a6655, - 0x2807b2eb, 0xc2032fb5, 0x7b9a86c5, 0x08a5d337, - 0x87f23028, 0xa5b223bf, 0x6aba0203, 0x825ced16, - 0x1c2b8acf, 0xb492a779, 0xf2f0f307, 0xe2a14e69, - 0xf4cd65da, 0xbed50605, 0x621fd134, 0xfe8ac4a6, - 0x539d342e, 0x55a0a2f3, 0xe132058a, 0xeb75a4f6, - 0xec390b83, 0xefaa4060, 0x9f065e71, 0x1051bd6e, - 0x8af93e21, 0x063d96dd, 0x05aedd3e, 0xbd464de6, - 0x8db59154, 0x5d0571c4, 0xd46f0406, 0x15ff6050, - 0xfb241998, 0xe997d6bd, 0x43cc8940, 0x9e7767d9, - 0x42bdb0e8, 0x8b880789, 0x5b38e719, 0xeedb79c8, - 0x0a47a17c, 0x0fe97c42, 0x1ec9f884, 0x00000000, - 0x86830980, 0xed48322b, 0x70ac1e11, 0x724e6c5a, - 0xfffbfd0e, 0x38560f85, 0xd51e3dae, 0x3927362d, - 0xd9640a0f, 0xa621685c, 0x54d19b5b, 0x2e3a2436, - 0x67b10c0a, 0xe70f9357, 0x96d2b4ee, 0x919e1b9b, - 0xc54f80c0, 0x20a261dc, 0x4b695a77, 0x1a161c12, - 0xba0ae293, 0x2ae5c0a0, 0xe0433c22, 0x171d121b, - 0x0d0b0e09, 0xc7adf28b, 0xa8b92db6, 0xa9c8141e, - 0x198557f1, 0x074caf75, 0xddbbee99, 0x60fda37f, - 0x269ff701, 0xf5bc5c72, 0x3bc54466, 0x7e345bfb, - 0x29768b43, 0xc6dccb23, 0xfc68b6ed, 0xf163b8e4, - 0xdccad731, 0x85104263, 0x22401397, 0x112084c6, - 0x247d854a, 0x3df8d2bb, 0x3211aef9, 0xa16dc729, - 0x2f4b1d9e, 0x30f3dcb2, 0x52ec0d86, 0xe3d077c1, - 0x166c2bb3, 0xb999a970, 0x48fa1194, 0x642247e9, - 0x8cc4a8fc, 0x3f1aa0f0, 0x2cd8567d, 0x90ef2233, - 0x4ec78749, 0xd1c1d938, 0xa2fe8cca, 0x0b3698d4, - 0x81cfa6f5, 0xde28a57a, 0x8e26dab7, 0xbfa43fad, - 0x9de42c3a, 0x920d5078, 0xcc9b6a5f, 0x4662547e, - 0x13c2f68d, 0xb8e890d8, 0xf75e2e39, 0xaff582c3, - 0x80be9f5d, 0x937c69d0, 0x2da96fd5, 0x12b3cf25, - 0x993bc8ac, 0x7da71018, 0x636ee89c, 0xbb7bdb3b, - 0x7809cd26, 0x18f46e59, 0xb701ec9a, 0x9aa8834f, - 0x6e65e695, 0xe67eaaff, 0xcf0821bc, 0xe8e6ef15, - 0x9bd9bae7, 0x36ce4a6f, 0x09d4ea9f, 0x7cd629b0, - 0xb2af31a4, 0x23312a3f, 0x9430c6a5, 0x66c035a2, - 0xbc37744e, 0xcaa6fc82, 0xd0b0e090, 0xd81533a7, - 0x984af104, 0xdaf741ec, 0x500e7fcd, 0xf62f1791, - 0xd68d764d, 0xb04d43ef, 0x4d54ccaa, 0x04dfe496, - 0xb5e39ed1, 0x881b4c6a, 0x1fb8c12c, 0x517f4665, - 0xea049d5e, 0x355d018c, 0x7473fa87, 0x412efb0b, - 0x1d5ab367, 0xd25292db, 0x5633e910, 0x47136dd6, - 0x618c9ad7, 0x0c7a37a1, 0x148e59f8, 0x3c89eb13, - 0x27eecea9, 0xc935b761, 0xe5ede11c, 0xb13c7a47, - 0xdf599cd2, 0x733f55f2, 0xce791814, 0x37bf73c7, - 0xcdea53f7, 0xaa5b5ffd, 0x6f14df3d, 0xdb867844, - 0xf381caaf, 0xc43eb968, 0x342c3824, 0x405fc2a3, - 0xc372161d, 0x250cbce2, 0x498b283c, 0x9541ff0d, - 0x017139a8, 0xb3de080c, 0xe49cd8b4, 0xc1906456, - 0x84617bcb, 0xb670d532, 0x5c74486c, 0x5742d0b8, -}; - -__device__ __constant__ u32 td3[256] = -{ - 0xf4a75051, 0x4165537e, 0x17a4c31a, 0x275e963a, - 0xab6bcb3b, 0x9d45f11f, 0xfa58abac, 0xe303934b, - 0x30fa5520, 0x766df6ad, 0xcc769188, 0x024c25f5, - 0xe5d7fc4f, 0x2acbd7c5, 0x35448026, 0x62a38fb5, - 0xb15a49de, 0xba1b6725, 0xea0e9845, 0xfec0e15d, - 0x2f7502c3, 0x4cf01281, 0x4697a38d, 0xd3f9c66b, - 0x8f5fe703, 0x929c9515, 0x6d7aebbf, 0x5259da95, - 0xbe832dd4, 0x7421d358, 0xe0692949, 0xc9c8448e, - 0xc2896a75, 0x8e7978f4, 0x583e6b99, 0xb971dd27, - 0xe14fb6be, 0x88ad17f0, 0x20ac66c9, 0xce3ab47d, - 0xdf4a1863, 0x1a3182e5, 0x51336097, 0x537f4562, - 0x6477e0b1, 0x6bae84bb, 0x81a01cfe, 0x082b94f9, - 0x48685870, 0x45fd198f, 0xde6c8794, 0x7bf8b752, - 0x73d323ab, 0x4b02e272, 0x1f8f57e3, 0x55ab2a66, - 0xeb2807b2, 0xb5c2032f, 0xc57b9a86, 0x3708a5d3, - 0x2887f230, 0xbfa5b223, 0x036aba02, 0x16825ced, - 0xcf1c2b8a, 0x79b492a7, 0x07f2f0f3, 0x69e2a14e, - 0xdaf4cd65, 0x05bed506, 0x34621fd1, 0xa6fe8ac4, - 0x2e539d34, 0xf355a0a2, 0x8ae13205, 0xf6eb75a4, - 0x83ec390b, 0x60efaa40, 0x719f065e, 0x6e1051bd, - 0x218af93e, 0xdd063d96, 0x3e05aedd, 0xe6bd464d, - 0x548db591, 0xc45d0571, 0x06d46f04, 0x5015ff60, - 0x98fb2419, 0xbde997d6, 0x4043cc89, 0xd99e7767, - 0xe842bdb0, 0x898b8807, 0x195b38e7, 0xc8eedb79, - 0x7c0a47a1, 0x420fe97c, 0x841ec9f8, 0x00000000, - 0x80868309, 0x2bed4832, 0x1170ac1e, 0x5a724e6c, - 0x0efffbfd, 0x8538560f, 0xaed51e3d, 0x2d392736, - 0x0fd9640a, 0x5ca62168, 0x5b54d19b, 0x362e3a24, - 0x0a67b10c, 0x57e70f93, 0xee96d2b4, 0x9b919e1b, - 0xc0c54f80, 0xdc20a261, 0x774b695a, 0x121a161c, - 0x93ba0ae2, 0xa02ae5c0, 0x22e0433c, 0x1b171d12, - 0x090d0b0e, 0x8bc7adf2, 0xb6a8b92d, 0x1ea9c814, - 0xf1198557, 0x75074caf, 0x99ddbbee, 0x7f60fda3, - 0x01269ff7, 0x72f5bc5c, 0x663bc544, 0xfb7e345b, - 0x4329768b, 0x23c6dccb, 0xedfc68b6, 0xe4f163b8, - 0x31dccad7, 0x63851042, 0x97224013, 0xc6112084, - 0x4a247d85, 0xbb3df8d2, 0xf93211ae, 0x29a16dc7, - 0x9e2f4b1d, 0xb230f3dc, 0x8652ec0d, 0xc1e3d077, - 0xb3166c2b, 0x70b999a9, 0x9448fa11, 0xe9642247, - 0xfc8cc4a8, 0xf03f1aa0, 0x7d2cd856, 0x3390ef22, - 0x494ec787, 0x38d1c1d9, 0xcaa2fe8c, 0xd40b3698, - 0xf581cfa6, 0x7ade28a5, 0xb78e26da, 0xadbfa43f, - 0x3a9de42c, 0x78920d50, 0x5fcc9b6a, 0x7e466254, - 0x8d13c2f6, 0xd8b8e890, 0x39f75e2e, 0xc3aff582, - 0x5d80be9f, 0xd0937c69, 0xd52da96f, 0x2512b3cf, - 0xac993bc8, 0x187da710, 0x9c636ee8, 0x3bbb7bdb, - 0x267809cd, 0x5918f46e, 0x9ab701ec, 0x4f9aa883, - 0x956e65e6, 0xffe67eaa, 0xbccf0821, 0x15e8e6ef, - 0xe79bd9ba, 0x6f36ce4a, 0x9f09d4ea, 0xb07cd629, - 0xa4b2af31, 0x3f23312a, 0xa59430c6, 0xa266c035, - 0x4ebc3774, 0x82caa6fc, 0x90d0b0e0, 0xa7d81533, - 0x04984af1, 0xecdaf741, 0xcd500e7f, 0x91f62f17, - 0x4dd68d76, 0xefb04d43, 0xaa4d54cc, 0x9604dfe4, - 0xd1b5e39e, 0x6a881b4c, 0x2c1fb8c1, 0x65517f46, - 0x5eea049d, 0x8c355d01, 0x877473fa, 0x0b412efb, - 0x671d5ab3, 0xdbd25292, 0x105633e9, 0xd647136d, - 0xd7618c9a, 0xa10c7a37, 0xf8148e59, 0x133c89eb, - 0xa927eece, 0x61c935b7, 0x1ce5ede1, 0x47b13c7a, - 0xd2df599c, 0xf2733f55, 0x14ce7918, 0xc737bf73, - 0xf7cdea53, 0xfdaa5b5f, 0x3d6f14df, 0x44db8678, - 0xaff381ca, 0x68c43eb9, 0x24342c38, 0xa3405fc2, - 0x1dc37216, 0xe2250cbc, 0x3c498b28, 0x0d9541ff, - 0xa8017139, 0x0cb3de08, 0xb4e49cd8, 0x56c19064, - 0xcb84617b, 0x32b670d5, 0x6c5c7448, 0xb85742d0, -}; - -__device__ __constant__ u32 td4[256] = -{ - 0x52525252, 0x09090909, 0x6a6a6a6a, 0xd5d5d5d5, - 0x30303030, 0x36363636, 0xa5a5a5a5, 0x38383838, - 0xbfbfbfbf, 0x40404040, 0xa3a3a3a3, 0x9e9e9e9e, - 0x81818181, 0xf3f3f3f3, 0xd7d7d7d7, 0xfbfbfbfb, - 0x7c7c7c7c, 0xe3e3e3e3, 0x39393939, 0x82828282, - 0x9b9b9b9b, 0x2f2f2f2f, 0xffffffff, 0x87878787, - 0x34343434, 0x8e8e8e8e, 0x43434343, 0x44444444, - 0xc4c4c4c4, 0xdededede, 0xe9e9e9e9, 0xcbcbcbcb, - 0x54545454, 0x7b7b7b7b, 0x94949494, 0x32323232, - 0xa6a6a6a6, 0xc2c2c2c2, 0x23232323, 0x3d3d3d3d, - 0xeeeeeeee, 0x4c4c4c4c, 0x95959595, 0x0b0b0b0b, - 0x42424242, 0xfafafafa, 0xc3c3c3c3, 0x4e4e4e4e, - 0x08080808, 0x2e2e2e2e, 0xa1a1a1a1, 0x66666666, - 0x28282828, 0xd9d9d9d9, 0x24242424, 0xb2b2b2b2, - 0x76767676, 0x5b5b5b5b, 0xa2a2a2a2, 0x49494949, - 0x6d6d6d6d, 0x8b8b8b8b, 0xd1d1d1d1, 0x25252525, - 0x72727272, 0xf8f8f8f8, 0xf6f6f6f6, 0x64646464, - 0x86868686, 0x68686868, 0x98989898, 0x16161616, - 0xd4d4d4d4, 0xa4a4a4a4, 0x5c5c5c5c, 0xcccccccc, - 0x5d5d5d5d, 0x65656565, 0xb6b6b6b6, 0x92929292, - 0x6c6c6c6c, 0x70707070, 0x48484848, 0x50505050, - 0xfdfdfdfd, 0xedededed, 0xb9b9b9b9, 0xdadadada, - 0x5e5e5e5e, 0x15151515, 0x46464646, 0x57575757, - 0xa7a7a7a7, 0x8d8d8d8d, 0x9d9d9d9d, 0x84848484, - 0x90909090, 0xd8d8d8d8, 0xabababab, 0x00000000, - 0x8c8c8c8c, 0xbcbcbcbc, 0xd3d3d3d3, 0x0a0a0a0a, - 0xf7f7f7f7, 0xe4e4e4e4, 0x58585858, 0x05050505, - 0xb8b8b8b8, 0xb3b3b3b3, 0x45454545, 0x06060606, - 0xd0d0d0d0, 0x2c2c2c2c, 0x1e1e1e1e, 0x8f8f8f8f, - 0xcacacaca, 0x3f3f3f3f, 0x0f0f0f0f, 0x02020202, - 0xc1c1c1c1, 0xafafafaf, 0xbdbdbdbd, 0x03030303, - 0x01010101, 0x13131313, 0x8a8a8a8a, 0x6b6b6b6b, - 0x3a3a3a3a, 0x91919191, 0x11111111, 0x41414141, - 0x4f4f4f4f, 0x67676767, 0xdcdcdcdc, 0xeaeaeaea, - 0x97979797, 0xf2f2f2f2, 0xcfcfcfcf, 0xcececece, - 0xf0f0f0f0, 0xb4b4b4b4, 0xe6e6e6e6, 0x73737373, - 0x96969696, 0xacacacac, 0x74747474, 0x22222222, - 0xe7e7e7e7, 0xadadadad, 0x35353535, 0x85858585, - 0xe2e2e2e2, 0xf9f9f9f9, 0x37373737, 0xe8e8e8e8, - 0x1c1c1c1c, 0x75757575, 0xdfdfdfdf, 0x6e6e6e6e, - 0x47474747, 0xf1f1f1f1, 0x1a1a1a1a, 0x71717171, - 0x1d1d1d1d, 0x29292929, 0xc5c5c5c5, 0x89898989, - 0x6f6f6f6f, 0xb7b7b7b7, 0x62626262, 0x0e0e0e0e, - 0xaaaaaaaa, 0x18181818, 0xbebebebe, 0x1b1b1b1b, - 0xfcfcfcfc, 0x56565656, 0x3e3e3e3e, 0x4b4b4b4b, - 0xc6c6c6c6, 0xd2d2d2d2, 0x79797979, 0x20202020, - 0x9a9a9a9a, 0xdbdbdbdb, 0xc0c0c0c0, 0xfefefefe, - 0x78787878, 0xcdcdcdcd, 0x5a5a5a5a, 0xf4f4f4f4, - 0x1f1f1f1f, 0xdddddddd, 0xa8a8a8a8, 0x33333333, - 0x88888888, 0x07070707, 0xc7c7c7c7, 0x31313131, - 0xb1b1b1b1, 0x12121212, 0x10101010, 0x59595959, - 0x27272727, 0x80808080, 0xecececec, 0x5f5f5f5f, - 0x60606060, 0x51515151, 0x7f7f7f7f, 0xa9a9a9a9, - 0x19191919, 0xb5b5b5b5, 0x4a4a4a4a, 0x0d0d0d0d, - 0x2d2d2d2d, 0xe5e5e5e5, 0x7a7a7a7a, 0x9f9f9f9f, - 0x93939393, 0xc9c9c9c9, 0x9c9c9c9c, 0xefefefef, - 0xa0a0a0a0, 0xe0e0e0e0, 0x3b3b3b3b, 0x4d4d4d4d, - 0xaeaeaeae, 0x2a2a2a2a, 0xf5f5f5f5, 0xb0b0b0b0, - 0xc8c8c8c8, 0xebebebeb, 0xbbbbbbbb, 0x3c3c3c3c, - 0x83838383, 0x53535353, 0x99999999, 0x61616161, - 0x17171717, 0x2b2b2b2b, 0x04040404, 0x7e7e7e7e, - 0xbabababa, 0x77777777, 0xd6d6d6d6, 0x26262626, - 0xe1e1e1e1, 0x69696969, 0x14141414, 0x63636363, - 0x55555555, 0x21212121, 0x0c0c0c0c, 0x7d7d7d7d, -}; - -__device__ __constant__ u32 rcon[] = -{ - 0x01000000, 0x02000000, 0x04000000, 0x08000000, - 0x10000000, 0x20000000, 0x40000000, 0x80000000, - 0x1b000000, 0x36000000, -}; - -__device__ static void AES256_ExpandKey (u32 *userkey, u32 *rek, u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - rek[0] = userkey[0]; - rek[1] = userkey[1]; - rek[2] = userkey[2]; - rek[3] = userkey[3]; - rek[4] = userkey[4]; - rek[5] = userkey[5]; - rek[6] = userkey[6]; - rek[7] = userkey[7]; - - int i; - int j; - - i = 0; - j = 0; - - u32 run = 1; - - while (run) - { - u32 temp = rek[j + 7]; - - rek[j + 8] = rek[j + 0] - ^ (s_te2[(temp >> 16) & 0xff] & 0xff000000) - ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000) - ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00) - ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff) - ^ rcon[i]; - - rek[j + 9] = rek[j + 1] ^ rek[j + 8]; - rek[j + 10] = rek[j + 2] ^ rek[j + 9]; - rek[j + 11] = rek[j + 3] ^ rek[j + 10]; - - if (++i == 7) - { - run = 0; - continue; - } - - temp = rek[j + 11]; - - rek[j + 12] = rek[j + 4] - ^ (s_te2[(temp >> 24) & 0xff] & 0xff000000) - ^ (s_te3[(temp >> 16) & 0xff] & 0x00ff0000) - ^ (s_te0[(temp >> 8) & 0xff] & 0x0000ff00) - ^ (s_te1[(temp >> 0) & 0xff] & 0x000000ff); - - rek[j + 13] = rek[j + 5] ^ rek[j + 12]; - rek[j + 14] = rek[j + 6] ^ rek[j + 13]; - rek[j + 15] = rek[j + 7] ^ rek[j + 14]; - - j += 8; - } -} - -__device__ static void AES256_InvertKey (u32 *rdk, u32 s_td0[256], u32 s_td1[256], u32 s_td2[256], u32 s_td3[256], u32 s_td4[256], u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - for (u32 i = 0, j = 56; i < j; i += 4, j -= 4) - { - u32 temp; - - temp = rdk[i + 0]; rdk[i + 0] = rdk[j + 0]; rdk[j + 0] = temp; - temp = rdk[i + 1]; rdk[i + 1] = rdk[j + 1]; rdk[j + 1] = temp; - temp = rdk[i + 2]; rdk[i + 2] = rdk[j + 2]; rdk[j + 2] = temp; - temp = rdk[i + 3]; rdk[i + 3] = rdk[j + 3]; rdk[j + 3] = temp; - } - - for (u32 i = 1, j = 4; i < 14; i += 1, j += 4) - { - rdk[j + 0] = - s_td0[s_te1[(rdk[j + 0] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 0] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 0] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 0] >> 0) & 0xff] & 0xff]; - - rdk[j + 1] = - s_td0[s_te1[(rdk[j + 1] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 1] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 1] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 1] >> 0) & 0xff] & 0xff]; - - rdk[j + 2] = - s_td0[s_te1[(rdk[j + 2] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 2] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 2] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 2] >> 0) & 0xff] & 0xff]; - - rdk[j + 3] = - s_td0[s_te1[(rdk[j + 3] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 3] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 3] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 3] >> 0) & 0xff] & 0xff]; - } -} - -__device__ static void AES256_decrypt (const u32 *in, u32 *out, const u32 *rdk, u32 s_td0[256], u32 s_td1[256], u32 s_td2[256], u32 s_td3[256], u32 s_td4[256]) -{ - u32 s0 = in[0] ^ rdk[0]; - u32 s1 = in[1] ^ rdk[1]; - u32 s2 = in[2] ^ rdk[2]; - u32 s3 = in[3] ^ rdk[3]; - - u32 t0; - u32 t1; - u32 t2; - u32 t3; - - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[ 4]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[ 5]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[ 6]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[ 7]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[ 8]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[ 9]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[10]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[11]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[12]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[13]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[14]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[15]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[16]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[17]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[18]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[19]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[20]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[21]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[22]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[23]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[24]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[25]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[26]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[27]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[28]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[29]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[30]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[31]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[32]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[33]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[34]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[35]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[36]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[37]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[38]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[39]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[40]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[41]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[42]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[43]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[44]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[45]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[46]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[47]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[48]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[49]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[50]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[51]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[52]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[53]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[54]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[55]; - - out[0] = (s_td4[(t0 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t3 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t2 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t1 >> 0) & 0xff] & 0x000000ff) - ^ rdk[56]; - - out[1] = (s_td4[(t1 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t0 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t3 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t2 >> 0) & 0xff] & 0x000000ff) - ^ rdk[57]; - - out[2] = (s_td4[(t2 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t1 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t0 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t3 >> 0) & 0xff] & 0x000000ff) - ^ rdk[58]; - - out[3] = (s_td4[(t3 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t2 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t1 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t0 >> 0) & 0xff] & 0x000000ff) - ^ rdk[59]; -} - -__device__ static void AES256_encrypt (const u32 *in, u32 *out, const u32 *rek, u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - u32 s0 = in[0] ^ rek[0]; - u32 s1 = in[1] ^ rek[1]; - u32 s2 = in[2] ^ rek[2]; - u32 s3 = in[3] ^ rek[3]; - - u32 t0; - u32 t1; - u32 t2; - u32 t3; - - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[ 4]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[ 5]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[ 6]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[ 7]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[ 8]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[ 9]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[10]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[11]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[12]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[13]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[14]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[15]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[16]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[17]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[18]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[19]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[20]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[21]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[22]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[23]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[24]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[25]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[26]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[27]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[28]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[29]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[30]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[31]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[32]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[33]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[34]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[35]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[36]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[37]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[38]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[39]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[40]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[41]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[42]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[43]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[44]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[45]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[46]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[47]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[48]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[49]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[50]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[51]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[52]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[53]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[54]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[55]; - - out[0] = (s_te4[(t0 >> 24) & 0xff] & 0xff000000) - ^ (s_te4[(t1 >> 16) & 0xff] & 0x00ff0000) - ^ (s_te4[(t2 >> 8) & 0xff] & 0x0000ff00) - ^ (s_te4[(t3 >> 0) & 0xff] & 0x000000ff) - ^ rek[56]; - - out[1] = (s_te4[(t1 >> 24) & 0xff] & 0xff000000) - ^ (s_te4[(t2 >> 16) & 0xff] & 0x00ff0000) - ^ (s_te4[(t3 >> 8) & 0xff] & 0x0000ff00) - ^ (s_te4[(t0 >> 0) & 0xff] & 0x000000ff) - ^ rek[57]; - - out[2] = (s_te4[(t2 >> 24) & 0xff] & 0xff000000) - ^ (s_te4[(t3 >> 16) & 0xff] & 0x00ff0000) - ^ (s_te4[(t0 >> 8) & 0xff] & 0x0000ff00) - ^ (s_te4[(t1 >> 0) & 0xff] & 0x000000ff) - ^ rek[58]; - - out[3] = (s_te4[(t3 >> 24) & 0xff] & 0xff000000) - ^ (s_te4[(t0 >> 16) & 0xff] & 0x00ff0000) - ^ (s_te4[(t1 >> 8) & 0xff] & 0x0000ff00) - ^ (s_te4[(t2 >> 0) & 0xff] & 0x000000ff) - ^ rek[59]; -} - -__device__ static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; -} - -__device__ static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = SHA256M_A; - ipad[1] = SHA256M_B; - ipad[2] = SHA256M_C; - ipad[3] = SHA256M_D; - ipad[4] = SHA256M_E; - ipad[5] = SHA256M_F; - ipad[6] = SHA256M_G; - ipad[7] = SHA256M_H; - - sha256_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = SHA256M_A; - opad[1] = SHA256M_B; - opad[2] = SHA256M_C; - opad[3] = SHA256M_D; - opad[4] = SHA256M_E; - opad[5] = SHA256M_F; - opad[6] = SHA256M_G; - opad[7] = SHA256M_H; - - sha256_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha256_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8], u32x digest[8]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - digest[5] = ipad[5]; - digest[6] = ipad[6]; - digest[7] = ipad[7]; - - sha256_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = digest[5]; - w1[2] = digest[6]; - w1[3] = digest[7]; - w2[0] = 0x80000000; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 32) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - digest[5] = opad[5]; - digest[6] = opad[6]; - digest[7] = opad[7]; - - sha256_transform (w0, w1, w2, w3, digest); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06800_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, lastpass_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - /** - * salt - */ - - u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 salt_buf0[4]; - u32 salt_buf1[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - /** - * pads - */ - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - w2[0] = swap_workaround (w2[0]); - w2[1] = swap_workaround (w2[1]); - w2[2] = swap_workaround (w2[2]); - w2[3] = swap_workaround (w2[3]); - w3[0] = swap_workaround (w3[0]); - w3[1] = swap_workaround (w3[1]); - w3[2] = swap_workaround (w3[2]); - w3[3] = swap_workaround (w3[3]); - - u32x ipad[8]; - u32x opad[8]; - - hmac_sha256_pad (w0, w1, w2, w3, ipad, opad); - - tmps[gid].ipad[0] = ipad[0]; - tmps[gid].ipad[1] = ipad[1]; - tmps[gid].ipad[2] = ipad[2]; - tmps[gid].ipad[3] = ipad[3]; - tmps[gid].ipad[4] = ipad[4]; - tmps[gid].ipad[5] = ipad[5]; - tmps[gid].ipad[6] = ipad[6]; - tmps[gid].ipad[7] = ipad[7]; - - tmps[gid].opad[0] = opad[0]; - tmps[gid].opad[1] = opad[1]; - tmps[gid].opad[2] = opad[2]; - tmps[gid].opad[3] = opad[3]; - tmps[gid].opad[4] = opad[4]; - tmps[gid].opad[5] = opad[5]; - tmps[gid].opad[6] = opad[6]; - tmps[gid].opad[7] = opad[7]; - - w0[0] = salt_buf0[0]; - w0[1] = salt_buf0[1]; - w0[2] = salt_buf0[2]; - w0[3] = salt_buf0[3]; - w1[0] = salt_buf1[0]; - w1[1] = salt_buf1[1]; - w1[2] = salt_buf1[2]; - w1[3] = salt_buf1[3]; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - append_0x01_3 (w0, w1, w2, salt_len + 3); - append_0x80_3 (w0, w1, w2, salt_len + 4); - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - w2[0] = swap_workaround (w2[0]); - w2[1] = swap_workaround (w2[1]); - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + salt_len + 4) * 8; - - u32x dgst[8]; - - hmac_sha256_run (w0, w1, w2, w3, ipad, opad, dgst); - - tmps[gid].dgst[0] = dgst[0]; - tmps[gid].dgst[1] = dgst[1]; - tmps[gid].dgst[2] = dgst[2]; - tmps[gid].dgst[3] = dgst[3]; - tmps[gid].dgst[4] = dgst[4]; - tmps[gid].dgst[5] = dgst[5]; - tmps[gid].dgst[6] = dgst[6]; - tmps[gid].dgst[7] = dgst[7]; - - tmps[gid].out[0] = dgst[0]; - tmps[gid].out[1] = dgst[1]; - tmps[gid].out[2] = dgst[2]; - tmps[gid].out[3] = dgst[3]; - tmps[gid].out[4] = dgst[4]; - tmps[gid].out[5] = dgst[5]; - tmps[gid].out[6] = dgst[6]; - tmps[gid].out[7] = dgst[7]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06800_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, lastpass_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x ipad[8]; - u32x opad[8]; - - ipad[0] = tmps[gid].ipad[0]; - ipad[1] = tmps[gid].ipad[1]; - ipad[2] = tmps[gid].ipad[2]; - ipad[3] = tmps[gid].ipad[3]; - ipad[4] = tmps[gid].ipad[4]; - ipad[5] = tmps[gid].ipad[5]; - ipad[6] = tmps[gid].ipad[6]; - ipad[7] = tmps[gid].ipad[7]; - - opad[0] = tmps[gid].opad[0]; - opad[1] = tmps[gid].opad[1]; - opad[2] = tmps[gid].opad[2]; - opad[3] = tmps[gid].opad[3]; - opad[4] = tmps[gid].opad[4]; - opad[5] = tmps[gid].opad[5]; - opad[6] = tmps[gid].opad[6]; - opad[7] = tmps[gid].opad[7]; - - u32x dgst[8]; - u32x out[8]; - - dgst[0] = tmps[gid].dgst[0]; - dgst[1] = tmps[gid].dgst[1]; - dgst[2] = tmps[gid].dgst[2]; - dgst[3] = tmps[gid].dgst[3]; - dgst[4] = tmps[gid].dgst[4]; - dgst[5] = tmps[gid].dgst[5]; - dgst[6] = tmps[gid].dgst[6]; - dgst[7] = tmps[gid].dgst[7]; - - out[0] = tmps[gid].out[0]; - out[1] = tmps[gid].out[1]; - out[2] = tmps[gid].out[2]; - out[3] = tmps[gid].out[3]; - out[4] = tmps[gid].out[4]; - out[5] = tmps[gid].out[5]; - out[6] = tmps[gid].out[6]; - out[7] = tmps[gid].out[7]; - - for (u32 j = 0; j < loop_cnt; j++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = dgst[0]; - w0[1] = dgst[1]; - w0[2] = dgst[2]; - w0[3] = dgst[3]; - w1[0] = dgst[4]; - w1[1] = dgst[5]; - w1[2] = dgst[6]; - w1[3] = dgst[7]; - w2[0] = 0x80000000; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 32) * 8; - - hmac_sha256_run (w0, w1, w2, w3, ipad, opad, dgst); - - out[0] ^= dgst[0]; - out[1] ^= dgst[1]; - out[2] ^= dgst[2]; - out[3] ^= dgst[3]; - out[4] ^= dgst[4]; - out[5] ^= dgst[5]; - out[6] ^= dgst[6]; - out[7] ^= dgst[7]; - } - - tmps[gid].dgst[0] = dgst[0]; - tmps[gid].dgst[1] = dgst[1]; - tmps[gid].dgst[2] = dgst[2]; - tmps[gid].dgst[3] = dgst[3]; - tmps[gid].dgst[4] = dgst[4]; - tmps[gid].dgst[5] = dgst[5]; - tmps[gid].dgst[6] = dgst[6]; - tmps[gid].dgst[7] = dgst[7]; - - tmps[gid].out[0] = out[0]; - tmps[gid].out[1] = out[1]; - tmps[gid].out[2] = out[2]; - tmps[gid].out[3] = out[3]; - tmps[gid].out[4] = out[4]; - tmps[gid].out[5] = out[5]; - tmps[gid].out[6] = out[6]; - tmps[gid].out[7] = out[7]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06800_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, lastpass_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - const u32 data[4] = - { - digests_buf[digests_offset].digest_buf[0], - digests_buf[digests_offset].digest_buf[1], - digests_buf[digests_offset].digest_buf[2], - digests_buf[digests_offset].digest_buf[3], - }; - - /** - * aes shared - */ - - __shared__ u32 s_td0[256]; - __shared__ u32 s_td1[256]; - __shared__ u32 s_td2[256]; - __shared__ u32 s_td3[256]; - __shared__ u32 s_td4[256]; - - __shared__ u32 s_te0[256]; - __shared__ u32 s_te1[256]; - __shared__ u32 s_te2[256]; - __shared__ u32 s_te3[256]; - __shared__ u32 s_te4[256]; - - s_td0[lid] = td0[lid]; - s_td1[lid] = td1[lid]; - s_td2[lid] = td2[lid]; - s_td3[lid] = td3[lid]; - s_td4[lid] = td4[lid]; - - s_te0[lid] = te0[lid]; - s_te1[lid] = te1[lid]; - s_te2[lid] = te2[lid]; - s_te3[lid] = te3[lid]; - s_te4[lid] = te4[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * aes init - */ - - u32 ukey[8]; - - ukey[0] = tmps[gid].out[0]; - ukey[1] = tmps[gid].out[1]; - ukey[2] = tmps[gid].out[2]; - ukey[3] = tmps[gid].out[3]; - ukey[4] = tmps[gid].out[4]; - ukey[5] = tmps[gid].out[5]; - ukey[6] = tmps[gid].out[6]; - ukey[7] = tmps[gid].out[7]; - - #define KEYLEN 60 - - u32 rek[KEYLEN]; - - AES256_ExpandKey (ukey, rek, s_te0, s_te1, s_te2, s_te3, s_te4); - - u32 out[4]; - - /** - * sniffed mode - */ - - u32 rdk[KEYLEN]; - - #pragma unroll 60 - for (u32 i = 0; i < KEYLEN; i++) rdk[i] = rek[i]; - - AES256_InvertKey (rdk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4); - - AES256_decrypt (data, out, rdk, s_td0, s_td1, s_td2, s_td3, s_td4); - - u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - out[0] = swap_workaround (out[0]); - out[1] = swap_workaround (out[1]); - out[2] = swap_workaround (out[2]); - out[3] = swap_workaround (out[3]); - - truncate_block (out, salt_len); - - if ((out[0] == salt_buf[0]) - && (out[1] == salt_buf[1]) - && (out[2] == salt_buf[2]) - && (out[3] == salt_buf[3])) - { - mark_hash_s0 (plains_buf, hashes_shown, digests_offset + 0, gid, 0); - - d_return_buf[lid] = 1; - } - - /** - * offline mode - */ - - const u32 lastpass_magic[4] = - { - 0x6c617374, - 0x70617373, - 0x20726f63, - 0x6b730202, - }; - - AES256_encrypt (lastpass_magic, out, rek, s_te0, s_te1, s_te2, s_te3, s_te4); - - const u32x r0 = out[DGST_R0]; - const u32x r1 = out[DGST_R1]; - const u32x r2 = out[DGST_R2]; - const u32x r3 = out[DGST_R3]; - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m06900_a0.cu b/nv/m06900_a0.cu deleted file mode 100644 index ccb2586..0000000 --- a/nv/m06900_a0.cu +++ /dev/null @@ -1,1215 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _GOST_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE1 -#define BOX(i,n,S) u32x ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(i,n,S) u32x ((S)[(n)][(i).x], (S)[(n)][(i).y]) -#endif - -#define round(k1,k2,tbl) \ -{ \ - u32x t; \ - t = (k1) + r; \ - l ^= BOX ((t >> 0) & 0xff, 0, tbl) ^ \ - BOX ((t >> 8) & 0xff, 1, tbl) ^ \ - BOX ((t >> 16) & 0xff, 2, tbl) ^ \ - BOX ((t >> 24) & 0xff, 3, tbl); \ - t = (k2) + l; \ - r ^= BOX ((t >> 0) & 0xff, 0, tbl) ^ \ - BOX ((t >> 8) & 0xff, 1, tbl) ^ \ - BOX ((t >> 16) & 0xff, 2, tbl) ^ \ - BOX ((t >> 24) & 0xff, 3, tbl); \ -} - -#define R(k,h,s,i,t) \ -{ \ - u32x r; \ - u32x l; \ - r = h[i + 0]; \ - l = h[i + 1]; \ - round (k[0], k[1], t); \ - round (k[2], k[3], t); \ - round (k[4], k[5], t); \ - round (k[6], k[7], t); \ - round (k[0], k[1], t); \ - round (k[2], k[3], t); \ - round (k[4], k[5], t); \ - round (k[6], k[7], t); \ - round (k[0], k[1], t); \ - round (k[2], k[3], t); \ - round (k[4], k[5], t); \ - round (k[6], k[7], t); \ - round (k[7], k[6], t); \ - round (k[5], k[4], t); \ - round (k[3], k[2], t); \ - round (k[1], k[0], t); \ - s[i + 0] = l; \ - s[i + 1] = r; \ -} - -#define X(w,u,v) \ - w[0] = u[0] ^ v[0]; \ - w[1] = u[1] ^ v[1]; \ - w[2] = u[2] ^ v[2]; \ - w[3] = u[3] ^ v[3]; \ - w[4] = u[4] ^ v[4]; \ - w[5] = u[5] ^ v[5]; \ - w[6] = u[6] ^ v[6]; \ - w[7] = u[7] ^ v[7]; - -#define P(k,w) \ - k[0] = ((w[0] & 0x000000ff) << 0) \ - | ((w[2] & 0x000000ff) << 8) \ - | ((w[4] & 0x000000ff) << 16) \ - | ((w[6] & 0x000000ff) << 24); \ - k[1] = ((w[0] & 0x0000ff00) >> 8) \ - | ((w[2] & 0x0000ff00) >> 0) \ - | ((w[4] & 0x0000ff00) << 8) \ - | ((w[6] & 0x0000ff00) << 16); \ - k[2] = ((w[0] & 0x00ff0000) >> 16) \ - | ((w[2] & 0x00ff0000) >> 8) \ - | ((w[4] & 0x00ff0000) << 0) \ - | ((w[6] & 0x00ff0000) << 8); \ - k[3] = ((w[0] & 0xff000000) >> 24) \ - | ((w[2] & 0xff000000) >> 16) \ - | ((w[4] & 0xff000000) >> 8) \ - | ((w[6] & 0xff000000) >> 0); \ - k[4] = ((w[1] & 0x000000ff) << 0) \ - | ((w[3] & 0x000000ff) << 8) \ - | ((w[5] & 0x000000ff) << 16) \ - | ((w[7] & 0x000000ff) << 24); \ - k[5] = ((w[1] & 0x0000ff00) >> 8) \ - | ((w[3] & 0x0000ff00) >> 0) \ - | ((w[5] & 0x0000ff00) << 8) \ - | ((w[7] & 0x0000ff00) << 16); \ - k[6] = ((w[1] & 0x00ff0000) >> 16) \ - | ((w[3] & 0x00ff0000) >> 8) \ - | ((w[5] & 0x00ff0000) << 0) \ - | ((w[7] & 0x00ff0000) << 8); \ - k[7] = ((w[1] & 0xff000000) >> 24) \ - | ((w[3] & 0xff000000) >> 16) \ - | ((w[5] & 0xff000000) >> 8) \ - | ((w[7] & 0xff000000) >> 0); - -#define A(x) \ -{ \ - u32x l; \ - u32x r; \ - l = x[0] ^ x[2]; \ - r = x[1] ^ x[3]; \ - x[0] = x[2]; \ - x[1] = x[3]; \ - x[2] = x[4]; \ - x[3] = x[5]; \ - x[4] = x[6]; \ - x[5] = x[7]; \ - x[6] = l; \ - x[7] = r; \ -} - -#define AA(x) \ -{ \ - u32x l; \ - u32x r; \ - l = x[0]; \ - r = x[2]; \ - x[0] = x[4]; \ - x[2] = x[6]; \ - x[4] = l ^ r; \ - x[6] = x[0] ^ r; \ - l = x[1]; \ - r = x[3]; \ - x[1] = x[5]; \ - x[3] = x[7]; \ - x[5] = l ^ r; \ - x[7] = x[1] ^ r; \ -} - -#define C(x) \ - x[0] ^= 0xff00ff00; \ - x[1] ^= 0xff00ff00; \ - x[2] ^= 0x00ff00ff; \ - x[3] ^= 0x00ff00ff; \ - x[4] ^= 0x00ffff00; \ - x[5] ^= 0xff0000ff; \ - x[6] ^= 0x000000ff; \ - x[7] ^= 0xff00ffff; - -#define SHIFT12(u,m,s) \ - u[0] = m[0] ^ s[6]; \ - u[1] = m[1] ^ s[7]; \ - u[2] = m[2] ^ (s[0] << 16) \ - ^ (s[0] >> 16) \ - ^ (s[0] & 0x0000ffff) \ - ^ (s[1] & 0x0000ffff) \ - ^ (s[1] >> 16) \ - ^ (s[2] << 16) \ - ^ s[6] \ - ^ (s[6] << 16) \ - ^ (s[7] & 0xffff0000) \ - ^ (s[7] >> 16); \ - u[3] = m[3] ^ (s[0] & 0x0000ffff) \ - ^ (s[0] << 16) \ - ^ (s[1] & 0x0000ffff) \ - ^ (s[1] << 16) \ - ^ (s[1] >> 16) \ - ^ (s[2] << 16) \ - ^ (s[2] >> 16) \ - ^ (s[3] << 16) \ - ^ s[6] \ - ^ (s[6] << 16) \ - ^ (s[6] >> 16) \ - ^ (s[7] & 0x0000ffff) \ - ^ (s[7] << 16) \ - ^ (s[7] >> 16); \ - u[4] = m[4] ^ (s[0] & 0xffff0000) \ - ^ (s[0] << 16) \ - ^ (s[0] >> 16) \ - ^ (s[1] & 0xffff0000) \ - ^ (s[1] >> 16) \ - ^ (s[2] << 16) \ - ^ (s[2] >> 16) \ - ^ (s[3] << 16) \ - ^ (s[3] >> 16) \ - ^ (s[4] << 16) \ - ^ (s[6] << 16) \ - ^ (s[6] >> 16) \ - ^ (s[7] & 0x0000ffff) \ - ^ (s[7] << 16) \ - ^ (s[7] >> 16); \ - u[5] = m[5] ^ (s[0] << 16) \ - ^ (s[0] >> 16) \ - ^ (s[0] & 0xffff0000) \ - ^ (s[1] & 0x0000ffff) \ - ^ s[2] \ - ^ (s[2] >> 16) \ - ^ (s[3] << 16) \ - ^ (s[3] >> 16) \ - ^ (s[4] << 16) \ - ^ (s[4] >> 16) \ - ^ (s[5] << 16) \ - ^ (s[6] << 16) \ - ^ (s[6] >> 16) \ - ^ (s[7] & 0xffff0000) \ - ^ (s[7] << 16) \ - ^ (s[7] >> 16); \ - u[6] = m[6] ^ s[0] \ - ^ (s[1] >> 16) \ - ^ (s[2] << 16) \ - ^ s[3] \ - ^ (s[3] >> 16) \ - ^ (s[4] << 16) \ - ^ (s[4] >> 16) \ - ^ (s[5] << 16) \ - ^ (s[5] >> 16) \ - ^ s[6] \ - ^ (s[6] << 16) \ - ^ (s[6] >> 16) \ - ^ (s[7] << 16); \ - u[7] = m[7] ^ (s[0] & 0xffff0000) \ - ^ (s[0] << 16) \ - ^ (s[1] & 0x0000ffff) \ - ^ (s[1] << 16) \ - ^ (s[2] >> 16) \ - ^ (s[3] << 16) \ - ^ s[4] \ - ^ (s[4] >> 16) \ - ^ (s[5] << 16) \ - ^ (s[5] >> 16) \ - ^ (s[6] >> 16) \ - ^ (s[7] & 0x0000ffff) \ - ^ (s[7] << 16) \ - ^ (s[7] >> 16); - -#define SHIFT16(h,v,u) \ - v[0] = h[0] ^ (u[1] << 16) \ - ^ (u[0] >> 16); \ - v[1] = h[1] ^ (u[2] << 16) \ - ^ (u[1] >> 16); \ - v[2] = h[2] ^ (u[3] << 16) \ - ^ (u[2] >> 16); \ - v[3] = h[3] ^ (u[4] << 16) \ - ^ (u[3] >> 16); \ - v[4] = h[4] ^ (u[5] << 16) \ - ^ (u[4] >> 16); \ - v[5] = h[5] ^ (u[6] << 16) \ - ^ (u[5] >> 16); \ - v[6] = h[6] ^ (u[7] << 16) \ - ^ (u[6] >> 16); \ - v[7] = h[7] ^ (u[0] & 0xffff0000) \ - ^ (u[0] << 16) \ - ^ (u[7] >> 16) \ - ^ (u[1] & 0xffff0000) \ - ^ (u[1] << 16) \ - ^ (u[6] << 16) \ - ^ (u[7] & 0xffff0000); - -#define SHIFT61(h,v) \ - h[0] = (v[0] & 0xffff0000) \ - ^ (v[0] << 16) \ - ^ (v[0] >> 16) \ - ^ (v[1] >> 16) \ - ^ (v[1] & 0xffff0000) \ - ^ (v[2] << 16) \ - ^ (v[3] >> 16) \ - ^ (v[4] << 16) \ - ^ (v[5] >> 16) \ - ^ v[5] \ - ^ (v[6] >> 16) \ - ^ (v[7] << 16) \ - ^ (v[7] >> 16) \ - ^ (v[7] & 0x0000ffff); \ - h[1] = (v[0] << 16) \ - ^ (v[0] >> 16) \ - ^ (v[0] & 0xffff0000) \ - ^ (v[1] & 0x0000ffff) \ - ^ v[2] \ - ^ (v[2] >> 16) \ - ^ (v[3] << 16) \ - ^ (v[4] >> 16) \ - ^ (v[5] << 16) \ - ^ (v[6] << 16) \ - ^ v[6] \ - ^ (v[7] & 0xffff0000) \ - ^ (v[7] >> 16); \ - h[2] = (v[0] & 0x0000ffff) \ - ^ (v[0] << 16) \ - ^ (v[1] << 16) \ - ^ (v[1] >> 16) \ - ^ (v[1] & 0xffff0000) \ - ^ (v[2] << 16) \ - ^ (v[3] >> 16) \ - ^ v[3] \ - ^ (v[4] << 16) \ - ^ (v[5] >> 16) \ - ^ v[6] \ - ^ (v[6] >> 16) \ - ^ (v[7] & 0x0000ffff) \ - ^ (v[7] << 16) \ - ^ (v[7] >> 16); \ - h[3] = (v[0] << 16) \ - ^ (v[0] >> 16) \ - ^ (v[0] & 0xffff0000) \ - ^ (v[1] & 0xffff0000) \ - ^ (v[1] >> 16) \ - ^ (v[2] << 16) \ - ^ (v[2] >> 16) \ - ^ v[2] \ - ^ (v[3] << 16) \ - ^ (v[4] >> 16) \ - ^ v[4] \ - ^ (v[5] << 16) \ - ^ (v[6] << 16) \ - ^ (v[7] & 0x0000ffff) \ - ^ (v[7] >> 16); \ - h[4] = (v[0] >> 16) \ - ^ (v[1] << 16) \ - ^ v[1] \ - ^ (v[2] >> 16) \ - ^ v[2] \ - ^ (v[3] << 16) \ - ^ (v[3] >> 16) \ - ^ v[3] \ - ^ (v[4] << 16) \ - ^ (v[5] >> 16) \ - ^ v[5] \ - ^ (v[6] << 16) \ - ^ (v[6] >> 16) \ - ^ (v[7] << 16); \ - h[5] = (v[0] << 16) \ - ^ (v[0] & 0xffff0000) \ - ^ (v[1] << 16) \ - ^ (v[1] >> 16) \ - ^ (v[1] & 0xffff0000) \ - ^ (v[2] << 16) \ - ^ v[2] \ - ^ (v[3] >> 16) \ - ^ v[3] \ - ^ (v[4] << 16) \ - ^ (v[4] >> 16) \ - ^ v[4] \ - ^ (v[5] << 16) \ - ^ (v[6] << 16) \ - ^ (v[6] >> 16) \ - ^ v[6] \ - ^ (v[7] << 16) \ - ^ (v[7] >> 16) \ - ^ (v[7] & 0xffff0000); \ - h[6] = v[0] \ - ^ v[2] \ - ^ (v[2] >> 16) \ - ^ v[3] \ - ^ (v[3] << 16) \ - ^ v[4] \ - ^ (v[4] >> 16) \ - ^ (v[5] << 16) \ - ^ (v[5] >> 16) \ - ^ v[5] \ - ^ (v[6] << 16) \ - ^ (v[6] >> 16) \ - ^ v[6] \ - ^ (v[7] << 16) \ - ^ v[7]; \ - h[7] = v[0] \ - ^ (v[0] >> 16) \ - ^ (v[1] << 16) \ - ^ (v[1] >> 16) \ - ^ (v[2] << 16) \ - ^ (v[3] >> 16) \ - ^ v[3] \ - ^ (v[4] << 16) \ - ^ v[4] \ - ^ (v[5] >> 16) \ - ^ v[5] \ - ^ (v[6] << 16) \ - ^ (v[6] >> 16) \ - ^ (v[7] << 16) \ - ^ v[7]; - -#define PASS0(h,s,u,v,t) \ -{ \ - u32x k[8]; \ - u32x w[8]; \ - X (w, u, v); \ - P (k, w); \ - R (k, h, s, 0, t); \ - A (u); \ - AA (v); \ -} - -#define PASS2(h,s,u,v,t) \ -{ \ - u32x k[8]; \ - u32x w[8]; \ - X (w, u, v); \ - P (k, w); \ - R (k, h, s, 2, t); \ - A (u); \ - C (u); \ - AA (v); \ -} - -#define PASS4(h,s,u,v,t) \ -{ \ - u32x k[8]; \ - u32x w[8]; \ - X (w, u, v); \ - P (k, w); \ - R (k, h, s, 4, t); \ - A (u); \ - AA (v); \ -} - -#define PASS6(h,s,u,v,t) \ -{ \ - u32x k[8]; \ - u32x w[8]; \ - X (w, u, v); \ - P (k, w); \ - R (k, h, s, 6, t); \ -} - -__device__ __constant__ u32 c_tables[4][256] = -{ - { - 0x00072000, 0x00075000, 0x00074800, 0x00071000, - 0x00076800, 0x00074000, 0x00070000, 0x00077000, - 0x00073000, 0x00075800, 0x00070800, 0x00076000, - 0x00073800, 0x00077800, 0x00072800, 0x00071800, - 0x0005a000, 0x0005d000, 0x0005c800, 0x00059000, - 0x0005e800, 0x0005c000, 0x00058000, 0x0005f000, - 0x0005b000, 0x0005d800, 0x00058800, 0x0005e000, - 0x0005b800, 0x0005f800, 0x0005a800, 0x00059800, - 0x00022000, 0x00025000, 0x00024800, 0x00021000, - 0x00026800, 0x00024000, 0x00020000, 0x00027000, - 0x00023000, 0x00025800, 0x00020800, 0x00026000, - 0x00023800, 0x00027800, 0x00022800, 0x00021800, - 0x00062000, 0x00065000, 0x00064800, 0x00061000, - 0x00066800, 0x00064000, 0x00060000, 0x00067000, - 0x00063000, 0x00065800, 0x00060800, 0x00066000, - 0x00063800, 0x00067800, 0x00062800, 0x00061800, - 0x00032000, 0x00035000, 0x00034800, 0x00031000, - 0x00036800, 0x00034000, 0x00030000, 0x00037000, - 0x00033000, 0x00035800, 0x00030800, 0x00036000, - 0x00033800, 0x00037800, 0x00032800, 0x00031800, - 0x0006a000, 0x0006d000, 0x0006c800, 0x00069000, - 0x0006e800, 0x0006c000, 0x00068000, 0x0006f000, - 0x0006b000, 0x0006d800, 0x00068800, 0x0006e000, - 0x0006b800, 0x0006f800, 0x0006a800, 0x00069800, - 0x0007a000, 0x0007d000, 0x0007c800, 0x00079000, - 0x0007e800, 0x0007c000, 0x00078000, 0x0007f000, - 0x0007b000, 0x0007d800, 0x00078800, 0x0007e000, - 0x0007b800, 0x0007f800, 0x0007a800, 0x00079800, - 0x00052000, 0x00055000, 0x00054800, 0x00051000, - 0x00056800, 0x00054000, 0x00050000, 0x00057000, - 0x00053000, 0x00055800, 0x00050800, 0x00056000, - 0x00053800, 0x00057800, 0x00052800, 0x00051800, - 0x00012000, 0x00015000, 0x00014800, 0x00011000, - 0x00016800, 0x00014000, 0x00010000, 0x00017000, - 0x00013000, 0x00015800, 0x00010800, 0x00016000, - 0x00013800, 0x00017800, 0x00012800, 0x00011800, - 0x0001a000, 0x0001d000, 0x0001c800, 0x00019000, - 0x0001e800, 0x0001c000, 0x00018000, 0x0001f000, - 0x0001b000, 0x0001d800, 0x00018800, 0x0001e000, - 0x0001b800, 0x0001f800, 0x0001a800, 0x00019800, - 0x00042000, 0x00045000, 0x00044800, 0x00041000, - 0x00046800, 0x00044000, 0x00040000, 0x00047000, - 0x00043000, 0x00045800, 0x00040800, 0x00046000, - 0x00043800, 0x00047800, 0x00042800, 0x00041800, - 0x0000a000, 0x0000d000, 0x0000c800, 0x00009000, - 0x0000e800, 0x0000c000, 0x00008000, 0x0000f000, - 0x0000b000, 0x0000d800, 0x00008800, 0x0000e000, - 0x0000b800, 0x0000f800, 0x0000a800, 0x00009800, - 0x00002000, 0x00005000, 0x00004800, 0x00001000, - 0x00006800, 0x00004000, 0x00000000, 0x00007000, - 0x00003000, 0x00005800, 0x00000800, 0x00006000, - 0x00003800, 0x00007800, 0x00002800, 0x00001800, - 0x0003a000, 0x0003d000, 0x0003c800, 0x00039000, - 0x0003e800, 0x0003c000, 0x00038000, 0x0003f000, - 0x0003b000, 0x0003d800, 0x00038800, 0x0003e000, - 0x0003b800, 0x0003f800, 0x0003a800, 0x00039800, - 0x0002a000, 0x0002d000, 0x0002c800, 0x00029000, - 0x0002e800, 0x0002c000, 0x00028000, 0x0002f000, - 0x0002b000, 0x0002d800, 0x00028800, 0x0002e000, - 0x0002b800, 0x0002f800, 0x0002a800, 0x00029800, - 0x0004a000, 0x0004d000, 0x0004c800, 0x00049000, - 0x0004e800, 0x0004c000, 0x00048000, 0x0004f000, - 0x0004b000, 0x0004d800, 0x00048800, 0x0004e000, - 0x0004b800, 0x0004f800, 0x0004a800, 0x00049800, - }, - { - 0x03a80000, 0x03c00000, 0x03880000, 0x03e80000, - 0x03d00000, 0x03980000, 0x03a00000, 0x03900000, - 0x03f00000, 0x03f80000, 0x03e00000, 0x03b80000, - 0x03b00000, 0x03800000, 0x03c80000, 0x03d80000, - 0x06a80000, 0x06c00000, 0x06880000, 0x06e80000, - 0x06d00000, 0x06980000, 0x06a00000, 0x06900000, - 0x06f00000, 0x06f80000, 0x06e00000, 0x06b80000, - 0x06b00000, 0x06800000, 0x06c80000, 0x06d80000, - 0x05280000, 0x05400000, 0x05080000, 0x05680000, - 0x05500000, 0x05180000, 0x05200000, 0x05100000, - 0x05700000, 0x05780000, 0x05600000, 0x05380000, - 0x05300000, 0x05000000, 0x05480000, 0x05580000, - 0x00a80000, 0x00c00000, 0x00880000, 0x00e80000, - 0x00d00000, 0x00980000, 0x00a00000, 0x00900000, - 0x00f00000, 0x00f80000, 0x00e00000, 0x00b80000, - 0x00b00000, 0x00800000, 0x00c80000, 0x00d80000, - 0x00280000, 0x00400000, 0x00080000, 0x00680000, - 0x00500000, 0x00180000, 0x00200000, 0x00100000, - 0x00700000, 0x00780000, 0x00600000, 0x00380000, - 0x00300000, 0x00000000, 0x00480000, 0x00580000, - 0x04280000, 0x04400000, 0x04080000, 0x04680000, - 0x04500000, 0x04180000, 0x04200000, 0x04100000, - 0x04700000, 0x04780000, 0x04600000, 0x04380000, - 0x04300000, 0x04000000, 0x04480000, 0x04580000, - 0x04a80000, 0x04c00000, 0x04880000, 0x04e80000, - 0x04d00000, 0x04980000, 0x04a00000, 0x04900000, - 0x04f00000, 0x04f80000, 0x04e00000, 0x04b80000, - 0x04b00000, 0x04800000, 0x04c80000, 0x04d80000, - 0x07a80000, 0x07c00000, 0x07880000, 0x07e80000, - 0x07d00000, 0x07980000, 0x07a00000, 0x07900000, - 0x07f00000, 0x07f80000, 0x07e00000, 0x07b80000, - 0x07b00000, 0x07800000, 0x07c80000, 0x07d80000, - 0x07280000, 0x07400000, 0x07080000, 0x07680000, - 0x07500000, 0x07180000, 0x07200000, 0x07100000, - 0x07700000, 0x07780000, 0x07600000, 0x07380000, - 0x07300000, 0x07000000, 0x07480000, 0x07580000, - 0x02280000, 0x02400000, 0x02080000, 0x02680000, - 0x02500000, 0x02180000, 0x02200000, 0x02100000, - 0x02700000, 0x02780000, 0x02600000, 0x02380000, - 0x02300000, 0x02000000, 0x02480000, 0x02580000, - 0x03280000, 0x03400000, 0x03080000, 0x03680000, - 0x03500000, 0x03180000, 0x03200000, 0x03100000, - 0x03700000, 0x03780000, 0x03600000, 0x03380000, - 0x03300000, 0x03000000, 0x03480000, 0x03580000, - 0x06280000, 0x06400000, 0x06080000, 0x06680000, - 0x06500000, 0x06180000, 0x06200000, 0x06100000, - 0x06700000, 0x06780000, 0x06600000, 0x06380000, - 0x06300000, 0x06000000, 0x06480000, 0x06580000, - 0x05a80000, 0x05c00000, 0x05880000, 0x05e80000, - 0x05d00000, 0x05980000, 0x05a00000, 0x05900000, - 0x05f00000, 0x05f80000, 0x05e00000, 0x05b80000, - 0x05b00000, 0x05800000, 0x05c80000, 0x05d80000, - 0x01280000, 0x01400000, 0x01080000, 0x01680000, - 0x01500000, 0x01180000, 0x01200000, 0x01100000, - 0x01700000, 0x01780000, 0x01600000, 0x01380000, - 0x01300000, 0x01000000, 0x01480000, 0x01580000, - 0x02a80000, 0x02c00000, 0x02880000, 0x02e80000, - 0x02d00000, 0x02980000, 0x02a00000, 0x02900000, - 0x02f00000, 0x02f80000, 0x02e00000, 0x02b80000, - 0x02b00000, 0x02800000, 0x02c80000, 0x02d80000, - 0x01a80000, 0x01c00000, 0x01880000, 0x01e80000, - 0x01d00000, 0x01980000, 0x01a00000, 0x01900000, - 0x01f00000, 0x01f80000, 0x01e00000, 0x01b80000, - 0x01b00000, 0x01800000, 0x01c80000, 0x01d80000, - }, - { - 0x30000002, 0x60000002, 0x38000002, 0x08000002, - 0x28000002, 0x78000002, 0x68000002, 0x40000002, - 0x20000002, 0x50000002, 0x48000002, 0x70000002, - 0x00000002, 0x18000002, 0x58000002, 0x10000002, - 0xb0000005, 0xe0000005, 0xb8000005, 0x88000005, - 0xa8000005, 0xf8000005, 0xe8000005, 0xc0000005, - 0xa0000005, 0xd0000005, 0xc8000005, 0xf0000005, - 0x80000005, 0x98000005, 0xd8000005, 0x90000005, - 0x30000005, 0x60000005, 0x38000005, 0x08000005, - 0x28000005, 0x78000005, 0x68000005, 0x40000005, - 0x20000005, 0x50000005, 0x48000005, 0x70000005, - 0x00000005, 0x18000005, 0x58000005, 0x10000005, - 0x30000000, 0x60000000, 0x38000000, 0x08000000, - 0x28000000, 0x78000000, 0x68000000, 0x40000000, - 0x20000000, 0x50000000, 0x48000000, 0x70000000, - 0x00000000, 0x18000000, 0x58000000, 0x10000000, - 0xb0000003, 0xe0000003, 0xb8000003, 0x88000003, - 0xa8000003, 0xf8000003, 0xe8000003, 0xc0000003, - 0xa0000003, 0xd0000003, 0xc8000003, 0xf0000003, - 0x80000003, 0x98000003, 0xd8000003, 0x90000003, - 0x30000001, 0x60000001, 0x38000001, 0x08000001, - 0x28000001, 0x78000001, 0x68000001, 0x40000001, - 0x20000001, 0x50000001, 0x48000001, 0x70000001, - 0x00000001, 0x18000001, 0x58000001, 0x10000001, - 0xb0000000, 0xe0000000, 0xb8000000, 0x88000000, - 0xa8000000, 0xf8000000, 0xe8000000, 0xc0000000, - 0xa0000000, 0xd0000000, 0xc8000000, 0xf0000000, - 0x80000000, 0x98000000, 0xd8000000, 0x90000000, - 0xb0000006, 0xe0000006, 0xb8000006, 0x88000006, - 0xa8000006, 0xf8000006, 0xe8000006, 0xc0000006, - 0xa0000006, 0xd0000006, 0xc8000006, 0xf0000006, - 0x80000006, 0x98000006, 0xd8000006, 0x90000006, - 0xb0000001, 0xe0000001, 0xb8000001, 0x88000001, - 0xa8000001, 0xf8000001, 0xe8000001, 0xc0000001, - 0xa0000001, 0xd0000001, 0xc8000001, 0xf0000001, - 0x80000001, 0x98000001, 0xd8000001, 0x90000001, - 0x30000003, 0x60000003, 0x38000003, 0x08000003, - 0x28000003, 0x78000003, 0x68000003, 0x40000003, - 0x20000003, 0x50000003, 0x48000003, 0x70000003, - 0x00000003, 0x18000003, 0x58000003, 0x10000003, - 0x30000004, 0x60000004, 0x38000004, 0x08000004, - 0x28000004, 0x78000004, 0x68000004, 0x40000004, - 0x20000004, 0x50000004, 0x48000004, 0x70000004, - 0x00000004, 0x18000004, 0x58000004, 0x10000004, - 0xb0000002, 0xe0000002, 0xb8000002, 0x88000002, - 0xa8000002, 0xf8000002, 0xe8000002, 0xc0000002, - 0xa0000002, 0xd0000002, 0xc8000002, 0xf0000002, - 0x80000002, 0x98000002, 0xd8000002, 0x90000002, - 0xb0000004, 0xe0000004, 0xb8000004, 0x88000004, - 0xa8000004, 0xf8000004, 0xe8000004, 0xc0000004, - 0xa0000004, 0xd0000004, 0xc8000004, 0xf0000004, - 0x80000004, 0x98000004, 0xd8000004, 0x90000004, - 0x30000006, 0x60000006, 0x38000006, 0x08000006, - 0x28000006, 0x78000006, 0x68000006, 0x40000006, - 0x20000006, 0x50000006, 0x48000006, 0x70000006, - 0x00000006, 0x18000006, 0x58000006, 0x10000006, - 0xb0000007, 0xe0000007, 0xb8000007, 0x88000007, - 0xa8000007, 0xf8000007, 0xe8000007, 0xc0000007, - 0xa0000007, 0xd0000007, 0xc8000007, 0xf0000007, - 0x80000007, 0x98000007, 0xd8000007, 0x90000007, - 0x30000007, 0x60000007, 0x38000007, 0x08000007, - 0x28000007, 0x78000007, 0x68000007, 0x40000007, - 0x20000007, 0x50000007, 0x48000007, 0x70000007, - 0x00000007, 0x18000007, 0x58000007, 0x10000007, - }, - { - 0x000000e8, 0x000000d8, 0x000000a0, 0x00000088, - 0x00000098, 0x000000f8, 0x000000a8, 0x000000c8, - 0x00000080, 0x000000d0, 0x000000f0, 0x000000b8, - 0x000000b0, 0x000000c0, 0x00000090, 0x000000e0, - 0x000007e8, 0x000007d8, 0x000007a0, 0x00000788, - 0x00000798, 0x000007f8, 0x000007a8, 0x000007c8, - 0x00000780, 0x000007d0, 0x000007f0, 0x000007b8, - 0x000007b0, 0x000007c0, 0x00000790, 0x000007e0, - 0x000006e8, 0x000006d8, 0x000006a0, 0x00000688, - 0x00000698, 0x000006f8, 0x000006a8, 0x000006c8, - 0x00000680, 0x000006d0, 0x000006f0, 0x000006b8, - 0x000006b0, 0x000006c0, 0x00000690, 0x000006e0, - 0x00000068, 0x00000058, 0x00000020, 0x00000008, - 0x00000018, 0x00000078, 0x00000028, 0x00000048, - 0x00000000, 0x00000050, 0x00000070, 0x00000038, - 0x00000030, 0x00000040, 0x00000010, 0x00000060, - 0x000002e8, 0x000002d8, 0x000002a0, 0x00000288, - 0x00000298, 0x000002f8, 0x000002a8, 0x000002c8, - 0x00000280, 0x000002d0, 0x000002f0, 0x000002b8, - 0x000002b0, 0x000002c0, 0x00000290, 0x000002e0, - 0x000003e8, 0x000003d8, 0x000003a0, 0x00000388, - 0x00000398, 0x000003f8, 0x000003a8, 0x000003c8, - 0x00000380, 0x000003d0, 0x000003f0, 0x000003b8, - 0x000003b0, 0x000003c0, 0x00000390, 0x000003e0, - 0x00000568, 0x00000558, 0x00000520, 0x00000508, - 0x00000518, 0x00000578, 0x00000528, 0x00000548, - 0x00000500, 0x00000550, 0x00000570, 0x00000538, - 0x00000530, 0x00000540, 0x00000510, 0x00000560, - 0x00000268, 0x00000258, 0x00000220, 0x00000208, - 0x00000218, 0x00000278, 0x00000228, 0x00000248, - 0x00000200, 0x00000250, 0x00000270, 0x00000238, - 0x00000230, 0x00000240, 0x00000210, 0x00000260, - 0x000004e8, 0x000004d8, 0x000004a0, 0x00000488, - 0x00000498, 0x000004f8, 0x000004a8, 0x000004c8, - 0x00000480, 0x000004d0, 0x000004f0, 0x000004b8, - 0x000004b0, 0x000004c0, 0x00000490, 0x000004e0, - 0x00000168, 0x00000158, 0x00000120, 0x00000108, - 0x00000118, 0x00000178, 0x00000128, 0x00000148, - 0x00000100, 0x00000150, 0x00000170, 0x00000138, - 0x00000130, 0x00000140, 0x00000110, 0x00000160, - 0x000001e8, 0x000001d8, 0x000001a0, 0x00000188, - 0x00000198, 0x000001f8, 0x000001a8, 0x000001c8, - 0x00000180, 0x000001d0, 0x000001f0, 0x000001b8, - 0x000001b0, 0x000001c0, 0x00000190, 0x000001e0, - 0x00000768, 0x00000758, 0x00000720, 0x00000708, - 0x00000718, 0x00000778, 0x00000728, 0x00000748, - 0x00000700, 0x00000750, 0x00000770, 0x00000738, - 0x00000730, 0x00000740, 0x00000710, 0x00000760, - 0x00000368, 0x00000358, 0x00000320, 0x00000308, - 0x00000318, 0x00000378, 0x00000328, 0x00000348, - 0x00000300, 0x00000350, 0x00000370, 0x00000338, - 0x00000330, 0x00000340, 0x00000310, 0x00000360, - 0x000005e8, 0x000005d8, 0x000005a0, 0x00000588, - 0x00000598, 0x000005f8, 0x000005a8, 0x000005c8, - 0x00000580, 0x000005d0, 0x000005f0, 0x000005b8, - 0x000005b0, 0x000005c0, 0x00000590, 0x000005e0, - 0x00000468, 0x00000458, 0x00000420, 0x00000408, - 0x00000418, 0x00000478, 0x00000428, 0x00000448, - 0x00000400, 0x00000450, 0x00000470, 0x00000438, - 0x00000430, 0x00000440, 0x00000410, 0x00000460, - 0x00000668, 0x00000658, 0x00000620, 0x00000608, - 0x00000618, 0x00000678, 0x00000628, 0x00000648, - 0x00000600, 0x00000650, 0x00000670, 0x00000638, - 0x00000630, 0x00000640, 0x00000610, 0x00000660, - } -}; - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m06900_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * sbox - */ - - __shared__ u32 s_tables[4][256]; - - s_tables[0][lid] = c_tables[0][lid]; - s_tables[1][lid] = c_tables[1][lid]; - s_tables[2][lid] = c_tables[2][lid]; - s_tables[3][lid] = c_tables[3][lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - u32 w14 = out_len * 8; - - u32x data[8]; - - data[0] = w0[0]; - data[1] = w0[1]; - data[2] = w0[2]; - data[3] = w0[3]; - data[4] = w1[0]; - data[5] = w1[1]; - data[6] = w1[2]; - data[7] = w1[3]; - - u32x state[16]; - - state[ 0] = 0; - state[ 1] = 0; - state[ 2] = 0; - state[ 3] = 0; - state[ 4] = 0; - state[ 5] = 0; - state[ 6] = 0; - state[ 7] = 0; - state[ 8] = data[0]; - state[ 9] = data[1]; - state[10] = data[2]; - state[11] = data[3]; - state[12] = data[4]; - state[13] = data[5]; - state[14] = data[6]; - state[15] = data[7]; - - u32x state_m[8]; - u32x data_m[8]; - - /* gost1 */ - - state_m[0] = state[0]; - state_m[1] = state[1]; - state_m[2] = state[2]; - state_m[3] = state[3]; - state_m[4] = state[4]; - state_m[5] = state[5]; - state_m[6] = state[6]; - state_m[7] = state[7]; - - data_m[0] = data[0]; - data_m[1] = data[1]; - data_m[2] = data[2]; - data_m[3] = data[3]; - data_m[4] = data[4]; - data_m[5] = data[5]; - data_m[6] = data[6]; - data_m[7] = data[7]; - - u32x tmp[8]; - - PASS0 (state, tmp, state_m, data_m, s_tables); - PASS2 (state, tmp, state_m, data_m, s_tables); - PASS4 (state, tmp, state_m, data_m, s_tables); - PASS6 (state, tmp, state_m, data_m, s_tables); - - SHIFT12 (state_m, data, tmp); - SHIFT16 (state, data_m, state_m); - SHIFT61 (state, data_m); - - data[0] = w14; - data[1] = 0; - data[2] = 0; - data[3] = 0; - data[4] = 0; - data[5] = 0; - data[6] = 0; - data[7] = 0; - - /* gost2 */ - - state_m[0] = state[0]; - state_m[1] = state[1]; - state_m[2] = state[2]; - state_m[3] = state[3]; - state_m[4] = state[4]; - state_m[5] = state[5]; - state_m[6] = state[6]; - state_m[7] = state[7]; - - data_m[0] = data[0]; - data_m[1] = data[1]; - data_m[2] = data[2]; - data_m[3] = data[3]; - data_m[4] = data[4]; - data_m[5] = data[5]; - data_m[6] = data[6]; - data_m[7] = data[7]; - - PASS0 (state, tmp, state_m, data_m, s_tables); - PASS2 (state, tmp, state_m, data_m, s_tables); - PASS4 (state, tmp, state_m, data_m, s_tables); - PASS6 (state, tmp, state_m, data_m, s_tables); - - SHIFT12 (state_m, data, tmp); - SHIFT16 (state, data_m, state_m); - SHIFT61 (state, data_m); - - /* gost3 */ - - data[0] = state[ 8]; - data[1] = state[ 9]; - data[2] = state[10]; - data[3] = state[11]; - data[4] = state[12]; - data[5] = state[13]; - data[6] = state[14]; - data[7] = state[15]; - - state_m[0] = state[0]; - state_m[1] = state[1]; - state_m[2] = state[2]; - state_m[3] = state[3]; - state_m[4] = state[4]; - state_m[5] = state[5]; - state_m[6] = state[6]; - state_m[7] = state[7]; - - data_m[0] = data[0]; - data_m[1] = data[1]; - data_m[2] = data[2]; - data_m[3] = data[3]; - data_m[4] = data[4]; - data_m[5] = data[5]; - data_m[6] = data[6]; - data_m[7] = data[7]; - - PASS0 (state, tmp, state_m, data_m, s_tables); - PASS2 (state, tmp, state_m, data_m, s_tables); - PASS4 (state, tmp, state_m, data_m, s_tables); - PASS6 (state, tmp, state_m, data_m, s_tables); - - SHIFT12 (state_m, data, tmp); - SHIFT16 (state, data_m, state_m); - SHIFT61 (state, data_m); - - /* store */ - - const u32x r0 = state[0]; - const u32x r1 = state[1]; - const u32x r2 = state[2]; - const u32x r3 = state[3]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06900_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06900_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06900_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * sbox - */ - - __shared__ u32 s_tables[4][256]; - - s_tables[0][lid] = c_tables[0][lid]; - s_tables[1][lid] = c_tables[1][lid]; - s_tables[2][lid] = c_tables[2][lid]; - s_tables[3][lid] = c_tables[3][lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - u32 w14 = out_len * 8; - - u32x data[8]; - - data[0] = w0[0]; - data[1] = w0[1]; - data[2] = w0[2]; - data[3] = w0[3]; - data[4] = w1[0]; - data[5] = w1[1]; - data[6] = w1[2]; - data[7] = w1[3]; - - u32x state[16]; - - state[ 0] = 0; - state[ 1] = 0; - state[ 2] = 0; - state[ 3] = 0; - state[ 4] = 0; - state[ 5] = 0; - state[ 6] = 0; - state[ 7] = 0; - state[ 8] = data[0]; - state[ 9] = data[1]; - state[10] = data[2]; - state[11] = data[3]; - state[12] = data[4]; - state[13] = data[5]; - state[14] = data[6]; - state[15] = data[7]; - - u32x state_m[8]; - u32x data_m[8]; - - /* gost1 */ - - state_m[0] = state[0]; - state_m[1] = state[1]; - state_m[2] = state[2]; - state_m[3] = state[3]; - state_m[4] = state[4]; - state_m[5] = state[5]; - state_m[6] = state[6]; - state_m[7] = state[7]; - - data_m[0] = data[0]; - data_m[1] = data[1]; - data_m[2] = data[2]; - data_m[3] = data[3]; - data_m[4] = data[4]; - data_m[5] = data[5]; - data_m[6] = data[6]; - data_m[7] = data[7]; - - u32x tmp[8]; - - PASS0 (state, tmp, state_m, data_m, s_tables); - PASS2 (state, tmp, state_m, data_m, s_tables); - PASS4 (state, tmp, state_m, data_m, s_tables); - PASS6 (state, tmp, state_m, data_m, s_tables); - - SHIFT12 (state_m, data, tmp); - SHIFT16 (state, data_m, state_m); - SHIFT61 (state, data_m); - - data[0] = w14; - data[1] = 0; - data[2] = 0; - data[3] = 0; - data[4] = 0; - data[5] = 0; - data[6] = 0; - data[7] = 0; - - /* gost2 */ - - state_m[0] = state[0]; - state_m[1] = state[1]; - state_m[2] = state[2]; - state_m[3] = state[3]; - state_m[4] = state[4]; - state_m[5] = state[5]; - state_m[6] = state[6]; - state_m[7] = state[7]; - - data_m[0] = data[0]; - data_m[1] = data[1]; - data_m[2] = data[2]; - data_m[3] = data[3]; - data_m[4] = data[4]; - data_m[5] = data[5]; - data_m[6] = data[6]; - data_m[7] = data[7]; - - PASS0 (state, tmp, state_m, data_m, s_tables); - PASS2 (state, tmp, state_m, data_m, s_tables); - PASS4 (state, tmp, state_m, data_m, s_tables); - PASS6 (state, tmp, state_m, data_m, s_tables); - - SHIFT12 (state_m, data, tmp); - SHIFT16 (state, data_m, state_m); - SHIFT61 (state, data_m); - - /* gost3 */ - - data[0] = state[ 8]; - data[1] = state[ 9]; - data[2] = state[10]; - data[3] = state[11]; - data[4] = state[12]; - data[5] = state[13]; - data[6] = state[14]; - data[7] = state[15]; - - state_m[0] = state[0]; - state_m[1] = state[1]; - state_m[2] = state[2]; - state_m[3] = state[3]; - state_m[4] = state[4]; - state_m[5] = state[5]; - state_m[6] = state[6]; - state_m[7] = state[7]; - - data_m[0] = data[0]; - data_m[1] = data[1]; - data_m[2] = data[2]; - data_m[3] = data[3]; - data_m[4] = data[4]; - data_m[5] = data[5]; - data_m[6] = data[6]; - data_m[7] = data[7]; - - PASS0 (state, tmp, state_m, data_m, s_tables); - PASS2 (state, tmp, state_m, data_m, s_tables); - PASS4 (state, tmp, state_m, data_m, s_tables); - PASS6 (state, tmp, state_m, data_m, s_tables); - - SHIFT12 (state_m, data, tmp); - SHIFT16 (state, data_m, state_m); - SHIFT61 (state, data_m); - - /* store */ - - const u32x r0 = state[0]; - const u32x r1 = state[1]; - const u32x r2 = state[2]; - const u32x r3 = state[3]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06900_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06900_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m06900_a1.cu b/nv/m06900_a1.cu deleted file mode 100644 index 339a0a1..0000000 --- a/nv/m06900_a1.cu +++ /dev/null @@ -1,1321 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _GOST_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ __constant__ u32 c_tables[4][256] = -{ - { - 0x00072000, 0x00075000, 0x00074800, 0x00071000, - 0x00076800, 0x00074000, 0x00070000, 0x00077000, - 0x00073000, 0x00075800, 0x00070800, 0x00076000, - 0x00073800, 0x00077800, 0x00072800, 0x00071800, - 0x0005a000, 0x0005d000, 0x0005c800, 0x00059000, - 0x0005e800, 0x0005c000, 0x00058000, 0x0005f000, - 0x0005b000, 0x0005d800, 0x00058800, 0x0005e000, - 0x0005b800, 0x0005f800, 0x0005a800, 0x00059800, - 0x00022000, 0x00025000, 0x00024800, 0x00021000, - 0x00026800, 0x00024000, 0x00020000, 0x00027000, - 0x00023000, 0x00025800, 0x00020800, 0x00026000, - 0x00023800, 0x00027800, 0x00022800, 0x00021800, - 0x00062000, 0x00065000, 0x00064800, 0x00061000, - 0x00066800, 0x00064000, 0x00060000, 0x00067000, - 0x00063000, 0x00065800, 0x00060800, 0x00066000, - 0x00063800, 0x00067800, 0x00062800, 0x00061800, - 0x00032000, 0x00035000, 0x00034800, 0x00031000, - 0x00036800, 0x00034000, 0x00030000, 0x00037000, - 0x00033000, 0x00035800, 0x00030800, 0x00036000, - 0x00033800, 0x00037800, 0x00032800, 0x00031800, - 0x0006a000, 0x0006d000, 0x0006c800, 0x00069000, - 0x0006e800, 0x0006c000, 0x00068000, 0x0006f000, - 0x0006b000, 0x0006d800, 0x00068800, 0x0006e000, - 0x0006b800, 0x0006f800, 0x0006a800, 0x00069800, - 0x0007a000, 0x0007d000, 0x0007c800, 0x00079000, - 0x0007e800, 0x0007c000, 0x00078000, 0x0007f000, - 0x0007b000, 0x0007d800, 0x00078800, 0x0007e000, - 0x0007b800, 0x0007f800, 0x0007a800, 0x00079800, - 0x00052000, 0x00055000, 0x00054800, 0x00051000, - 0x00056800, 0x00054000, 0x00050000, 0x00057000, - 0x00053000, 0x00055800, 0x00050800, 0x00056000, - 0x00053800, 0x00057800, 0x00052800, 0x00051800, - 0x00012000, 0x00015000, 0x00014800, 0x00011000, - 0x00016800, 0x00014000, 0x00010000, 0x00017000, - 0x00013000, 0x00015800, 0x00010800, 0x00016000, - 0x00013800, 0x00017800, 0x00012800, 0x00011800, - 0x0001a000, 0x0001d000, 0x0001c800, 0x00019000, - 0x0001e800, 0x0001c000, 0x00018000, 0x0001f000, - 0x0001b000, 0x0001d800, 0x00018800, 0x0001e000, - 0x0001b800, 0x0001f800, 0x0001a800, 0x00019800, - 0x00042000, 0x00045000, 0x00044800, 0x00041000, - 0x00046800, 0x00044000, 0x00040000, 0x00047000, - 0x00043000, 0x00045800, 0x00040800, 0x00046000, - 0x00043800, 0x00047800, 0x00042800, 0x00041800, - 0x0000a000, 0x0000d000, 0x0000c800, 0x00009000, - 0x0000e800, 0x0000c000, 0x00008000, 0x0000f000, - 0x0000b000, 0x0000d800, 0x00008800, 0x0000e000, - 0x0000b800, 0x0000f800, 0x0000a800, 0x00009800, - 0x00002000, 0x00005000, 0x00004800, 0x00001000, - 0x00006800, 0x00004000, 0x00000000, 0x00007000, - 0x00003000, 0x00005800, 0x00000800, 0x00006000, - 0x00003800, 0x00007800, 0x00002800, 0x00001800, - 0x0003a000, 0x0003d000, 0x0003c800, 0x00039000, - 0x0003e800, 0x0003c000, 0x00038000, 0x0003f000, - 0x0003b000, 0x0003d800, 0x00038800, 0x0003e000, - 0x0003b800, 0x0003f800, 0x0003a800, 0x00039800, - 0x0002a000, 0x0002d000, 0x0002c800, 0x00029000, - 0x0002e800, 0x0002c000, 0x00028000, 0x0002f000, - 0x0002b000, 0x0002d800, 0x00028800, 0x0002e000, - 0x0002b800, 0x0002f800, 0x0002a800, 0x00029800, - 0x0004a000, 0x0004d000, 0x0004c800, 0x00049000, - 0x0004e800, 0x0004c000, 0x00048000, 0x0004f000, - 0x0004b000, 0x0004d800, 0x00048800, 0x0004e000, - 0x0004b800, 0x0004f800, 0x0004a800, 0x00049800, - }, - { - 0x03a80000, 0x03c00000, 0x03880000, 0x03e80000, - 0x03d00000, 0x03980000, 0x03a00000, 0x03900000, - 0x03f00000, 0x03f80000, 0x03e00000, 0x03b80000, - 0x03b00000, 0x03800000, 0x03c80000, 0x03d80000, - 0x06a80000, 0x06c00000, 0x06880000, 0x06e80000, - 0x06d00000, 0x06980000, 0x06a00000, 0x06900000, - 0x06f00000, 0x06f80000, 0x06e00000, 0x06b80000, - 0x06b00000, 0x06800000, 0x06c80000, 0x06d80000, - 0x05280000, 0x05400000, 0x05080000, 0x05680000, - 0x05500000, 0x05180000, 0x05200000, 0x05100000, - 0x05700000, 0x05780000, 0x05600000, 0x05380000, - 0x05300000, 0x05000000, 0x05480000, 0x05580000, - 0x00a80000, 0x00c00000, 0x00880000, 0x00e80000, - 0x00d00000, 0x00980000, 0x00a00000, 0x00900000, - 0x00f00000, 0x00f80000, 0x00e00000, 0x00b80000, - 0x00b00000, 0x00800000, 0x00c80000, 0x00d80000, - 0x00280000, 0x00400000, 0x00080000, 0x00680000, - 0x00500000, 0x00180000, 0x00200000, 0x00100000, - 0x00700000, 0x00780000, 0x00600000, 0x00380000, - 0x00300000, 0x00000000, 0x00480000, 0x00580000, - 0x04280000, 0x04400000, 0x04080000, 0x04680000, - 0x04500000, 0x04180000, 0x04200000, 0x04100000, - 0x04700000, 0x04780000, 0x04600000, 0x04380000, - 0x04300000, 0x04000000, 0x04480000, 0x04580000, - 0x04a80000, 0x04c00000, 0x04880000, 0x04e80000, - 0x04d00000, 0x04980000, 0x04a00000, 0x04900000, - 0x04f00000, 0x04f80000, 0x04e00000, 0x04b80000, - 0x04b00000, 0x04800000, 0x04c80000, 0x04d80000, - 0x07a80000, 0x07c00000, 0x07880000, 0x07e80000, - 0x07d00000, 0x07980000, 0x07a00000, 0x07900000, - 0x07f00000, 0x07f80000, 0x07e00000, 0x07b80000, - 0x07b00000, 0x07800000, 0x07c80000, 0x07d80000, - 0x07280000, 0x07400000, 0x07080000, 0x07680000, - 0x07500000, 0x07180000, 0x07200000, 0x07100000, - 0x07700000, 0x07780000, 0x07600000, 0x07380000, - 0x07300000, 0x07000000, 0x07480000, 0x07580000, - 0x02280000, 0x02400000, 0x02080000, 0x02680000, - 0x02500000, 0x02180000, 0x02200000, 0x02100000, - 0x02700000, 0x02780000, 0x02600000, 0x02380000, - 0x02300000, 0x02000000, 0x02480000, 0x02580000, - 0x03280000, 0x03400000, 0x03080000, 0x03680000, - 0x03500000, 0x03180000, 0x03200000, 0x03100000, - 0x03700000, 0x03780000, 0x03600000, 0x03380000, - 0x03300000, 0x03000000, 0x03480000, 0x03580000, - 0x06280000, 0x06400000, 0x06080000, 0x06680000, - 0x06500000, 0x06180000, 0x06200000, 0x06100000, - 0x06700000, 0x06780000, 0x06600000, 0x06380000, - 0x06300000, 0x06000000, 0x06480000, 0x06580000, - 0x05a80000, 0x05c00000, 0x05880000, 0x05e80000, - 0x05d00000, 0x05980000, 0x05a00000, 0x05900000, - 0x05f00000, 0x05f80000, 0x05e00000, 0x05b80000, - 0x05b00000, 0x05800000, 0x05c80000, 0x05d80000, - 0x01280000, 0x01400000, 0x01080000, 0x01680000, - 0x01500000, 0x01180000, 0x01200000, 0x01100000, - 0x01700000, 0x01780000, 0x01600000, 0x01380000, - 0x01300000, 0x01000000, 0x01480000, 0x01580000, - 0x02a80000, 0x02c00000, 0x02880000, 0x02e80000, - 0x02d00000, 0x02980000, 0x02a00000, 0x02900000, - 0x02f00000, 0x02f80000, 0x02e00000, 0x02b80000, - 0x02b00000, 0x02800000, 0x02c80000, 0x02d80000, - 0x01a80000, 0x01c00000, 0x01880000, 0x01e80000, - 0x01d00000, 0x01980000, 0x01a00000, 0x01900000, - 0x01f00000, 0x01f80000, 0x01e00000, 0x01b80000, - 0x01b00000, 0x01800000, 0x01c80000, 0x01d80000, - }, - { - 0x30000002, 0x60000002, 0x38000002, 0x08000002, - 0x28000002, 0x78000002, 0x68000002, 0x40000002, - 0x20000002, 0x50000002, 0x48000002, 0x70000002, - 0x00000002, 0x18000002, 0x58000002, 0x10000002, - 0xb0000005, 0xe0000005, 0xb8000005, 0x88000005, - 0xa8000005, 0xf8000005, 0xe8000005, 0xc0000005, - 0xa0000005, 0xd0000005, 0xc8000005, 0xf0000005, - 0x80000005, 0x98000005, 0xd8000005, 0x90000005, - 0x30000005, 0x60000005, 0x38000005, 0x08000005, - 0x28000005, 0x78000005, 0x68000005, 0x40000005, - 0x20000005, 0x50000005, 0x48000005, 0x70000005, - 0x00000005, 0x18000005, 0x58000005, 0x10000005, - 0x30000000, 0x60000000, 0x38000000, 0x08000000, - 0x28000000, 0x78000000, 0x68000000, 0x40000000, - 0x20000000, 0x50000000, 0x48000000, 0x70000000, - 0x00000000, 0x18000000, 0x58000000, 0x10000000, - 0xb0000003, 0xe0000003, 0xb8000003, 0x88000003, - 0xa8000003, 0xf8000003, 0xe8000003, 0xc0000003, - 0xa0000003, 0xd0000003, 0xc8000003, 0xf0000003, - 0x80000003, 0x98000003, 0xd8000003, 0x90000003, - 0x30000001, 0x60000001, 0x38000001, 0x08000001, - 0x28000001, 0x78000001, 0x68000001, 0x40000001, - 0x20000001, 0x50000001, 0x48000001, 0x70000001, - 0x00000001, 0x18000001, 0x58000001, 0x10000001, - 0xb0000000, 0xe0000000, 0xb8000000, 0x88000000, - 0xa8000000, 0xf8000000, 0xe8000000, 0xc0000000, - 0xa0000000, 0xd0000000, 0xc8000000, 0xf0000000, - 0x80000000, 0x98000000, 0xd8000000, 0x90000000, - 0xb0000006, 0xe0000006, 0xb8000006, 0x88000006, - 0xa8000006, 0xf8000006, 0xe8000006, 0xc0000006, - 0xa0000006, 0xd0000006, 0xc8000006, 0xf0000006, - 0x80000006, 0x98000006, 0xd8000006, 0x90000006, - 0xb0000001, 0xe0000001, 0xb8000001, 0x88000001, - 0xa8000001, 0xf8000001, 0xe8000001, 0xc0000001, - 0xa0000001, 0xd0000001, 0xc8000001, 0xf0000001, - 0x80000001, 0x98000001, 0xd8000001, 0x90000001, - 0x30000003, 0x60000003, 0x38000003, 0x08000003, - 0x28000003, 0x78000003, 0x68000003, 0x40000003, - 0x20000003, 0x50000003, 0x48000003, 0x70000003, - 0x00000003, 0x18000003, 0x58000003, 0x10000003, - 0x30000004, 0x60000004, 0x38000004, 0x08000004, - 0x28000004, 0x78000004, 0x68000004, 0x40000004, - 0x20000004, 0x50000004, 0x48000004, 0x70000004, - 0x00000004, 0x18000004, 0x58000004, 0x10000004, - 0xb0000002, 0xe0000002, 0xb8000002, 0x88000002, - 0xa8000002, 0xf8000002, 0xe8000002, 0xc0000002, - 0xa0000002, 0xd0000002, 0xc8000002, 0xf0000002, - 0x80000002, 0x98000002, 0xd8000002, 0x90000002, - 0xb0000004, 0xe0000004, 0xb8000004, 0x88000004, - 0xa8000004, 0xf8000004, 0xe8000004, 0xc0000004, - 0xa0000004, 0xd0000004, 0xc8000004, 0xf0000004, - 0x80000004, 0x98000004, 0xd8000004, 0x90000004, - 0x30000006, 0x60000006, 0x38000006, 0x08000006, - 0x28000006, 0x78000006, 0x68000006, 0x40000006, - 0x20000006, 0x50000006, 0x48000006, 0x70000006, - 0x00000006, 0x18000006, 0x58000006, 0x10000006, - 0xb0000007, 0xe0000007, 0xb8000007, 0x88000007, - 0xa8000007, 0xf8000007, 0xe8000007, 0xc0000007, - 0xa0000007, 0xd0000007, 0xc8000007, 0xf0000007, - 0x80000007, 0x98000007, 0xd8000007, 0x90000007, - 0x30000007, 0x60000007, 0x38000007, 0x08000007, - 0x28000007, 0x78000007, 0x68000007, 0x40000007, - 0x20000007, 0x50000007, 0x48000007, 0x70000007, - 0x00000007, 0x18000007, 0x58000007, 0x10000007, - }, - { - 0x000000e8, 0x000000d8, 0x000000a0, 0x00000088, - 0x00000098, 0x000000f8, 0x000000a8, 0x000000c8, - 0x00000080, 0x000000d0, 0x000000f0, 0x000000b8, - 0x000000b0, 0x000000c0, 0x00000090, 0x000000e0, - 0x000007e8, 0x000007d8, 0x000007a0, 0x00000788, - 0x00000798, 0x000007f8, 0x000007a8, 0x000007c8, - 0x00000780, 0x000007d0, 0x000007f0, 0x000007b8, - 0x000007b0, 0x000007c0, 0x00000790, 0x000007e0, - 0x000006e8, 0x000006d8, 0x000006a0, 0x00000688, - 0x00000698, 0x000006f8, 0x000006a8, 0x000006c8, - 0x00000680, 0x000006d0, 0x000006f0, 0x000006b8, - 0x000006b0, 0x000006c0, 0x00000690, 0x000006e0, - 0x00000068, 0x00000058, 0x00000020, 0x00000008, - 0x00000018, 0x00000078, 0x00000028, 0x00000048, - 0x00000000, 0x00000050, 0x00000070, 0x00000038, - 0x00000030, 0x00000040, 0x00000010, 0x00000060, - 0x000002e8, 0x000002d8, 0x000002a0, 0x00000288, - 0x00000298, 0x000002f8, 0x000002a8, 0x000002c8, - 0x00000280, 0x000002d0, 0x000002f0, 0x000002b8, - 0x000002b0, 0x000002c0, 0x00000290, 0x000002e0, - 0x000003e8, 0x000003d8, 0x000003a0, 0x00000388, - 0x00000398, 0x000003f8, 0x000003a8, 0x000003c8, - 0x00000380, 0x000003d0, 0x000003f0, 0x000003b8, - 0x000003b0, 0x000003c0, 0x00000390, 0x000003e0, - 0x00000568, 0x00000558, 0x00000520, 0x00000508, - 0x00000518, 0x00000578, 0x00000528, 0x00000548, - 0x00000500, 0x00000550, 0x00000570, 0x00000538, - 0x00000530, 0x00000540, 0x00000510, 0x00000560, - 0x00000268, 0x00000258, 0x00000220, 0x00000208, - 0x00000218, 0x00000278, 0x00000228, 0x00000248, - 0x00000200, 0x00000250, 0x00000270, 0x00000238, - 0x00000230, 0x00000240, 0x00000210, 0x00000260, - 0x000004e8, 0x000004d8, 0x000004a0, 0x00000488, - 0x00000498, 0x000004f8, 0x000004a8, 0x000004c8, - 0x00000480, 0x000004d0, 0x000004f0, 0x000004b8, - 0x000004b0, 0x000004c0, 0x00000490, 0x000004e0, - 0x00000168, 0x00000158, 0x00000120, 0x00000108, - 0x00000118, 0x00000178, 0x00000128, 0x00000148, - 0x00000100, 0x00000150, 0x00000170, 0x00000138, - 0x00000130, 0x00000140, 0x00000110, 0x00000160, - 0x000001e8, 0x000001d8, 0x000001a0, 0x00000188, - 0x00000198, 0x000001f8, 0x000001a8, 0x000001c8, - 0x00000180, 0x000001d0, 0x000001f0, 0x000001b8, - 0x000001b0, 0x000001c0, 0x00000190, 0x000001e0, - 0x00000768, 0x00000758, 0x00000720, 0x00000708, - 0x00000718, 0x00000778, 0x00000728, 0x00000748, - 0x00000700, 0x00000750, 0x00000770, 0x00000738, - 0x00000730, 0x00000740, 0x00000710, 0x00000760, - 0x00000368, 0x00000358, 0x00000320, 0x00000308, - 0x00000318, 0x00000378, 0x00000328, 0x00000348, - 0x00000300, 0x00000350, 0x00000370, 0x00000338, - 0x00000330, 0x00000340, 0x00000310, 0x00000360, - 0x000005e8, 0x000005d8, 0x000005a0, 0x00000588, - 0x00000598, 0x000005f8, 0x000005a8, 0x000005c8, - 0x00000580, 0x000005d0, 0x000005f0, 0x000005b8, - 0x000005b0, 0x000005c0, 0x00000590, 0x000005e0, - 0x00000468, 0x00000458, 0x00000420, 0x00000408, - 0x00000418, 0x00000478, 0x00000428, 0x00000448, - 0x00000400, 0x00000450, 0x00000470, 0x00000438, - 0x00000430, 0x00000440, 0x00000410, 0x00000460, - 0x00000668, 0x00000658, 0x00000620, 0x00000608, - 0x00000618, 0x00000678, 0x00000628, 0x00000648, - 0x00000600, 0x00000650, 0x00000670, 0x00000638, - 0x00000630, 0x00000640, 0x00000610, 0x00000660, - } -}; - -#ifdef VECT_SIZE1 -#define BOX(i,n,S) u32x ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(i,n,S) u32x ((S)[(n)][(i).x], (S)[(n)][(i).y]) -#endif - -#define round(k1,k2,tbl) \ -{ \ - u32x t; \ - t = (k1) + r; \ - l ^= BOX ((t >> 0) & 0xff, 0, tbl) ^ \ - BOX ((t >> 8) & 0xff, 1, tbl) ^ \ - BOX ((t >> 16) & 0xff, 2, tbl) ^ \ - BOX ((t >> 24) & 0xff, 3, tbl); \ - t = (k2) + l; \ - r ^= BOX ((t >> 0) & 0xff, 0, tbl) ^ \ - BOX ((t >> 8) & 0xff, 1, tbl) ^ \ - BOX ((t >> 16) & 0xff, 2, tbl) ^ \ - BOX ((t >> 24) & 0xff, 3, tbl); \ -} - -#define R(k,h,s,i,t) \ -{ \ - u32x r; \ - u32x l; \ - r = h[i + 0]; \ - l = h[i + 1]; \ - round (k[0], k[1], t); \ - round (k[2], k[3], t); \ - round (k[4], k[5], t); \ - round (k[6], k[7], t); \ - round (k[0], k[1], t); \ - round (k[2], k[3], t); \ - round (k[4], k[5], t); \ - round (k[6], k[7], t); \ - round (k[0], k[1], t); \ - round (k[2], k[3], t); \ - round (k[4], k[5], t); \ - round (k[6], k[7], t); \ - round (k[7], k[6], t); \ - round (k[5], k[4], t); \ - round (k[3], k[2], t); \ - round (k[1], k[0], t); \ - s[i + 0] = l; \ - s[i + 1] = r; \ -} - -#define X(w,u,v) \ - w[0] = u[0] ^ v[0]; \ - w[1] = u[1] ^ v[1]; \ - w[2] = u[2] ^ v[2]; \ - w[3] = u[3] ^ v[3]; \ - w[4] = u[4] ^ v[4]; \ - w[5] = u[5] ^ v[5]; \ - w[6] = u[6] ^ v[6]; \ - w[7] = u[7] ^ v[7]; - -#define P(k,w) \ - k[0] = ((w[0] & 0x000000ff) << 0) \ - | ((w[2] & 0x000000ff) << 8) \ - | ((w[4] & 0x000000ff) << 16) \ - | ((w[6] & 0x000000ff) << 24); \ - k[1] = ((w[0] & 0x0000ff00) >> 8) \ - | ((w[2] & 0x0000ff00) >> 0) \ - | ((w[4] & 0x0000ff00) << 8) \ - | ((w[6] & 0x0000ff00) << 16); \ - k[2] = ((w[0] & 0x00ff0000) >> 16) \ - | ((w[2] & 0x00ff0000) >> 8) \ - | ((w[4] & 0x00ff0000) << 0) \ - | ((w[6] & 0x00ff0000) << 8); \ - k[3] = ((w[0] & 0xff000000) >> 24) \ - | ((w[2] & 0xff000000) >> 16) \ - | ((w[4] & 0xff000000) >> 8) \ - | ((w[6] & 0xff000000) >> 0); \ - k[4] = ((w[1] & 0x000000ff) << 0) \ - | ((w[3] & 0x000000ff) << 8) \ - | ((w[5] & 0x000000ff) << 16) \ - | ((w[7] & 0x000000ff) << 24); \ - k[5] = ((w[1] & 0x0000ff00) >> 8) \ - | ((w[3] & 0x0000ff00) >> 0) \ - | ((w[5] & 0x0000ff00) << 8) \ - | ((w[7] & 0x0000ff00) << 16); \ - k[6] = ((w[1] & 0x00ff0000) >> 16) \ - | ((w[3] & 0x00ff0000) >> 8) \ - | ((w[5] & 0x00ff0000) << 0) \ - | ((w[7] & 0x00ff0000) << 8); \ - k[7] = ((w[1] & 0xff000000) >> 24) \ - | ((w[3] & 0xff000000) >> 16) \ - | ((w[5] & 0xff000000) >> 8) \ - | ((w[7] & 0xff000000) >> 0); - -#define A(x) \ -{ \ - u32x l; \ - u32x r; \ - l = x[0] ^ x[2]; \ - r = x[1] ^ x[3]; \ - x[0] = x[2]; \ - x[1] = x[3]; \ - x[2] = x[4]; \ - x[3] = x[5]; \ - x[4] = x[6]; \ - x[5] = x[7]; \ - x[6] = l; \ - x[7] = r; \ -} - -#define AA(x) \ -{ \ - u32x l; \ - u32x r; \ - l = x[0]; \ - r = x[2]; \ - x[0] = x[4]; \ - x[2] = x[6]; \ - x[4] = l ^ r; \ - x[6] = x[0] ^ r; \ - l = x[1]; \ - r = x[3]; \ - x[1] = x[5]; \ - x[3] = x[7]; \ - x[5] = l ^ r; \ - x[7] = x[1] ^ r; \ -} - -#define C(x) \ - x[0] ^= 0xff00ff00; \ - x[1] ^= 0xff00ff00; \ - x[2] ^= 0x00ff00ff; \ - x[3] ^= 0x00ff00ff; \ - x[4] ^= 0x00ffff00; \ - x[5] ^= 0xff0000ff; \ - x[6] ^= 0x000000ff; \ - x[7] ^= 0xff00ffff; - -#define SHIFT12(u,m,s) \ - u[0] = m[0] ^ s[6]; \ - u[1] = m[1] ^ s[7]; \ - u[2] = m[2] ^ (s[0] << 16) \ - ^ (s[0] >> 16) \ - ^ (s[0] & 0x0000ffff) \ - ^ (s[1] & 0x0000ffff) \ - ^ (s[1] >> 16) \ - ^ (s[2] << 16) \ - ^ s[6] \ - ^ (s[6] << 16) \ - ^ (s[7] & 0xffff0000) \ - ^ (s[7] >> 16); \ - u[3] = m[3] ^ (s[0] & 0x0000ffff) \ - ^ (s[0] << 16) \ - ^ (s[1] & 0x0000ffff) \ - ^ (s[1] << 16) \ - ^ (s[1] >> 16) \ - ^ (s[2] << 16) \ - ^ (s[2] >> 16) \ - ^ (s[3] << 16) \ - ^ s[6] \ - ^ (s[6] << 16) \ - ^ (s[6] >> 16) \ - ^ (s[7] & 0x0000ffff) \ - ^ (s[7] << 16) \ - ^ (s[7] >> 16); \ - u[4] = m[4] ^ (s[0] & 0xffff0000) \ - ^ (s[0] << 16) \ - ^ (s[0] >> 16) \ - ^ (s[1] & 0xffff0000) \ - ^ (s[1] >> 16) \ - ^ (s[2] << 16) \ - ^ (s[2] >> 16) \ - ^ (s[3] << 16) \ - ^ (s[3] >> 16) \ - ^ (s[4] << 16) \ - ^ (s[6] << 16) \ - ^ (s[6] >> 16) \ - ^ (s[7] & 0x0000ffff) \ - ^ (s[7] << 16) \ - ^ (s[7] >> 16); \ - u[5] = m[5] ^ (s[0] << 16) \ - ^ (s[0] >> 16) \ - ^ (s[0] & 0xffff0000) \ - ^ (s[1] & 0x0000ffff) \ - ^ s[2] \ - ^ (s[2] >> 16) \ - ^ (s[3] << 16) \ - ^ (s[3] >> 16) \ - ^ (s[4] << 16) \ - ^ (s[4] >> 16) \ - ^ (s[5] << 16) \ - ^ (s[6] << 16) \ - ^ (s[6] >> 16) \ - ^ (s[7] & 0xffff0000) \ - ^ (s[7] << 16) \ - ^ (s[7] >> 16); \ - u[6] = m[6] ^ s[0] \ - ^ (s[1] >> 16) \ - ^ (s[2] << 16) \ - ^ s[3] \ - ^ (s[3] >> 16) \ - ^ (s[4] << 16) \ - ^ (s[4] >> 16) \ - ^ (s[5] << 16) \ - ^ (s[5] >> 16) \ - ^ s[6] \ - ^ (s[6] << 16) \ - ^ (s[6] >> 16) \ - ^ (s[7] << 16); \ - u[7] = m[7] ^ (s[0] & 0xffff0000) \ - ^ (s[0] << 16) \ - ^ (s[1] & 0x0000ffff) \ - ^ (s[1] << 16) \ - ^ (s[2] >> 16) \ - ^ (s[3] << 16) \ - ^ s[4] \ - ^ (s[4] >> 16) \ - ^ (s[5] << 16) \ - ^ (s[5] >> 16) \ - ^ (s[6] >> 16) \ - ^ (s[7] & 0x0000ffff) \ - ^ (s[7] << 16) \ - ^ (s[7] >> 16); - -#define SHIFT16(h,v,u) \ - v[0] = h[0] ^ (u[1] << 16) \ - ^ (u[0] >> 16); \ - v[1] = h[1] ^ (u[2] << 16) \ - ^ (u[1] >> 16); \ - v[2] = h[2] ^ (u[3] << 16) \ - ^ (u[2] >> 16); \ - v[3] = h[3] ^ (u[4] << 16) \ - ^ (u[3] >> 16); \ - v[4] = h[4] ^ (u[5] << 16) \ - ^ (u[4] >> 16); \ - v[5] = h[5] ^ (u[6] << 16) \ - ^ (u[5] >> 16); \ - v[6] = h[6] ^ (u[7] << 16) \ - ^ (u[6] >> 16); \ - v[7] = h[7] ^ (u[0] & 0xffff0000) \ - ^ (u[0] << 16) \ - ^ (u[7] >> 16) \ - ^ (u[1] & 0xffff0000) \ - ^ (u[1] << 16) \ - ^ (u[6] << 16) \ - ^ (u[7] & 0xffff0000); - -#define SHIFT61(h,v) \ - h[0] = (v[0] & 0xffff0000) \ - ^ (v[0] << 16) \ - ^ (v[0] >> 16) \ - ^ (v[1] >> 16) \ - ^ (v[1] & 0xffff0000) \ - ^ (v[2] << 16) \ - ^ (v[3] >> 16) \ - ^ (v[4] << 16) \ - ^ (v[5] >> 16) \ - ^ v[5] \ - ^ (v[6] >> 16) \ - ^ (v[7] << 16) \ - ^ (v[7] >> 16) \ - ^ (v[7] & 0x0000ffff); \ - h[1] = (v[0] << 16) \ - ^ (v[0] >> 16) \ - ^ (v[0] & 0xffff0000) \ - ^ (v[1] & 0x0000ffff) \ - ^ v[2] \ - ^ (v[2] >> 16) \ - ^ (v[3] << 16) \ - ^ (v[4] >> 16) \ - ^ (v[5] << 16) \ - ^ (v[6] << 16) \ - ^ v[6] \ - ^ (v[7] & 0xffff0000) \ - ^ (v[7] >> 16); \ - h[2] = (v[0] & 0x0000ffff) \ - ^ (v[0] << 16) \ - ^ (v[1] << 16) \ - ^ (v[1] >> 16) \ - ^ (v[1] & 0xffff0000) \ - ^ (v[2] << 16) \ - ^ (v[3] >> 16) \ - ^ v[3] \ - ^ (v[4] << 16) \ - ^ (v[5] >> 16) \ - ^ v[6] \ - ^ (v[6] >> 16) \ - ^ (v[7] & 0x0000ffff) \ - ^ (v[7] << 16) \ - ^ (v[7] >> 16); \ - h[3] = (v[0] << 16) \ - ^ (v[0] >> 16) \ - ^ (v[0] & 0xffff0000) \ - ^ (v[1] & 0xffff0000) \ - ^ (v[1] >> 16) \ - ^ (v[2] << 16) \ - ^ (v[2] >> 16) \ - ^ v[2] \ - ^ (v[3] << 16) \ - ^ (v[4] >> 16) \ - ^ v[4] \ - ^ (v[5] << 16) \ - ^ (v[6] << 16) \ - ^ (v[7] & 0x0000ffff) \ - ^ (v[7] >> 16); \ - h[4] = (v[0] >> 16) \ - ^ (v[1] << 16) \ - ^ v[1] \ - ^ (v[2] >> 16) \ - ^ v[2] \ - ^ (v[3] << 16) \ - ^ (v[3] >> 16) \ - ^ v[3] \ - ^ (v[4] << 16) \ - ^ (v[5] >> 16) \ - ^ v[5] \ - ^ (v[6] << 16) \ - ^ (v[6] >> 16) \ - ^ (v[7] << 16); \ - h[5] = (v[0] << 16) \ - ^ (v[0] & 0xffff0000) \ - ^ (v[1] << 16) \ - ^ (v[1] >> 16) \ - ^ (v[1] & 0xffff0000) \ - ^ (v[2] << 16) \ - ^ v[2] \ - ^ (v[3] >> 16) \ - ^ v[3] \ - ^ (v[4] << 16) \ - ^ (v[4] >> 16) \ - ^ v[4] \ - ^ (v[5] << 16) \ - ^ (v[6] << 16) \ - ^ (v[6] >> 16) \ - ^ v[6] \ - ^ (v[7] << 16) \ - ^ (v[7] >> 16) \ - ^ (v[7] & 0xffff0000); \ - h[6] = v[0] \ - ^ v[2] \ - ^ (v[2] >> 16) \ - ^ v[3] \ - ^ (v[3] << 16) \ - ^ v[4] \ - ^ (v[4] >> 16) \ - ^ (v[5] << 16) \ - ^ (v[5] >> 16) \ - ^ v[5] \ - ^ (v[6] << 16) \ - ^ (v[6] >> 16) \ - ^ v[6] \ - ^ (v[7] << 16) \ - ^ v[7]; \ - h[7] = v[0] \ - ^ (v[0] >> 16) \ - ^ (v[1] << 16) \ - ^ (v[1] >> 16) \ - ^ (v[2] << 16) \ - ^ (v[3] >> 16) \ - ^ v[3] \ - ^ (v[4] << 16) \ - ^ v[4] \ - ^ (v[5] >> 16) \ - ^ v[5] \ - ^ (v[6] << 16) \ - ^ (v[6] >> 16) \ - ^ (v[7] << 16) \ - ^ v[7]; - -#define PASS0(h,s,u,v,t) \ -{ \ - u32x k[8]; \ - u32x w[8]; \ - X (w, u, v); \ - P (k, w); \ - R (k, h, s, 0, t); \ - A (u); \ - AA (v); \ -} - -#define PASS2(h,s,u,v,t) \ -{ \ - u32x k[8]; \ - u32x w[8]; \ - X (w, u, v); \ - P (k, w); \ - R (k, h, s, 2, t); \ - A (u); \ - C (u); \ - AA (v); \ -} - -#define PASS4(h,s,u,v,t) \ -{ \ - u32x k[8]; \ - u32x w[8]; \ - X (w, u, v); \ - P (k, w); \ - R (k, h, s, 4, t); \ - A (u); \ - AA (v); \ -} - -#define PASS6(h,s,u,v,t) \ -{ \ - u32x k[8]; \ - u32x w[8]; \ - X (w, u, v); \ - P (k, w); \ - R (k, h, s, 6, t); \ -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m06900_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * sbox - */ - - __shared__ u32 s_tables[4][256]; - - s_tables[0][lid] = c_tables[0][lid]; - s_tables[1][lid] = c_tables[1][lid]; - s_tables[2][lid] = c_tables[2][lid]; - s_tables[3][lid] = c_tables[3][lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = pw_len * 8; - w3[3] = 0; - - const u32 w14 = pw_len * 8; - - u32x data[8]; - - data[0] = w0[0]; - data[1] = w0[1]; - data[2] = w0[2]; - data[3] = w0[3]; - data[4] = w1[0]; - data[5] = w1[1]; - data[6] = w1[2]; - data[7] = w1[3]; - - u32x state[16]; - - state[ 0] = 0; - state[ 1] = 0; - state[ 2] = 0; - state[ 3] = 0; - state[ 4] = 0; - state[ 5] = 0; - state[ 6] = 0; - state[ 7] = 0; - state[ 8] = data[0]; - state[ 9] = data[1]; - state[10] = data[2]; - state[11] = data[3]; - state[12] = data[4]; - state[13] = data[5]; - state[14] = data[6]; - state[15] = data[7]; - - u32x state_m[8]; - u32x data_m[8]; - - /* gost1 */ - - state_m[0] = state[0]; - state_m[1] = state[1]; - state_m[2] = state[2]; - state_m[3] = state[3]; - state_m[4] = state[4]; - state_m[5] = state[5]; - state_m[6] = state[6]; - state_m[7] = state[7]; - - data_m[0] = data[0]; - data_m[1] = data[1]; - data_m[2] = data[2]; - data_m[3] = data[3]; - data_m[4] = data[4]; - data_m[5] = data[5]; - data_m[6] = data[6]; - data_m[7] = data[7]; - - u32x tmp[8]; - - PASS0 (state, tmp, state_m, data_m, s_tables); - PASS2 (state, tmp, state_m, data_m, s_tables); - PASS4 (state, tmp, state_m, data_m, s_tables); - PASS6 (state, tmp, state_m, data_m, s_tables); - - SHIFT12 (state_m, data, tmp); - SHIFT16 (state, data_m, state_m); - SHIFT61 (state, data_m); - - data[0] = w14; - data[1] = 0; - data[2] = 0; - data[3] = 0; - data[4] = 0; - data[5] = 0; - data[6] = 0; - data[7] = 0; - - /* gost2 */ - - state_m[0] = state[0]; - state_m[1] = state[1]; - state_m[2] = state[2]; - state_m[3] = state[3]; - state_m[4] = state[4]; - state_m[5] = state[5]; - state_m[6] = state[6]; - state_m[7] = state[7]; - - data_m[0] = data[0]; - data_m[1] = data[1]; - data_m[2] = data[2]; - data_m[3] = data[3]; - data_m[4] = data[4]; - data_m[5] = data[5]; - data_m[6] = data[6]; - data_m[7] = data[7]; - - PASS0 (state, tmp, state_m, data_m, s_tables); - PASS2 (state, tmp, state_m, data_m, s_tables); - PASS4 (state, tmp, state_m, data_m, s_tables); - PASS6 (state, tmp, state_m, data_m, s_tables); - - SHIFT12 (state_m, data, tmp); - SHIFT16 (state, data_m, state_m); - SHIFT61 (state, data_m); - - /* gost3 */ - - data[0] = state[ 8]; - data[1] = state[ 9]; - data[2] = state[10]; - data[3] = state[11]; - data[4] = state[12]; - data[5] = state[13]; - data[6] = state[14]; - data[7] = state[15]; - - state_m[0] = state[0]; - state_m[1] = state[1]; - state_m[2] = state[2]; - state_m[3] = state[3]; - state_m[4] = state[4]; - state_m[5] = state[5]; - state_m[6] = state[6]; - state_m[7] = state[7]; - - data_m[0] = data[0]; - data_m[1] = data[1]; - data_m[2] = data[2]; - data_m[3] = data[3]; - data_m[4] = data[4]; - data_m[5] = data[5]; - data_m[6] = data[6]; - data_m[7] = data[7]; - - PASS0 (state, tmp, state_m, data_m, s_tables); - PASS2 (state, tmp, state_m, data_m, s_tables); - PASS4 (state, tmp, state_m, data_m, s_tables); - PASS6 (state, tmp, state_m, data_m, s_tables); - - SHIFT12 (state_m, data, tmp); - SHIFT16 (state, data_m, state_m); - SHIFT61 (state, data_m); - - /* store */ - - const u32x r0 = state[0]; - const u32x r1 = state[1]; - const u32x r2 = state[2]; - const u32x r3 = state[3]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06900_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06900_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06900_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * sbox - */ - - __shared__ u32 s_tables[4][256]; - - s_tables[0][lid] = c_tables[0][lid]; - s_tables[1][lid] = c_tables[1][lid]; - s_tables[2][lid] = c_tables[2][lid]; - s_tables[3][lid] = c_tables[3][lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = pw_len * 8; - w3[3] = 0; - - const u32 w14 = pw_len * 8; - - u32x data[8]; - - data[0] = w0[0]; - data[1] = w0[1]; - data[2] = w0[2]; - data[3] = w0[3]; - data[4] = w1[0]; - data[5] = w1[1]; - data[6] = w1[2]; - data[7] = w1[3]; - - u32x state[16]; - - state[ 0] = 0; - state[ 1] = 0; - state[ 2] = 0; - state[ 3] = 0; - state[ 4] = 0; - state[ 5] = 0; - state[ 6] = 0; - state[ 7] = 0; - state[ 8] = data[0]; - state[ 9] = data[1]; - state[10] = data[2]; - state[11] = data[3]; - state[12] = data[4]; - state[13] = data[5]; - state[14] = data[6]; - state[15] = data[7]; - - u32x state_m[8]; - u32x data_m[8]; - - /* gost1 */ - - state_m[0] = state[0]; - state_m[1] = state[1]; - state_m[2] = state[2]; - state_m[3] = state[3]; - state_m[4] = state[4]; - state_m[5] = state[5]; - state_m[6] = state[6]; - state_m[7] = state[7]; - - data_m[0] = data[0]; - data_m[1] = data[1]; - data_m[2] = data[2]; - data_m[3] = data[3]; - data_m[4] = data[4]; - data_m[5] = data[5]; - data_m[6] = data[6]; - data_m[7] = data[7]; - - u32x tmp[8]; - - PASS0 (state, tmp, state_m, data_m, s_tables); - PASS2 (state, tmp, state_m, data_m, s_tables); - PASS4 (state, tmp, state_m, data_m, s_tables); - PASS6 (state, tmp, state_m, data_m, s_tables); - - SHIFT12 (state_m, data, tmp); - SHIFT16 (state, data_m, state_m); - SHIFT61 (state, data_m); - - data[0] = w14; - data[1] = 0; - data[2] = 0; - data[3] = 0; - data[4] = 0; - data[5] = 0; - data[6] = 0; - data[7] = 0; - - /* gost2 */ - - state_m[0] = state[0]; - state_m[1] = state[1]; - state_m[2] = state[2]; - state_m[3] = state[3]; - state_m[4] = state[4]; - state_m[5] = state[5]; - state_m[6] = state[6]; - state_m[7] = state[7]; - - data_m[0] = data[0]; - data_m[1] = data[1]; - data_m[2] = data[2]; - data_m[3] = data[3]; - data_m[4] = data[4]; - data_m[5] = data[5]; - data_m[6] = data[6]; - data_m[7] = data[7]; - - PASS0 (state, tmp, state_m, data_m, s_tables); - PASS2 (state, tmp, state_m, data_m, s_tables); - PASS4 (state, tmp, state_m, data_m, s_tables); - PASS6 (state, tmp, state_m, data_m, s_tables); - - SHIFT12 (state_m, data, tmp); - SHIFT16 (state, data_m, state_m); - SHIFT61 (state, data_m); - - /* gost3 */ - - data[0] = state[ 8]; - data[1] = state[ 9]; - data[2] = state[10]; - data[3] = state[11]; - data[4] = state[12]; - data[5] = state[13]; - data[6] = state[14]; - data[7] = state[15]; - - state_m[0] = state[0]; - state_m[1] = state[1]; - state_m[2] = state[2]; - state_m[3] = state[3]; - state_m[4] = state[4]; - state_m[5] = state[5]; - state_m[6] = state[6]; - state_m[7] = state[7]; - - data_m[0] = data[0]; - data_m[1] = data[1]; - data_m[2] = data[2]; - data_m[3] = data[3]; - data_m[4] = data[4]; - data_m[5] = data[5]; - data_m[6] = data[6]; - data_m[7] = data[7]; - - PASS0 (state, tmp, state_m, data_m, s_tables); - PASS2 (state, tmp, state_m, data_m, s_tables); - PASS4 (state, tmp, state_m, data_m, s_tables); - PASS6 (state, tmp, state_m, data_m, s_tables); - - SHIFT12 (state_m, data, tmp); - SHIFT16 (state, data_m, state_m); - SHIFT61 (state, data_m); - - /* store */ - - const u32x r0 = state[0]; - const u32x r1 = state[1]; - const u32x r2 = state[2]; - const u32x r3 = state[3]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06900_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06900_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m06900_a3.cu b/nv/m06900_a3.cu deleted file mode 100644 index 8d237fc..0000000 --- a/nv/m06900_a3.cu +++ /dev/null @@ -1,1331 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _GOST_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ __constant__ u32 c_tables[4][256] = -{ - { - 0x00072000, 0x00075000, 0x00074800, 0x00071000, - 0x00076800, 0x00074000, 0x00070000, 0x00077000, - 0x00073000, 0x00075800, 0x00070800, 0x00076000, - 0x00073800, 0x00077800, 0x00072800, 0x00071800, - 0x0005a000, 0x0005d000, 0x0005c800, 0x00059000, - 0x0005e800, 0x0005c000, 0x00058000, 0x0005f000, - 0x0005b000, 0x0005d800, 0x00058800, 0x0005e000, - 0x0005b800, 0x0005f800, 0x0005a800, 0x00059800, - 0x00022000, 0x00025000, 0x00024800, 0x00021000, - 0x00026800, 0x00024000, 0x00020000, 0x00027000, - 0x00023000, 0x00025800, 0x00020800, 0x00026000, - 0x00023800, 0x00027800, 0x00022800, 0x00021800, - 0x00062000, 0x00065000, 0x00064800, 0x00061000, - 0x00066800, 0x00064000, 0x00060000, 0x00067000, - 0x00063000, 0x00065800, 0x00060800, 0x00066000, - 0x00063800, 0x00067800, 0x00062800, 0x00061800, - 0x00032000, 0x00035000, 0x00034800, 0x00031000, - 0x00036800, 0x00034000, 0x00030000, 0x00037000, - 0x00033000, 0x00035800, 0x00030800, 0x00036000, - 0x00033800, 0x00037800, 0x00032800, 0x00031800, - 0x0006a000, 0x0006d000, 0x0006c800, 0x00069000, - 0x0006e800, 0x0006c000, 0x00068000, 0x0006f000, - 0x0006b000, 0x0006d800, 0x00068800, 0x0006e000, - 0x0006b800, 0x0006f800, 0x0006a800, 0x00069800, - 0x0007a000, 0x0007d000, 0x0007c800, 0x00079000, - 0x0007e800, 0x0007c000, 0x00078000, 0x0007f000, - 0x0007b000, 0x0007d800, 0x00078800, 0x0007e000, - 0x0007b800, 0x0007f800, 0x0007a800, 0x00079800, - 0x00052000, 0x00055000, 0x00054800, 0x00051000, - 0x00056800, 0x00054000, 0x00050000, 0x00057000, - 0x00053000, 0x00055800, 0x00050800, 0x00056000, - 0x00053800, 0x00057800, 0x00052800, 0x00051800, - 0x00012000, 0x00015000, 0x00014800, 0x00011000, - 0x00016800, 0x00014000, 0x00010000, 0x00017000, - 0x00013000, 0x00015800, 0x00010800, 0x00016000, - 0x00013800, 0x00017800, 0x00012800, 0x00011800, - 0x0001a000, 0x0001d000, 0x0001c800, 0x00019000, - 0x0001e800, 0x0001c000, 0x00018000, 0x0001f000, - 0x0001b000, 0x0001d800, 0x00018800, 0x0001e000, - 0x0001b800, 0x0001f800, 0x0001a800, 0x00019800, - 0x00042000, 0x00045000, 0x00044800, 0x00041000, - 0x00046800, 0x00044000, 0x00040000, 0x00047000, - 0x00043000, 0x00045800, 0x00040800, 0x00046000, - 0x00043800, 0x00047800, 0x00042800, 0x00041800, - 0x0000a000, 0x0000d000, 0x0000c800, 0x00009000, - 0x0000e800, 0x0000c000, 0x00008000, 0x0000f000, - 0x0000b000, 0x0000d800, 0x00008800, 0x0000e000, - 0x0000b800, 0x0000f800, 0x0000a800, 0x00009800, - 0x00002000, 0x00005000, 0x00004800, 0x00001000, - 0x00006800, 0x00004000, 0x00000000, 0x00007000, - 0x00003000, 0x00005800, 0x00000800, 0x00006000, - 0x00003800, 0x00007800, 0x00002800, 0x00001800, - 0x0003a000, 0x0003d000, 0x0003c800, 0x00039000, - 0x0003e800, 0x0003c000, 0x00038000, 0x0003f000, - 0x0003b000, 0x0003d800, 0x00038800, 0x0003e000, - 0x0003b800, 0x0003f800, 0x0003a800, 0x00039800, - 0x0002a000, 0x0002d000, 0x0002c800, 0x00029000, - 0x0002e800, 0x0002c000, 0x00028000, 0x0002f000, - 0x0002b000, 0x0002d800, 0x00028800, 0x0002e000, - 0x0002b800, 0x0002f800, 0x0002a800, 0x00029800, - 0x0004a000, 0x0004d000, 0x0004c800, 0x00049000, - 0x0004e800, 0x0004c000, 0x00048000, 0x0004f000, - 0x0004b000, 0x0004d800, 0x00048800, 0x0004e000, - 0x0004b800, 0x0004f800, 0x0004a800, 0x00049800, - }, - { - 0x03a80000, 0x03c00000, 0x03880000, 0x03e80000, - 0x03d00000, 0x03980000, 0x03a00000, 0x03900000, - 0x03f00000, 0x03f80000, 0x03e00000, 0x03b80000, - 0x03b00000, 0x03800000, 0x03c80000, 0x03d80000, - 0x06a80000, 0x06c00000, 0x06880000, 0x06e80000, - 0x06d00000, 0x06980000, 0x06a00000, 0x06900000, - 0x06f00000, 0x06f80000, 0x06e00000, 0x06b80000, - 0x06b00000, 0x06800000, 0x06c80000, 0x06d80000, - 0x05280000, 0x05400000, 0x05080000, 0x05680000, - 0x05500000, 0x05180000, 0x05200000, 0x05100000, - 0x05700000, 0x05780000, 0x05600000, 0x05380000, - 0x05300000, 0x05000000, 0x05480000, 0x05580000, - 0x00a80000, 0x00c00000, 0x00880000, 0x00e80000, - 0x00d00000, 0x00980000, 0x00a00000, 0x00900000, - 0x00f00000, 0x00f80000, 0x00e00000, 0x00b80000, - 0x00b00000, 0x00800000, 0x00c80000, 0x00d80000, - 0x00280000, 0x00400000, 0x00080000, 0x00680000, - 0x00500000, 0x00180000, 0x00200000, 0x00100000, - 0x00700000, 0x00780000, 0x00600000, 0x00380000, - 0x00300000, 0x00000000, 0x00480000, 0x00580000, - 0x04280000, 0x04400000, 0x04080000, 0x04680000, - 0x04500000, 0x04180000, 0x04200000, 0x04100000, - 0x04700000, 0x04780000, 0x04600000, 0x04380000, - 0x04300000, 0x04000000, 0x04480000, 0x04580000, - 0x04a80000, 0x04c00000, 0x04880000, 0x04e80000, - 0x04d00000, 0x04980000, 0x04a00000, 0x04900000, - 0x04f00000, 0x04f80000, 0x04e00000, 0x04b80000, - 0x04b00000, 0x04800000, 0x04c80000, 0x04d80000, - 0x07a80000, 0x07c00000, 0x07880000, 0x07e80000, - 0x07d00000, 0x07980000, 0x07a00000, 0x07900000, - 0x07f00000, 0x07f80000, 0x07e00000, 0x07b80000, - 0x07b00000, 0x07800000, 0x07c80000, 0x07d80000, - 0x07280000, 0x07400000, 0x07080000, 0x07680000, - 0x07500000, 0x07180000, 0x07200000, 0x07100000, - 0x07700000, 0x07780000, 0x07600000, 0x07380000, - 0x07300000, 0x07000000, 0x07480000, 0x07580000, - 0x02280000, 0x02400000, 0x02080000, 0x02680000, - 0x02500000, 0x02180000, 0x02200000, 0x02100000, - 0x02700000, 0x02780000, 0x02600000, 0x02380000, - 0x02300000, 0x02000000, 0x02480000, 0x02580000, - 0x03280000, 0x03400000, 0x03080000, 0x03680000, - 0x03500000, 0x03180000, 0x03200000, 0x03100000, - 0x03700000, 0x03780000, 0x03600000, 0x03380000, - 0x03300000, 0x03000000, 0x03480000, 0x03580000, - 0x06280000, 0x06400000, 0x06080000, 0x06680000, - 0x06500000, 0x06180000, 0x06200000, 0x06100000, - 0x06700000, 0x06780000, 0x06600000, 0x06380000, - 0x06300000, 0x06000000, 0x06480000, 0x06580000, - 0x05a80000, 0x05c00000, 0x05880000, 0x05e80000, - 0x05d00000, 0x05980000, 0x05a00000, 0x05900000, - 0x05f00000, 0x05f80000, 0x05e00000, 0x05b80000, - 0x05b00000, 0x05800000, 0x05c80000, 0x05d80000, - 0x01280000, 0x01400000, 0x01080000, 0x01680000, - 0x01500000, 0x01180000, 0x01200000, 0x01100000, - 0x01700000, 0x01780000, 0x01600000, 0x01380000, - 0x01300000, 0x01000000, 0x01480000, 0x01580000, - 0x02a80000, 0x02c00000, 0x02880000, 0x02e80000, - 0x02d00000, 0x02980000, 0x02a00000, 0x02900000, - 0x02f00000, 0x02f80000, 0x02e00000, 0x02b80000, - 0x02b00000, 0x02800000, 0x02c80000, 0x02d80000, - 0x01a80000, 0x01c00000, 0x01880000, 0x01e80000, - 0x01d00000, 0x01980000, 0x01a00000, 0x01900000, - 0x01f00000, 0x01f80000, 0x01e00000, 0x01b80000, - 0x01b00000, 0x01800000, 0x01c80000, 0x01d80000, - }, - { - 0x30000002, 0x60000002, 0x38000002, 0x08000002, - 0x28000002, 0x78000002, 0x68000002, 0x40000002, - 0x20000002, 0x50000002, 0x48000002, 0x70000002, - 0x00000002, 0x18000002, 0x58000002, 0x10000002, - 0xb0000005, 0xe0000005, 0xb8000005, 0x88000005, - 0xa8000005, 0xf8000005, 0xe8000005, 0xc0000005, - 0xa0000005, 0xd0000005, 0xc8000005, 0xf0000005, - 0x80000005, 0x98000005, 0xd8000005, 0x90000005, - 0x30000005, 0x60000005, 0x38000005, 0x08000005, - 0x28000005, 0x78000005, 0x68000005, 0x40000005, - 0x20000005, 0x50000005, 0x48000005, 0x70000005, - 0x00000005, 0x18000005, 0x58000005, 0x10000005, - 0x30000000, 0x60000000, 0x38000000, 0x08000000, - 0x28000000, 0x78000000, 0x68000000, 0x40000000, - 0x20000000, 0x50000000, 0x48000000, 0x70000000, - 0x00000000, 0x18000000, 0x58000000, 0x10000000, - 0xb0000003, 0xe0000003, 0xb8000003, 0x88000003, - 0xa8000003, 0xf8000003, 0xe8000003, 0xc0000003, - 0xa0000003, 0xd0000003, 0xc8000003, 0xf0000003, - 0x80000003, 0x98000003, 0xd8000003, 0x90000003, - 0x30000001, 0x60000001, 0x38000001, 0x08000001, - 0x28000001, 0x78000001, 0x68000001, 0x40000001, - 0x20000001, 0x50000001, 0x48000001, 0x70000001, - 0x00000001, 0x18000001, 0x58000001, 0x10000001, - 0xb0000000, 0xe0000000, 0xb8000000, 0x88000000, - 0xa8000000, 0xf8000000, 0xe8000000, 0xc0000000, - 0xa0000000, 0xd0000000, 0xc8000000, 0xf0000000, - 0x80000000, 0x98000000, 0xd8000000, 0x90000000, - 0xb0000006, 0xe0000006, 0xb8000006, 0x88000006, - 0xa8000006, 0xf8000006, 0xe8000006, 0xc0000006, - 0xa0000006, 0xd0000006, 0xc8000006, 0xf0000006, - 0x80000006, 0x98000006, 0xd8000006, 0x90000006, - 0xb0000001, 0xe0000001, 0xb8000001, 0x88000001, - 0xa8000001, 0xf8000001, 0xe8000001, 0xc0000001, - 0xa0000001, 0xd0000001, 0xc8000001, 0xf0000001, - 0x80000001, 0x98000001, 0xd8000001, 0x90000001, - 0x30000003, 0x60000003, 0x38000003, 0x08000003, - 0x28000003, 0x78000003, 0x68000003, 0x40000003, - 0x20000003, 0x50000003, 0x48000003, 0x70000003, - 0x00000003, 0x18000003, 0x58000003, 0x10000003, - 0x30000004, 0x60000004, 0x38000004, 0x08000004, - 0x28000004, 0x78000004, 0x68000004, 0x40000004, - 0x20000004, 0x50000004, 0x48000004, 0x70000004, - 0x00000004, 0x18000004, 0x58000004, 0x10000004, - 0xb0000002, 0xe0000002, 0xb8000002, 0x88000002, - 0xa8000002, 0xf8000002, 0xe8000002, 0xc0000002, - 0xa0000002, 0xd0000002, 0xc8000002, 0xf0000002, - 0x80000002, 0x98000002, 0xd8000002, 0x90000002, - 0xb0000004, 0xe0000004, 0xb8000004, 0x88000004, - 0xa8000004, 0xf8000004, 0xe8000004, 0xc0000004, - 0xa0000004, 0xd0000004, 0xc8000004, 0xf0000004, - 0x80000004, 0x98000004, 0xd8000004, 0x90000004, - 0x30000006, 0x60000006, 0x38000006, 0x08000006, - 0x28000006, 0x78000006, 0x68000006, 0x40000006, - 0x20000006, 0x50000006, 0x48000006, 0x70000006, - 0x00000006, 0x18000006, 0x58000006, 0x10000006, - 0xb0000007, 0xe0000007, 0xb8000007, 0x88000007, - 0xa8000007, 0xf8000007, 0xe8000007, 0xc0000007, - 0xa0000007, 0xd0000007, 0xc8000007, 0xf0000007, - 0x80000007, 0x98000007, 0xd8000007, 0x90000007, - 0x30000007, 0x60000007, 0x38000007, 0x08000007, - 0x28000007, 0x78000007, 0x68000007, 0x40000007, - 0x20000007, 0x50000007, 0x48000007, 0x70000007, - 0x00000007, 0x18000007, 0x58000007, 0x10000007, - }, - { - 0x000000e8, 0x000000d8, 0x000000a0, 0x00000088, - 0x00000098, 0x000000f8, 0x000000a8, 0x000000c8, - 0x00000080, 0x000000d0, 0x000000f0, 0x000000b8, - 0x000000b0, 0x000000c0, 0x00000090, 0x000000e0, - 0x000007e8, 0x000007d8, 0x000007a0, 0x00000788, - 0x00000798, 0x000007f8, 0x000007a8, 0x000007c8, - 0x00000780, 0x000007d0, 0x000007f0, 0x000007b8, - 0x000007b0, 0x000007c0, 0x00000790, 0x000007e0, - 0x000006e8, 0x000006d8, 0x000006a0, 0x00000688, - 0x00000698, 0x000006f8, 0x000006a8, 0x000006c8, - 0x00000680, 0x000006d0, 0x000006f0, 0x000006b8, - 0x000006b0, 0x000006c0, 0x00000690, 0x000006e0, - 0x00000068, 0x00000058, 0x00000020, 0x00000008, - 0x00000018, 0x00000078, 0x00000028, 0x00000048, - 0x00000000, 0x00000050, 0x00000070, 0x00000038, - 0x00000030, 0x00000040, 0x00000010, 0x00000060, - 0x000002e8, 0x000002d8, 0x000002a0, 0x00000288, - 0x00000298, 0x000002f8, 0x000002a8, 0x000002c8, - 0x00000280, 0x000002d0, 0x000002f0, 0x000002b8, - 0x000002b0, 0x000002c0, 0x00000290, 0x000002e0, - 0x000003e8, 0x000003d8, 0x000003a0, 0x00000388, - 0x00000398, 0x000003f8, 0x000003a8, 0x000003c8, - 0x00000380, 0x000003d0, 0x000003f0, 0x000003b8, - 0x000003b0, 0x000003c0, 0x00000390, 0x000003e0, - 0x00000568, 0x00000558, 0x00000520, 0x00000508, - 0x00000518, 0x00000578, 0x00000528, 0x00000548, - 0x00000500, 0x00000550, 0x00000570, 0x00000538, - 0x00000530, 0x00000540, 0x00000510, 0x00000560, - 0x00000268, 0x00000258, 0x00000220, 0x00000208, - 0x00000218, 0x00000278, 0x00000228, 0x00000248, - 0x00000200, 0x00000250, 0x00000270, 0x00000238, - 0x00000230, 0x00000240, 0x00000210, 0x00000260, - 0x000004e8, 0x000004d8, 0x000004a0, 0x00000488, - 0x00000498, 0x000004f8, 0x000004a8, 0x000004c8, - 0x00000480, 0x000004d0, 0x000004f0, 0x000004b8, - 0x000004b0, 0x000004c0, 0x00000490, 0x000004e0, - 0x00000168, 0x00000158, 0x00000120, 0x00000108, - 0x00000118, 0x00000178, 0x00000128, 0x00000148, - 0x00000100, 0x00000150, 0x00000170, 0x00000138, - 0x00000130, 0x00000140, 0x00000110, 0x00000160, - 0x000001e8, 0x000001d8, 0x000001a0, 0x00000188, - 0x00000198, 0x000001f8, 0x000001a8, 0x000001c8, - 0x00000180, 0x000001d0, 0x000001f0, 0x000001b8, - 0x000001b0, 0x000001c0, 0x00000190, 0x000001e0, - 0x00000768, 0x00000758, 0x00000720, 0x00000708, - 0x00000718, 0x00000778, 0x00000728, 0x00000748, - 0x00000700, 0x00000750, 0x00000770, 0x00000738, - 0x00000730, 0x00000740, 0x00000710, 0x00000760, - 0x00000368, 0x00000358, 0x00000320, 0x00000308, - 0x00000318, 0x00000378, 0x00000328, 0x00000348, - 0x00000300, 0x00000350, 0x00000370, 0x00000338, - 0x00000330, 0x00000340, 0x00000310, 0x00000360, - 0x000005e8, 0x000005d8, 0x000005a0, 0x00000588, - 0x00000598, 0x000005f8, 0x000005a8, 0x000005c8, - 0x00000580, 0x000005d0, 0x000005f0, 0x000005b8, - 0x000005b0, 0x000005c0, 0x00000590, 0x000005e0, - 0x00000468, 0x00000458, 0x00000420, 0x00000408, - 0x00000418, 0x00000478, 0x00000428, 0x00000448, - 0x00000400, 0x00000450, 0x00000470, 0x00000438, - 0x00000430, 0x00000440, 0x00000410, 0x00000460, - 0x00000668, 0x00000658, 0x00000620, 0x00000608, - 0x00000618, 0x00000678, 0x00000628, 0x00000648, - 0x00000600, 0x00000650, 0x00000670, 0x00000638, - 0x00000630, 0x00000640, 0x00000610, 0x00000660, - } -}; - -#ifdef VECT_SIZE1 -#define BOX(i,n,S) u32x ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(i,n,S) u32x ((S)[(n)][(i).x], (S)[(n)][(i).y]) -#endif - -#define round(k1,k2,tbl) \ -{ \ - u32x t; \ - t = (k1) + r; \ - l ^= BOX ((t >> 0) & 0xff, 0, tbl) ^ \ - BOX ((t >> 8) & 0xff, 1, tbl) ^ \ - BOX ((t >> 16) & 0xff, 2, tbl) ^ \ - BOX ((t >> 24) & 0xff, 3, tbl); \ - t = (k2) + l; \ - r ^= BOX ((t >> 0) & 0xff, 0, tbl) ^ \ - BOX ((t >> 8) & 0xff, 1, tbl) ^ \ - BOX ((t >> 16) & 0xff, 2, tbl) ^ \ - BOX ((t >> 24) & 0xff, 3, tbl); \ -} - -#define R(k,h,s,i,t) \ -{ \ - u32x r; \ - u32x l; \ - r = h[i + 0]; \ - l = h[i + 1]; \ - round (k[0], k[1], t); \ - round (k[2], k[3], t); \ - round (k[4], k[5], t); \ - round (k[6], k[7], t); \ - round (k[0], k[1], t); \ - round (k[2], k[3], t); \ - round (k[4], k[5], t); \ - round (k[6], k[7], t); \ - round (k[0], k[1], t); \ - round (k[2], k[3], t); \ - round (k[4], k[5], t); \ - round (k[6], k[7], t); \ - round (k[7], k[6], t); \ - round (k[5], k[4], t); \ - round (k[3], k[2], t); \ - round (k[1], k[0], t); \ - s[i + 0] = l; \ - s[i + 1] = r; \ -} - -#define X(w,u,v) \ - w[0] = u[0] ^ v[0]; \ - w[1] = u[1] ^ v[1]; \ - w[2] = u[2] ^ v[2]; \ - w[3] = u[3] ^ v[3]; \ - w[4] = u[4] ^ v[4]; \ - w[5] = u[5] ^ v[5]; \ - w[6] = u[6] ^ v[6]; \ - w[7] = u[7] ^ v[7]; - -#define P(k,w) \ - k[0] = ((w[0] & 0x000000ff) << 0) \ - | ((w[2] & 0x000000ff) << 8) \ - | ((w[4] & 0x000000ff) << 16) \ - | ((w[6] & 0x000000ff) << 24); \ - k[1] = ((w[0] & 0x0000ff00) >> 8) \ - | ((w[2] & 0x0000ff00) >> 0) \ - | ((w[4] & 0x0000ff00) << 8) \ - | ((w[6] & 0x0000ff00) << 16); \ - k[2] = ((w[0] & 0x00ff0000) >> 16) \ - | ((w[2] & 0x00ff0000) >> 8) \ - | ((w[4] & 0x00ff0000) << 0) \ - | ((w[6] & 0x00ff0000) << 8); \ - k[3] = ((w[0] & 0xff000000) >> 24) \ - | ((w[2] & 0xff000000) >> 16) \ - | ((w[4] & 0xff000000) >> 8) \ - | ((w[6] & 0xff000000) >> 0); \ - k[4] = ((w[1] & 0x000000ff) << 0) \ - | ((w[3] & 0x000000ff) << 8) \ - | ((w[5] & 0x000000ff) << 16) \ - | ((w[7] & 0x000000ff) << 24); \ - k[5] = ((w[1] & 0x0000ff00) >> 8) \ - | ((w[3] & 0x0000ff00) >> 0) \ - | ((w[5] & 0x0000ff00) << 8) \ - | ((w[7] & 0x0000ff00) << 16); \ - k[6] = ((w[1] & 0x00ff0000) >> 16) \ - | ((w[3] & 0x00ff0000) >> 8) \ - | ((w[5] & 0x00ff0000) << 0) \ - | ((w[7] & 0x00ff0000) << 8); \ - k[7] = ((w[1] & 0xff000000) >> 24) \ - | ((w[3] & 0xff000000) >> 16) \ - | ((w[5] & 0xff000000) >> 8) \ - | ((w[7] & 0xff000000) >> 0); - -#define A(x) \ -{ \ - u32x l; \ - u32x r; \ - l = x[0] ^ x[2]; \ - r = x[1] ^ x[3]; \ - x[0] = x[2]; \ - x[1] = x[3]; \ - x[2] = x[4]; \ - x[3] = x[5]; \ - x[4] = x[6]; \ - x[5] = x[7]; \ - x[6] = l; \ - x[7] = r; \ -} - -#define AA(x) \ -{ \ - u32x l; \ - u32x r; \ - l = x[0]; \ - r = x[2]; \ - x[0] = x[4]; \ - x[2] = x[6]; \ - x[4] = l ^ r; \ - x[6] = x[0] ^ r; \ - l = x[1]; \ - r = x[3]; \ - x[1] = x[5]; \ - x[3] = x[7]; \ - x[5] = l ^ r; \ - x[7] = x[1] ^ r; \ -} - -#define C(x) \ - x[0] ^= 0xff00ff00; \ - x[1] ^= 0xff00ff00; \ - x[2] ^= 0x00ff00ff; \ - x[3] ^= 0x00ff00ff; \ - x[4] ^= 0x00ffff00; \ - x[5] ^= 0xff0000ff; \ - x[6] ^= 0x000000ff; \ - x[7] ^= 0xff00ffff; - -#define SHIFT12(u,m,s) \ - u[0] = m[0] ^ s[6]; \ - u[1] = m[1] ^ s[7]; \ - u[2] = m[2] ^ (s[0] << 16) \ - ^ (s[0] >> 16) \ - ^ (s[0] & 0x0000ffff) \ - ^ (s[1] & 0x0000ffff) \ - ^ (s[1] >> 16) \ - ^ (s[2] << 16) \ - ^ s[6] \ - ^ (s[6] << 16) \ - ^ (s[7] & 0xffff0000) \ - ^ (s[7] >> 16); \ - u[3] = m[3] ^ (s[0] & 0x0000ffff) \ - ^ (s[0] << 16) \ - ^ (s[1] & 0x0000ffff) \ - ^ (s[1] << 16) \ - ^ (s[1] >> 16) \ - ^ (s[2] << 16) \ - ^ (s[2] >> 16) \ - ^ (s[3] << 16) \ - ^ s[6] \ - ^ (s[6] << 16) \ - ^ (s[6] >> 16) \ - ^ (s[7] & 0x0000ffff) \ - ^ (s[7] << 16) \ - ^ (s[7] >> 16); \ - u[4] = m[4] ^ (s[0] & 0xffff0000) \ - ^ (s[0] << 16) \ - ^ (s[0] >> 16) \ - ^ (s[1] & 0xffff0000) \ - ^ (s[1] >> 16) \ - ^ (s[2] << 16) \ - ^ (s[2] >> 16) \ - ^ (s[3] << 16) \ - ^ (s[3] >> 16) \ - ^ (s[4] << 16) \ - ^ (s[6] << 16) \ - ^ (s[6] >> 16) \ - ^ (s[7] & 0x0000ffff) \ - ^ (s[7] << 16) \ - ^ (s[7] >> 16); \ - u[5] = m[5] ^ (s[0] << 16) \ - ^ (s[0] >> 16) \ - ^ (s[0] & 0xffff0000) \ - ^ (s[1] & 0x0000ffff) \ - ^ s[2] \ - ^ (s[2] >> 16) \ - ^ (s[3] << 16) \ - ^ (s[3] >> 16) \ - ^ (s[4] << 16) \ - ^ (s[4] >> 16) \ - ^ (s[5] << 16) \ - ^ (s[6] << 16) \ - ^ (s[6] >> 16) \ - ^ (s[7] & 0xffff0000) \ - ^ (s[7] << 16) \ - ^ (s[7] >> 16); \ - u[6] = m[6] ^ s[0] \ - ^ (s[1] >> 16) \ - ^ (s[2] << 16) \ - ^ s[3] \ - ^ (s[3] >> 16) \ - ^ (s[4] << 16) \ - ^ (s[4] >> 16) \ - ^ (s[5] << 16) \ - ^ (s[5] >> 16) \ - ^ s[6] \ - ^ (s[6] << 16) \ - ^ (s[6] >> 16) \ - ^ (s[7] << 16); \ - u[7] = m[7] ^ (s[0] & 0xffff0000) \ - ^ (s[0] << 16) \ - ^ (s[1] & 0x0000ffff) \ - ^ (s[1] << 16) \ - ^ (s[2] >> 16) \ - ^ (s[3] << 16) \ - ^ s[4] \ - ^ (s[4] >> 16) \ - ^ (s[5] << 16) \ - ^ (s[5] >> 16) \ - ^ (s[6] >> 16) \ - ^ (s[7] & 0x0000ffff) \ - ^ (s[7] << 16) \ - ^ (s[7] >> 16); - -#define SHIFT16(h,v,u) \ - v[0] = h[0] ^ (u[1] << 16) \ - ^ (u[0] >> 16); \ - v[1] = h[1] ^ (u[2] << 16) \ - ^ (u[1] >> 16); \ - v[2] = h[2] ^ (u[3] << 16) \ - ^ (u[2] >> 16); \ - v[3] = h[3] ^ (u[4] << 16) \ - ^ (u[3] >> 16); \ - v[4] = h[4] ^ (u[5] << 16) \ - ^ (u[4] >> 16); \ - v[5] = h[5] ^ (u[6] << 16) \ - ^ (u[5] >> 16); \ - v[6] = h[6] ^ (u[7] << 16) \ - ^ (u[6] >> 16); \ - v[7] = h[7] ^ (u[0] & 0xffff0000) \ - ^ (u[0] << 16) \ - ^ (u[7] >> 16) \ - ^ (u[1] & 0xffff0000) \ - ^ (u[1] << 16) \ - ^ (u[6] << 16) \ - ^ (u[7] & 0xffff0000); - -#define SHIFT61(h,v) \ - h[0] = (v[0] & 0xffff0000) \ - ^ (v[0] << 16) \ - ^ (v[0] >> 16) \ - ^ (v[1] >> 16) \ - ^ (v[1] & 0xffff0000) \ - ^ (v[2] << 16) \ - ^ (v[3] >> 16) \ - ^ (v[4] << 16) \ - ^ (v[5] >> 16) \ - ^ v[5] \ - ^ (v[6] >> 16) \ - ^ (v[7] << 16) \ - ^ (v[7] >> 16) \ - ^ (v[7] & 0x0000ffff); \ - h[1] = (v[0] << 16) \ - ^ (v[0] >> 16) \ - ^ (v[0] & 0xffff0000) \ - ^ (v[1] & 0x0000ffff) \ - ^ v[2] \ - ^ (v[2] >> 16) \ - ^ (v[3] << 16) \ - ^ (v[4] >> 16) \ - ^ (v[5] << 16) \ - ^ (v[6] << 16) \ - ^ v[6] \ - ^ (v[7] & 0xffff0000) \ - ^ (v[7] >> 16); \ - h[2] = (v[0] & 0x0000ffff) \ - ^ (v[0] << 16) \ - ^ (v[1] << 16) \ - ^ (v[1] >> 16) \ - ^ (v[1] & 0xffff0000) \ - ^ (v[2] << 16) \ - ^ (v[3] >> 16) \ - ^ v[3] \ - ^ (v[4] << 16) \ - ^ (v[5] >> 16) \ - ^ v[6] \ - ^ (v[6] >> 16) \ - ^ (v[7] & 0x0000ffff) \ - ^ (v[7] << 16) \ - ^ (v[7] >> 16); \ - h[3] = (v[0] << 16) \ - ^ (v[0] >> 16) \ - ^ (v[0] & 0xffff0000) \ - ^ (v[1] & 0xffff0000) \ - ^ (v[1] >> 16) \ - ^ (v[2] << 16) \ - ^ (v[2] >> 16) \ - ^ v[2] \ - ^ (v[3] << 16) \ - ^ (v[4] >> 16) \ - ^ v[4] \ - ^ (v[5] << 16) \ - ^ (v[6] << 16) \ - ^ (v[7] & 0x0000ffff) \ - ^ (v[7] >> 16); \ - h[4] = (v[0] >> 16) \ - ^ (v[1] << 16) \ - ^ v[1] \ - ^ (v[2] >> 16) \ - ^ v[2] \ - ^ (v[3] << 16) \ - ^ (v[3] >> 16) \ - ^ v[3] \ - ^ (v[4] << 16) \ - ^ (v[5] >> 16) \ - ^ v[5] \ - ^ (v[6] << 16) \ - ^ (v[6] >> 16) \ - ^ (v[7] << 16); \ - h[5] = (v[0] << 16) \ - ^ (v[0] & 0xffff0000) \ - ^ (v[1] << 16) \ - ^ (v[1] >> 16) \ - ^ (v[1] & 0xffff0000) \ - ^ (v[2] << 16) \ - ^ v[2] \ - ^ (v[3] >> 16) \ - ^ v[3] \ - ^ (v[4] << 16) \ - ^ (v[4] >> 16) \ - ^ v[4] \ - ^ (v[5] << 16) \ - ^ (v[6] << 16) \ - ^ (v[6] >> 16) \ - ^ v[6] \ - ^ (v[7] << 16) \ - ^ (v[7] >> 16) \ - ^ (v[7] & 0xffff0000); \ - h[6] = v[0] \ - ^ v[2] \ - ^ (v[2] >> 16) \ - ^ v[3] \ - ^ (v[3] << 16) \ - ^ v[4] \ - ^ (v[4] >> 16) \ - ^ (v[5] << 16) \ - ^ (v[5] >> 16) \ - ^ v[5] \ - ^ (v[6] << 16) \ - ^ (v[6] >> 16) \ - ^ v[6] \ - ^ (v[7] << 16) \ - ^ v[7]; \ - h[7] = v[0] \ - ^ (v[0] >> 16) \ - ^ (v[1] << 16) \ - ^ (v[1] >> 16) \ - ^ (v[2] << 16) \ - ^ (v[3] >> 16) \ - ^ v[3] \ - ^ (v[4] << 16) \ - ^ v[4] \ - ^ (v[5] >> 16) \ - ^ v[5] \ - ^ (v[6] << 16) \ - ^ (v[6] >> 16) \ - ^ (v[7] << 16) \ - ^ v[7]; - -#define PASS0(h,s,u,v,t) \ -{ \ - u32x k[8]; \ - u32x w[8]; \ - X (w, u, v); \ - P (k, w); \ - R (k, h, s, 0, t); \ - A (u); \ - AA (v); \ -} - -#define PASS2(h,s,u,v,t) \ -{ \ - u32x k[8]; \ - u32x w[8]; \ - X (w, u, v); \ - P (k, w); \ - R (k, h, s, 2, t); \ - A (u); \ - C (u); \ - AA (v); \ -} - -#define PASS4(h,s,u,v,t) \ -{ \ - u32x k[8]; \ - u32x w[8]; \ - X (w, u, v); \ - P (k, w); \ - R (k, h, s, 4, t); \ - A (u); \ - AA (v); \ -} - -#define PASS6(h,s,u,v,t) \ -{ \ - u32x k[8]; \ - u32x w[8]; \ - X (w, u, v); \ - P (k, w); \ - R (k, h, s, 6, t); \ -} - -__device__ __shared__ u32 s_tables[4][256]; - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m06900m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 w14 = pw_len * 8; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x data[8]; - - data[0] = w0[0]; - data[1] = w0[1]; - data[2] = w0[2]; - data[3] = w0[3]; - data[4] = w1[0]; - data[5] = w1[1]; - data[6] = w1[2]; - data[7] = w1[3]; - - u32x state[16]; - - state[ 0] = 0; - state[ 1] = 0; - state[ 2] = 0; - state[ 3] = 0; - state[ 4] = 0; - state[ 5] = 0; - state[ 6] = 0; - state[ 7] = 0; - state[ 8] = data[0]; - state[ 9] = data[1]; - state[10] = data[2]; - state[11] = data[3]; - state[12] = data[4]; - state[13] = data[5]; - state[14] = data[6]; - state[15] = data[7]; - - u32x state_m[8]; - u32x data_m[8]; - - /* gost1 */ - - state_m[0] = state[0]; - state_m[1] = state[1]; - state_m[2] = state[2]; - state_m[3] = state[3]; - state_m[4] = state[4]; - state_m[5] = state[5]; - state_m[6] = state[6]; - state_m[7] = state[7]; - - data_m[0] = data[0]; - data_m[1] = data[1]; - data_m[2] = data[2]; - data_m[3] = data[3]; - data_m[4] = data[4]; - data_m[5] = data[5]; - data_m[6] = data[6]; - data_m[7] = data[7]; - - u32x tmp[8]; - - PASS0 (state, tmp, state_m, data_m, s_tables); - PASS2 (state, tmp, state_m, data_m, s_tables); - PASS4 (state, tmp, state_m, data_m, s_tables); - PASS6 (state, tmp, state_m, data_m, s_tables); - - SHIFT12 (state_m, data, tmp); - SHIFT16 (state, data_m, state_m); - SHIFT61 (state, data_m); - - data[0] = w14; - data[1] = 0; - data[2] = 0; - data[3] = 0; - data[4] = 0; - data[5] = 0; - data[6] = 0; - data[7] = 0; - - /* gost2 */ - - state_m[0] = state[0]; - state_m[1] = state[1]; - state_m[2] = state[2]; - state_m[3] = state[3]; - state_m[4] = state[4]; - state_m[5] = state[5]; - state_m[6] = state[6]; - state_m[7] = state[7]; - - data_m[0] = data[0]; - data_m[1] = data[1]; - data_m[2] = data[2]; - data_m[3] = data[3]; - data_m[4] = data[4]; - data_m[5] = data[5]; - data_m[6] = data[6]; - data_m[7] = data[7]; - - PASS0 (state, tmp, state_m, data_m, s_tables); - PASS2 (state, tmp, state_m, data_m, s_tables); - PASS4 (state, tmp, state_m, data_m, s_tables); - PASS6 (state, tmp, state_m, data_m, s_tables); - - SHIFT12 (state_m, data, tmp); - SHIFT16 (state, data_m, state_m); - SHIFT61 (state, data_m); - - /* gost3 */ - - data[0] = state[ 8]; - data[1] = state[ 9]; - data[2] = state[10]; - data[3] = state[11]; - data[4] = state[12]; - data[5] = state[13]; - data[6] = state[14]; - data[7] = state[15]; - - state_m[0] = state[0]; - state_m[1] = state[1]; - state_m[2] = state[2]; - state_m[3] = state[3]; - state_m[4] = state[4]; - state_m[5] = state[5]; - state_m[6] = state[6]; - state_m[7] = state[7]; - - data_m[0] = data[0]; - data_m[1] = data[1]; - data_m[2] = data[2]; - data_m[3] = data[3]; - data_m[4] = data[4]; - data_m[5] = data[5]; - data_m[6] = data[6]; - data_m[7] = data[7]; - - PASS0 (state, tmp, state_m, data_m, s_tables); - PASS2 (state, tmp, state_m, data_m, s_tables); - PASS4 (state, tmp, state_m, data_m, s_tables); - PASS6 (state, tmp, state_m, data_m, s_tables); - - SHIFT12 (state_m, data, tmp); - SHIFT16 (state, data_m, state_m); - SHIFT61 (state, data_m); - - /* store */ - - const u32x r0 = state[0]; - const u32x r1 = state[1]; - const u32x r2 = state[2]; - const u32x r3 = state[3]; - - #include VECT_COMPARE_M - } -} - -__device__ static void m06900s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 w14 = pw_len * 8; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x data[8]; - - data[0] = w0[0]; - data[1] = w0[1]; - data[2] = w0[2]; - data[3] = w0[3]; - data[4] = w1[0]; - data[5] = w1[1]; - data[6] = w1[2]; - data[7] = w1[3]; - - u32x state[16]; - - state[ 0] = 0; - state[ 1] = 0; - state[ 2] = 0; - state[ 3] = 0; - state[ 4] = 0; - state[ 5] = 0; - state[ 6] = 0; - state[ 7] = 0; - state[ 8] = data[0]; - state[ 9] = data[1]; - state[10] = data[2]; - state[11] = data[3]; - state[12] = data[4]; - state[13] = data[5]; - state[14] = data[6]; - state[15] = data[7]; - - u32x state_m[8]; - u32x data_m[8]; - - /* gost1 */ - - state_m[0] = state[0]; - state_m[1] = state[1]; - state_m[2] = state[2]; - state_m[3] = state[3]; - state_m[4] = state[4]; - state_m[5] = state[5]; - state_m[6] = state[6]; - state_m[7] = state[7]; - - data_m[0] = data[0]; - data_m[1] = data[1]; - data_m[2] = data[2]; - data_m[3] = data[3]; - data_m[4] = data[4]; - data_m[5] = data[5]; - data_m[6] = data[6]; - data_m[7] = data[7]; - - u32x tmp[8]; - - PASS0 (state, tmp, state_m, data_m, s_tables); - PASS2 (state, tmp, state_m, data_m, s_tables); - PASS4 (state, tmp, state_m, data_m, s_tables); - PASS6 (state, tmp, state_m, data_m, s_tables); - - SHIFT12 (state_m, data, tmp); - SHIFT16 (state, data_m, state_m); - SHIFT61 (state, data_m); - - data[0] = w14; - data[1] = 0; - data[2] = 0; - data[3] = 0; - data[4] = 0; - data[5] = 0; - data[6] = 0; - data[7] = 0; - - /* gost2 */ - - state_m[0] = state[0]; - state_m[1] = state[1]; - state_m[2] = state[2]; - state_m[3] = state[3]; - state_m[4] = state[4]; - state_m[5] = state[5]; - state_m[6] = state[6]; - state_m[7] = state[7]; - - data_m[0] = data[0]; - data_m[1] = data[1]; - data_m[2] = data[2]; - data_m[3] = data[3]; - data_m[4] = data[4]; - data_m[5] = data[5]; - data_m[6] = data[6]; - data_m[7] = data[7]; - - PASS0 (state, tmp, state_m, data_m, s_tables); - PASS2 (state, tmp, state_m, data_m, s_tables); - PASS4 (state, tmp, state_m, data_m, s_tables); - PASS6 (state, tmp, state_m, data_m, s_tables); - - SHIFT12 (state_m, data, tmp); - SHIFT16 (state, data_m, state_m); - SHIFT61 (state, data_m); - - /* gost3 */ - - data[0] = state[ 8]; - data[1] = state[ 9]; - data[2] = state[10]; - data[3] = state[11]; - data[4] = state[12]; - data[5] = state[13]; - data[6] = state[14]; - data[7] = state[15]; - - state_m[0] = state[0]; - state_m[1] = state[1]; - state_m[2] = state[2]; - state_m[3] = state[3]; - state_m[4] = state[4]; - state_m[5] = state[5]; - state_m[6] = state[6]; - state_m[7] = state[7]; - - data_m[0] = data[0]; - data_m[1] = data[1]; - data_m[2] = data[2]; - data_m[3] = data[3]; - data_m[4] = data[4]; - data_m[5] = data[5]; - data_m[6] = data[6]; - data_m[7] = data[7]; - - PASS0 (state, tmp, state_m, data_m, s_tables); - PASS2 (state, tmp, state_m, data_m, s_tables); - PASS4 (state, tmp, state_m, data_m, s_tables); - PASS6 (state, tmp, state_m, data_m, s_tables); - - SHIFT12 (state_m, data, tmp); - SHIFT16 (state, data_m, state_m); - SHIFT61 (state, data_m); - - /* store */ - - const u32x r0 = state[0]; - const u32x r1 = state[1]; - const u32x r2 = state[2]; - const u32x r3 = state[3]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06900_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * sbox - */ - - s_tables[0][lid] = c_tables[0][lid]; - s_tables[1][lid] = c_tables[1][lid]; - s_tables[2][lid] = c_tables[2][lid]; - s_tables[3][lid] = c_tables[3][lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m06900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06900_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * sbox - */ - - s_tables[0][lid] = c_tables[0][lid]; - s_tables[1][lid] = c_tables[1][lid]; - s_tables[2][lid] = c_tables[2][lid]; - s_tables[3][lid] = c_tables[3][lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m06900m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06900_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06900_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * sbox - */ - - s_tables[0][lid] = c_tables[0][lid]; - s_tables[1][lid] = c_tables[1][lid]; - s_tables[2][lid] = c_tables[2][lid]; - s_tables[3][lid] = c_tables[3][lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m06900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06900_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * sbox - */ - - s_tables[0][lid] = c_tables[0][lid]; - s_tables[1][lid] = c_tables[1][lid]; - s_tables[2][lid] = c_tables[2][lid]; - s_tables[3][lid] = c_tables[3][lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m06900s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m06900_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m07100.cu b/nv/m07100.cu deleted file mode 100644 index 79e625b..0000000 --- a/nv/m07100.cu +++ /dev/null @@ -1,503 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _PBKDF2_SHA512_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -__device__ __constant__ u64 k[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -#define ROUND_EXPAND() \ -{ \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ -} - -#define ROUND_STEP(i) \ -{ \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k[i + 15]); \ -} - -__device__ static void sha512_transform (const u64 w[16], u64 dgst[8]) -{ - u64 a = dgst[0]; - u64 b = dgst[1]; - u64 c = dgst[2]; - u64 d = dgst[3]; - u64 e = dgst[4]; - u64 f = dgst[5]; - u64 g = dgst[6]; - u64 h = dgst[7]; - - u64 w0_t = w[ 0]; - u64 w1_t = w[ 1]; - u64 w2_t = w[ 2]; - u64 w3_t = w[ 3]; - u64 w4_t = w[ 4]; - u64 w5_t = w[ 5]; - u64 w6_t = w[ 6]; - u64 w7_t = w[ 7]; - u64 w8_t = w[ 8]; - u64 w9_t = w[ 9]; - u64 wa_t = w[10]; - u64 wb_t = w[11]; - u64 wc_t = w[12]; - u64 wd_t = w[13]; - u64 we_t = w[14]; - u64 wf_t = w[15]; - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - dgst[0] += a; - dgst[1] += b; - dgst[2] += c; - dgst[3] += d; - dgst[4] += e; - dgst[5] += f; - dgst[6] += g; - dgst[7] += h; -} - -__device__ static void hmac_run (const u64 w1[16], const u64 ipad[8], const u64 opad[8], u64 dgst[8]) -{ - dgst[0] = ipad[0]; - dgst[1] = ipad[1]; - dgst[2] = ipad[2]; - dgst[3] = ipad[3]; - dgst[4] = ipad[4]; - dgst[5] = ipad[5]; - dgst[6] = ipad[6]; - dgst[7] = ipad[7]; - - sha512_transform (w1, dgst); - - u64 w[16]; - - w[ 0] = dgst[0]; - w[ 1] = dgst[1]; - w[ 2] = dgst[2]; - w[ 3] = dgst[3]; - w[ 4] = dgst[4]; - w[ 5] = dgst[5]; - w[ 6] = dgst[6]; - w[ 7] = dgst[7]; - w[ 8] = 0x8000000000000000; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = (128 + 64) * 8; - - dgst[0] = opad[0]; - dgst[1] = opad[1]; - dgst[2] = opad[2]; - dgst[3] = opad[3]; - dgst[4] = opad[4]; - dgst[5] = opad[5]; - dgst[6] = opad[6]; - dgst[7] = opad[7]; - - sha512_transform (w, dgst); -} - -__device__ static void hmac_init (u64 w[16], u64 ipad[8], u64 opad[8]) -{ - w[ 0] ^= 0x3636363636363636; - w[ 1] ^= 0x3636363636363636; - w[ 2] ^= 0x3636363636363636; - w[ 3] ^= 0x3636363636363636; - w[ 4] ^= 0x3636363636363636; - w[ 5] ^= 0x3636363636363636; - w[ 6] ^= 0x3636363636363636; - w[ 7] ^= 0x3636363636363636; - w[ 8] ^= 0x3636363636363636; - w[ 9] ^= 0x3636363636363636; - w[10] ^= 0x3636363636363636; - w[11] ^= 0x3636363636363636; - w[12] ^= 0x3636363636363636; - w[13] ^= 0x3636363636363636; - w[14] ^= 0x3636363636363636; - w[15] ^= 0x3636363636363636; - - ipad[0] = SHA512M_A; - ipad[1] = SHA512M_B; - ipad[2] = SHA512M_C; - ipad[3] = SHA512M_D; - ipad[4] = SHA512M_E; - ipad[5] = SHA512M_F; - ipad[6] = SHA512M_G; - ipad[7] = SHA512M_H; - - sha512_transform (w, ipad); - - w[ 0] ^= 0x6a6a6a6a6a6a6a6a; - w[ 1] ^= 0x6a6a6a6a6a6a6a6a; - w[ 2] ^= 0x6a6a6a6a6a6a6a6a; - w[ 3] ^= 0x6a6a6a6a6a6a6a6a; - w[ 4] ^= 0x6a6a6a6a6a6a6a6a; - w[ 5] ^= 0x6a6a6a6a6a6a6a6a; - w[ 6] ^= 0x6a6a6a6a6a6a6a6a; - w[ 7] ^= 0x6a6a6a6a6a6a6a6a; - w[ 8] ^= 0x6a6a6a6a6a6a6a6a; - w[ 9] ^= 0x6a6a6a6a6a6a6a6a; - w[10] ^= 0x6a6a6a6a6a6a6a6a; - w[11] ^= 0x6a6a6a6a6a6a6a6a; - w[12] ^= 0x6a6a6a6a6a6a6a6a; - w[13] ^= 0x6a6a6a6a6a6a6a6a; - w[14] ^= 0x6a6a6a6a6a6a6a6a; - w[15] ^= 0x6a6a6a6a6a6a6a6a; - - opad[0] = SHA512M_A; - opad[1] = SHA512M_B; - opad[2] = SHA512M_C; - opad[3] = SHA512M_D; - opad[4] = SHA512M_E; - opad[5] = SHA512M_F; - opad[6] = SHA512M_G; - opad[7] = SHA512M_H; - - sha512_transform (w, opad); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07100_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, pbkdf2_sha512_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pbkdf2_sha512_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = swap_workaround (pws[gid].i[ 0]); - w0[1] = swap_workaround (pws[gid].i[ 1]); - w0[2] = swap_workaround (pws[gid].i[ 2]); - w0[3] = swap_workaround (pws[gid].i[ 3]); - - u32x w1[4]; - - w1[0] = swap_workaround (pws[gid].i[ 4]); - w1[1] = swap_workaround (pws[gid].i[ 5]); - w1[2] = swap_workaround (pws[gid].i[ 6]); - w1[3] = swap_workaround (pws[gid].i[ 7]); - - u32x w2[4]; - - w2[0] = swap_workaround (pws[gid].i[ 8]); - w2[1] = swap_workaround (pws[gid].i[ 9]); - w2[2] = swap_workaround (pws[gid].i[10]); - w2[3] = swap_workaround (pws[gid].i[11]); - - u32x w3[4]; - - w3[0] = swap_workaround (pws[gid].i[12]); - w3[1] = swap_workaround (pws[gid].i[13]); - w3[2] = swap_workaround (pws[gid].i[14]); - w3[3] = swap_workaround (pws[gid].i[15]); - - /** - * salt - */ - - u64 esalt_buf[16]; - - u32 salt_len = salt_bufs[salt_pos].salt_len; - - esalt_buf[ 0] = hl32_to_64 (swap_workaround (esalt_bufs[salt_pos].salt_buf[ 0]), swap_workaround (esalt_bufs[salt_pos].salt_buf[ 1])); - esalt_buf[ 1] = hl32_to_64 (swap_workaround (esalt_bufs[salt_pos].salt_buf[ 2]), swap_workaround (esalt_bufs[salt_pos].salt_buf[ 3])); - esalt_buf[ 2] = hl32_to_64 (swap_workaround (esalt_bufs[salt_pos].salt_buf[ 4]), swap_workaround (esalt_bufs[salt_pos].salt_buf[ 5])); - esalt_buf[ 3] = hl32_to_64 (swap_workaround (esalt_bufs[salt_pos].salt_buf[ 6]), swap_workaround (esalt_bufs[salt_pos].salt_buf[ 7])); - esalt_buf[ 4] = hl32_to_64 (swap_workaround (esalt_bufs[salt_pos].salt_buf[ 8]), swap_workaround (esalt_bufs[salt_pos].salt_buf[ 9])); - esalt_buf[ 5] = hl32_to_64 (swap_workaround (esalt_bufs[salt_pos].salt_buf[10]), swap_workaround (esalt_bufs[salt_pos].salt_buf[11])); - esalt_buf[ 6] = hl32_to_64 (swap_workaround (esalt_bufs[salt_pos].salt_buf[12]), swap_workaround (esalt_bufs[salt_pos].salt_buf[13])); - esalt_buf[ 7] = hl32_to_64 (swap_workaround (esalt_bufs[salt_pos].salt_buf[14]), swap_workaround (esalt_bufs[salt_pos].salt_buf[15])); - esalt_buf[ 8] = hl32_to_64 (swap_workaround (esalt_bufs[salt_pos].salt_buf[16]), swap_workaround (esalt_bufs[salt_pos].salt_buf[17])); - esalt_buf[ 9] = hl32_to_64 (swap_workaround (esalt_bufs[salt_pos].salt_buf[18]), swap_workaround (esalt_bufs[salt_pos].salt_buf[19])); - esalt_buf[10] = hl32_to_64 (swap_workaround (esalt_bufs[salt_pos].salt_buf[20]), swap_workaround (esalt_bufs[salt_pos].salt_buf[21])); - esalt_buf[11] = hl32_to_64 (swap_workaround (esalt_bufs[salt_pos].salt_buf[22]), swap_workaround (esalt_bufs[salt_pos].salt_buf[23])); - esalt_buf[12] = hl32_to_64 (swap_workaround (esalt_bufs[salt_pos].salt_buf[24]), swap_workaround (esalt_bufs[salt_pos].salt_buf[25])); - esalt_buf[13] = hl32_to_64 (swap_workaround (esalt_bufs[salt_pos].salt_buf[26]), swap_workaround (esalt_bufs[salt_pos].salt_buf[27])); - esalt_buf[14] = 0; - esalt_buf[15] = (128 + salt_len + 4) * 8; - - u64 w[16]; - - w[ 0] = hl32_to_64 (w0[0], w0[1]); - w[ 1] = hl32_to_64 (w0[2], w0[3]); - w[ 2] = hl32_to_64 (w1[0], w1[1]); - w[ 3] = hl32_to_64 (w1[2], w1[3]); - w[ 4] = hl32_to_64 (w2[0], w2[1]); - w[ 5] = hl32_to_64 (w2[2], w2[3]); - w[ 6] = hl32_to_64 (w3[0], w3[1]); - w[ 7] = hl32_to_64 (w3[2], w3[3]); - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - u64 ipad[8]; - u64 opad[8]; - - hmac_init (w, ipad, opad); - - tmps[gid].ipad[0] = ipad[0]; - tmps[gid].ipad[1] = ipad[1]; - tmps[gid].ipad[2] = ipad[2]; - tmps[gid].ipad[3] = ipad[3]; - tmps[gid].ipad[4] = ipad[4]; - tmps[gid].ipad[5] = ipad[5]; - tmps[gid].ipad[6] = ipad[6]; - tmps[gid].ipad[7] = ipad[7]; - - tmps[gid].opad[0] = opad[0]; - tmps[gid].opad[1] = opad[1]; - tmps[gid].opad[2] = opad[2]; - tmps[gid].opad[3] = opad[3]; - tmps[gid].opad[4] = opad[4]; - tmps[gid].opad[5] = opad[5]; - tmps[gid].opad[6] = opad[6]; - tmps[gid].opad[7] = opad[7]; - - for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) - { - u64 dgst[8]; - - hmac_run (esalt_buf, ipad, opad, dgst); - - tmps[gid].dgst[i + 0] = dgst[0]; - tmps[gid].dgst[i + 1] = dgst[1]; - tmps[gid].dgst[i + 2] = dgst[2]; - tmps[gid].dgst[i + 3] = dgst[3]; - tmps[gid].dgst[i + 4] = dgst[4]; - tmps[gid].dgst[i + 5] = dgst[5]; - tmps[gid].dgst[i + 6] = dgst[6]; - tmps[gid].dgst[i + 7] = dgst[7]; - - tmps[gid].out[i + 0] = dgst[0]; - tmps[gid].out[i + 1] = dgst[1]; - tmps[gid].out[i + 2] = dgst[2]; - tmps[gid].out[i + 3] = dgst[3]; - tmps[gid].out[i + 4] = dgst[4]; - tmps[gid].out[i + 5] = dgst[5]; - tmps[gid].out[i + 6] = dgst[6]; - tmps[gid].out[i + 7] = dgst[7]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07100_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, pbkdf2_sha512_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pbkdf2_sha512_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u64 ipad[8]; - - ipad[0] = tmps[gid].ipad[0]; - ipad[1] = tmps[gid].ipad[1]; - ipad[2] = tmps[gid].ipad[2]; - ipad[3] = tmps[gid].ipad[3]; - ipad[4] = tmps[gid].ipad[4]; - ipad[5] = tmps[gid].ipad[5]; - ipad[6] = tmps[gid].ipad[6]; - ipad[7] = tmps[gid].ipad[7]; - - u64 opad[8]; - - opad[0] = tmps[gid].opad[0]; - opad[1] = tmps[gid].opad[1]; - opad[2] = tmps[gid].opad[2]; - opad[3] = tmps[gid].opad[3]; - opad[4] = tmps[gid].opad[4]; - opad[5] = tmps[gid].opad[5]; - opad[6] = tmps[gid].opad[6]; - opad[7] = tmps[gid].opad[7]; - - for (u32 i = 0; i < 8; i += 8) - { - u64 dgst[8]; - - dgst[0] = tmps[gid].dgst[i + 0]; - dgst[1] = tmps[gid].dgst[i + 1]; - dgst[2] = tmps[gid].dgst[i + 2]; - dgst[3] = tmps[gid].dgst[i + 3]; - dgst[4] = tmps[gid].dgst[i + 4]; - dgst[5] = tmps[gid].dgst[i + 5]; - dgst[6] = tmps[gid].dgst[i + 6]; - dgst[7] = tmps[gid].dgst[i + 7]; - - u64 out[8]; - - out[0] = tmps[gid].out[i + 0]; - out[1] = tmps[gid].out[i + 1]; - out[2] = tmps[gid].out[i + 2]; - out[3] = tmps[gid].out[i + 3]; - out[4] = tmps[gid].out[i + 4]; - out[5] = tmps[gid].out[i + 5]; - out[6] = tmps[gid].out[i + 6]; - out[7] = tmps[gid].out[i + 7]; - - for (u32 j = 0; j < loop_cnt; j++) - { - u64 w[16]; - - w[ 0] = dgst[0]; - w[ 1] = dgst[1]; - w[ 2] = dgst[2]; - w[ 3] = dgst[3]; - w[ 4] = dgst[4]; - w[ 5] = dgst[5]; - w[ 6] = dgst[6]; - w[ 7] = dgst[7]; - w[ 8] = 0x8000000000000000; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = (128 + 64) * 8; - - hmac_run (w, ipad, opad, dgst); - - out[0] ^= dgst[0]; - out[1] ^= dgst[1]; - out[2] ^= dgst[2]; - out[3] ^= dgst[3]; - out[4] ^= dgst[4]; - out[5] ^= dgst[5]; - out[6] ^= dgst[6]; - out[7] ^= dgst[7]; - } - - tmps[gid].dgst[i + 0] = dgst[0]; - tmps[gid].dgst[i + 1] = dgst[1]; - tmps[gid].dgst[i + 2] = dgst[2]; - tmps[gid].dgst[i + 3] = dgst[3]; - tmps[gid].dgst[i + 4] = dgst[4]; - tmps[gid].dgst[i + 5] = dgst[5]; - tmps[gid].dgst[i + 6] = dgst[6]; - tmps[gid].dgst[i + 7] = dgst[7]; - - tmps[gid].out[i + 0] = out[0]; - tmps[gid].out[i + 1] = out[1]; - tmps[gid].out[i + 2] = out[2]; - tmps[gid].out[i + 3] = out[3]; - tmps[gid].out[i + 4] = out[4]; - tmps[gid].out[i + 5] = out[5]; - tmps[gid].out[i + 6] = out[6]; - tmps[gid].out[i + 7] = out[7]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07100_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, pbkdf2_sha512_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pbkdf2_sha512_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 lid = threadIdx.x; - - const u64x a = tmps[gid].out[0]; - const u64x b = tmps[gid].out[1]; - - const u32x r0 = l32_from_64 (a); - const u32x r1 = h32_from_64 (a); - const u32x r2 = l32_from_64 (b); - const u32x r3 = h32_from_64 (b); - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m07300_a0.cu b/nv/m07300_a0.cu deleted file mode 100644 index 7048ef2..0000000 --- a/nv/m07300_a0.cu +++ /dev/null @@ -1,621 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = SHA1M_A; - ipad[1] = SHA1M_B; - ipad[2] = SHA1M_C; - ipad[3] = SHA1M_D; - ipad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = SHA1M_A; - opad[1] = SHA1M_B; - opad[2] = SHA1M_C; - opad[3] = SHA1M_D; - opad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - - sha1_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - - sha1_transform (w0, w1, w2, w3, digest); -} - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m07300_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const rakp_t *rakp_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - const u32 esalt_len = rakp_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[5]; - u32x opad[5]; - - hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - int esalt_size = esalt_len; - - int esalt_left; - int esalt_off; - - for (esalt_left = esalt_size, esalt_off = 0; esalt_left >= 56; esalt_left -= 64, esalt_off += 16) - { - w0_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 0]; - w0_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 1]; - w0_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 2]; - w0_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 3]; - w1_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 4]; - w1_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 5]; - w1_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 6]; - w1_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 7]; - w2_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 8]; - w2_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 9]; - w2_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 10]; - w2_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 11]; - w3_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 12]; - w3_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 13]; - w3_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 14]; - w3_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 15]; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, ipad); - } - - w0_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 0]; - w0_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 1]; - w0_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 2]; - w0_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 3]; - w1_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 4]; - w1_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 5]; - w1_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 6]; - w1_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 7]; - w2_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 8]; - w2_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 9]; - w2_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 10]; - w2_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 11]; - w3_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 12]; - w3_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 13]; - w3_t[2] = 0; - w3_t[3] = (64 + esalt_size) * 8; - - u32x digest[5]; - - hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07300_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const rakp_t *rakp_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07300_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const rakp_t *rakp_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07300_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const rakp_t *rakp_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - const u32 esalt_len = rakp_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[5]; - u32x opad[5]; - - hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - int esalt_size = esalt_len; - - int esalt_left; - int esalt_off; - - for (esalt_left = esalt_size, esalt_off = 0; esalt_left >= 56; esalt_left -= 64, esalt_off += 16) - { - w0_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 0]; - w0_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 1]; - w0_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 2]; - w0_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 3]; - w1_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 4]; - w1_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 5]; - w1_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 6]; - w1_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 7]; - w2_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 8]; - w2_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 9]; - w2_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 10]; - w2_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 11]; - w3_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 12]; - w3_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 13]; - w3_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 14]; - w3_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 15]; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, ipad); - } - - w0_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 0]; - w0_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 1]; - w0_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 2]; - w0_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 3]; - w1_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 4]; - w1_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 5]; - w1_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 6]; - w1_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 7]; - w2_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 8]; - w2_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 9]; - w2_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 10]; - w2_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 11]; - w3_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 12]; - w3_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 13]; - w3_t[2] = 0; - w3_t[3] = (64 + esalt_size) * 8; - - u32x digest[5]; - - hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07300_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const rakp_t *rakp_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07300_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const rakp_t *rakp_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m07300_a1.cu b/nv/m07300_a1.cu deleted file mode 100644 index 28db29a..0000000 --- a/nv/m07300_a1.cu +++ /dev/null @@ -1,727 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = SHA1M_A; - ipad[1] = SHA1M_B; - ipad[2] = SHA1M_C; - ipad[3] = SHA1M_D; - ipad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = SHA1M_A; - opad[1] = SHA1M_B; - opad[2] = SHA1M_C; - opad[3] = SHA1M_D; - opad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - - sha1_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - - sha1_transform (w0, w1, w2, w3, digest); -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m07300_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const rakp_t *rakp_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - const u32 esalt_len = rakp_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[5]; - u32x opad[5]; - - hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - int esalt_size = esalt_len; - - int esalt_left; - int esalt_off; - - for (esalt_left = esalt_size, esalt_off = 0; esalt_left >= 56; esalt_left -= 64, esalt_off += 16) - { - w0_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 0]; - w0_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 1]; - w0_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 2]; - w0_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 3]; - w1_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 4]; - w1_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 5]; - w1_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 6]; - w1_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 7]; - w2_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 8]; - w2_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 9]; - w2_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 10]; - w2_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 11]; - w3_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 12]; - w3_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 13]; - w3_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 14]; - w3_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 15]; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, ipad); - } - - w0_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 0]; - w0_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 1]; - w0_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 2]; - w0_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 3]; - w1_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 4]; - w1_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 5]; - w1_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 6]; - w1_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 7]; - w2_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 8]; - w2_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 9]; - w2_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 10]; - w2_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 11]; - w3_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 12]; - w3_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 13]; - w3_t[2] = 0; - w3_t[3] = (64 + esalt_size) * 8; - - u32x digest[5]; - - hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07300_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const rakp_t *rakp_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07300_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const rakp_t *rakp_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07300_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const rakp_t *rakp_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - const u32 esalt_len = rakp_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[5]; - u32x opad[5]; - - hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - int esalt_size = esalt_len; - - int esalt_left; - int esalt_off; - - for (esalt_left = esalt_size, esalt_off = 0; esalt_left >= 56; esalt_left -= 64, esalt_off += 16) - { - w0_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 0]; - w0_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 1]; - w0_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 2]; - w0_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 3]; - w1_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 4]; - w1_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 5]; - w1_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 6]; - w1_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 7]; - w2_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 8]; - w2_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 9]; - w2_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 10]; - w2_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 11]; - w3_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 12]; - w3_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 13]; - w3_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 14]; - w3_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 15]; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, ipad); - } - - w0_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 0]; - w0_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 1]; - w0_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 2]; - w0_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 3]; - w1_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 4]; - w1_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 5]; - w1_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 6]; - w1_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 7]; - w2_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 8]; - w2_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 9]; - w2_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 10]; - w2_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 11]; - w3_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 12]; - w3_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 13]; - w3_t[2] = 0; - w3_t[3] = (64 + esalt_size) * 8; - - u32x digest[5]; - - hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07300_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const rakp_t *rakp_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07300_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const rakp_t *rakp_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m07300_a3.cu b/nv/m07300_a3.cu deleted file mode 100644 index 4966609..0000000 --- a/nv/m07300_a3.cu +++ /dev/null @@ -1,791 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = SHA1M_A; - ipad[1] = SHA1M_B; - ipad[2] = SHA1M_C; - ipad[3] = SHA1M_D; - ipad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = SHA1M_A; - opad[1] = SHA1M_B; - opad[2] = SHA1M_C; - opad[3] = SHA1M_D; - opad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - - sha1_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - - sha1_transform (w0, w1, w2, w3, digest); -} - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m07300m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const rakp_t *rakp_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - const u32 esalt_len = rakp_bufs[salt_pos].salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[5]; - u32x opad[5]; - - hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - int esalt_size = esalt_len; - - int esalt_left; - int esalt_off; - - for (esalt_left = esalt_size, esalt_off = 0; esalt_left >= 56; esalt_left -= 64, esalt_off += 16) - { - w0_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 0]; - w0_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 1]; - w0_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 2]; - w0_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 3]; - w1_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 4]; - w1_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 5]; - w1_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 6]; - w1_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 7]; - w2_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 8]; - w2_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 9]; - w2_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 10]; - w2_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 11]; - w3_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 12]; - w3_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 13]; - w3_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 14]; - w3_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 15]; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, ipad); - } - - w0_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 0]; - w0_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 1]; - w0_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 2]; - w0_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 3]; - w1_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 4]; - w1_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 5]; - w1_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 6]; - w1_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 7]; - w2_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 8]; - w2_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 9]; - w2_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 10]; - w2_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 11]; - w3_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 12]; - w3_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 13]; - w3_t[2] = 0; - w3_t[3] = (64 + esalt_size) * 8; - - u32x digest[5]; - - hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -__device__ static void m07300s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const rakp_t *rakp_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - const u32 esalt_len = rakp_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * pads - */ - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[5]; - u32x opad[5]; - - hmac_sha1_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - int esalt_size = esalt_len; - - int esalt_left; - int esalt_off; - - for (esalt_left = esalt_size, esalt_off = 0; esalt_left >= 56; esalt_left -= 64, esalt_off += 16) - { - w0_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 0]; - w0_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 1]; - w0_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 2]; - w0_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 3]; - w1_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 4]; - w1_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 5]; - w1_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 6]; - w1_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 7]; - w2_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 8]; - w2_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 9]; - w2_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 10]; - w2_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 11]; - w3_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 12]; - w3_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 13]; - w3_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 14]; - w3_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 15]; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, ipad); - } - - w0_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 0]; - w0_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 1]; - w0_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 2]; - w0_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 3]; - w1_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 4]; - w1_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 5]; - w1_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 6]; - w1_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 7]; - w2_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 8]; - w2_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 9]; - w2_t[2] = rakp_bufs[salt_pos].salt_buf[esalt_off + 10]; - w2_t[3] = rakp_bufs[salt_pos].salt_buf[esalt_off + 11]; - w3_t[0] = rakp_bufs[salt_pos].salt_buf[esalt_off + 12]; - w3_t[1] = rakp_bufs[salt_pos].salt_buf[esalt_off + 13]; - w3_t[2] = 0; - w3_t[3] = (64 + esalt_size) * 8; - - u32x digest[5]; - - hmac_sha1_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07300_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const rakp_t *rakp_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m07300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, rakp_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07300_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const rakp_t *rakp_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m07300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, rakp_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07300_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const rakp_t *rakp_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m07300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, rakp_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07300_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const rakp_t *rakp_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m07300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, rakp_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07300_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const rakp_t *rakp_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m07300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, rakp_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07300_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const rakp_t *rakp_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m07300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, rakp_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m07400.cu b/nv/m07400.cu deleted file mode 100644 index 9412e40..0000000 --- a/nv/m07400.cu +++ /dev/null @@ -1,1283 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA256_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -__device__ __constant__ u32 k_sha256[64] = -{ - SHA256C00, SHA256C01, SHA256C02, SHA256C03, - SHA256C04, SHA256C05, SHA256C06, SHA256C07, - SHA256C08, SHA256C09, SHA256C0a, SHA256C0b, - SHA256C0c, SHA256C0d, SHA256C0e, SHA256C0f, - SHA256C10, SHA256C11, SHA256C12, SHA256C13, - SHA256C14, SHA256C15, SHA256C16, SHA256C17, - SHA256C18, SHA256C19, SHA256C1a, SHA256C1b, - SHA256C1c, SHA256C1d, SHA256C1e, SHA256C1f, - SHA256C20, SHA256C21, SHA256C22, SHA256C23, - SHA256C24, SHA256C25, SHA256C26, SHA256C27, - SHA256C28, SHA256C29, SHA256C2a, SHA256C2b, - SHA256C2c, SHA256C2d, SHA256C2e, SHA256C2f, - SHA256C30, SHA256C31, SHA256C32, SHA256C33, - SHA256C34, SHA256C35, SHA256C36, SHA256C37, - SHA256C38, SHA256C39, SHA256C3a, SHA256C3b, - SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f, -}; - -__device__ static void sha256_transform (const u32x w[16], u32x digest[8]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = swap_workaround (w[ 0]); - u32x w1_t = swap_workaround (w[ 1]); - u32x w2_t = swap_workaround (w[ 2]); - u32x w3_t = swap_workaround (w[ 3]); - u32x w4_t = swap_workaround (w[ 4]); - u32x w5_t = swap_workaround (w[ 5]); - u32x w6_t = swap_workaround (w[ 6]); - u32x w7_t = swap_workaround (w[ 7]); - u32x w8_t = swap_workaround (w[ 8]); - u32x w9_t = swap_workaround (w[ 9]); - u32x wa_t = swap_workaround (w[10]); - u32x wb_t = swap_workaround (w[11]); - u32x wc_t = swap_workaround (w[12]); - u32x wd_t = swap_workaround (w[13]); - u32x we_t = swap_workaround (w[14]); - u32x wf_t = swap_workaround (w[15]); - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha256[i + 0]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha256[i + 1]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha256[i + 2]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha256[i + 3]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha256[i + 4]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha256[i + 5]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha256[i + 6]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha256[i + 7]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha256[i + 8]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha256[i + 9]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha256[i + 10]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha256[i + 11]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha256[i + 12]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha256[i + 13]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, k_sha256[i + 14]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha256[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 64; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; -} - -__device__ static void sha256_transform_no14 (const u32x w[16], u32x digest[8]) -{ - u32x w_t[16]; - - w_t[ 0] = w[ 0]; - w_t[ 1] = w[ 1]; - w_t[ 2] = w[ 2]; - w_t[ 3] = w[ 3]; - w_t[ 4] = w[ 4]; - w_t[ 5] = w[ 5]; - w_t[ 6] = w[ 6]; - w_t[ 7] = w[ 7]; - w_t[ 8] = w[ 8]; - w_t[ 9] = w[ 9]; - w_t[10] = w[10]; - w_t[11] = w[11]; - w_t[12] = w[12]; - w_t[13] = w[13]; - w_t[14] = 0; - w_t[15] = w[15]; - - sha256_transform (w_t, digest); -} - -__device__ static void init_ctx (u32x digest[8]) -{ - digest[0] = SHA256M_A; - digest[1] = SHA256M_B; - digest[2] = SHA256M_C; - digest[3] = SHA256M_D; - digest[4] = SHA256M_E; - digest[5] = SHA256M_F; - digest[6] = SHA256M_G; - digest[7] = SHA256M_H; -} - -__device__ static void bzero16 (u32x block[16]) -{ - block[ 0] = 0; - block[ 1] = 0; - block[ 2] = 0; - block[ 3] = 0; - block[ 4] = 0; - block[ 5] = 0; - block[ 6] = 0; - block[ 7] = 0; - block[ 8] = 0; - block[ 9] = 0; - block[10] = 0; - block[11] = 0; - block[12] = 0; - block[13] = 0; - block[14] = 0; - block[15] = 0; -} - -__device__ static void bswap8 (u32x block[16]) -{ - block[ 0] = swap_workaround (block[ 0]); - block[ 1] = swap_workaround (block[ 1]); - block[ 2] = swap_workaround (block[ 2]); - block[ 3] = swap_workaround (block[ 3]); - block[ 4] = swap_workaround (block[ 4]); - block[ 5] = swap_workaround (block[ 5]); - block[ 6] = swap_workaround (block[ 6]); - block[ 7] = swap_workaround (block[ 7]); -} - -__device__ static u32 memcat16 (u32x block[16], const u32 block_len, const u32x append[4], const u32 append_len) -{ - const u32 div = block_len / 4; - - u32x tmp0; - u32x tmp1; - u32x tmp2; - u32x tmp3; - u32x tmp4; - - #if __CUDA_ARCH__ >= 200 - - const int offset_minus_4 = 4 - (block_len & 3); - - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - - tmp0 = __byte_perm ( 0, append[0], selector); - tmp1 = __byte_perm (append[0], append[1], selector); - tmp2 = __byte_perm (append[1], append[2], selector); - tmp3 = __byte_perm (append[2], append[3], selector); - tmp4 = __byte_perm (append[3], 0, selector); - - #else - - const u32 mod = block_len & 3; - - switch (mod) - { - case 0: tmp0 = append[0]; - tmp1 = append[1]; - tmp2 = append[2]; - tmp3 = append[3]; - tmp4 = 0; - break; - case 1: tmp0 = append[0] << 8; - tmp1 = append[0] >> 24 | append[1] << 8; - tmp2 = append[1] >> 24 | append[2] << 8; - tmp3 = append[2] >> 24 | append[3] << 8; - tmp4 = append[3] >> 24; - break; - case 2: tmp0 = append[0] << 16; - tmp1 = append[0] >> 16 | append[1] << 16; - tmp2 = append[1] >> 16 | append[2] << 16; - tmp3 = append[2] >> 16 | append[3] << 16; - tmp4 = append[3] >> 16; - break; - case 3: tmp0 = append[0] << 24; - tmp1 = append[0] >> 8 | append[1] << 24; - tmp2 = append[1] >> 8 | append[2] << 24; - tmp3 = append[2] >> 8 | append[3] << 24; - tmp4 = append[3] >> 8; - break; - } - - #endif - - switch (div) - { - case 0: block[ 0] |= tmp0; - block[ 1] = tmp1; - block[ 2] = tmp2; - block[ 3] = tmp3; - block[ 4] = tmp4; - break; - case 1: block[ 1] |= tmp0; - block[ 2] = tmp1; - block[ 3] = tmp2; - block[ 4] = tmp3; - block[ 5] = tmp4; - break; - case 2: block[ 2] |= tmp0; - block[ 3] = tmp1; - block[ 4] = tmp2; - block[ 5] = tmp3; - block[ 6] = tmp4; - break; - case 3: block[ 3] |= tmp0; - block[ 4] = tmp1; - block[ 5] = tmp2; - block[ 6] = tmp3; - block[ 7] = tmp4; - break; - case 4: block[ 4] |= tmp0; - block[ 5] = tmp1; - block[ 6] = tmp2; - block[ 7] = tmp3; - block[ 8] = tmp4; - break; - case 5: block[ 5] |= tmp0; - block[ 6] = tmp1; - block[ 7] = tmp2; - block[ 8] = tmp3; - block[ 9] = tmp4; - break; - case 6: block[ 6] |= tmp0; - block[ 7] = tmp1; - block[ 8] = tmp2; - block[ 9] = tmp3; - block[10] = tmp4; - break; - case 7: block[ 7] |= tmp0; - block[ 8] = tmp1; - block[ 9] = tmp2; - block[10] = tmp3; - block[11] = tmp4; - break; - case 8: block[ 8] |= tmp0; - block[ 9] = tmp1; - block[10] = tmp2; - block[11] = tmp3; - block[12] = tmp4; - break; - case 9: block[ 9] |= tmp0; - block[10] = tmp1; - block[11] = tmp2; - block[12] = tmp3; - block[13] = tmp4; - break; - case 10: block[10] |= tmp0; - block[11] = tmp1; - block[12] = tmp2; - block[13] = tmp3; - block[14] = tmp4; - break; - case 11: block[11] |= tmp0; - block[12] = tmp1; - block[13] = tmp2; - block[14] = tmp3; - block[15] = tmp4; - break; - case 12: block[12] |= tmp0; - block[13] = tmp1; - block[14] = tmp2; - block[15] = tmp3; - break; - case 13: block[13] |= tmp0; - block[14] = tmp1; - block[15] = tmp2; - break; - case 14: block[14] |= tmp0; - block[15] = tmp1; - break; - case 15: block[15] |= tmp0; - break; - } - - u32 new_len = block_len + append_len; - - return new_len; -} - -__device__ static u32 memcat16c (u32x block[16], const u32 block_len, const u32x append[4], const u32 append_len, u32x digest[8]) -{ - const u32 div = block_len / 4; - - u32x tmp0; - u32x tmp1; - u32x tmp2; - u32x tmp3; - u32x tmp4; - - #if __CUDA_ARCH__ >= 200 - - const int offset_minus_4 = 4 - (block_len & 3); - - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - - tmp0 = __byte_perm ( 0, append[0], selector); - tmp1 = __byte_perm (append[0], append[1], selector); - tmp2 = __byte_perm (append[1], append[2], selector); - tmp3 = __byte_perm (append[2], append[3], selector); - tmp4 = __byte_perm (append[3], 0, selector); - - #else - - const u32 mod = block_len & 3; - - switch (mod) - { - case 0: tmp0 = append[0]; - tmp1 = append[1]; - tmp2 = append[2]; - tmp3 = append[3]; - tmp4 = 0; - break; - case 1: tmp0 = append[0] << 8; - tmp1 = append[0] >> 24 | append[1] << 8; - tmp2 = append[1] >> 24 | append[2] << 8; - tmp3 = append[2] >> 24 | append[3] << 8; - tmp4 = append[3] >> 24; - break; - case 2: tmp0 = append[0] << 16; - tmp1 = append[0] >> 16 | append[1] << 16; - tmp2 = append[1] >> 16 | append[2] << 16; - tmp3 = append[2] >> 16 | append[3] << 16; - tmp4 = append[3] >> 16; - break; - case 3: tmp0 = append[0] << 24; - tmp1 = append[0] >> 8 | append[1] << 24; - tmp2 = append[1] >> 8 | append[2] << 24; - tmp3 = append[2] >> 8 | append[3] << 24; - tmp4 = append[3] >> 8; - break; - } - - #endif - - u32x carry[4] = { 0, 0, 0, 0 }; - - switch (div) - { - case 0: block[ 0] |= tmp0; - block[ 1] = tmp1; - block[ 2] = tmp2; - block[ 3] = tmp3; - block[ 4] = tmp4; - break; - case 1: block[ 1] |= tmp0; - block[ 2] = tmp1; - block[ 3] = tmp2; - block[ 4] = tmp3; - block[ 5] = tmp4; - break; - case 2: block[ 2] |= tmp0; - block[ 3] = tmp1; - block[ 4] = tmp2; - block[ 5] = tmp3; - block[ 6] = tmp4; - break; - case 3: block[ 3] |= tmp0; - block[ 4] = tmp1; - block[ 5] = tmp2; - block[ 6] = tmp3; - block[ 7] = tmp4; - break; - case 4: block[ 4] |= tmp0; - block[ 5] = tmp1; - block[ 6] = tmp2; - block[ 7] = tmp3; - block[ 8] = tmp4; - break; - case 5: block[ 5] |= tmp0; - block[ 6] = tmp1; - block[ 7] = tmp2; - block[ 8] = tmp3; - block[ 9] = tmp4; - break; - case 6: block[ 6] |= tmp0; - block[ 7] = tmp1; - block[ 8] = tmp2; - block[ 9] = tmp3; - block[10] = tmp4; - break; - case 7: block[ 7] |= tmp0; - block[ 8] = tmp1; - block[ 9] = tmp2; - block[10] = tmp3; - block[11] = tmp4; - break; - case 8: block[ 8] |= tmp0; - block[ 9] = tmp1; - block[10] = tmp2; - block[11] = tmp3; - block[12] = tmp4; - break; - case 9: block[ 9] |= tmp0; - block[10] = tmp1; - block[11] = tmp2; - block[12] = tmp3; - block[13] = tmp4; - break; - case 10: block[10] |= tmp0; - block[11] = tmp1; - block[12] = tmp2; - block[13] = tmp3; - block[14] = tmp4; - break; - case 11: block[11] |= tmp0; - block[12] = tmp1; - block[13] = tmp2; - block[14] = tmp3; - block[15] = tmp4; - break; - case 12: block[12] |= tmp0; - block[13] = tmp1; - block[14] = tmp2; - block[15] = tmp3; - carry[ 0] = tmp4; - break; - case 13: block[13] |= tmp0; - block[14] = tmp1; - block[15] = tmp2; - carry[ 0] = tmp3; - carry[ 1] = tmp4; - break; - case 14: block[14] |= tmp0; - block[15] = tmp1; - carry[ 0] = tmp2; - carry[ 1] = tmp3; - carry[ 2] = tmp4; - break; - case 15: block[15] |= tmp0; - carry[ 0] = tmp1; - carry[ 1] = tmp2; - carry[ 2] = tmp3; - carry[ 3] = tmp4; - break; - } - - u32 new_len = block_len + append_len; - - if (new_len >= 64) - { - new_len -= 64; - - sha256_transform (block, digest); - - bzero16 (block); - - block[0] = carry[0]; - block[1] = carry[1]; - block[2] = carry[2]; - block[3] = carry[3]; - } - - return new_len; -} - -__device__ static u32 memcat20 (u32x block[20], const u32 block_len, const u32x append[4], const u32 append_len) -{ - const u32 div = block_len / 4; - - u32x tmp0; - u32x tmp1; - u32x tmp2; - u32x tmp3; - u32x tmp4; - - #if __CUDA_ARCH__ >= 200 - - const int offset_minus_4 = 4 - (block_len & 3); - - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - - tmp0 = __byte_perm ( 0, append[0], selector); - tmp1 = __byte_perm (append[0], append[1], selector); - tmp2 = __byte_perm (append[1], append[2], selector); - tmp3 = __byte_perm (append[2], append[3], selector); - tmp4 = __byte_perm (append[3], 0, selector); - - #else - - const u32 mod = block_len & 3; - - switch (mod) - { - case 0: tmp0 = append[0]; - tmp1 = append[1]; - tmp2 = append[2]; - tmp3 = append[3]; - tmp4 = 0; - break; - case 1: tmp0 = append[0] << 8; - tmp1 = append[0] >> 24 | append[1] << 8; - tmp2 = append[1] >> 24 | append[2] << 8; - tmp3 = append[2] >> 24 | append[3] << 8; - tmp4 = append[3] >> 24; - break; - case 2: tmp0 = append[0] << 16; - tmp1 = append[0] >> 16 | append[1] << 16; - tmp2 = append[1] >> 16 | append[2] << 16; - tmp3 = append[2] >> 16 | append[3] << 16; - tmp4 = append[3] >> 16; - break; - case 3: tmp0 = append[0] << 24; - tmp1 = append[0] >> 8 | append[1] << 24; - tmp2 = append[1] >> 8 | append[2] << 24; - tmp3 = append[2] >> 8 | append[3] << 24; - tmp4 = append[3] >> 8; - break; - } - - #endif - - switch (div) - { - case 0: block[ 0] |= tmp0; - block[ 1] = tmp1; - block[ 2] = tmp2; - block[ 3] = tmp3; - block[ 4] = tmp4; - break; - case 1: block[ 1] |= tmp0; - block[ 2] = tmp1; - block[ 3] = tmp2; - block[ 4] = tmp3; - block[ 5] = tmp4; - break; - case 2: block[ 2] |= tmp0; - block[ 3] = tmp1; - block[ 4] = tmp2; - block[ 5] = tmp3; - block[ 6] = tmp4; - break; - case 3: block[ 3] |= tmp0; - block[ 4] = tmp1; - block[ 5] = tmp2; - block[ 6] = tmp3; - block[ 7] = tmp4; - break; - case 4: block[ 4] |= tmp0; - block[ 5] = tmp1; - block[ 6] = tmp2; - block[ 7] = tmp3; - block[ 8] = tmp4; - break; - case 5: block[ 5] |= tmp0; - block[ 6] = tmp1; - block[ 7] = tmp2; - block[ 8] = tmp3; - block[ 9] = tmp4; - break; - case 6: block[ 6] |= tmp0; - block[ 7] = tmp1; - block[ 8] = tmp2; - block[ 9] = tmp3; - block[10] = tmp4; - break; - case 7: block[ 7] |= tmp0; - block[ 8] = tmp1; - block[ 9] = tmp2; - block[10] = tmp3; - block[11] = tmp4; - break; - case 8: block[ 8] |= tmp0; - block[ 9] = tmp1; - block[10] = tmp2; - block[11] = tmp3; - block[12] = tmp4; - break; - case 9: block[ 9] |= tmp0; - block[10] = tmp1; - block[11] = tmp2; - block[12] = tmp3; - block[13] = tmp4; - break; - case 10: block[10] |= tmp0; - block[11] = tmp1; - block[12] = tmp2; - block[13] = tmp3; - block[14] = tmp4; - break; - case 11: block[11] |= tmp0; - block[12] = tmp1; - block[13] = tmp2; - block[14] = tmp3; - block[15] = tmp4; - break; - case 12: block[12] |= tmp0; - block[13] = tmp1; - block[14] = tmp2; - block[15] = tmp3; - block[16] = tmp4; - break; - case 13: block[13] |= tmp0; - block[14] = tmp1; - block[15] = tmp2; - block[16] = tmp3; - block[17] = tmp4; - break; - case 14: block[14] |= tmp0; - block[15] = tmp1; - block[16] = tmp2; - block[17] = tmp3; - block[18] = tmp4; - break; - case 15: block[15] |= tmp0; - block[16] = tmp1; - block[17] = tmp2; - block[18] = tmp3; - block[19] = tmp4; - break; - } - - return block_len + append_len; -} - -__device__ static u32 memcat20_x80 (u32x block[20], const u32 block_len, const u32x append[4], const u32 append_len) -{ - const u32 div = block_len / 4; - - u32x tmp0; - u32x tmp1; - u32x tmp2; - u32x tmp3; - u32x tmp4; - - #if __CUDA_ARCH__ >= 200 - - const int offset_minus_4 = 4 - (block_len & 3); - - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - - tmp0 = __byte_perm ( 0, append[0], selector); - tmp1 = __byte_perm (append[0], append[1], selector); - tmp2 = __byte_perm (append[1], append[2], selector); - tmp3 = __byte_perm (append[2], append[3], selector); - tmp4 = __byte_perm (append[3], 0x80, selector); - - #else - - const u32 mod = block_len & 3; - - switch (mod) - { - case 0: tmp0 = append[0]; - tmp1 = append[1]; - tmp2 = append[2]; - tmp3 = append[3]; - tmp4 = 0; - break; - case 1: tmp0 = append[0] << 8; - tmp1 = append[0] >> 24 | append[1] << 8; - tmp2 = append[1] >> 24 | append[2] << 8; - tmp3 = append[2] >> 24 | append[3] << 8; - tmp4 = append[3] >> 24; - break; - case 2: tmp0 = append[0] << 16; - tmp1 = append[0] >> 16 | append[1] << 16; - tmp2 = append[1] >> 16 | append[2] << 16; - tmp3 = append[2] >> 16 | append[3] << 16; - tmp4 = append[3] >> 16; - break; - case 3: tmp0 = append[0] << 24; - tmp1 = append[0] >> 8 | append[1] << 24; - tmp2 = append[1] >> 8 | append[2] << 24; - tmp3 = append[2] >> 8 | append[3] << 24; - tmp4 = append[3] >> 8; - break; - } - - #endif - - switch (div) - { - case 0: block[ 0] |= tmp0; - block[ 1] = tmp1; - block[ 2] = tmp2; - block[ 3] = tmp3; - block[ 4] = tmp4; - break; - case 1: block[ 1] |= tmp0; - block[ 2] = tmp1; - block[ 3] = tmp2; - block[ 4] = tmp3; - block[ 5] = tmp4; - break; - case 2: block[ 2] |= tmp0; - block[ 3] = tmp1; - block[ 4] = tmp2; - block[ 5] = tmp3; - block[ 6] = tmp4; - break; - case 3: block[ 3] |= tmp0; - block[ 4] = tmp1; - block[ 5] = tmp2; - block[ 6] = tmp3; - block[ 7] = tmp4; - break; - case 4: block[ 4] |= tmp0; - block[ 5] = tmp1; - block[ 6] = tmp2; - block[ 7] = tmp3; - block[ 8] = tmp4; - break; - case 5: block[ 5] |= tmp0; - block[ 6] = tmp1; - block[ 7] = tmp2; - block[ 8] = tmp3; - block[ 9] = tmp4; - break; - case 6: block[ 6] |= tmp0; - block[ 7] = tmp1; - block[ 8] = tmp2; - block[ 9] = tmp3; - block[10] = tmp4; - break; - case 7: block[ 7] |= tmp0; - block[ 8] = tmp1; - block[ 9] = tmp2; - block[10] = tmp3; - block[11] = tmp4; - break; - case 8: block[ 8] |= tmp0; - block[ 9] = tmp1; - block[10] = tmp2; - block[11] = tmp3; - block[12] = tmp4; - break; - case 9: block[ 9] |= tmp0; - block[10] = tmp1; - block[11] = tmp2; - block[12] = tmp3; - block[13] = tmp4; - break; - case 10: block[10] |= tmp0; - block[11] = tmp1; - block[12] = tmp2; - block[13] = tmp3; - block[14] = tmp4; - break; - case 11: block[11] |= tmp0; - block[12] = tmp1; - block[13] = tmp2; - block[14] = tmp3; - block[15] = tmp4; - break; - case 12: block[12] |= tmp0; - block[13] = tmp1; - block[14] = tmp2; - block[15] = tmp3; - block[16] = tmp4; - break; - case 13: block[13] |= tmp0; - block[14] = tmp1; - block[15] = tmp2; - block[16] = tmp3; - block[17] = tmp4; - break; - case 14: block[14] |= tmp0; - block[15] = tmp1; - block[16] = tmp2; - block[17] = tmp3; - block[18] = tmp4; - break; - case 15: block[15] |= tmp0; - block[16] = tmp1; - block[17] = tmp2; - block[18] = tmp3; - block[19] = tmp4; - break; - } - - return block_len + append_len; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07400_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, sha256crypt_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[0]; - w0[1] = pws[gid].i[1]; - w0[2] = pws[gid].i[2]; - w0[3] = pws[gid].i[3]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * buffers - */ - - u32 block_len; // never reaches > 64 - u32 transform_len; // required for w[15] = len * 8 - - u32x block[16]; - - u32x alt_result[8]; - u32x p_bytes[8]; - u32x s_bytes[8]; - - /* Prepare for the real work. */ - - block_len = 0; - - bzero16 (block); - - /* Add key. */ - - block_len = memcat16 (block, block_len, w0, pw_len); - - /* Add salt. */ - - block_len = memcat16 (block, block_len, salt_buf, salt_len); - - /* Add key again. */ - - block_len = memcat16 (block, block_len, w0, pw_len); - - append_0x80_4 (block, block_len); - - block[15] = swap_workaround (block_len * 8); - - init_ctx (alt_result); - - sha256_transform (block, alt_result); - - bswap8 (alt_result); - - block_len = 0; - - bzero16 (block); - - u32x alt_result_tmp[8]; - - alt_result_tmp[0] = alt_result[0]; - alt_result_tmp[1] = alt_result[1]; - alt_result_tmp[2] = alt_result[2]; - alt_result_tmp[3] = alt_result[3]; - alt_result_tmp[4] = 0; - alt_result_tmp[5] = 0; - alt_result_tmp[6] = 0; - alt_result_tmp[7] = 0; - - truncate_block (alt_result_tmp, pw_len); - - /* Add the key string. */ - - block_len = memcat16 (block, block_len, w0, pw_len); - - /* The last part is the salt string. This must be at most 8 - characters and it ends at the first `$' character (for - compatibility with existing implementations). */ - - block_len = memcat16 (block, block_len, salt_buf, salt_len); - - /* Now get result of this (32 bytes) and add it to the other - context. */ - - block_len = memcat16 (block, block_len, alt_result_tmp, pw_len); - - transform_len = block_len; - - /* Take the binary representation of the length of the key and for every - 1 add the alternate sum, for every 0 the key. */ - - alt_result_tmp[0] = alt_result[0]; - alt_result_tmp[1] = alt_result[1]; - alt_result_tmp[2] = alt_result[2]; - alt_result_tmp[3] = alt_result[3]; - alt_result_tmp[4] = alt_result[4]; - alt_result_tmp[5] = alt_result[5]; - alt_result_tmp[6] = alt_result[6]; - alt_result_tmp[7] = alt_result[7]; - - init_ctx (alt_result); - - for (u32 j = pw_len; j; j >>= 1) - { - if (j & 1) - { - block_len = memcat16c (block, block_len, &alt_result_tmp[0], 16, alt_result); - block_len = memcat16c (block, block_len, &alt_result_tmp[4], 16, alt_result); - - transform_len += 32; - } - else - { - block_len = memcat16c (block, block_len, w0, pw_len, alt_result); - - transform_len += pw_len; - } - } - - append_0x80_4 (block, block_len); - - if (block_len >= 56) - { - sha256_transform (block, alt_result); - - bzero16 (block); - } - - block[15] = swap_workaround (transform_len * 8); - - sha256_transform (block, alt_result); - - bswap8 (alt_result); - - tmps[gid].alt_result[0] = alt_result[0]; - tmps[gid].alt_result[1] = alt_result[1]; - tmps[gid].alt_result[2] = alt_result[2]; - tmps[gid].alt_result[3] = alt_result[3]; - tmps[gid].alt_result[4] = alt_result[4]; - tmps[gid].alt_result[5] = alt_result[5]; - tmps[gid].alt_result[6] = alt_result[6]; - tmps[gid].alt_result[7] = alt_result[7]; - - /* Start computation of P byte sequence. */ - - block_len = 0; - - transform_len = 0; - - bzero16 (block); - - /* For every character in the password add the entire password. */ - - init_ctx (p_bytes); - - for (u32 j = 0; j < pw_len; j++) - { - block_len = memcat16c (block, block_len, w0, pw_len, p_bytes); - - transform_len += pw_len; - } - - /* Finish the digest. */ - - append_0x80_4 (block, block_len); - - if (block_len >= 56) - { - sha256_transform (block, p_bytes); - - bzero16 (block); - } - - block[15] = swap_workaround (transform_len * 8); - - sha256_transform (block, p_bytes); - - bswap8 (p_bytes); - - truncate_block (p_bytes, pw_len); - - tmps[gid].p_bytes[0] = p_bytes[0]; - tmps[gid].p_bytes[1] = p_bytes[1]; - tmps[gid].p_bytes[2] = p_bytes[2]; - tmps[gid].p_bytes[3] = p_bytes[3]; - - /* Start computation of S byte sequence. */ - - block_len = 0; - - transform_len = 0; - - bzero16 (block); - - /* For every character in the password add the entire password. */ - - init_ctx (s_bytes); - - for (u32 j = 0; j < 16 + (alt_result[0] & 0xff); j++) - { - block_len = memcat16c (block, block_len, salt_buf, salt_len, s_bytes); - - transform_len += salt_len; - } - - /* Finish the digest. */ - - append_0x80_4 (block, block_len); - - if (block_len >= 56) - { - sha256_transform (block, s_bytes); - - bzero16 (block); - } - - block[15] = swap_workaround (transform_len * 8); - - sha256_transform (block, s_bytes); - - bswap8 (s_bytes); - - truncate_block (s_bytes, salt_len); - - tmps[gid].s_bytes[0] = s_bytes[0]; - tmps[gid].s_bytes[1] = s_bytes[1]; - tmps[gid].s_bytes[2] = s_bytes[2]; - tmps[gid].s_bytes[3] = s_bytes[3]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07400_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, sha256crypt_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 pw_len = pws[gid].pw_len; - - /** - * base - */ - - u32x p_bytes[4]; - - p_bytes[0] = tmps[gid].p_bytes[0]; - p_bytes[1] = tmps[gid].p_bytes[1]; - p_bytes[2] = tmps[gid].p_bytes[2]; - p_bytes[3] = tmps[gid].p_bytes[3]; - - u32x p_bytes_x80[4]; - - p_bytes_x80[0] = tmps[gid].p_bytes[0]; - p_bytes_x80[1] = tmps[gid].p_bytes[1]; - p_bytes_x80[2] = tmps[gid].p_bytes[2]; - p_bytes_x80[3] = tmps[gid].p_bytes[3]; - - append_0x80_1 (p_bytes_x80, pw_len); - - u32x s_bytes[4]; - - s_bytes[0] = tmps[gid].s_bytes[0]; - s_bytes[1] = tmps[gid].s_bytes[1]; - s_bytes[2] = tmps[gid].s_bytes[2]; - s_bytes[3] = tmps[gid].s_bytes[3]; - - u32x alt_result[8]; - - alt_result[0] = tmps[gid].alt_result[0]; - alt_result[1] = tmps[gid].alt_result[1]; - alt_result[2] = tmps[gid].alt_result[2]; - alt_result[3] = tmps[gid].alt_result[3]; - alt_result[4] = tmps[gid].alt_result[4]; - alt_result[5] = tmps[gid].alt_result[5]; - alt_result[6] = tmps[gid].alt_result[6]; - alt_result[7] = tmps[gid].alt_result[7]; - - u32 salt_len = salt_bufs[salt_pos].salt_len; - - /* Repeatedly run the collected hash value through SHA256 to burn - CPU cycles. */ - - for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++) - { - u32x tmp[8]; - - init_ctx (tmp); - - u32x block[32]; - - bzero16 (&block[ 0]); - bzero16 (&block[16]); - - u32 block_len = 0; - - const u32 j1 = (j & 1) ? 1 : 0; - const u32 j3 = (j % 3) ? 1 : 0; - const u32 j7 = (j % 7) ? 1 : 0; - - if (j1) - { - block[0] = p_bytes[0]; - block[1] = p_bytes[1]; - block[2] = p_bytes[2]; - block[3] = p_bytes[3]; - - block_len = pw_len; - } - else - { - block[0] = alt_result[0]; - block[1] = alt_result[1]; - block[2] = alt_result[2]; - block[3] = alt_result[3]; - block[4] = alt_result[4]; - block[5] = alt_result[5]; - block[6] = alt_result[6]; - block[7] = alt_result[7]; - - block_len = 32; - } - - if (j3) - { - block_len = memcat20 (block, block_len, s_bytes, salt_len); - } - - if (j7) - { - block_len = memcat20 (block, block_len, p_bytes, pw_len); - } - - if (j1) - { - block_len = memcat20 (block, block_len, &alt_result[0], 16); - block_len = memcat20_x80 (block, block_len, &alt_result[4], 16); - } - else - { - block_len = memcat20 (block, block_len, p_bytes_x80, pw_len); - } - - if (block_len >= 56) - { - sha256_transform (block, tmp); - - block[ 0] = block[16]; - block[ 1] = block[17]; - block[ 2] = block[18]; - block[ 3] = block[19]; - block[ 4] = 0; - block[ 5] = 0; - block[ 6] = 0; - block[ 7] = 0; - block[ 8] = 0; - block[ 9] = 0; - block[10] = 0; - block[11] = 0; - block[12] = 0; - block[13] = 0; - block[14] = 0; - block[15] = 0; - } - - block[15] = swap_workaround (block_len * 8); - - sha256_transform_no14 (block, tmp); - - bswap8 (tmp); - - alt_result[0] = tmp[0]; - alt_result[1] = tmp[1]; - alt_result[2] = tmp[2]; - alt_result[3] = tmp[3]; - alt_result[4] = tmp[4]; - alt_result[5] = tmp[5]; - alt_result[6] = tmp[6]; - alt_result[7] = tmp[7]; - } - - tmps[gid].alt_result[0] = alt_result[0]; - tmps[gid].alt_result[1] = alt_result[1]; - tmps[gid].alt_result[2] = alt_result[2]; - tmps[gid].alt_result[3] = alt_result[3]; - tmps[gid].alt_result[4] = alt_result[4]; - tmps[gid].alt_result[5] = alt_result[5]; - tmps[gid].alt_result[6] = alt_result[6]; - tmps[gid].alt_result[7] = alt_result[7]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07400_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, sha256crypt_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 lid = threadIdx.x; - - const u32x r0 = tmps[gid].alt_result[0]; - const u32x r1 = tmps[gid].alt_result[1]; - const u32x r2 = tmps[gid].alt_result[2]; - const u32x r3 = tmps[gid].alt_result[3]; - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m07500_a0.cu b/nv/m07500_a0.cu deleted file mode 100644 index 7e2269e..0000000 --- a/nv/m07500_a0.cu +++ /dev/null @@ -1,813 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _KRB5PA_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 7 -#define DGST_R2 2 -#define DGST_R3 6 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -typedef struct -{ - u8 S[256]; - - u8 i; - u8 j; - -} RC4_KEY; - -__device__ static void swap (RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -__device__ static void rc4_init_16 (RC4_KEY *rc4_key, const u32 data[4]) -{ - u32 i; - - for (i = 0; i < 256; i += 1) rc4_key->S[i] = i; - - u8 j = 0; - - for (i = 0; i < 256; i += 16) - { - u32 idx = i; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } - - rc4_key->i = 0; - rc4_key->j = 0; -} - -__device__ static u32 rc4_next_4 (RC4_KEY *rc4_key, const u32 ct) -{ - u8 idx; - - u32 xor4 = 0; - - u8 i = rc4_key->i; - u8 j = rc4_key->j; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - rc4_key->i = i; - rc4_key->j = j; - - return ct ^ xor4; -} - -__device__ static int decrypt_and_check (RC4_KEY *rc4_key, u32 data[4], u32 timestamp_ct[8]) -{ - u32 pt; - - rc4_init_16 (rc4_key, data); - - pt = rc4_next_4 (rc4_key, timestamp_ct[0]); - pt = rc4_next_4 (rc4_key, timestamp_ct[1]); - pt = rc4_next_4 (rc4_key, timestamp_ct[2]); - pt = rc4_next_4 (rc4_key, timestamp_ct[3]); - - if ((pt & 0xffff0000) != 0x30320000) return 0; - - pt = rc4_next_4 (rc4_key, timestamp_ct[4]); - - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; pt >>= 8; - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; pt >>= 8; - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; pt >>= 8; - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; - - pt = rc4_next_4 (rc4_key, timestamp_ct[5]); - - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; pt >>= 8; - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; pt >>= 8; - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; pt >>= 8; - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; - - pt = rc4_next_4 (rc4_key, timestamp_ct[6]); - - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; pt >>= 8; - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; pt >>= 8; - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; pt >>= 8; - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; - - return 1; -} - -__device__ static void md4_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - MD4_STEP (MD4_Fo, a, b, c, d, w0[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w0[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w0[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w0[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w1[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w1[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w1[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w1[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w2[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w2[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w2[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w2[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w3[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w3[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w3[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w3[3], MD4C00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0[0], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[0], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[0], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[0], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0[1], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[1], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[1], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[1], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0[2], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[2], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[2], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[2], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0[3], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[3], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[3], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[3], MD4C01, MD4S13); - - MD4_STEP (MD4_H , a, b, c, d, w0[0], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[0], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[0], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[0], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0[2], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[2], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[2], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[2], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0[1], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[1], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[1], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[1], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0[3], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[3], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[3], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[3], MD4C02, MD4S23); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = MD5M_A; - ipad[1] = MD5M_B; - ipad[2] = MD5M_C; - ipad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = MD5M_A; - opad[1] = MD5M_B; - opad[2] = MD5M_C; - opad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - - md5_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = 0x80; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = (64 + 16) * 8; - w3[3] = 0; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - - md5_transform (w0, w1, w2, w3, digest); -} - -__device__ static void kerb_prepare (const u32x w0[4], const u32x w1[4], const u32 pw_len, const u32 checksum[4], u32x digest[4]) -{ - /** - * pads - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // K=MD4(Little_indian(UNICODE(pwd)) - - append_0x80_2 (w0_t, w1_t, pw_len); - - make_unicode (w1_t, w2_t, w3_t); - make_unicode (w0_t, w0_t, w1_t); - - w3_t[2] = pw_len * 8 * 2; - w3_t[3] = 0; - - digest[0] = MD4M_A; - digest[1] = MD4M_B; - digest[2] = MD4M_C; - digest[3] = MD4M_D; - - md4_transform (w0_t, w1_t, w2_t, w3_t, digest); - - // K1=MD5_HMAC(K,1); with 1 encoded as little indian on 4 bytes (01000000 in hexa); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[4]; - u32x opad[4]; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = 1; - w0_t[1] = 0x80; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = (64 + 4) * 8; - w3_t[3] = 0; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - // K3=MD5_HMAC(K1,checksum); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = checksum[0]; - w0_t[1] = checksum[1]; - w0_t[2] = checksum[2]; - w0_t[3] = checksum[3]; - w1_t[0] = 0x80; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = (64 + 16) * 8; - w3_t[3] = 0; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m07500_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const krb5pa_t *krb5pa_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - __shared__ RC4_KEY rc4_keys[64]; - - const u32 lid = threadIdx.x; - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - /** - * base - */ - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 checksum[4]; - - checksum[0] = krb5pa_bufs[salt_pos].checksum[0]; - checksum[1] = krb5pa_bufs[salt_pos].checksum[1]; - checksum[2] = krb5pa_bufs[salt_pos].checksum[2]; - checksum[3] = krb5pa_bufs[salt_pos].checksum[3]; - - u32 timestamp_ct[8]; - - timestamp_ct[0] = krb5pa_bufs[salt_pos].timestamp[0]; - timestamp_ct[1] = krb5pa_bufs[salt_pos].timestamp[1]; - timestamp_ct[2] = krb5pa_bufs[salt_pos].timestamp[2]; - timestamp_ct[3] = krb5pa_bufs[salt_pos].timestamp[3]; - timestamp_ct[4] = krb5pa_bufs[salt_pos].timestamp[4]; - timestamp_ct[5] = krb5pa_bufs[salt_pos].timestamp[5]; - timestamp_ct[6] = krb5pa_bufs[salt_pos].timestamp[6]; - timestamp_ct[7] = krb5pa_bufs[salt_pos].timestamp[7]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * kerberos - */ - - u32x digest[4]; - - kerb_prepare (w0, w1, out_len, checksum, digest); - - u32 tmp[4]; - - #ifdef VECT_SIZE1 - - tmp[0] = digest[0]; - tmp[1] = digest[1]; - tmp[2] = digest[2]; - tmp[3] = digest[3]; - - if (decrypt_and_check (&rc4_keys[lid], tmp, timestamp_ct) == 1) - { - mark_hash_s0 (plains_buf, hashes_shown, digests_offset, gid, il_pos); - - d_return_buf[lid] = 1; - } - - #endif - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m07500_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const krb5pa_t *krb5pa_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m07500_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const krb5pa_t *krb5pa_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m07500_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const krb5pa_t *krb5pa_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - __shared__ RC4_KEY rc4_keys[64]; - - const u32 lid = threadIdx.x; - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - /** - * base - */ - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 checksum[4]; - - checksum[0] = krb5pa_bufs[salt_pos].checksum[0]; - checksum[1] = krb5pa_bufs[salt_pos].checksum[1]; - checksum[2] = krb5pa_bufs[salt_pos].checksum[2]; - checksum[3] = krb5pa_bufs[salt_pos].checksum[3]; - - u32 timestamp_ct[8]; - - timestamp_ct[0] = krb5pa_bufs[salt_pos].timestamp[0]; - timestamp_ct[1] = krb5pa_bufs[salt_pos].timestamp[1]; - timestamp_ct[2] = krb5pa_bufs[salt_pos].timestamp[2]; - timestamp_ct[3] = krb5pa_bufs[salt_pos].timestamp[3]; - timestamp_ct[4] = krb5pa_bufs[salt_pos].timestamp[4]; - timestamp_ct[5] = krb5pa_bufs[salt_pos].timestamp[5]; - timestamp_ct[6] = krb5pa_bufs[salt_pos].timestamp[6]; - timestamp_ct[7] = krb5pa_bufs[salt_pos].timestamp[7]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * kerberos - */ - - u32x digest[4]; - - kerb_prepare (w0, w1, out_len, checksum, digest); - - u32 tmp[4]; - - #ifdef VECT_SIZE1 - - tmp[0] = digest[0]; - tmp[1] = digest[1]; - tmp[2] = digest[2]; - tmp[3] = digest[3]; - - if (decrypt_and_check (&rc4_keys[lid], tmp, timestamp_ct) == 1) - { - mark_hash_s0 (plains_buf, hashes_shown, digests_offset, gid, il_pos); - - d_return_buf[lid] = 1; - } - - #endif - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m07500_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const krb5pa_t *krb5pa_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m07500_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const krb5pa_t *krb5pa_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m07500_a1.cu b/nv/m07500_a1.cu deleted file mode 100644 index bcfb40c..0000000 --- a/nv/m07500_a1.cu +++ /dev/null @@ -1,919 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _KRB5PA_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 7 -#define DGST_R2 2 -#define DGST_R3 6 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -__device__ __constant__ comb_t c_combs[1024]; - -typedef struct -{ - u8 S[256]; - - u8 i; - u8 j; - -} RC4_KEY; - -__device__ static void swap (RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -__device__ static void rc4_init_16 (RC4_KEY *rc4_key, const u32 data[4]) -{ - u32 i; - - for (i = 0; i < 256; i += 1) rc4_key->S[i] = i; - - u8 j = 0; - - for (i = 0; i < 256; i += 16) - { - u32 idx = i; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } - - rc4_key->i = 0; - rc4_key->j = 0; -} - -__device__ static u32 rc4_next_4 (RC4_KEY *rc4_key, const u32 ct) -{ - u8 idx; - - u32 xor4 = 0; - - u8 i = rc4_key->i; - u8 j = rc4_key->j; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - rc4_key->i = i; - rc4_key->j = j; - - return ct ^ xor4; -} - -__device__ static int decrypt_and_check (RC4_KEY *rc4_key, u32 data[4], u32 timestamp_ct[8]) -{ - u32 pt; - - rc4_init_16 (rc4_key, data); - - pt = rc4_next_4 (rc4_key, timestamp_ct[0]); - pt = rc4_next_4 (rc4_key, timestamp_ct[1]); - pt = rc4_next_4 (rc4_key, timestamp_ct[2]); - pt = rc4_next_4 (rc4_key, timestamp_ct[3]); - - if ((pt & 0xffff0000) != 0x30320000) return 0; - - pt = rc4_next_4 (rc4_key, timestamp_ct[4]); - - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; pt >>= 8; - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; pt >>= 8; - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; pt >>= 8; - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; - - pt = rc4_next_4 (rc4_key, timestamp_ct[5]); - - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; pt >>= 8; - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; pt >>= 8; - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; pt >>= 8; - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; - - pt = rc4_next_4 (rc4_key, timestamp_ct[6]); - - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; pt >>= 8; - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; pt >>= 8; - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; pt >>= 8; - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; - - return 1; -} - -__device__ static void md4_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - MD4_STEP (MD4_Fo, a, b, c, d, w0[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w0[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w0[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w0[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w1[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w1[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w1[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w1[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w2[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w2[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w2[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w2[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w3[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w3[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w3[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w3[3], MD4C00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0[0], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[0], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[0], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[0], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0[1], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[1], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[1], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[1], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0[2], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[2], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[2], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[2], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0[3], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[3], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[3], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[3], MD4C01, MD4S13); - - MD4_STEP (MD4_H , a, b, c, d, w0[0], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[0], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[0], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[0], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0[2], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[2], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[2], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[2], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0[1], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[1], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[1], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[1], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0[3], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[3], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[3], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[3], MD4C02, MD4S23); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = MD5M_A; - ipad[1] = MD5M_B; - ipad[2] = MD5M_C; - ipad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = MD5M_A; - opad[1] = MD5M_B; - opad[2] = MD5M_C; - opad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - - md5_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = 0x80; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = (64 + 16) * 8; - w3[3] = 0; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - - md5_transform (w0, w1, w2, w3, digest); -} - -__device__ static void kerb_prepare (const u32x w0[4], const u32x w1[4], const u32 pw_len, const u32 checksum[4], u32x digest[4]) -{ - /** - * pads - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // K=MD4(Little_indian(UNICODE(pwd)) - - append_0x80_2 (w0_t, w1_t, pw_len); - - make_unicode (w1_t, w2_t, w3_t); - make_unicode (w0_t, w0_t, w1_t); - - w3_t[2] = pw_len * 8 * 2; - w3_t[3] = 0; - - digest[0] = MD4M_A; - digest[1] = MD4M_B; - digest[2] = MD4M_C; - digest[3] = MD4M_D; - - md4_transform (w0_t, w1_t, w2_t, w3_t, digest); - - // K1=MD5_HMAC(K,1); with 1 encoded as little indian on 4 bytes (01000000 in hexa); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[4]; - u32x opad[4]; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = 1; - w0_t[1] = 0x80; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = (64 + 4) * 8; - w3_t[3] = 0; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - // K3=MD5_HMAC(K1,checksum); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = checksum[0]; - w0_t[1] = checksum[1]; - w0_t[2] = checksum[2]; - w0_t[3] = checksum[3]; - w1_t[0] = 0x80; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = (64 + 16) * 8; - w3_t[3] = 0; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m07500_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const krb5pa_t *krb5pa_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - __shared__ RC4_KEY rc4_keys[64]; - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - if (gid >= gid_max) return; - - /** - * base - */ - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 checksum[4]; - - checksum[0] = krb5pa_bufs[salt_pos].checksum[0]; - checksum[1] = krb5pa_bufs[salt_pos].checksum[1]; - checksum[2] = krb5pa_bufs[salt_pos].checksum[2]; - checksum[3] = krb5pa_bufs[salt_pos].checksum[3]; - - u32 timestamp_ct[8]; - - timestamp_ct[0] = krb5pa_bufs[salt_pos].timestamp[0]; - timestamp_ct[1] = krb5pa_bufs[salt_pos].timestamp[1]; - timestamp_ct[2] = krb5pa_bufs[salt_pos].timestamp[2]; - timestamp_ct[3] = krb5pa_bufs[salt_pos].timestamp[3]; - timestamp_ct[4] = krb5pa_bufs[salt_pos].timestamp[4]; - timestamp_ct[5] = krb5pa_bufs[salt_pos].timestamp[5]; - timestamp_ct[6] = krb5pa_bufs[salt_pos].timestamp[6]; - timestamp_ct[7] = krb5pa_bufs[salt_pos].timestamp[7]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - /** - * kerberos - */ - - u32x digest[4]; - - kerb_prepare (w0, w1, pw_len, checksum, digest); - - u32 tmp[4]; - - #ifdef VECT_SIZE1 - - tmp[0] = digest[0]; - tmp[1] = digest[1]; - tmp[2] = digest[2]; - tmp[3] = digest[3]; - - if (decrypt_and_check (&rc4_keys[lid], tmp, timestamp_ct) == 1) - { - mark_hash_s0 (plains_buf, hashes_shown, digests_offset, gid, il_pos); - - d_return_buf[lid] = 1; - } - - #endif - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m07500_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const krb5pa_t *krb5pa_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m07500_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const krb5pa_t *krb5pa_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m07500_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const krb5pa_t *krb5pa_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - __shared__ RC4_KEY rc4_keys[64]; - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - if (gid >= gid_max) return; - - /** - * base - */ - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 checksum[4]; - - checksum[0] = krb5pa_bufs[salt_pos].checksum[0]; - checksum[1] = krb5pa_bufs[salt_pos].checksum[1]; - checksum[2] = krb5pa_bufs[salt_pos].checksum[2]; - checksum[3] = krb5pa_bufs[salt_pos].checksum[3]; - - u32 timestamp_ct[8]; - - timestamp_ct[0] = krb5pa_bufs[salt_pos].timestamp[0]; - timestamp_ct[1] = krb5pa_bufs[salt_pos].timestamp[1]; - timestamp_ct[2] = krb5pa_bufs[salt_pos].timestamp[2]; - timestamp_ct[3] = krb5pa_bufs[salt_pos].timestamp[3]; - timestamp_ct[4] = krb5pa_bufs[salt_pos].timestamp[4]; - timestamp_ct[5] = krb5pa_bufs[salt_pos].timestamp[5]; - timestamp_ct[6] = krb5pa_bufs[salt_pos].timestamp[6]; - timestamp_ct[7] = krb5pa_bufs[salt_pos].timestamp[7]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - /** - * kerberos - */ - - u32x digest[4]; - - kerb_prepare (w0, w1, pw_len, checksum, digest); - - u32 tmp[4]; - - #ifdef VECT_SIZE1 - - tmp[0] = digest[0]; - tmp[1] = digest[1]; - tmp[2] = digest[2]; - tmp[3] = digest[3]; - - if (decrypt_and_check (&rc4_keys[lid], tmp, timestamp_ct) == 1) - { - mark_hash_s0 (plains_buf, hashes_shown, digests_offset, gid, il_pos); - - d_return_buf[lid] = 1; - } - - #endif - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m07500_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const krb5pa_t *krb5pa_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m07500_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const krb5pa_t *krb5pa_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m07500_a3.cu b/nv/m07500_a3.cu deleted file mode 100644 index 8336ce3..0000000 --- a/nv/m07500_a3.cu +++ /dev/null @@ -1,826 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _KRB5PA_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 7 -#define DGST_R2 2 -#define DGST_R3 6 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -__device__ __constant__ bf_t c_bfs[1024]; - -typedef struct -{ - u8 S[256]; - - u8 i; - u8 j; - -} RC4_KEY; - -__device__ static void swap (RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -__device__ static void rc4_init_16 (RC4_KEY *rc4_key, const u32 data[4]) -{ - u32 i; - - for (i = 0; i < 256; i += 1) rc4_key->S[i] = i; - - u8 j = 0; - - for (i = 0; i < 256; i += 16) - { - u32 idx = i; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } - - rc4_key->i = 0; - rc4_key->j = 0; -} - -__device__ static u32 rc4_next_4 (RC4_KEY *rc4_key, const u32 ct) -{ - u8 idx; - - u32 xor4 = 0; - - u8 i = rc4_key->i; - u8 j = rc4_key->j; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - rc4_key->i = i; - rc4_key->j = j; - - return ct ^ xor4; -} - -__device__ static int decrypt_and_check (RC4_KEY *rc4_key, u32 data[4], u32 timestamp_ct[8]) -{ - u32 pt; - - rc4_init_16 (rc4_key, data); - - pt = rc4_next_4 (rc4_key, timestamp_ct[0]); - pt = rc4_next_4 (rc4_key, timestamp_ct[1]); - pt = rc4_next_4 (rc4_key, timestamp_ct[2]); - pt = rc4_next_4 (rc4_key, timestamp_ct[3]); - - if ((pt & 0xffff0000) != 0x30320000) return 0; - - pt = rc4_next_4 (rc4_key, timestamp_ct[4]); - - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; pt >>= 8; - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; pt >>= 8; - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; pt >>= 8; - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; - - pt = rc4_next_4 (rc4_key, timestamp_ct[5]); - - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; pt >>= 8; - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; pt >>= 8; - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; pt >>= 8; - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; - - pt = rc4_next_4 (rc4_key, timestamp_ct[6]); - - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; pt >>= 8; - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; pt >>= 8; - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; pt >>= 8; - if (((pt & 0xff) < '0') || ((pt & 0xff) > '9')) return 0; - - return 1; -} - -__device__ static void md4_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - MD4_STEP (MD4_Fo, a, b, c, d, w0[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w0[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w0[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w0[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w1[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w1[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w1[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w1[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w2[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w2[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w2[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w2[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w3[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w3[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w3[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w3[3], MD4C00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0[0], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[0], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[0], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[0], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0[1], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[1], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[1], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[1], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0[2], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[2], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[2], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[2], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0[3], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[3], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[3], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[3], MD4C01, MD4S13); - - MD4_STEP (MD4_H , a, b, c, d, w0[0], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[0], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[0], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[0], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0[2], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[2], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[2], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[2], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0[1], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[1], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[1], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[1], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0[3], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[3], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[3], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[3], MD4C02, MD4S23); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = MD5M_A; - ipad[1] = MD5M_B; - ipad[2] = MD5M_C; - ipad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = MD5M_A; - opad[1] = MD5M_B; - opad[2] = MD5M_C; - opad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - - md5_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = 0x80; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = (64 + 16) * 8; - w3[3] = 0; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - - md5_transform (w0, w1, w2, w3, digest); -} - -__device__ static void kerb_prepare (const u32x w0[4], const u32x w1[4], const u32 pw_len, const u32 checksum[4], u32x digest[4]) -{ - /** - * pads - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // K=MD4(Little_indian(UNICODE(pwd)) - - append_0x80_2 (w0_t, w1_t, pw_len); - - make_unicode (w1_t, w2_t, w3_t); - make_unicode (w0_t, w0_t, w1_t); - - w3_t[2] = pw_len * 8 * 2; - w3_t[3] = 0; - - digest[0] = MD4M_A; - digest[1] = MD4M_B; - digest[2] = MD4M_C; - digest[3] = MD4M_D; - - md4_transform (w0_t, w1_t, w2_t, w3_t, digest); - - // K1=MD5_HMAC(K,1); with 1 encoded as little indian on 4 bytes (01000000 in hexa); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - u32x ipad[4]; - u32x opad[4]; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = 1; - w0_t[1] = 0x80; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = (64 + 4) * 8; - w3_t[3] = 0; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); - - // K3=MD5_HMAC(K1,checksum); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - hmac_md5_pad (w0_t, w1_t, w2_t, w3_t, ipad, opad); - - w0_t[0] = checksum[0]; - w0_t[1] = checksum[1]; - w0_t[2] = checksum[2]; - w0_t[3] = checksum[3]; - w1_t[0] = 0x80; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = (64 + 16) * 8; - w3_t[3] = 0; - - hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); -} - -__device__ static void m07500 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const krb5pa_t *krb5pa_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - __shared__ RC4_KEY rc4_keys[64]; - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 checksum[4]; - - checksum[0] = krb5pa_bufs[salt_pos].checksum[0]; - checksum[1] = krb5pa_bufs[salt_pos].checksum[1]; - checksum[2] = krb5pa_bufs[salt_pos].checksum[2]; - checksum[3] = krb5pa_bufs[salt_pos].checksum[3]; - - u32 timestamp_ct[8]; - - timestamp_ct[0] = krb5pa_bufs[salt_pos].timestamp[0]; - timestamp_ct[1] = krb5pa_bufs[salt_pos].timestamp[1]; - timestamp_ct[2] = krb5pa_bufs[salt_pos].timestamp[2]; - timestamp_ct[3] = krb5pa_bufs[salt_pos].timestamp[3]; - timestamp_ct[4] = krb5pa_bufs[salt_pos].timestamp[4]; - timestamp_ct[5] = krb5pa_bufs[salt_pos].timestamp[5]; - timestamp_ct[6] = krb5pa_bufs[salt_pos].timestamp[6]; - timestamp_ct[7] = krb5pa_bufs[salt_pos].timestamp[7]; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x digest[4]; - - kerb_prepare (w0, w1, pw_len, checksum, digest); - - u32 tmp[4]; - - #ifdef VECT_SIZE1 - - tmp[0] = digest[0]; - tmp[1] = digest[1]; - tmp[2] = digest[2]; - tmp[3] = digest[3]; - - if (decrypt_and_check (&rc4_keys[lid], tmp, timestamp_ct) == 1) - { - mark_hash_s0 (plains_buf, hashes_shown, digests_offset, gid, il_pos); - - d_return_buf[lid] = 1; - } - - #endif - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m07500_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const krb5pa_t *krb5pa_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m07500 (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, krb5pa_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m07500_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const krb5pa_t *krb5pa_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m07500 (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, krb5pa_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m07500_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const krb5pa_t *krb5pa_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m07500_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const krb5pa_t *krb5pa_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m07500 (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, krb5pa_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m07500_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const krb5pa_t *krb5pa_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m07500 (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, krb5pa_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m07500_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const krb5pa_t *krb5pa_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m07600_a0.cu b/nv/m07600_a0.cu deleted file mode 100644 index b0d7892..0000000 --- a/nv/m07600_a0.cu +++ /dev/null @@ -1,1180 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m07600_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 total_len = (salt_len + 40) * 8; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - /** - * sha1 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - /** - * Prepend salt - */ - - u32x w0t[4]; - - w0t[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a >> 16) & 255) << 16; - w0t[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a >> 0) & 255) << 16; - w0t[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b >> 16) & 255) << 16; - w0t[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b >> 0) & 255) << 16; - - u32x w1t[4]; - - w1t[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c >> 16) & 255) << 16; - w1t[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c >> 0) & 255) << 16; - w1t[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d >> 16) & 255) << 16; - w1t[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d >> 0) & 255) << 16; - - u32x w2t[2]; - - w2t[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0 - | uint_to_hex_lower8 ((e >> 16) & 255) << 16; - w2t[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0 - | uint_to_hex_lower8 ((e >> 0) & 255) << 16; - - w0_t = salt_buf0[0]; - w1_t = salt_buf0[1]; - w2_t = salt_buf0[2]; - w3_t = salt_buf0[3]; - w4_t = salt_buf1[0]; - w5_t = salt_buf1[1]; - w6_t = salt_buf1[2]; - w7_t = salt_buf1[3]; - w8_t = w0t[0]; - w9_t = w0t[1]; - wa_t = w0t[2]; - wb_t = w0t[3]; - wc_t = w1t[0]; - wd_t = w1t[1]; - we_t = w1t[2]; - wf_t = w1t[3]; - - /** - * 2nd SHA1 - */ - - // 1st transform - - w0_t = swap_workaround (w0_t); - w1_t = swap_workaround (w1_t); - w2_t = swap_workaround (w2_t); - w3_t = swap_workaround (w3_t); - w4_t = swap_workaround (w4_t); - w5_t = swap_workaround (w5_t); - w6_t = swap_workaround (w6_t); - w7_t = swap_workaround (w7_t); - w8_t = swap_workaround (w8_t); - w9_t = swap_workaround (w9_t); - wa_t = swap_workaround (wa_t); - wb_t = swap_workaround (wb_t); - wc_t = swap_workaround (wc_t); - wd_t = swap_workaround (wd_t); - we_t = swap_workaround (we_t); - wf_t = swap_workaround (wf_t); - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - u32x r_e = e; - - // 2nd transform - - w0_t = swap_workaround (w2t[0]); - w1_t = swap_workaround (w2t[1]); - w2_t = 0x80000000; - w3_t = 0; - w4_t = 0; - w5_t = 0; - w6_t = 0; - w7_t = 0; - w8_t = 0; - w9_t = 0; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = total_len; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - e += r_e; - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07600_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07600_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07600_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 total_len = (salt_len + 40) * 8; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - /** - * sha1 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - /** - * Prepend salt - */ - - u32x w0t[4]; - - w0t[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a >> 16) & 255) << 16; - w0t[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a >> 0) & 255) << 16; - w0t[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b >> 16) & 255) << 16; - w0t[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b >> 0) & 255) << 16; - - u32x w1t[4]; - - w1t[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c >> 16) & 255) << 16; - w1t[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c >> 0) & 255) << 16; - w1t[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d >> 16) & 255) << 16; - w1t[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d >> 0) & 255) << 16; - - u32x w2t[2]; - - w2t[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0 - | uint_to_hex_lower8 ((e >> 16) & 255) << 16; - w2t[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0 - | uint_to_hex_lower8 ((e >> 0) & 255) << 16; - - w0_t = salt_buf0[0]; - w1_t = salt_buf0[1]; - w2_t = salt_buf0[2]; - w3_t = salt_buf0[3]; - w4_t = salt_buf1[0]; - w5_t = salt_buf1[1]; - w6_t = salt_buf1[2]; - w7_t = salt_buf1[3]; - w8_t = w0t[0]; - w9_t = w0t[1]; - wa_t = w0t[2]; - wb_t = w0t[3]; - wc_t = w1t[0]; - wd_t = w1t[1]; - we_t = w1t[2]; - wf_t = w1t[3]; - - /** - * 2nd SHA1 - */ - - // 1st transform - - w0_t = swap_workaround (w0_t); - w1_t = swap_workaround (w1_t); - w2_t = swap_workaround (w2_t); - w3_t = swap_workaround (w3_t); - w4_t = swap_workaround (w4_t); - w5_t = swap_workaround (w5_t); - w6_t = swap_workaround (w6_t); - w7_t = swap_workaround (w7_t); - w8_t = swap_workaround (w8_t); - w9_t = swap_workaround (w9_t); - wa_t = swap_workaround (wa_t); - wb_t = swap_workaround (wb_t); - wc_t = swap_workaround (wc_t); - wd_t = swap_workaround (wd_t); - we_t = swap_workaround (we_t); - wf_t = swap_workaround (wf_t); - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - u32x r_e = e; - - // 2nd transform - - w0_t = swap_workaround (w2t[0]); - w1_t = swap_workaround (w2t[1]); - w2_t = 0x80000000; - w3_t = 0; - w4_t = 0; - w5_t = 0; - w6_t = 0; - w7_t = 0; - w8_t = 0; - w9_t = 0; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = total_len; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - e += r_e; - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07600_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07600_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m07600_a1.cu b/nv/m07600_a1.cu deleted file mode 100644 index 9fecf6a..0000000 --- a/nv/m07600_a1.cu +++ /dev/null @@ -1,1290 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m07600_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 total_len = (salt_len + 40) * 8; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - append_0x80_2 (wordr0, wordr1, pw_r_len); - - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - /** - * sha1 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - /** - * Prepend salt - */ - - u32x w0t[4]; - - w0t[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a >> 16) & 255) << 16; - w0t[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a >> 0) & 255) << 16; - w0t[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b >> 16) & 255) << 16; - w0t[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b >> 0) & 255) << 16; - - u32x w1t[4]; - - w1t[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c >> 16) & 255) << 16; - w1t[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c >> 0) & 255) << 16; - w1t[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d >> 16) & 255) << 16; - w1t[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d >> 0) & 255) << 16; - - u32x w2t[2]; - - w2t[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0 - | uint_to_hex_lower8 ((e >> 16) & 255) << 16; - w2t[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0 - | uint_to_hex_lower8 ((e >> 0) & 255) << 16; - - w0_t = salt_buf0[0]; - w1_t = salt_buf0[1]; - w2_t = salt_buf0[2]; - w3_t = salt_buf0[3]; - w4_t = salt_buf1[0]; - w5_t = salt_buf1[1]; - w6_t = salt_buf1[2]; - w7_t = salt_buf1[3]; - w8_t = w0t[0]; - w9_t = w0t[1]; - wa_t = w0t[2]; - wb_t = w0t[3]; - wc_t = w1t[0]; - wd_t = w1t[1]; - we_t = w1t[2]; - wf_t = w1t[3]; - - /** - * 2nd SHA1 - */ - - // 1st transform - - w0_t = swap_workaround (w0_t); - w1_t = swap_workaround (w1_t); - w2_t = swap_workaround (w2_t); - w3_t = swap_workaround (w3_t); - w4_t = swap_workaround (w4_t); - w5_t = swap_workaround (w5_t); - w6_t = swap_workaround (w6_t); - w7_t = swap_workaround (w7_t); - w8_t = swap_workaround (w8_t); - w9_t = swap_workaround (w9_t); - wa_t = swap_workaround (wa_t); - wb_t = swap_workaround (wb_t); - wc_t = swap_workaround (wc_t); - wd_t = swap_workaround (wd_t); - we_t = swap_workaround (we_t); - wf_t = swap_workaround (wf_t); - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - u32x r_e = e; - - // 2nd transform - - w0_t = swap_workaround (w2t[0]); - w1_t = swap_workaround (w2t[1]); - w2_t = 0x80000000; - w3_t = 0; - w4_t = 0; - w5_t = 0; - w6_t = 0; - w7_t = 0; - w8_t = 0; - w9_t = 0; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = total_len; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - e += r_e; - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07600_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07600_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07600_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 total_len = (salt_len + 40) * 8; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - append_0x80_2 (wordr0, wordr1, pw_r_len); - - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - /** - * sha1 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - /** - * Prepend salt - */ - - u32x w0t[4]; - - w0t[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a >> 16) & 255) << 16; - w0t[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a >> 0) & 255) << 16; - w0t[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b >> 16) & 255) << 16; - w0t[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b >> 0) & 255) << 16; - - u32x w1t[4]; - - w1t[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c >> 16) & 255) << 16; - w1t[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c >> 0) & 255) << 16; - w1t[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d >> 16) & 255) << 16; - w1t[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d >> 0) & 255) << 16; - - u32x w2t[2]; - - w2t[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0 - | uint_to_hex_lower8 ((e >> 16) & 255) << 16; - w2t[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0 - | uint_to_hex_lower8 ((e >> 0) & 255) << 16; - - w0_t = salt_buf0[0]; - w1_t = salt_buf0[1]; - w2_t = salt_buf0[2]; - w3_t = salt_buf0[3]; - w4_t = salt_buf1[0]; - w5_t = salt_buf1[1]; - w6_t = salt_buf1[2]; - w7_t = salt_buf1[3]; - w8_t = w0t[0]; - w9_t = w0t[1]; - wa_t = w0t[2]; - wb_t = w0t[3]; - wc_t = w1t[0]; - wd_t = w1t[1]; - we_t = w1t[2]; - wf_t = w1t[3]; - - /** - * 2nd SHA1 - */ - - // 1st transform - - w0_t = swap_workaround (w0_t); - w1_t = swap_workaround (w1_t); - w2_t = swap_workaround (w2_t); - w3_t = swap_workaround (w3_t); - w4_t = swap_workaround (w4_t); - w5_t = swap_workaround (w5_t); - w6_t = swap_workaround (w6_t); - w7_t = swap_workaround (w7_t); - w8_t = swap_workaround (w8_t); - w9_t = swap_workaround (w9_t); - wa_t = swap_workaround (wa_t); - wb_t = swap_workaround (wb_t); - wc_t = swap_workaround (wc_t); - wd_t = swap_workaround (wd_t); - we_t = swap_workaround (we_t); - wf_t = swap_workaround (wf_t); - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - u32x r_e = e; - - // 2nd transform - - w0_t = swap_workaround (w2t[0]); - w1_t = swap_workaround (w2t[1]); - w2_t = 0x80000000; - w3_t = 0; - w4_t = 0; - w5_t = 0; - w6_t = 0; - w7_t = 0; - w8_t = 0; - w9_t = 0; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = total_len; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - e += r_e; - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07600_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07600_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m07600_a3.cu b/nv/m07600_a3.cu deleted file mode 100644 index 3d60d8e..0000000 --- a/nv/m07600_a3.cu +++ /dev/null @@ -1,1419 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m07600m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 total_len = (salt_len + 40) * 8; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * sha1 - */ - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - /** - * Prepend salt - */ - - u32x w0t[4]; - - w0t[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a >> 16) & 255) << 16; - w0t[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a >> 0) & 255) << 16; - w0t[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b >> 16) & 255) << 16; - w0t[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b >> 0) & 255) << 16; - - u32x w1t[4]; - - w1t[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c >> 16) & 255) << 16; - w1t[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c >> 0) & 255) << 16; - w1t[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d >> 16) & 255) << 16; - w1t[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d >> 0) & 255) << 16; - - u32x w2t[2]; - - w2t[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0 - | uint_to_hex_lower8 ((e >> 16) & 255) << 16; - w2t[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0 - | uint_to_hex_lower8 ((e >> 0) & 255) << 16; - - w0_t = salt_buf0[0]; - w1_t = salt_buf0[1]; - w2_t = salt_buf0[2]; - w3_t = salt_buf0[3]; - w4_t = salt_buf1[0]; - w5_t = salt_buf1[1]; - w6_t = salt_buf1[2]; - w7_t = salt_buf1[3]; - w8_t = w0t[0]; - w9_t = w0t[1]; - wa_t = w0t[2]; - wb_t = w0t[3]; - wc_t = w1t[0]; - wd_t = w1t[1]; - we_t = w1t[2]; - wf_t = w1t[3]; - - /** - * 2nd SHA1 - */ - - // 1st transform - - w0_t = swap_workaround (w0_t); - w1_t = swap_workaround (w1_t); - w2_t = swap_workaround (w2_t); - w3_t = swap_workaround (w3_t); - w4_t = swap_workaround (w4_t); - w5_t = swap_workaround (w5_t); - w6_t = swap_workaround (w6_t); - w7_t = swap_workaround (w7_t); - w8_t = swap_workaround (w8_t); - w9_t = swap_workaround (w9_t); - wa_t = swap_workaround (wa_t); - wb_t = swap_workaround (wb_t); - wc_t = swap_workaround (wc_t); - wd_t = swap_workaround (wd_t); - we_t = swap_workaround (we_t); - wf_t = swap_workaround (wf_t); - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - u32x r_e = e; - - // 2nd transform - - w0_t = swap_workaround (w2t[0]); - w1_t = swap_workaround (w2t[1]); - w2_t = 0x80000000; - w3_t = 0; - w4_t = 0; - w5_t = 0; - w6_t = 0; - w7_t = 0; - w8_t = 0; - w9_t = 0; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = total_len; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - e += r_e; - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m07600s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 total_len = (salt_len + 40) * 8; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u); - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * sha1 - */ - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - /** - * Prepend salt - */ - - u32x w0t[4]; - - w0t[0] = uint_to_hex_lower8 ((a >> 24) & 255) << 0 - | uint_to_hex_lower8 ((a >> 16) & 255) << 16; - w0t[1] = uint_to_hex_lower8 ((a >> 8) & 255) << 0 - | uint_to_hex_lower8 ((a >> 0) & 255) << 16; - w0t[2] = uint_to_hex_lower8 ((b >> 24) & 255) << 0 - | uint_to_hex_lower8 ((b >> 16) & 255) << 16; - w0t[3] = uint_to_hex_lower8 ((b >> 8) & 255) << 0 - | uint_to_hex_lower8 ((b >> 0) & 255) << 16; - - u32x w1t[4]; - - w1t[0] = uint_to_hex_lower8 ((c >> 24) & 255) << 0 - | uint_to_hex_lower8 ((c >> 16) & 255) << 16; - w1t[1] = uint_to_hex_lower8 ((c >> 8) & 255) << 0 - | uint_to_hex_lower8 ((c >> 0) & 255) << 16; - w1t[2] = uint_to_hex_lower8 ((d >> 24) & 255) << 0 - | uint_to_hex_lower8 ((d >> 16) & 255) << 16; - w1t[3] = uint_to_hex_lower8 ((d >> 8) & 255) << 0 - | uint_to_hex_lower8 ((d >> 0) & 255) << 16; - - u32x w2t[2]; - - w2t[0] = uint_to_hex_lower8 ((e >> 24) & 255) << 0 - | uint_to_hex_lower8 ((e >> 16) & 255) << 16; - w2t[1] = uint_to_hex_lower8 ((e >> 8) & 255) << 0 - | uint_to_hex_lower8 ((e >> 0) & 255) << 16; - - w0_t = salt_buf0[0]; - w1_t = salt_buf0[1]; - w2_t = salt_buf0[2]; - w3_t = salt_buf0[3]; - w4_t = salt_buf1[0]; - w5_t = salt_buf1[1]; - w6_t = salt_buf1[2]; - w7_t = salt_buf1[3]; - w8_t = w0t[0]; - w9_t = w0t[1]; - wa_t = w0t[2]; - wb_t = w0t[3]; - wc_t = w1t[0]; - wd_t = w1t[1]; - we_t = w1t[2]; - wf_t = w1t[3]; - - /** - * 2nd SHA1 - */ - - // 1st transform - - w0_t = swap_workaround (w0_t); - w1_t = swap_workaround (w1_t); - w2_t = swap_workaround (w2_t); - w3_t = swap_workaround (w3_t); - w4_t = swap_workaround (w4_t); - w5_t = swap_workaround (w5_t); - w6_t = swap_workaround (w6_t); - w7_t = swap_workaround (w7_t); - w8_t = swap_workaround (w8_t); - w9_t = swap_workaround (w9_t); - wa_t = swap_workaround (wa_t); - wb_t = swap_workaround (wb_t); - wc_t = swap_workaround (wc_t); - wd_t = swap_workaround (wd_t); - we_t = swap_workaround (we_t); - wf_t = swap_workaround (wf_t); - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - u32x r_e = e; - - // 2nd transform - - w0_t = swap_workaround (w2t[0]); - w1_t = swap_workaround (w2t[1]); - w2_t = 0x80000000; - w3_t = 0; - w4_t = 0; - w5_t = 0; - w6_t = 0; - w7_t = 0; - w8_t = 0; - w9_t = 0; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = total_len; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - e += r_e; - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07600_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m07600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07600_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m07600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07600_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m07600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07600_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m07600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07600_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m07600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07600_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m07600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m07700_a0.cu b/nv/m07700_a0.cu deleted file mode 100644 index 4ce2f9a..0000000 --- a/nv/m07700_a0.cu +++ /dev/null @@ -1,901 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SAPB_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff) -#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8)))) - -__device__ __constant__ u32 sapb_trans_tbl[256] = -{ - // first value hack for 0 byte as part of an optimization - 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0x3f, 0x40, 0x41, 0x50, 0x43, 0x44, 0x45, 0x4b, 0x47, 0x48, 0x4d, 0x4e, 0x54, 0x51, 0x53, 0x46, - 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x56, 0x55, 0x5c, 0x49, 0x5d, 0x4a, - 0x42, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x58, 0x5b, 0x59, 0xff, 0x52, - 0x4c, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x57, 0x5e, 0x5a, 0x4f, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff -}; - -__device__ __constant__ u32 bcodeArray[48] = -{ - 0x14, 0x77, 0xf3, 0xd4, 0xbb, 0x71, 0x23, 0xd0, 0x03, 0xff, 0x47, 0x93, 0x55, 0xaa, 0x66, 0x91, - 0xf2, 0x88, 0x6b, 0x99, 0xbf, 0xcb, 0x32, 0x1a, 0x19, 0xd9, 0xa7, 0x82, 0x22, 0x49, 0xa2, 0x51, - 0xe2, 0xb7, 0x33, 0x71, 0x8b, 0x9f, 0x5d, 0x01, 0x44, 0x70, 0xae, 0x11, 0xef, 0x28, 0xf0, 0x0d -}; - -__device__ static u32x sapb_trans (const u32x in) -{ - u32x out = 0; - - #ifdef VECT_SIZE1 - out |= (sapb_trans_tbl[(in >> 0) & 0xff]) << 0; - out |= (sapb_trans_tbl[(in >> 8) & 0xff]) << 8; - out |= (sapb_trans_tbl[(in >> 16) & 0xff]) << 16; - out |= (sapb_trans_tbl[(in >> 24) & 0xff]) << 24; - #endif - - return out; -} - -__device__ static u32x walld0rf_magic (const u32x w0[4], const u32 pw_len, const u32x salt_buf0[4], const u32 salt_len, const u32x a, const u32x b, const u32x c, const u32x d, u32x t[16]) -{ - t[ 0] = 0; - t[ 1] = 0; - t[ 2] = 0; - t[ 3] = 0; - t[ 4] = 0; - t[ 5] = 0; - t[ 6] = 0; - t[ 7] = 0; - t[ 8] = 0; - t[ 9] = 0; - t[10] = 0; - t[11] = 0; - t[12] = 0; - t[13] = 0; - t[14] = 0; - t[15] = 0; - - u32 sum20 = ((a >> 24) & 3) - + ((a >> 16) & 3) - + ((a >> 8) & 3) - + ((a >> 0) & 3) - + ((b >> 8) & 3); - - sum20 |= 0x20; - - const u32 w[2] = { w0[0], w0[1] }; - - const u32 s[3] = { salt_buf0[0], salt_buf0[1], salt_buf0[2] }; - - u32 saved_key[4] = { a, b, c, d }; - - u32 i1 = 0; - u32 i2 = 0; - u32 i3 = 0; - - // we can assume this because the password must be at least 3 - // and the username must be at least 1 so we can save the if () - - u32 t0 = 0; - - if ((d >> 24) & 1) - { - t0 |= bcodeArray[47] << 0; - t0 |= (w[0] & 0xff) << 8; - t0 |= (s[0] & 0xff) << 16; - t0 |= bcodeArray[ 1] << 24; - - i1 = 1; - i2 = 5; - i3 = 1; - } - else - { - t0 |= (w[0] & 0xff) << 0; - t0 |= (s[0] & 0xff) << 8; - t0 |= bcodeArray[ 0] << 16; - - i1 = 1; - i2 = 4; - i3 = 1; - } - - t[0] = t0; - - // because the following code can increase i2 by a maximum of 5, - // there is an overflow potential of 4 before it comes to the next test for i2 >= sum20 - // we need to truncate in that case - - while ((i1 < pw_len) && (i3 < salt_len)) - { - if (GETCHAR (saved_key, 15 - i1) & 1) - { - PUTCHAR (t, i2, bcodeArray[48 - 1 - i1]); - - i2++; - } - - PUTCHAR (t, i2, GETCHAR (w, i1)); - - i1++; - i2++; - - PUTCHAR (t, i2, GETCHAR (s, i3)); - - i2++; - i3++; - - PUTCHAR (t, i2, bcodeArray[i2 - i1 - i3]); - - i2++; - i2++; - - if (i2 >= sum20) - { - PUTCHAR (t, sum20 + 0, 0x80); - PUTCHAR (t, sum20 + 1, 0); - PUTCHAR (t, sum20 + 2, 0); - PUTCHAR (t, sum20 + 3, 0); - - return sum20; - } - } - - while ((i1 < pw_len) || (i3 < salt_len)) - { - if (i1 < pw_len) // max 8 - { - if (GETCHAR (saved_key, 15 - i1) & 1) - { - PUTCHAR (t, i2, bcodeArray[48 - 1 - i1]); - - i2++; - } - - PUTCHAR (t, i2, GETCHAR (w, i1)); - - i1++; - i2++; - } - else if (i3 < salt_len) // max 12 - { - PUTCHAR (t, i2, GETCHAR (s, i3)); - - i2++; - i3++; - } - - PUTCHAR (t, i2, bcodeArray[i2 - i1 - i3]); - - i2++; - i2++; - - if (i2 >= sum20) - { - PUTCHAR (t, sum20 + 0, 0x80); - PUTCHAR (t, sum20 + 1, 0); - PUTCHAR (t, sum20 + 2, 0); - PUTCHAR (t, sum20 + 3, 0); - - return sum20; - } - } - - while (i2 < sum20) - { - PUTCHAR (t, i2, bcodeArray[i2 - i1 - i3]); - - i2++; - i2++; - } - - PUTCHAR (t, sum20 + 0, 0x80); - PUTCHAR (t, sum20 + 1, 0); - PUTCHAR (t, sum20 + 2, 0); - PUTCHAR (t, sum20 + 3, 0); - - return sum20; -} - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m07700_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = 0; - pw_buf0[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - salt_buf0[0] = sapb_trans (salt_buf0[0]); - salt_buf0[1] = sapb_trans (salt_buf0[1]); - salt_buf0[2] = sapb_trans (salt_buf0[2]); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = 0; - w0[3] = 0; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - if (out_len > 8) continue; // otherwise it overflows in waldorf function - - w0[0] = sapb_trans (w0[0]); - w0[1] = sapb_trans (w0[1]); - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = 0; - - u32 s1[4]; - - s1[0] = 0; - s1[1] = 0; - s1[2] = 0; - s1[3] = 0; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, out_len); - - const u32 pw_salt_len = out_len + salt_len; - - u32x t[16]; - - t[ 0] = s0[0] | w0[0]; - t[ 1] = s0[1] | w0[1]; - t[ 2] = s0[2]; - t[ 3] = s0[3]; - t[ 4] = s1[0]; - t[ 5] = 0; - t[ 6] = 0; - t[ 7] = 0; - t[ 8] = 0; - t[ 9] = 0; - t[10] = 0; - t[11] = 0; - t[12] = 0; - t[13] = 0; - t[14] = pw_salt_len * 8; - t[15] = 0; - - append_0x80_4 (&t[0], &t[4], &t[8], &t[12], pw_salt_len); - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, t[ 0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[ 2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[ 3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[ 4], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 5], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[ 6], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[ 7], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[ 8], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 9], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[10], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[11], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[12], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[13], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[14], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[15], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, t[ 1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[ 6], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[11], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[ 5], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[10], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[15], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 4], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[ 9], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[14], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[ 3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 8], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[13], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[ 2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[ 7], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[12], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, t[ 5], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 8], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[11], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[14], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[ 1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 4], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[ 7], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[10], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[13], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[ 3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[ 6], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[ 9], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[12], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[15], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[ 2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, t[ 0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[ 7], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[14], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 5], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[12], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[ 3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[10], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[ 8], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[15], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[ 6], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[13], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[ 4], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[11], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[ 2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 9], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - const u32x sum20 = walld0rf_magic (w0, out_len, salt_buf0, salt_len, a, b, c, d, t); - - t[14] = sum20 * 8; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, t[ 0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[ 2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[ 3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[ 4], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 5], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[ 6], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[ 7], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[ 8], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 9], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[10], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[11], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[12], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[13], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[14], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[15], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, t[ 1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[ 6], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[11], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[ 5], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[10], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[15], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 4], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[ 9], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[14], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[ 3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 8], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[13], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[ 2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[ 7], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[12], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, t[ 5], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 8], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[11], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[14], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[ 1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 4], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[ 7], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[10], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[13], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[ 3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[ 6], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[ 9], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[12], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[15], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[ 2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, t[ 0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[ 7], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[14], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 5], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[12], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[ 3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[10], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[ 8], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[15], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[ 6], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[13], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[ 4], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[11], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[ 2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 9], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - a ^= c; - b ^= d; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07700_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07700_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07700_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = 0; - pw_buf0[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - salt_buf0[0] = sapb_trans (salt_buf0[0]); - salt_buf0[1] = sapb_trans (salt_buf0[1]); - salt_buf0[2] = sapb_trans (salt_buf0[2]); - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = 0; - w0[3] = 0; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - if (out_len > 8) continue; // otherwise it overflows in waldorf function - - w0[0] = sapb_trans (w0[0]); - w0[1] = sapb_trans (w0[1]); - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = 0; - - u32 s1[4]; - - s1[0] = 0; - s1[1] = 0; - s1[2] = 0; - s1[3] = 0; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, out_len); - - const u32 pw_salt_len = out_len + salt_len; - - u32x t[16]; - - t[ 0] = s0[0] | w0[0]; - t[ 1] = s0[1] | w0[1]; - t[ 2] = s0[2]; - t[ 3] = s0[3]; - t[ 4] = s1[0]; - t[ 5] = 0; - t[ 6] = 0; - t[ 7] = 0; - t[ 8] = 0; - t[ 9] = 0; - t[10] = 0; - t[11] = 0; - t[12] = 0; - t[13] = 0; - t[14] = pw_salt_len * 8; - t[15] = 0; - - append_0x80_4 (&t[0], &t[4], &t[8], &t[12], pw_salt_len); - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, t[ 0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[ 2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[ 3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[ 4], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 5], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[ 6], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[ 7], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[ 8], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 9], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[10], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[11], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[12], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[13], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[14], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[15], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, t[ 1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[ 6], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[11], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[ 5], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[10], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[15], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 4], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[ 9], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[14], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[ 3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 8], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[13], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[ 2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[ 7], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[12], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, t[ 5], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 8], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[11], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[14], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[ 1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 4], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[ 7], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[10], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[13], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[ 3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[ 6], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[ 9], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[12], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[15], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[ 2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, t[ 0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[ 7], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[14], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 5], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[12], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[ 3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[10], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[ 8], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[15], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[ 6], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[13], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[ 4], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[11], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[ 2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 9], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - const u32x sum20 = walld0rf_magic (w0, out_len, salt_buf0, salt_len, a, b, c, d, t); - - t[14] = sum20 * 8; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, t[ 0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[ 2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[ 3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[ 4], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 5], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[ 6], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[ 7], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[ 8], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 9], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[10], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[11], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[12], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[13], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[14], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[15], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, t[ 1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[ 6], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[11], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[ 5], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[10], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[15], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 4], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[ 9], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[14], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[ 3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 8], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[13], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[ 2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[ 7], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[12], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, t[ 5], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 8], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[11], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[14], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[ 1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 4], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[ 7], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[10], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[13], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[ 3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[ 6], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[ 9], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[12], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[15], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[ 2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, t[ 0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[ 7], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[14], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 5], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[12], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[ 3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[10], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[ 8], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[15], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[ 6], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[13], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[ 4], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[11], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[ 2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 9], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - a ^= c; - b ^= d; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07700_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07700_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m07700_a1.cu b/nv/m07700_a1.cu deleted file mode 100644 index b0014e1..0000000 --- a/nv/m07700_a1.cu +++ /dev/null @@ -1,973 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SAPB_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff) -#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8)))) - -__device__ __constant__ u32 sapb_trans_tbl[256] = -{ - // first value hack for 0 byte as part of an optimization - 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0x3f, 0x40, 0x41, 0x50, 0x43, 0x44, 0x45, 0x4b, 0x47, 0x48, 0x4d, 0x4e, 0x54, 0x51, 0x53, 0x46, - 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x56, 0x55, 0x5c, 0x49, 0x5d, 0x4a, - 0x42, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x58, 0x5b, 0x59, 0xff, 0x52, - 0x4c, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x57, 0x5e, 0x5a, 0x4f, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff -}; - -__device__ __constant__ u32 bcodeArray[48] = -{ - 0x14, 0x77, 0xf3, 0xd4, 0xbb, 0x71, 0x23, 0xd0, 0x03, 0xff, 0x47, 0x93, 0x55, 0xaa, 0x66, 0x91, - 0xf2, 0x88, 0x6b, 0x99, 0xbf, 0xcb, 0x32, 0x1a, 0x19, 0xd9, 0xa7, 0x82, 0x22, 0x49, 0xa2, 0x51, - 0xe2, 0xb7, 0x33, 0x71, 0x8b, 0x9f, 0x5d, 0x01, 0x44, 0x70, 0xae, 0x11, 0xef, 0x28, 0xf0, 0x0d -}; - -__device__ static u32x sapb_trans (const u32x in) -{ - u32x out = 0; - - #ifdef VECT_SIZE1 - out |= (sapb_trans_tbl[(in >> 0) & 0xff]) << 0; - out |= (sapb_trans_tbl[(in >> 8) & 0xff]) << 8; - out |= (sapb_trans_tbl[(in >> 16) & 0xff]) << 16; - out |= (sapb_trans_tbl[(in >> 24) & 0xff]) << 24; - #endif - - return out; -} - -__device__ static u32x walld0rf_magic (const u32x w0[4], const u32 pw_len, const u32x salt_buf0[4], const u32 salt_len, const u32x a, const u32x b, const u32x c, const u32x d, u32x t[16]) -{ - t[ 0] = 0; - t[ 1] = 0; - t[ 2] = 0; - t[ 3] = 0; - t[ 4] = 0; - t[ 5] = 0; - t[ 6] = 0; - t[ 7] = 0; - t[ 8] = 0; - t[ 9] = 0; - t[10] = 0; - t[11] = 0; - t[12] = 0; - t[13] = 0; - t[14] = 0; - t[15] = 0; - - u32 sum20 = ((a >> 24) & 3) - + ((a >> 16) & 3) - + ((a >> 8) & 3) - + ((a >> 0) & 3) - + ((b >> 8) & 3); - - sum20 |= 0x20; - - const u32 w[2] = { w0[0], w0[1] }; - - const u32 s[3] = { salt_buf0[0], salt_buf0[1], salt_buf0[2] }; - - u32 saved_key[4] = { a, b, c, d }; - - u32 i1 = 0; - u32 i2 = 0; - u32 i3 = 0; - - // we can assume this because the password must be at least 3 - // and the username must be at least 1 so we can save the if () - - u32 t0 = 0; - - if ((d >> 24) & 1) - { - t0 |= bcodeArray[47] << 0; - t0 |= (w[0] & 0xff) << 8; - t0 |= (s[0] & 0xff) << 16; - t0 |= bcodeArray[ 1] << 24; - - i1 = 1; - i2 = 5; - i3 = 1; - } - else - { - t0 |= (w[0] & 0xff) << 0; - t0 |= (s[0] & 0xff) << 8; - t0 |= bcodeArray[ 0] << 16; - - i1 = 1; - i2 = 4; - i3 = 1; - } - - t[0] = t0; - - // because the following code can increase i2 by a maximum of 5, - // there is an overflow potential of 4 before it comes to the next test for i2 >= sum20 - // we need to truncate in that case - - while ((i1 < pw_len) && (i3 < salt_len)) - { - if (GETCHAR (saved_key, 15 - i1) & 1) - { - PUTCHAR (t, i2, bcodeArray[48 - 1 - i1]); - - i2++; - } - - PUTCHAR (t, i2, GETCHAR (w, i1)); - - i1++; - i2++; - - PUTCHAR (t, i2, GETCHAR (s, i3)); - - i2++; - i3++; - - PUTCHAR (t, i2, bcodeArray[i2 - i1 - i3]); - - i2++; - i2++; - - if (i2 >= sum20) - { - PUTCHAR (t, sum20 + 0, 0x80); - PUTCHAR (t, sum20 + 1, 0); - PUTCHAR (t, sum20 + 2, 0); - PUTCHAR (t, sum20 + 3, 0); - - return sum20; - } - } - - while ((i1 < pw_len) || (i3 < salt_len)) - { - if (i1 < pw_len) // max 8 - { - if (GETCHAR (saved_key, 15 - i1) & 1) - { - PUTCHAR (t, i2, bcodeArray[48 - 1 - i1]); - - i2++; - } - - PUTCHAR (t, i2, GETCHAR (w, i1)); - - i1++; - i2++; - } - else if (i3 < salt_len) // max 12 - { - PUTCHAR (t, i2, GETCHAR (s, i3)); - - i2++; - i3++; - } - - PUTCHAR (t, i2, bcodeArray[i2 - i1 - i3]); - - i2++; - i2++; - - if (i2 >= sum20) - { - PUTCHAR (t, sum20 + 0, 0x80); - PUTCHAR (t, sum20 + 1, 0); - PUTCHAR (t, sum20 + 2, 0); - PUTCHAR (t, sum20 + 3, 0); - - return sum20; - } - } - - while (i2 < sum20) - { - PUTCHAR (t, i2, bcodeArray[i2 - i1 - i3]); - - i2++; - i2++; - } - - PUTCHAR (t, sum20 + 0, 0x80); - PUTCHAR (t, sum20 + 1, 0); - PUTCHAR (t, sum20 + 2, 0); - PUTCHAR (t, sum20 + 3, 0); - - return sum20; -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m07700_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = 0; - wordl0[3] = 0; - - u32x wordl1[4]; - - wordl1[0] = 0; - wordl1[1] = 0; - wordl1[2] = 0; - wordl1[3] = 0; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - salt_buf0[0] = sapb_trans (salt_buf0[0]); - salt_buf0[1] = sapb_trans (salt_buf0[1]); - salt_buf0[2] = sapb_trans (salt_buf0[2]); - - /** - * digest - */ - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = 0; - wordr0[3] = 0; - - u32 wordr1[4]; - - wordr1[0] = 0; - wordr1[1] = 0; - wordr1[2] = 0; - wordr1[3] = 0; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = sapb_trans (wordl0[0] | wordr0[0]); - w0[1] = sapb_trans (wordl0[1] | wordr0[1]); - w0[2] = 0; - w0[3] = 0; - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = 0; - - u32 s1[4]; - - s1[0] = 0; - s1[1] = 0; - s1[2] = 0; - s1[3] = 0; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, pw_len); - - const u32 pw_salt_len = pw_len + salt_len; - - u32x t[16]; - - t[ 0] = s0[0] | w0[0]; - t[ 1] = s0[1] | w0[1]; - t[ 2] = s0[2]; - t[ 3] = s0[3]; - t[ 4] = s1[0]; - t[ 5] = 0; - t[ 6] = 0; - t[ 7] = 0; - t[ 8] = 0; - t[ 9] = 0; - t[10] = 0; - t[11] = 0; - t[12] = 0; - t[13] = 0; - t[14] = pw_salt_len * 8; - t[15] = 0; - - append_0x80_4 (&t[0], &t[4], &t[8], &t[12], pw_salt_len); - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, t[ 0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[ 2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[ 3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[ 4], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 5], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[ 6], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[ 7], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[ 8], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 9], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[10], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[11], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[12], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[13], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[14], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[15], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, t[ 1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[ 6], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[11], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[ 5], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[10], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[15], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 4], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[ 9], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[14], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[ 3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 8], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[13], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[ 2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[ 7], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[12], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, t[ 5], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 8], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[11], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[14], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[ 1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 4], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[ 7], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[10], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[13], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[ 3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[ 6], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[ 9], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[12], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[15], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[ 2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, t[ 0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[ 7], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[14], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 5], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[12], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[ 3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[10], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[ 8], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[15], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[ 6], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[13], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[ 4], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[11], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[ 2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 9], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - const u32x sum20 = walld0rf_magic (w0, pw_len, salt_buf0, salt_len, a, b, c, d, t); - - t[14] = sum20 * 8; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, t[ 0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[ 2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[ 3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[ 4], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 5], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[ 6], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[ 7], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[ 8], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 9], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[10], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[11], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[12], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[13], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[14], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[15], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, t[ 1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[ 6], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[11], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[ 5], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[10], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[15], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 4], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[ 9], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[14], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[ 3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 8], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[13], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[ 2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[ 7], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[12], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, t[ 5], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 8], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[11], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[14], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[ 1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 4], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[ 7], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[10], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[13], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[ 3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[ 6], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[ 9], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[12], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[15], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[ 2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, t[ 0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[ 7], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[14], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 5], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[12], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[ 3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[10], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[ 8], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[15], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[ 6], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[13], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[ 4], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[11], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[ 2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 9], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - a ^= c; - b ^= d; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07700_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07700_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07700_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = 0; - wordl0[3] = 0; - - u32x wordl1[4]; - - wordl1[0] = 0; - wordl1[1] = 0; - wordl1[2] = 0; - wordl1[3] = 0; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - salt_buf0[0] = sapb_trans (salt_buf0[0]); - salt_buf0[1] = sapb_trans (salt_buf0[1]); - salt_buf0[2] = sapb_trans (salt_buf0[2]); - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = 0; - wordr0[3] = 0; - - u32 wordr1[4]; - - wordr1[0] = 0; - wordr1[1] = 0; - wordr1[2] = 0; - wordr1[3] = 0; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = sapb_trans (wordl0[0] | wordr0[0]); - w0[1] = sapb_trans (wordl0[1] | wordr0[1]); - w0[2] = 0; - w0[3] = 0; - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = 0; - - u32 s1[4]; - - s1[0] = 0; - s1[1] = 0; - s1[2] = 0; - s1[3] = 0; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, pw_len); - - const u32 pw_salt_len = pw_len + salt_len; - - u32x t[16]; - - t[ 0] = s0[0] | w0[0]; - t[ 1] = s0[1] | w0[1]; - t[ 2] = s0[2]; - t[ 3] = s0[3]; - t[ 4] = s1[0]; - t[ 5] = 0; - t[ 6] = 0; - t[ 7] = 0; - t[ 8] = 0; - t[ 9] = 0; - t[10] = 0; - t[11] = 0; - t[12] = 0; - t[13] = 0; - t[14] = pw_salt_len * 8; - t[15] = 0; - - append_0x80_4 (&t[0], &t[4], &t[8], &t[12], pw_salt_len); - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, t[ 0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[ 2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[ 3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[ 4], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 5], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[ 6], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[ 7], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[ 8], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 9], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[10], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[11], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[12], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[13], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[14], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[15], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, t[ 1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[ 6], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[11], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[ 5], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[10], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[15], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 4], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[ 9], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[14], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[ 3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 8], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[13], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[ 2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[ 7], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[12], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, t[ 5], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 8], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[11], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[14], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[ 1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 4], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[ 7], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[10], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[13], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[ 3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[ 6], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[ 9], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[12], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[15], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[ 2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, t[ 0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[ 7], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[14], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 5], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[12], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[ 3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[10], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[ 8], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[15], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[ 6], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[13], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[ 4], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[11], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[ 2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 9], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - const u32x sum20 = walld0rf_magic (w0, pw_len, salt_buf0, salt_len, a, b, c, d, t); - - t[14] = sum20 * 8; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, t[ 0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[ 2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[ 3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[ 4], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 5], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[ 6], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[ 7], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[ 8], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 9], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[10], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[11], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[12], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[13], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[14], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[15], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, t[ 1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[ 6], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[11], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[ 5], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[10], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[15], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 4], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[ 9], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[14], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[ 3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 8], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[13], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[ 2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[ 7], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[12], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, t[ 5], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 8], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[11], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[14], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[ 1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 4], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[ 7], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[10], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[13], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[ 3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[ 6], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[ 9], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[12], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[15], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[ 2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, t[ 0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[ 7], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[14], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 5], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[12], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[ 3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[10], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[ 8], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[15], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[ 6], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[13], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[ 4], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[11], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[ 2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 9], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - a ^= c; - b ^= d; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07700_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07700_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m07700_a3.cu b/nv/m07700_a3.cu deleted file mode 100644 index 9bb8927..0000000 --- a/nv/m07700_a3.cu +++ /dev/null @@ -1,985 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SAPB_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff) -#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8)))) - -__device__ __constant__ u32 sapb_trans_tbl[256] = -{ - // first value hack for 0 byte as part of an optimization - 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0x3f, 0x40, 0x41, 0x50, 0x43, 0x44, 0x45, 0x4b, 0x47, 0x48, 0x4d, 0x4e, 0x54, 0x51, 0x53, 0x46, - 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x56, 0x55, 0x5c, 0x49, 0x5d, 0x4a, - 0x42, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x58, 0x5b, 0x59, 0xff, 0x52, - 0x4c, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x57, 0x5e, 0x5a, 0x4f, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff -}; - -__device__ __constant__ u32 bcodeArray[48] = -{ - 0x14, 0x77, 0xf3, 0xd4, 0xbb, 0x71, 0x23, 0xd0, 0x03, 0xff, 0x47, 0x93, 0x55, 0xaa, 0x66, 0x91, - 0xf2, 0x88, 0x6b, 0x99, 0xbf, 0xcb, 0x32, 0x1a, 0x19, 0xd9, 0xa7, 0x82, 0x22, 0x49, 0xa2, 0x51, - 0xe2, 0xb7, 0x33, 0x71, 0x8b, 0x9f, 0x5d, 0x01, 0x44, 0x70, 0xae, 0x11, 0xef, 0x28, 0xf0, 0x0d -}; - -__device__ static u32x sapb_trans (const u32x in) -{ - u32x out = 0; - - #ifdef VECT_SIZE1 - out |= (sapb_trans_tbl[(in >> 0) & 0xff]) << 0; - out |= (sapb_trans_tbl[(in >> 8) & 0xff]) << 8; - out |= (sapb_trans_tbl[(in >> 16) & 0xff]) << 16; - out |= (sapb_trans_tbl[(in >> 24) & 0xff]) << 24; - #endif - - return out; -} - -__device__ static u32x walld0rf_magic (const u32x w0[4], const u32 pw_len, const u32x salt_buf0[4], const u32 salt_len, const u32x a, const u32x b, const u32x c, const u32x d, u32x t[16]) -{ - t[ 0] = 0; - t[ 1] = 0; - t[ 2] = 0; - t[ 3] = 0; - t[ 4] = 0; - t[ 5] = 0; - t[ 6] = 0; - t[ 7] = 0; - t[ 8] = 0; - t[ 9] = 0; - t[10] = 0; - t[11] = 0; - t[12] = 0; - t[13] = 0; - t[14] = 0; - t[15] = 0; - - u32 sum20 = ((a >> 24) & 3) - + ((a >> 16) & 3) - + ((a >> 8) & 3) - + ((a >> 0) & 3) - + ((b >> 8) & 3); - - sum20 |= 0x20; - - const u32 w[2] = { w0[0], w0[1] }; - - const u32 s[3] = { salt_buf0[0], salt_buf0[1], salt_buf0[2] }; - - u32 saved_key[4] = { a, b, c, d }; - - u32 i1 = 0; - u32 i2 = 0; - u32 i3 = 0; - - // we can assume this because the password must be at least 3 - // and the username must be at least 1 so we can save the if () - - u32 t0 = 0; - - if ((d >> 24) & 1) - { - t0 |= bcodeArray[47] << 0; - t0 |= (w[0] & 0xff) << 8; - t0 |= (s[0] & 0xff) << 16; - t0 |= bcodeArray[ 1] << 24; - - i1 = 1; - i2 = 5; - i3 = 1; - } - else - { - t0 |= (w[0] & 0xff) << 0; - t0 |= (s[0] & 0xff) << 8; - t0 |= bcodeArray[ 0] << 16; - - i1 = 1; - i2 = 4; - i3 = 1; - } - - t[0] = t0; - - // because the following code can increase i2 by a maximum of 5, - // there is an overflow potential of 4 before it comes to the next test for i2 >= sum20 - // we need to truncate in that case - - while ((i1 < pw_len) && (i3 < salt_len)) - { - if (GETCHAR (saved_key, 15 - i1) & 1) - { - PUTCHAR (t, i2, bcodeArray[48 - 1 - i1]); - - i2++; - } - - PUTCHAR (t, i2, GETCHAR (w, i1)); - - i1++; - i2++; - - PUTCHAR (t, i2, GETCHAR (s, i3)); - - i2++; - i3++; - - PUTCHAR (t, i2, bcodeArray[i2 - i1 - i3]); - - i2++; - i2++; - - if (i2 >= sum20) - { - PUTCHAR (t, sum20 + 0, 0x80); - PUTCHAR (t, sum20 + 1, 0); - PUTCHAR (t, sum20 + 2, 0); - PUTCHAR (t, sum20 + 3, 0); - - return sum20; - } - } - - while ((i1 < pw_len) || (i3 < salt_len)) - { - if (i1 < pw_len) // max 8 - { - if (GETCHAR (saved_key, 15 - i1) & 1) - { - PUTCHAR (t, i2, bcodeArray[48 - 1 - i1]); - - i2++; - } - - PUTCHAR (t, i2, GETCHAR (w, i1)); - - i1++; - i2++; - } - else if (i3 < salt_len) // max 12 - { - PUTCHAR (t, i2, GETCHAR (s, i3)); - - i2++; - i3++; - } - - PUTCHAR (t, i2, bcodeArray[i2 - i1 - i3]); - - i2++; - i2++; - - if (i2 >= sum20) - { - PUTCHAR (t, sum20 + 0, 0x80); - PUTCHAR (t, sum20 + 1, 0); - PUTCHAR (t, sum20 + 2, 0); - PUTCHAR (t, sum20 + 3, 0); - - return sum20; - } - } - - while (i2 < sum20) - { - PUTCHAR (t, i2, bcodeArray[i2 - i1 - i3]); - - i2++; - i2++; - } - - PUTCHAR (t, sum20 + 0, 0x80); - PUTCHAR (t, sum20 + 1, 0); - PUTCHAR (t, sum20 + 2, 0); - PUTCHAR (t, sum20 + 3, 0); - - return sum20; -} - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m07700m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - w0[0] = sapb_trans (w0[0]); - w0[1] = sapb_trans (w0[1]); - - /** - * salt - */ - - u32 salt_buf0[3]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - - salt_buf0[0] = sapb_trans (salt_buf0[0]); - salt_buf0[1] = sapb_trans (salt_buf0[1]); - salt_buf0[2] = sapb_trans (salt_buf0[2]); - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = 0; - - u32 s1[4]; - - s1[0] = 0; - s1[1] = 0; - s1[2] = 0; - s1[3] = 0; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, pw_len); - - const u32 pw_salt_len = pw_len + salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = sapb_trans (c_bfs[il_pos].i); - - w0[0] = w0l | w0r; - - u32x t[16]; - - t[ 0] = s0[0] | w0[0]; - t[ 1] = s0[1] | w0[1]; - t[ 2] = s0[2]; - t[ 3] = s0[3]; - t[ 4] = s1[0]; - t[ 5] = 0; - t[ 6] = 0; - t[ 7] = 0; - t[ 8] = 0; - t[ 9] = 0; - t[10] = 0; - t[11] = 0; - t[12] = 0; - t[13] = 0; - t[14] = pw_salt_len * 8; - t[15] = 0; - - append_0x80_4 (&t[0], &t[4], &t[8], &t[12], pw_salt_len); - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, t[ 0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[ 2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[ 3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[ 4], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 5], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[ 6], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[ 7], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[ 8], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 9], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[10], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[11], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[12], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[13], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[14], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[15], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, t[ 1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[ 6], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[11], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[ 5], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[10], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[15], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 4], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[ 9], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[14], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[ 3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 8], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[13], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[ 2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[ 7], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[12], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, t[ 5], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 8], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[11], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[14], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[ 1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 4], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[ 7], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[10], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[13], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[ 3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[ 6], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[ 9], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[12], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[15], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[ 2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, t[ 0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[ 7], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[14], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 5], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[12], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[ 3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[10], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[ 8], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[15], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[ 6], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[13], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[ 4], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[11], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[ 2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 9], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - const u32x sum20 = walld0rf_magic (w0, pw_len, salt_buf0, salt_len, a, b, c, d, t); - - t[14] = sum20 * 8; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, t[ 0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[ 2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[ 3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[ 4], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 5], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[ 6], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[ 7], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[ 8], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 9], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[10], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[11], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[12], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[13], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[14], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[15], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, t[ 1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[ 6], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[11], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[ 5], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[10], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[15], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 4], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[ 9], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[14], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[ 3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 8], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[13], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[ 2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[ 7], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[12], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, t[ 5], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 8], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[11], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[14], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[ 1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 4], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[ 7], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[10], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[13], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[ 3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[ 6], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[ 9], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[12], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[15], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[ 2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, t[ 0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[ 7], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[14], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 5], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[12], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[ 3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[10], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[ 8], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[15], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[ 6], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[13], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[ 4], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[11], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[ 2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 9], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - a ^= c; - b ^= d; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } -} - -__device__ static void m07700s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - w0[0] = sapb_trans (w0[0]); - w0[1] = sapb_trans (w0[1]); - - /** - * salt - */ - - u32 salt_buf0[3]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - - salt_buf0[0] = sapb_trans (salt_buf0[0]); - salt_buf0[1] = sapb_trans (salt_buf0[1]); - salt_buf0[2] = sapb_trans (salt_buf0[2]); - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = 0; - - u32 s1[4]; - - s1[0] = 0; - s1[1] = 0; - s1[2] = 0; - s1[3] = 0; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, pw_len); - - const u32 pw_salt_len = pw_len + salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = sapb_trans (c_bfs[il_pos].i); - - w0[0] = w0l | w0r; - - u32x t[16]; - - t[ 0] = s0[0] | w0[0]; - t[ 1] = s0[1] | w0[1]; - t[ 2] = s0[2]; - t[ 3] = s0[3]; - t[ 4] = s1[0]; - t[ 5] = 0; - t[ 6] = 0; - t[ 7] = 0; - t[ 8] = 0; - t[ 9] = 0; - t[10] = 0; - t[11] = 0; - t[12] = 0; - t[13] = 0; - t[14] = pw_salt_len * 8; - t[15] = 0; - - append_0x80_4 (&t[0], &t[4], &t[8], &t[12], pw_salt_len); - - /** - * md5 - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, t[ 0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[ 2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[ 3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[ 4], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 5], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[ 6], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[ 7], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[ 8], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 9], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[10], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[11], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[12], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[13], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[14], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[15], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, t[ 1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[ 6], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[11], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[ 5], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[10], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[15], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 4], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[ 9], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[14], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[ 3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 8], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[13], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[ 2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[ 7], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[12], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, t[ 5], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 8], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[11], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[14], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[ 1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 4], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[ 7], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[10], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[13], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[ 3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[ 6], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[ 9], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[12], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[15], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[ 2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, t[ 0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[ 7], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[14], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 5], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[12], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[ 3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[10], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[ 8], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[15], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[ 6], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[13], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[ 4], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[11], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[ 2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 9], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - const u32x sum20 = walld0rf_magic (w0, pw_len, salt_buf0, salt_len, a, b, c, d, t); - - t[14] = sum20 * 8; - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, t[ 0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[ 2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[ 3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[ 4], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 5], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[ 6], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[ 7], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[ 8], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[ 9], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[10], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[11], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t[12], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t[13], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t[14], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t[15], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, t[ 1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[ 6], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[11], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[ 5], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[10], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[15], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 4], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[ 9], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[14], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[ 3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[ 8], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t[13], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t[ 2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t[ 7], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t[12], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, t[ 5], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 8], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[11], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[14], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[ 1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 4], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[ 7], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[10], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[13], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[ 0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[ 3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[ 6], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t[ 9], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t[12], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t[15], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t[ 2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, t[ 0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[ 7], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[14], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 5], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[12], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[ 3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[10], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[ 8], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[15], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[ 6], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[13], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t[ 4], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t[11], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t[ 2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t[ 9], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - a ^= c; - b ^= d; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07700_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m07700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07700_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m07700m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07700_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07700_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m07700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07700_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m07700s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07700_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m07800_a0.cu b/nv/m07800_a0.cu deleted file mode 100644 index 7699e39..0000000 --- a/nv/m07800_a0.cu +++ /dev/null @@ -1,764 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SAPG_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -__device__ static u32 bytealign (u32 src0, u32 src1, u32 src2) -{ - return (u32) (((((u64)src0) << 32) | (u64)src1) >> ((src2 & 3)*8)); -} - -#if __CUDA_ARCH__ >= 350 -#define GETSHIFTEDINT(a,n) __funnelshift_r ((a)[((n)/4)+0], (a)[((n)/4)+1], (n & 3) * 8) -#elif __CUDA_ARCH__ >= 200 -#define GETSHIFTEDINT(a,n) __byte_perm ((a)[((n)/4)+0], (a)[((n)/4)+1], (0x76543210 >> ((n & 3) * 4)) & 0xffff) -#else -#define GETSHIFTEDINT(a,n) bytealign ((a)[((n)/4)+1], (a)[((n)/4)+0], n) -#endif - -#define SETSHIFTEDINT(a,n,v) \ -{ \ - const u32 s = ((n) & 3) * 8; \ - const u64 x = (u64) (v) << s; \ - (a)[((n)/4)+0] |= x; \ - (a)[((n)/4)+1] = x >> 32; \ -} - -__device__ __constant__ u32 theMagicArray[64] = -{ - 0x1451ac91,0x4354679f,0xe03be724,0xc27b7428,0xeb133386,0x5ccb4f5a,0x37730a08,0x2f1c5d0e, - 0xe5e68f33,0xddae9bf8,0x8d4bf216,0xdcd4e12c,0x9ddfcbb0,0x176d70d4,0x3f424df9,0x94111b9b, - 0x9bc15b9f,0x039d0506,0x8a135e9d,0xe86a9a1e,0x17147cd9,0xf62ac758,0x0a6399a1,0xc370fdd7, - 0x13745ef6,0x040bc903,0x26f79826,0x2593928a,0x230da2b0,0x6d7963ed,0x3cfa3213,0xa39a0235, - 0x0a8eddb3,0xc351bf24,0x9f55cd7c,0x4c94af37,0x82520829,0x374e3bb2,0x9107179f,0xcdfd3b11, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 -}; - -__device__ static void swap_buffer (u32x final[16]) -{ - final[ 0] = swap_workaround (final[ 0]); - final[ 1] = swap_workaround (final[ 1]); - final[ 2] = swap_workaround (final[ 2]); - final[ 3] = swap_workaround (final[ 3]); - final[ 4] = swap_workaround (final[ 4]); - final[ 5] = swap_workaround (final[ 5]); - final[ 6] = swap_workaround (final[ 6]); - final[ 7] = swap_workaround (final[ 7]); - final[ 8] = swap_workaround (final[ 8]); - final[ 9] = swap_workaround (final[ 9]); - final[10] = swap_workaround (final[10]); - final[11] = swap_workaround (final[11]); - final[12] = swap_workaround (final[12]); - final[13] = swap_workaround (final[13]); - final[14] = swap_workaround (final[14]); - final[15] = swap_workaround (final[15]); -} - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m07800_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf[8]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf[5] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf[6] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf[7] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf[0]; - s0[1] = salt_buf[1]; - s0[2] = salt_buf[2]; - s0[3] = salt_buf[3]; - - u32 s1[4]; - - s1[0] = salt_buf[4]; - s1[1] = salt_buf[5]; - s1[2] = salt_buf[6]; - s1[3] = salt_buf[7]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, out_len); - - const u32 pw_salt_len = out_len + salt_len; - - /** - * sha1 - */ - - u32x final[256]; - - final[ 0] = swap_workaround (w0[0] | s0[0]); - final[ 1] = swap_workaround (w0[1] | s0[1]); - final[ 2] = swap_workaround (w0[2] | s0[2]); - final[ 3] = swap_workaround (w0[3] | s0[3]); - final[ 4] = swap_workaround (w1[0] | s1[0]); - final[ 5] = swap_workaround (w1[1] | s1[1]); - final[ 6] = swap_workaround (w1[2] | s1[2]); - final[ 7] = swap_workaround (w1[3] | s1[3]); - final[ 8] = swap_workaround (w2[0] | s2[0]); - final[ 9] = swap_workaround (w2[1] | s2[1]); - final[10] = swap_workaround (w2[2] | s2[2]); - final[11] = swap_workaround (w2[3] | s2[3]); - final[12] = swap_workaround (w3[0] | s3[0]); - final[13] = swap_workaround (w3[1] | s3[1]); - final[14] = 0; - final[15] = pw_salt_len * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (&final[0], &final[4], &final[8], &final[12], digest); - - // prepare magic array range - - u32x lengthMagicArray = 0x20; - u32x offsetMagicArray = 0; - - lengthMagicArray += ((digest[0] >> 24) & 0xff) % 6; - lengthMagicArray += ((digest[0] >> 16) & 0xff) % 6; - lengthMagicArray += ((digest[0] >> 8) & 0xff) % 6; - lengthMagicArray += ((digest[0] >> 0) & 0xff) % 6; - lengthMagicArray += ((digest[1] >> 24) & 0xff) % 6; - lengthMagicArray += ((digest[1] >> 16) & 0xff) % 6; - lengthMagicArray += ((digest[1] >> 8) & 0xff) % 6; - lengthMagicArray += ((digest[1] >> 0) & 0xff) % 6; - lengthMagicArray += ((digest[2] >> 24) & 0xff) % 6; - lengthMagicArray += ((digest[2] >> 16) & 0xff) % 6; - offsetMagicArray += ((digest[2] >> 8) & 0xff) % 8; - offsetMagicArray += ((digest[2] >> 0) & 0xff) % 8; - offsetMagicArray += ((digest[3] >> 24) & 0xff) % 8; - offsetMagicArray += ((digest[3] >> 16) & 0xff) % 8; - offsetMagicArray += ((digest[3] >> 8) & 0xff) % 8; - offsetMagicArray += ((digest[3] >> 0) & 0xff) % 8; - offsetMagicArray += ((digest[4] >> 24) & 0xff) % 8; - offsetMagicArray += ((digest[4] >> 16) & 0xff) % 8; - offsetMagicArray += ((digest[4] >> 8) & 0xff) % 8; - offsetMagicArray += ((digest[4] >> 0) & 0xff) % 8; - - // final - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - #pragma unroll 64 - for (int i = 0; i < 64; i++) final[i] = 0; - - final[0] = w0[0]; - final[1] = w0[1]; - final[2] = w0[2]; - final[3] = w0[3]; - final[4] = w1[0]; - final[5] = w1[1]; - final[6] = w1[2]; - final[7] = w1[3]; - - u32 final_len = out_len; - - int i; - - // append MagicArray - - for (i = 0; i < lengthMagicArray - 4; i += 4) - { - const u32 tmp = GETSHIFTEDINT (theMagicArray, offsetMagicArray + i); - - SETSHIFTEDINT (final, final_len + i, tmp); - } - - const u32 mask = 0xffffffff >> (((i - lengthMagicArray) & 3) * 8); - - const u32 tmp = GETSHIFTEDINT (theMagicArray, offsetMagicArray + i) & mask; - - SETSHIFTEDINT (final, final_len + i, tmp); - - final_len += lengthMagicArray; - - // append Salt - - for (i = 0; i < salt_len + 1; i += 4) // +1 for the 0x80 - { - const u32 tmp = salt_buf[i / 4]; // attention, int[] not char[] - - SETSHIFTEDINT (final, final_len + i, tmp); - } - - final_len += salt_len; - - // calculate - - int left; - int off; - - for (left = final_len, off = 0; left >= 56; left -= 64, off += 16) - { - swap_buffer (&final[off]); - - sha1_transform (&final[off + 0], &final[off + 4], &final[off + 8], &final[off + 12], digest); - } - - swap_buffer (&final[off]); - - final[off + 14] = 0; - final[off + 15] = final_len * 8; - - sha1_transform (&final[off + 0], &final[off + 4], &final[off + 8], &final[off + 12], digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07800_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07800_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07800_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf[8]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf[5] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf[6] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf[7] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf[0]; - s0[1] = salt_buf[1]; - s0[2] = salt_buf[2]; - s0[3] = salt_buf[3]; - - u32 s1[4]; - - s1[0] = salt_buf[4]; - s1[1] = salt_buf[5]; - s1[2] = salt_buf[6]; - s1[3] = salt_buf[7]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, out_len); - - const u32 pw_salt_len = out_len + salt_len; - - /** - * sha1 - */ - - u32x final[256]; - - final[ 0] = swap_workaround (w0[0] | s0[0]); - final[ 1] = swap_workaround (w0[1] | s0[1]); - final[ 2] = swap_workaround (w0[2] | s0[2]); - final[ 3] = swap_workaround (w0[3] | s0[3]); - final[ 4] = swap_workaround (w1[0] | s1[0]); - final[ 5] = swap_workaround (w1[1] | s1[1]); - final[ 6] = swap_workaround (w1[2] | s1[2]); - final[ 7] = swap_workaround (w1[3] | s1[3]); - final[ 8] = swap_workaround (w2[0] | s2[0]); - final[ 9] = swap_workaround (w2[1] | s2[1]); - final[10] = swap_workaround (w2[2] | s2[2]); - final[11] = swap_workaround (w2[3] | s2[3]); - final[12] = swap_workaround (w3[0] | s3[0]); - final[13] = swap_workaround (w3[1] | s3[1]); - final[14] = 0; - final[15] = pw_salt_len * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (&final[0], &final[4], &final[8], &final[12], digest); - - // prepare magic array range - - u32x lengthMagicArray = 0x20; - u32x offsetMagicArray = 0; - - lengthMagicArray += ((digest[0] >> 24) & 0xff) % 6; - lengthMagicArray += ((digest[0] >> 16) & 0xff) % 6; - lengthMagicArray += ((digest[0] >> 8) & 0xff) % 6; - lengthMagicArray += ((digest[0] >> 0) & 0xff) % 6; - lengthMagicArray += ((digest[1] >> 24) & 0xff) % 6; - lengthMagicArray += ((digest[1] >> 16) & 0xff) % 6; - lengthMagicArray += ((digest[1] >> 8) & 0xff) % 6; - lengthMagicArray += ((digest[1] >> 0) & 0xff) % 6; - lengthMagicArray += ((digest[2] >> 24) & 0xff) % 6; - lengthMagicArray += ((digest[2] >> 16) & 0xff) % 6; - offsetMagicArray += ((digest[2] >> 8) & 0xff) % 8; - offsetMagicArray += ((digest[2] >> 0) & 0xff) % 8; - offsetMagicArray += ((digest[3] >> 24) & 0xff) % 8; - offsetMagicArray += ((digest[3] >> 16) & 0xff) % 8; - offsetMagicArray += ((digest[3] >> 8) & 0xff) % 8; - offsetMagicArray += ((digest[3] >> 0) & 0xff) % 8; - offsetMagicArray += ((digest[4] >> 24) & 0xff) % 8; - offsetMagicArray += ((digest[4] >> 16) & 0xff) % 8; - offsetMagicArray += ((digest[4] >> 8) & 0xff) % 8; - offsetMagicArray += ((digest[4] >> 0) & 0xff) % 8; - - // final - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - #pragma unroll 64 - for (int i = 0; i < 64; i++) final[i] = 0; - - final[0] = w0[0]; - final[1] = w0[1]; - final[2] = w0[2]; - final[3] = w0[3]; - final[4] = w1[0]; - final[5] = w1[1]; - final[6] = w1[2]; - final[7] = w1[3]; - - u32 final_len = out_len; - - int i; - - // append MagicArray - - for (i = 0; i < lengthMagicArray - 4; i += 4) - { - const u32 tmp = GETSHIFTEDINT (theMagicArray, offsetMagicArray + i); - - SETSHIFTEDINT (final, final_len + i, tmp); - } - - const u32 mask = 0xffffffff >> (((i - lengthMagicArray) & 3) * 8); - - const u32 tmp = GETSHIFTEDINT (theMagicArray, offsetMagicArray + i) & mask; - - SETSHIFTEDINT (final, final_len + i, tmp); - - final_len += lengthMagicArray; - - // append Salt - - for (i = 0; i < salt_len + 1; i += 4) // +1 for the 0x80 - { - const u32 tmp = salt_buf[i / 4]; // attention, int[] not char[] - - SETSHIFTEDINT (final, final_len + i, tmp); - } - - final_len += salt_len; - - // calculate - - int left; - int off; - - for (left = final_len, off = 0; left >= 56; left -= 64, off += 16) - { - swap_buffer (&final[off]); - - sha1_transform (&final[off + 0], &final[off + 4], &final[off + 8], &final[off + 12], digest); - } - - swap_buffer (&final[off]); - - final[off + 14] = 0; - final[off + 15] = final_len * 8; - - sha1_transform (&final[off + 0], &final[off + 4], &final[off + 8], &final[off + 12], digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07800_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07800_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m07800_a1.cu b/nv/m07800_a1.cu deleted file mode 100644 index 137fd80..0000000 --- a/nv/m07800_a1.cu +++ /dev/null @@ -1,866 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SAPG_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -__device__ static u32 bytealign (u32 src0, u32 src1, u32 src2) -{ - return (u32) (((((u64)src0) << 32) | (u64)src1) >> ((src2 & 3)*8)); -} - -#if __CUDA_ARCH__ >= 350 -#define GETSHIFTEDINT(a,n) __funnelshift_r ((a)[((n)/4)+0], (a)[((n)/4)+1], (n & 3) * 8) -#elif __CUDA_ARCH__ >= 200 -#define GETSHIFTEDINT(a,n) __byte_perm ((a)[((n)/4)+0], (a)[((n)/4)+1], (0x76543210 >> ((n & 3) * 4)) & 0xffff) -#else -#define GETSHIFTEDINT(a,n) bytealign ((a)[((n)/4)+1], (a)[((n)/4)+0], n) -#endif - -#define SETSHIFTEDINT(a,n,v) \ -{ \ - const u32 s = ((n) & 3) * 8; \ - const u64 x = (u64) (v) << s; \ - (a)[((n)/4)+0] |= x; \ - (a)[((n)/4)+1] = x >> 32; \ -} - -__device__ __constant__ u32 theMagicArray[64] = -{ - 0x1451ac91,0x4354679f,0xe03be724,0xc27b7428,0xeb133386,0x5ccb4f5a,0x37730a08,0x2f1c5d0e, - 0xe5e68f33,0xddae9bf8,0x8d4bf216,0xdcd4e12c,0x9ddfcbb0,0x176d70d4,0x3f424df9,0x94111b9b, - 0x9bc15b9f,0x039d0506,0x8a135e9d,0xe86a9a1e,0x17147cd9,0xf62ac758,0x0a6399a1,0xc370fdd7, - 0x13745ef6,0x040bc903,0x26f79826,0x2593928a,0x230da2b0,0x6d7963ed,0x3cfa3213,0xa39a0235, - 0x0a8eddb3,0xc351bf24,0x9f55cd7c,0x4c94af37,0x82520829,0x374e3bb2,0x9107179f,0xcdfd3b11, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 -}; - -__device__ static void swap_buffer (u32x final[16]) -{ - final[ 0] = swap_workaround (final[ 0]); - final[ 1] = swap_workaround (final[ 1]); - final[ 2] = swap_workaround (final[ 2]); - final[ 3] = swap_workaround (final[ 3]); - final[ 4] = swap_workaround (final[ 4]); - final[ 5] = swap_workaround (final[ 5]); - final[ 6] = swap_workaround (final[ 6]); - final[ 7] = swap_workaround (final[ 7]); - final[ 8] = swap_workaround (final[ 8]); - final[ 9] = swap_workaround (final[ 9]); - final[10] = swap_workaround (final[10]); - final[11] = swap_workaround (final[11]); - final[12] = swap_workaround (final[12]); - final[13] = swap_workaround (final[13]); - final[14] = swap_workaround (final[14]); - final[15] = swap_workaround (final[15]); -} - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m07800_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf[8]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf[5] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf[6] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf[7] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf[0]; - s0[1] = salt_buf[1]; - s0[2] = salt_buf[2]; - s0[3] = salt_buf[3]; - - u32 s1[4]; - - s1[0] = salt_buf[4]; - s1[1] = salt_buf[5]; - s1[2] = salt_buf[6]; - s1[3] = salt_buf[7]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, pw_len); - - const u32 pw_salt_len = pw_len + salt_len; - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - /** - * sha1 - */ - - u32x final[256]; - - final[ 0] = swap_workaround (w0[0] | s0[0]); - final[ 1] = swap_workaround (w0[1] | s0[1]); - final[ 2] = swap_workaround (w0[2] | s0[2]); - final[ 3] = swap_workaround (w0[3] | s0[3]); - final[ 4] = swap_workaround (w1[0] | s1[0]); - final[ 5] = swap_workaround (w1[1] | s1[1]); - final[ 6] = swap_workaround (w1[2] | s1[2]); - final[ 7] = swap_workaround (w1[3] | s1[3]); - final[ 8] = swap_workaround (w2[0] | s2[0]); - final[ 9] = swap_workaround (w2[1] | s2[1]); - final[10] = swap_workaround (w2[2] | s2[2]); - final[11] = swap_workaround (w2[3] | s2[3]); - final[12] = swap_workaround (w3[0] | s3[0]); - final[13] = swap_workaround (w3[1] | s3[1]); - final[14] = 0; - final[15] = pw_salt_len * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (&final[0], &final[4], &final[8], &final[12], digest); - - // prepare magic array range - - u32x lengthMagicArray = 0x20; - u32x offsetMagicArray = 0; - - lengthMagicArray += ((digest[0] >> 24) & 0xff) % 6; - lengthMagicArray += ((digest[0] >> 16) & 0xff) % 6; - lengthMagicArray += ((digest[0] >> 8) & 0xff) % 6; - lengthMagicArray += ((digest[0] >> 0) & 0xff) % 6; - lengthMagicArray += ((digest[1] >> 24) & 0xff) % 6; - lengthMagicArray += ((digest[1] >> 16) & 0xff) % 6; - lengthMagicArray += ((digest[1] >> 8) & 0xff) % 6; - lengthMagicArray += ((digest[1] >> 0) & 0xff) % 6; - lengthMagicArray += ((digest[2] >> 24) & 0xff) % 6; - lengthMagicArray += ((digest[2] >> 16) & 0xff) % 6; - offsetMagicArray += ((digest[2] >> 8) & 0xff) % 8; - offsetMagicArray += ((digest[2] >> 0) & 0xff) % 8; - offsetMagicArray += ((digest[3] >> 24) & 0xff) % 8; - offsetMagicArray += ((digest[3] >> 16) & 0xff) % 8; - offsetMagicArray += ((digest[3] >> 8) & 0xff) % 8; - offsetMagicArray += ((digest[3] >> 0) & 0xff) % 8; - offsetMagicArray += ((digest[4] >> 24) & 0xff) % 8; - offsetMagicArray += ((digest[4] >> 16) & 0xff) % 8; - offsetMagicArray += ((digest[4] >> 8) & 0xff) % 8; - offsetMagicArray += ((digest[4] >> 0) & 0xff) % 8; - - // final - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - #pragma unroll 64 - for (int i = 0; i < 64; i++) final[i] = 0; - - final[0] = w0[0]; - final[1] = w0[1]; - final[2] = w0[2]; - final[3] = w0[3]; - final[4] = w1[0]; - final[5] = w1[1]; - final[6] = w1[2]; - final[7] = w1[3]; - - u32 final_len = pw_len; - - int i; - - // append MagicArray - - for (i = 0; i < lengthMagicArray - 4; i += 4) - { - const u32 tmp = GETSHIFTEDINT (theMagicArray, offsetMagicArray + i); - - SETSHIFTEDINT (final, final_len + i, tmp); - } - - const u32 mask = 0xffffffff >> (((i - lengthMagicArray) & 3) * 8); - - const u32 tmp = GETSHIFTEDINT (theMagicArray, offsetMagicArray + i) & mask; - - SETSHIFTEDINT (final, final_len + i, tmp); - - final_len += lengthMagicArray; - - // append Salt - - for (i = 0; i < salt_len + 1; i += 4) // +1 for the 0x80 - { - const u32 tmp = salt_buf[i / 4]; // attention, int[] not char[] - - SETSHIFTEDINT (final, final_len + i, tmp); - } - - final_len += salt_len; - - // calculate - - int left; - int off; - - for (left = final_len, off = 0; left >= 56; left -= 64, off += 16) - { - swap_buffer (&final[off]); - - sha1_transform (&final[off + 0], &final[off + 4], &final[off + 8], &final[off + 12], digest); - } - - swap_buffer (&final[off]); - - final[off + 14] = 0; - final[off + 15] = final_len * 8; - - sha1_transform (&final[off + 0], &final[off + 4], &final[off + 8], &final[off + 12], digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07800_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07800_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07800_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - u32 salt_buf[8]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf[5] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf[6] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf[7] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - /** - * append salt - */ - - u32 s0[4]; - - s0[0] = salt_buf[0]; - s0[1] = salt_buf[1]; - s0[2] = salt_buf[2]; - s0[3] = salt_buf[3]; - - u32 s1[4]; - - s1[0] = salt_buf[4]; - s1[1] = salt_buf[5]; - s1[2] = salt_buf[6]; - s1[3] = salt_buf[7]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, pw_len); - - const u32 pw_salt_len = pw_len + salt_len; - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - /** - * sha1 - */ - - u32x final[256]; - - final[ 0] = swap_workaround (w0[0] | s0[0]); - final[ 1] = swap_workaround (w0[1] | s0[1]); - final[ 2] = swap_workaround (w0[2] | s0[2]); - final[ 3] = swap_workaround (w0[3] | s0[3]); - final[ 4] = swap_workaround (w1[0] | s1[0]); - final[ 5] = swap_workaround (w1[1] | s1[1]); - final[ 6] = swap_workaround (w1[2] | s1[2]); - final[ 7] = swap_workaround (w1[3] | s1[3]); - final[ 8] = swap_workaround (w2[0] | s2[0]); - final[ 9] = swap_workaround (w2[1] | s2[1]); - final[10] = swap_workaround (w2[2] | s2[2]); - final[11] = swap_workaround (w2[3] | s2[3]); - final[12] = swap_workaround (w3[0] | s3[0]); - final[13] = swap_workaround (w3[1] | s3[1]); - final[14] = 0; - final[15] = pw_salt_len * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (&final[0], &final[4], &final[8], &final[12], digest); - - // prepare magic array range - - u32x lengthMagicArray = 0x20; - u32x offsetMagicArray = 0; - - lengthMagicArray += ((digest[0] >> 24) & 0xff) % 6; - lengthMagicArray += ((digest[0] >> 16) & 0xff) % 6; - lengthMagicArray += ((digest[0] >> 8) & 0xff) % 6; - lengthMagicArray += ((digest[0] >> 0) & 0xff) % 6; - lengthMagicArray += ((digest[1] >> 24) & 0xff) % 6; - lengthMagicArray += ((digest[1] >> 16) & 0xff) % 6; - lengthMagicArray += ((digest[1] >> 8) & 0xff) % 6; - lengthMagicArray += ((digest[1] >> 0) & 0xff) % 6; - lengthMagicArray += ((digest[2] >> 24) & 0xff) % 6; - lengthMagicArray += ((digest[2] >> 16) & 0xff) % 6; - offsetMagicArray += ((digest[2] >> 8) & 0xff) % 8; - offsetMagicArray += ((digest[2] >> 0) & 0xff) % 8; - offsetMagicArray += ((digest[3] >> 24) & 0xff) % 8; - offsetMagicArray += ((digest[3] >> 16) & 0xff) % 8; - offsetMagicArray += ((digest[3] >> 8) & 0xff) % 8; - offsetMagicArray += ((digest[3] >> 0) & 0xff) % 8; - offsetMagicArray += ((digest[4] >> 24) & 0xff) % 8; - offsetMagicArray += ((digest[4] >> 16) & 0xff) % 8; - offsetMagicArray += ((digest[4] >> 8) & 0xff) % 8; - offsetMagicArray += ((digest[4] >> 0) & 0xff) % 8; - - // final - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - #pragma unroll 64 - for (int i = 0; i < 64; i++) final[i] = 0; - - final[0] = w0[0]; - final[1] = w0[1]; - final[2] = w0[2]; - final[3] = w0[3]; - final[4] = w1[0]; - final[5] = w1[1]; - final[6] = w1[2]; - final[7] = w1[3]; - - u32 final_len = pw_len; - - int i; - - // append MagicArray - - for (i = 0; i < lengthMagicArray - 4; i += 4) - { - const u32 tmp = GETSHIFTEDINT (theMagicArray, offsetMagicArray + i); - - SETSHIFTEDINT (final, final_len + i, tmp); - } - - const u32 mask = 0xffffffff >> (((i - lengthMagicArray) & 3) * 8); - - const u32 tmp = GETSHIFTEDINT (theMagicArray, offsetMagicArray + i) & mask; - - SETSHIFTEDINT (final, final_len + i, tmp); - - final_len += lengthMagicArray; - - // append Salt - - for (i = 0; i < salt_len + 1; i += 4) // +1 for the 0x80 - { - const u32 tmp = salt_buf[i / 4]; // attention, int[] not char[] - - SETSHIFTEDINT (final, final_len + i, tmp); - } - - final_len += salt_len; - - // calculate - - int left; - int off; - - for (left = final_len, off = 0; left >= 56; left -= 64, off += 16) - { - swap_buffer (&final[off]); - - sha1_transform (&final[off + 0], &final[off + 4], &final[off + 8], &final[off + 12], digest); - } - - swap_buffer (&final[off]); - - final[off + 14] = 0; - final[off + 15] = final_len * 8; - - sha1_transform (&final[off + 0], &final[off + 4], &final[off + 8], &final[off + 12], digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07800_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07800_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m07800_a3.cu b/nv/m07800_a3.cu deleted file mode 100644 index dd7a7f9..0000000 --- a/nv/m07800_a3.cu +++ /dev/null @@ -1,860 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SAPG_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -__device__ static u32 bytealign (u32 src0, u32 src1, u32 src2) -{ - return (u32) (((((u64)src0) << 32) | (u64)src1) >> ((src2 & 3)*8)); -} - -#if __CUDA_ARCH__ >= 350 -#define GETSHIFTEDINT(a,n) __funnelshift_r ((a)[((n)/4)+0], (a)[((n)/4)+1], (n & 3) * 8) -#elif __CUDA_ARCH__ >= 200 -#define GETSHIFTEDINT(a,n) __byte_perm ((a)[((n)/4)+0], (a)[((n)/4)+1], (0x76543210 >> ((n & 3) * 4)) & 0xffff) -#else -#define GETSHIFTEDINT(a,n) bytealign ((a)[((n)/4)+1], (a)[((n)/4)+0], n) -#endif - -#define SETSHIFTEDINT(a,n,v) \ -{ \ - const u32 s = ((n) & 3) * 8; \ - const u64 x = (u64) (v) << s; \ - (a)[((n)/4)+0] |= x; \ - (a)[((n)/4)+1] = x >> 32; \ -} - -__device__ __constant__ u32 theMagicArray[64] = -{ - 0x1451ac91,0x4354679f,0xe03be724,0xc27b7428,0xeb133386,0x5ccb4f5a,0x37730a08,0x2f1c5d0e, - 0xe5e68f33,0xddae9bf8,0x8d4bf216,0xdcd4e12c,0x9ddfcbb0,0x176d70d4,0x3f424df9,0x94111b9b, - 0x9bc15b9f,0x039d0506,0x8a135e9d,0xe86a9a1e,0x17147cd9,0xf62ac758,0x0a6399a1,0xc370fdd7, - 0x13745ef6,0x040bc903,0x26f79826,0x2593928a,0x230da2b0,0x6d7963ed,0x3cfa3213,0xa39a0235, - 0x0a8eddb3,0xc351bf24,0x9f55cd7c,0x4c94af37,0x82520829,0x374e3bb2,0x9107179f,0xcdfd3b11, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 -}; - -__device__ static void swap_buffer (u32x final[16]) -{ - final[ 0] = swap_workaround (final[ 0]); - final[ 1] = swap_workaround (final[ 1]); - final[ 2] = swap_workaround (final[ 2]); - final[ 3] = swap_workaround (final[ 3]); - final[ 4] = swap_workaround (final[ 4]); - final[ 5] = swap_workaround (final[ 5]); - final[ 6] = swap_workaround (final[ 6]); - final[ 7] = swap_workaround (final[ 7]); - final[ 8] = swap_workaround (final[ 8]); - final[ 9] = swap_workaround (final[ 9]); - final[10] = swap_workaround (final[10]); - final[11] = swap_workaround (final[11]); - final[12] = swap_workaround (final[12]); - final[13] = swap_workaround (final[13]); - final[14] = swap_workaround (final[14]); - final[15] = swap_workaround (final[15]); -} - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m07800m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - - /** - * salt - */ - - u32 salt_buf[8]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf[5] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf[6] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf[7] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 s0[4]; - - s0[0] = salt_buf[0]; - s0[1] = salt_buf[1]; - s0[2] = salt_buf[2]; - s0[3] = salt_buf[3]; - - u32 s1[4]; - - s1[0] = salt_buf[4]; - s1[1] = salt_buf[5]; - s1[2] = salt_buf[6]; - s1[3] = salt_buf[7]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, pw_len); - - const u32 pw_salt_len = pw_len + salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = swap_workaround (c_bfs[il_pos].i); - - w0[0] = w0l | w0r; - - /** - * sha1 - */ - - u32x final[256]; - - final[ 0] = swap_workaround (w0[0] | s0[0]); - final[ 1] = swap_workaround (w0[1] | s0[1]); - final[ 2] = swap_workaround (w0[2] | s0[2]); - final[ 3] = swap_workaround (w0[3] | s0[3]); - final[ 4] = swap_workaround (w1[0] | s1[0]); - final[ 5] = swap_workaround (w1[1] | s1[1]); - final[ 6] = swap_workaround (w1[2] | s1[2]); - final[ 7] = swap_workaround (w1[3] | s1[3]); - final[ 8] = swap_workaround (w2[0] | s2[0]); - final[ 9] = swap_workaround (w2[1] | s2[1]); - final[10] = swap_workaround (w2[2] | s2[2]); - final[11] = swap_workaround (w2[3] | s2[3]); - final[12] = swap_workaround (w3[0] | s3[0]); - final[13] = swap_workaround (w3[1] | s3[1]); - final[14] = 0; - final[15] = pw_salt_len * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (&final[0], &final[4], &final[8], &final[12], digest); - - // prepare magic array range - - u32x lengthMagicArray = 0x20; - u32x offsetMagicArray = 0; - - lengthMagicArray += ((digest[0] >> 24) & 0xff) % 6; - lengthMagicArray += ((digest[0] >> 16) & 0xff) % 6; - lengthMagicArray += ((digest[0] >> 8) & 0xff) % 6; - lengthMagicArray += ((digest[0] >> 0) & 0xff) % 6; - lengthMagicArray += ((digest[1] >> 24) & 0xff) % 6; - lengthMagicArray += ((digest[1] >> 16) & 0xff) % 6; - lengthMagicArray += ((digest[1] >> 8) & 0xff) % 6; - lengthMagicArray += ((digest[1] >> 0) & 0xff) % 6; - lengthMagicArray += ((digest[2] >> 24) & 0xff) % 6; - lengthMagicArray += ((digest[2] >> 16) & 0xff) % 6; - offsetMagicArray += ((digest[2] >> 8) & 0xff) % 8; - offsetMagicArray += ((digest[2] >> 0) & 0xff) % 8; - offsetMagicArray += ((digest[3] >> 24) & 0xff) % 8; - offsetMagicArray += ((digest[3] >> 16) & 0xff) % 8; - offsetMagicArray += ((digest[3] >> 8) & 0xff) % 8; - offsetMagicArray += ((digest[3] >> 0) & 0xff) % 8; - offsetMagicArray += ((digest[4] >> 24) & 0xff) % 8; - offsetMagicArray += ((digest[4] >> 16) & 0xff) % 8; - offsetMagicArray += ((digest[4] >> 8) & 0xff) % 8; - offsetMagicArray += ((digest[4] >> 0) & 0xff) % 8; - - // final - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - #pragma unroll 64 - for (int i = 0; i < 64; i++) final[i] = 0; - - final[0] = w0[0]; - final[1] = w0[1]; - final[2] = w0[2]; - final[3] = w0[3]; - final[4] = w1[0]; - final[5] = w1[1]; - final[6] = w1[2]; - final[7] = w1[3]; - - u32 final_len = pw_len; - - int i; - - // append MagicArray - - for (i = 0; i < lengthMagicArray - 4; i += 4) - { - const u32 tmp = GETSHIFTEDINT (theMagicArray, offsetMagicArray + i); - - SETSHIFTEDINT (final, final_len + i, tmp); - } - - const u32 mask = 0xffffffff >> (((i - lengthMagicArray) & 3) * 8); - - const u32 tmp = GETSHIFTEDINT (theMagicArray, offsetMagicArray + i) & mask; - - SETSHIFTEDINT (final, final_len + i, tmp); - - final_len += lengthMagicArray; - - // append Salt - - for (i = 0; i < salt_len + 1; i += 4) // +1 for the 0x80 - { - const u32 tmp = salt_buf[i / 4]; // attention, int[] not char[] - - SETSHIFTEDINT (final, final_len + i, tmp); - } - - final_len += salt_len; - - // calculate - - int left; - int off; - - for (left = final_len, off = 0; left >= 56; left -= 64, off += 16) - { - swap_buffer (&final[off]); - - sha1_transform (&final[off + 0], &final[off + 4], &final[off + 8], &final[off + 12], digest); - } - - swap_buffer (&final[off]); - - final[off + 14] = 0; - final[off + 15] = final_len * 8; - - sha1_transform (&final[off + 0], &final[off + 4], &final[off + 8], &final[off + 12], digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -__device__ static void m07800s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - - /** - * salt - */ - - u32 salt_buf[8]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - salt_buf[4] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf[5] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf[6] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf[7] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 s0[4]; - - s0[0] = salt_buf[0]; - s0[1] = salt_buf[1]; - s0[2] = salt_buf[2]; - s0[3] = salt_buf[3]; - - u32 s1[4]; - - s1[0] = salt_buf[4]; - s1[1] = salt_buf[5]; - s1[2] = salt_buf[6]; - s1[3] = salt_buf[7]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, pw_len); - - const u32 pw_salt_len = pw_len + salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = swap_workaround (c_bfs[il_pos].i); - - w0[0] = w0l | w0r; - - /** - * sha1 - */ - - u32x final[256]; - - final[ 0] = swap_workaround (w0[0] | s0[0]); - final[ 1] = swap_workaround (w0[1] | s0[1]); - final[ 2] = swap_workaround (w0[2] | s0[2]); - final[ 3] = swap_workaround (w0[3] | s0[3]); - final[ 4] = swap_workaround (w1[0] | s1[0]); - final[ 5] = swap_workaround (w1[1] | s1[1]); - final[ 6] = swap_workaround (w1[2] | s1[2]); - final[ 7] = swap_workaround (w1[3] | s1[3]); - final[ 8] = swap_workaround (w2[0] | s2[0]); - final[ 9] = swap_workaround (w2[1] | s2[1]); - final[10] = swap_workaround (w2[2] | s2[2]); - final[11] = swap_workaround (w2[3] | s2[3]); - final[12] = swap_workaround (w3[0] | s3[0]); - final[13] = swap_workaround (w3[1] | s3[1]); - final[14] = 0; - final[15] = pw_salt_len * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (&final[0], &final[4], &final[8], &final[12], digest); - - // prepare magic array range - - u32x lengthMagicArray = 0x20; - u32x offsetMagicArray = 0; - - lengthMagicArray += ((digest[0] >> 24) & 0xff) % 6; - lengthMagicArray += ((digest[0] >> 16) & 0xff) % 6; - lengthMagicArray += ((digest[0] >> 8) & 0xff) % 6; - lengthMagicArray += ((digest[0] >> 0) & 0xff) % 6; - lengthMagicArray += ((digest[1] >> 24) & 0xff) % 6; - lengthMagicArray += ((digest[1] >> 16) & 0xff) % 6; - lengthMagicArray += ((digest[1] >> 8) & 0xff) % 6; - lengthMagicArray += ((digest[1] >> 0) & 0xff) % 6; - lengthMagicArray += ((digest[2] >> 24) & 0xff) % 6; - lengthMagicArray += ((digest[2] >> 16) & 0xff) % 6; - offsetMagicArray += ((digest[2] >> 8) & 0xff) % 8; - offsetMagicArray += ((digest[2] >> 0) & 0xff) % 8; - offsetMagicArray += ((digest[3] >> 24) & 0xff) % 8; - offsetMagicArray += ((digest[3] >> 16) & 0xff) % 8; - offsetMagicArray += ((digest[3] >> 8) & 0xff) % 8; - offsetMagicArray += ((digest[3] >> 0) & 0xff) % 8; - offsetMagicArray += ((digest[4] >> 24) & 0xff) % 8; - offsetMagicArray += ((digest[4] >> 16) & 0xff) % 8; - offsetMagicArray += ((digest[4] >> 8) & 0xff) % 8; - offsetMagicArray += ((digest[4] >> 0) & 0xff) % 8; - - // final - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - #pragma unroll 64 - for (int i = 0; i < 64; i++) final[i] = 0; - - final[0] = w0[0]; - final[1] = w0[1]; - final[2] = w0[2]; - final[3] = w0[3]; - final[4] = w1[0]; - final[5] = w1[1]; - final[6] = w1[2]; - final[7] = w1[3]; - - u32 final_len = pw_len; - - int i; - - // append MagicArray - - for (i = 0; i < lengthMagicArray - 4; i += 4) - { - const u32 tmp = GETSHIFTEDINT (theMagicArray, offsetMagicArray + i); - - SETSHIFTEDINT (final, final_len + i, tmp); - } - - const u32 mask = 0xffffffff >> (((i - lengthMagicArray) & 3) * 8); - - const u32 tmp = GETSHIFTEDINT (theMagicArray, offsetMagicArray + i) & mask; - - SETSHIFTEDINT (final, final_len + i, tmp); - - final_len += lengthMagicArray; - - // append Salt - - for (i = 0; i < salt_len + 1; i += 4) // +1 for the 0x80 - { - const u32 tmp = salt_buf[i / 4]; // attention, int[] not char[] - - SETSHIFTEDINT (final, final_len + i, tmp); - } - - final_len += salt_len; - - // calculate - - int left; - int off; - - for (left = final_len, off = 0; left >= 56; left -= 64, off += 16) - { - swap_buffer (&final[off]); - - sha1_transform (&final[off + 0], &final[off + 4], &final[off + 8], &final[off + 12], digest); - } - - swap_buffer (&final[off]); - - final[off + 14] = 0; - final[off + 15] = final_len * 8; - - sha1_transform (&final[off + 0], &final[off + 4], &final[off + 8], &final[off + 12], digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07800_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m07800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07800_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m07800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07800_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07800_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m07800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07800_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m07800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07800_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m07900.cu b/nv/m07900.cu deleted file mode 100644 index 5bff78d..0000000 --- a/nv/m07900.cu +++ /dev/null @@ -1,387 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA512_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -__device__ __constant__ u64 k[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -#define ROUND_EXPAND() \ -{ \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ -} - -#define ROUND_STEP(i) \ -{ \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k[i + 15]); \ -} - -__device__ static void sha512_transform (const u64 w[16], u64 dgst[8]) -{ - u64 a = dgst[0]; - u64 b = dgst[1]; - u64 c = dgst[2]; - u64 d = dgst[3]; - u64 e = dgst[4]; - u64 f = dgst[5]; - u64 g = dgst[6]; - u64 h = dgst[7]; - - u64 w0_t = w[ 0]; - u64 w1_t = w[ 1]; - u64 w2_t = w[ 2]; - u64 w3_t = w[ 3]; - u64 w4_t = w[ 4]; - u64 w5_t = w[ 5]; - u64 w6_t = w[ 6]; - u64 w7_t = w[ 7]; - u64 w8_t = w[ 8]; - u64 w9_t = w[ 9]; - u64 wa_t = w[10]; - u64 wb_t = w[11]; - u64 wc_t = w[12]; - u64 wd_t = w[13]; - u64 we_t = w[14]; - u64 wf_t = w[15]; - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - dgst[0] += a; - dgst[1] += b; - dgst[2] += c; - dgst[3] += d; - dgst[4] += e; - dgst[5] += f; - dgst[6] += g; - dgst[7] += h; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07900_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, drupal7_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - append_0x80_4 (w0, w1, w2, w3, pw_len); - - /** - * salt - */ - - u32 salt_buf[2]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - - /** - * init - */ - - u32 block_len = 8 + pw_len; - - u64 w[16]; - - w[ 0] = ((u64) swap_workaround (salt_buf[0])) << 32 | (u64) swap_workaround (salt_buf[1]); - w[ 1] = ((u64) swap_workaround (w0[0])) << 32 | (u64) swap_workaround (w0[1]); - w[ 2] = ((u64) swap_workaround (w0[2])) << 32 | (u64) swap_workaround (w0[3]); - w[ 3] = ((u64) swap_workaround (w1[0])) << 32 | (u64) swap_workaround (w1[1]); - w[ 4] = ((u64) swap_workaround (w1[2])) << 32 | (u64) swap_workaround (w1[3]); - w[ 5] = ((u64) swap_workaround (w2[0])) << 32 | (u64) swap_workaround (w2[1]); - w[ 6] = ((u64) swap_workaround (w2[2])) << 32 | (u64) swap_workaround (w2[3]); - w[ 7] = ((u64) swap_workaround (w3[0])) << 32 | (u64) swap_workaround (w3[1]); - w[ 8] = ((u64) swap_workaround (w3[2])) << 32 | (u64) swap_workaround (w3[3]); - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = block_len * 8; - - /** - * init - */ - - u64 digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w, digest); - - tmps[gid].digest_buf[0] = digest[0]; - tmps[gid].digest_buf[1] = digest[1]; - tmps[gid].digest_buf[2] = digest[2]; - tmps[gid].digest_buf[3] = digest[3]; - tmps[gid].digest_buf[4] = digest[4]; - tmps[gid].digest_buf[5] = digest[5]; - tmps[gid].digest_buf[6] = digest[6]; - tmps[gid].digest_buf[7] = digest[7]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07900_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, drupal7_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - append_0x80_4 (w0, w1, w2, w3, pw_len); - - /** - * digest - */ - - u64 digest[8]; - - digest[0] = tmps[gid].digest_buf[0]; - digest[1] = tmps[gid].digest_buf[1]; - digest[2] = tmps[gid].digest_buf[2]; - digest[3] = tmps[gid].digest_buf[3]; - digest[4] = tmps[gid].digest_buf[4]; - digest[5] = tmps[gid].digest_buf[5]; - digest[6] = tmps[gid].digest_buf[6]; - digest[7] = tmps[gid].digest_buf[7]; - - /** - * loop - */ - - u32 block_len = (64 + pw_len); - - u64 w[16]; - - w[ 0] = 0; - w[ 1] = 0; - w[ 2] = 0; - w[ 3] = 0; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = ((u64) swap_workaround (w0[0])) << 32 | (u64) swap_workaround (w0[1]); - w[ 9] = ((u64) swap_workaround (w0[2])) << 32 | (u64) swap_workaround (w0[3]); - w[10] = ((u64) swap_workaround (w1[0])) << 32 | (u64) swap_workaround (w1[1]); - w[11] = ((u64) swap_workaround (w1[2])) << 32 | (u64) swap_workaround (w1[3]); - w[12] = ((u64) swap_workaround (w2[0])) << 32 | (u64) swap_workaround (w2[1]); - w[13] = ((u64) swap_workaround (w2[2])) << 32 | (u64) swap_workaround (w2[3]); - w[14] = 0; - w[15] = block_len * 8; - - /** - * init - */ - - for (u32 i = 0; i < loop_cnt; i++) - { - w[ 0] = digest[0]; - w[ 1] = digest[1]; - w[ 2] = digest[2]; - w[ 3] = digest[3]; - w[ 4] = digest[4]; - w[ 5] = digest[5]; - w[ 6] = digest[6]; - w[ 7] = digest[7]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w, digest); - } - - tmps[gid].digest_buf[0] = digest[0]; - tmps[gid].digest_buf[1] = digest[1]; - tmps[gid].digest_buf[2] = digest[2]; - tmps[gid].digest_buf[3] = digest[3]; - tmps[gid].digest_buf[4] = digest[4]; - tmps[gid].digest_buf[5] = digest[5]; - tmps[gid].digest_buf[6] = digest[6]; - tmps[gid].digest_buf[7] = digest[7]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m07900_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, drupal7_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32x r0 = l32_from_64 (tmps[gid].digest_buf[0]); - const u32x r1 = h32_from_64 (tmps[gid].digest_buf[0]); - const u32x r2 = l32_from_64 (tmps[gid].digest_buf[1]); - const u32x r3 = h32_from_64 (tmps[gid].digest_buf[1]); - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m08000_a0.cu b/nv/m08000_a0.cu deleted file mode 100644 index e6c81d0..0000000 --- a/nv/m08000_a0.cu +++ /dev/null @@ -1,515 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA256_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 7 -#define DGST_R2 2 -#define DGST_R3 6 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ static void sha256_transform (u32x digest[8], const u32x w[16]) -{ - u32x w0_t = w[ 0]; - u32x w1_t = w[ 1]; - u32x w2_t = w[ 2]; - u32x w3_t = w[ 3]; - u32x w4_t = w[ 4]; - u32x w5_t = w[ 5]; - u32x w6_t = w[ 6]; - u32x w7_t = w[ 7]; - u32x w8_t = w[ 8]; - u32x w9_t = w[ 9]; - u32x wa_t = w[10]; - u32x wb_t = w[11]; - u32x wc_t = w[12]; - u32x wd_t = w[13]; - u32x we_t = w[14]; - u32x wf_t = w[15]; - - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; -} - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m08000_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - const u32 salt_buf0 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 0]); - const u32 salt_buf1 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 1]); - const u32 salt_buf2 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 2]); // 0x80 - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - - make_unicode (w1, w2_t, w3_t); - - u32x w_t[16]; - - w_t[ 0] = swap_workaround (w0_t[0]); - w_t[ 1] = swap_workaround (w0_t[1]); - w_t[ 2] = swap_workaround (w0_t[2]); - w_t[ 3] = swap_workaround (w0_t[3]); - w_t[ 4] = swap_workaround (w1_t[0]); - w_t[ 5] = swap_workaround (w1_t[1]); - w_t[ 6] = swap_workaround (w1_t[2]); - w_t[ 7] = swap_workaround (w1_t[3]); - w_t[ 8] = swap_workaround (w2_t[0]); - w_t[ 9] = swap_workaround (w2_t[1]); - w_t[10] = swap_workaround (w2_t[2]); - w_t[11] = swap_workaround (w2_t[3]); - w_t[12] = swap_workaround (w3_t[0]); - w_t[13] = swap_workaround (w3_t[1]); - w_t[14] = swap_workaround (w3_t[2]); - w_t[15] = swap_workaround (w3_t[3]); - - w_t[ 0] = w_t[ 0] >> 8; - w_t[ 1] = w_t[ 1] >> 8; - w_t[ 2] = w_t[ 2] >> 8; - w_t[ 3] = w_t[ 3] >> 8; - w_t[ 4] = w_t[ 4] >> 8; - w_t[ 5] = w_t[ 5] >> 8; - w_t[ 6] = w_t[ 6] >> 8; - w_t[ 7] = w_t[ 7] >> 8; - w_t[ 8] = w_t[ 8] >> 8; - w_t[ 9] = w_t[ 9] >> 8; - w_t[10] = w_t[10] >> 8; - w_t[11] = w_t[11] >> 8; - w_t[12] = w_t[12] >> 8; - w_t[13] = w_t[13] >> 8; - w_t[14] = w_t[14] >> 8; - w_t[15] = w_t[15] >> 8; - - u32x digest[8]; - - digest[0] = SHA256M_A; - digest[1] = SHA256M_B; - digest[2] = SHA256M_C; - digest[3] = SHA256M_D; - digest[4] = SHA256M_E; - digest[5] = SHA256M_F; - digest[6] = SHA256M_G; - digest[7] = SHA256M_H; - - sha256_transform (digest, w_t); // 0 - 64 - - w_t[ 0] = 0; - w_t[ 1] = 0; - w_t[ 2] = 0; - w_t[ 3] = 0; - w_t[ 4] = 0; - w_t[ 5] = 0; - w_t[ 6] = 0; - w_t[ 7] = 0; - w_t[ 8] = 0; - w_t[ 9] = 0; - w_t[10] = 0; - w_t[11] = 0; - w_t[12] = 0; - w_t[13] = 0; - w_t[14] = 0; - w_t[15] = 0; - - sha256_transform (digest, w_t); // 64 - 128 - sha256_transform (digest, w_t); // 128 - 192 - sha256_transform (digest, w_t); // 192 - 256 - sha256_transform (digest, w_t); // 256 - 320 - sha256_transform (digest, w_t); // 320 - 384 - sha256_transform (digest, w_t); // 384 - 448 - - w_t[15] = 0 | salt_buf0 >> 16; - - sha256_transform (digest, w_t); // 448 - 512 - - w_t[ 0] = salt_buf0 << 16 | salt_buf1 >> 16; - w_t[ 1] = salt_buf1 << 16 | salt_buf2 >> 16; - w_t[ 2] = salt_buf2 << 16 | 0; - w_t[15] = (510 + 8) * 8; - - sha256_transform (digest, w_t); // 512 - 576 - - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08000_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08000_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08000_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - const u32 salt_buf0 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 0]); - const u32 salt_buf1 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 1]); - const u32 salt_buf2 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 2]); // 0x80 - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - - make_unicode (w1, w2_t, w3_t); - - u32x w_t[16]; - - w_t[ 0] = swap_workaround (w0_t[0]); - w_t[ 1] = swap_workaround (w0_t[1]); - w_t[ 2] = swap_workaround (w0_t[2]); - w_t[ 3] = swap_workaround (w0_t[3]); - w_t[ 4] = swap_workaround (w1_t[0]); - w_t[ 5] = swap_workaround (w1_t[1]); - w_t[ 6] = swap_workaround (w1_t[2]); - w_t[ 7] = swap_workaround (w1_t[3]); - w_t[ 8] = swap_workaround (w2_t[0]); - w_t[ 9] = swap_workaround (w2_t[1]); - w_t[10] = swap_workaround (w2_t[2]); - w_t[11] = swap_workaround (w2_t[3]); - w_t[12] = swap_workaround (w3_t[0]); - w_t[13] = swap_workaround (w3_t[1]); - w_t[14] = swap_workaround (w3_t[2]); - w_t[15] = swap_workaround (w3_t[3]); - - w_t[ 0] = w_t[ 0] >> 8; - w_t[ 1] = w_t[ 1] >> 8; - w_t[ 2] = w_t[ 2] >> 8; - w_t[ 3] = w_t[ 3] >> 8; - w_t[ 4] = w_t[ 4] >> 8; - w_t[ 5] = w_t[ 5] >> 8; - w_t[ 6] = w_t[ 6] >> 8; - w_t[ 7] = w_t[ 7] >> 8; - w_t[ 8] = w_t[ 8] >> 8; - w_t[ 9] = w_t[ 9] >> 8; - w_t[10] = w_t[10] >> 8; - w_t[11] = w_t[11] >> 8; - w_t[12] = w_t[12] >> 8; - w_t[13] = w_t[13] >> 8; - w_t[14] = w_t[14] >> 8; - w_t[15] = w_t[15] >> 8; - - u32x digest[8]; - - digest[0] = SHA256M_A; - digest[1] = SHA256M_B; - digest[2] = SHA256M_C; - digest[3] = SHA256M_D; - digest[4] = SHA256M_E; - digest[5] = SHA256M_F; - digest[6] = SHA256M_G; - digest[7] = SHA256M_H; - - sha256_transform (digest, w_t); // 0 - 64 - - w_t[ 0] = 0; - w_t[ 1] = 0; - w_t[ 2] = 0; - w_t[ 3] = 0; - w_t[ 4] = 0; - w_t[ 5] = 0; - w_t[ 6] = 0; - w_t[ 7] = 0; - w_t[ 8] = 0; - w_t[ 9] = 0; - w_t[10] = 0; - w_t[11] = 0; - w_t[12] = 0; - w_t[13] = 0; - w_t[14] = 0; - w_t[15] = 0; - - sha256_transform (digest, w_t); // 64 - 128 - sha256_transform (digest, w_t); // 128 - 192 - sha256_transform (digest, w_t); // 192 - 256 - sha256_transform (digest, w_t); // 256 - 320 - sha256_transform (digest, w_t); // 320 - 384 - sha256_transform (digest, w_t); // 384 - 448 - - w_t[15] = 0 | salt_buf0 >> 16; - - sha256_transform (digest, w_t); // 448 - 512 - - w_t[ 0] = salt_buf0 << 16 | salt_buf1 >> 16; - w_t[ 1] = salt_buf1 << 16 | salt_buf2 >> 16; - w_t[ 2] = salt_buf2 << 16 | 0; - w_t[15] = (510 + 8) * 8; - - sha256_transform (digest, w_t); // 512 - 576 - - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08000_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08000_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m08000_a1.cu b/nv/m08000_a1.cu deleted file mode 100644 index cc269e1..0000000 --- a/nv/m08000_a1.cu +++ /dev/null @@ -1,601 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA256_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 7 -#define DGST_R2 2 -#define DGST_R3 6 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ static void sha256_transform (u32x digest[8], const u32x w[16]) -{ - u32x w0_t = w[ 0]; - u32x w1_t = w[ 1]; - u32x w2_t = w[ 2]; - u32x w3_t = w[ 3]; - u32x w4_t = w[ 4]; - u32x w5_t = w[ 5]; - u32x w6_t = w[ 6]; - u32x w7_t = w[ 7]; - u32x w8_t = w[ 8]; - u32x w9_t = w[ 9]; - u32x wa_t = w[10]; - u32x wb_t = w[11]; - u32x wc_t = w[12]; - u32x wd_t = w[13]; - u32x we_t = w[14]; - u32x wf_t = w[15]; - - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m08000_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - const u32 salt_buf0 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 0]); - const u32 salt_buf1 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 1]); - const u32 salt_buf2 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 2]); // 0x80 - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - u32 wordr0[4]; - u32 wordr1[4]; - u32 wordr2[4]; - u32 wordr3[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - - make_unicode (w1, w2_t, w3_t); - - u32x w_t[16]; - - w_t[ 0] = swap_workaround (w0_t[0]); - w_t[ 1] = swap_workaround (w0_t[1]); - w_t[ 2] = swap_workaround (w0_t[2]); - w_t[ 3] = swap_workaround (w0_t[3]); - w_t[ 4] = swap_workaround (w1_t[0]); - w_t[ 5] = swap_workaround (w1_t[1]); - w_t[ 6] = swap_workaround (w1_t[2]); - w_t[ 7] = swap_workaround (w1_t[3]); - w_t[ 8] = swap_workaround (w2_t[0]); - w_t[ 9] = swap_workaround (w2_t[1]); - w_t[10] = swap_workaround (w2_t[2]); - w_t[11] = swap_workaround (w2_t[3]); - w_t[12] = swap_workaround (w3_t[0]); - w_t[13] = swap_workaround (w3_t[1]); - w_t[14] = swap_workaround (w3_t[2]); - w_t[15] = swap_workaround (w3_t[3]); - - w_t[ 0] = w_t[ 0] >> 8; - w_t[ 1] = w_t[ 1] >> 8; - w_t[ 2] = w_t[ 2] >> 8; - w_t[ 3] = w_t[ 3] >> 8; - w_t[ 4] = w_t[ 4] >> 8; - w_t[ 5] = w_t[ 5] >> 8; - w_t[ 6] = w_t[ 6] >> 8; - w_t[ 7] = w_t[ 7] >> 8; - w_t[ 8] = w_t[ 8] >> 8; - w_t[ 9] = w_t[ 9] >> 8; - w_t[10] = w_t[10] >> 8; - w_t[11] = w_t[11] >> 8; - w_t[12] = w_t[12] >> 8; - w_t[13] = w_t[13] >> 8; - w_t[14] = w_t[14] >> 8; - w_t[15] = w_t[15] >> 8; - - u32x digest[8]; - - digest[0] = SHA256M_A; - digest[1] = SHA256M_B; - digest[2] = SHA256M_C; - digest[3] = SHA256M_D; - digest[4] = SHA256M_E; - digest[5] = SHA256M_F; - digest[6] = SHA256M_G; - digest[7] = SHA256M_H; - - sha256_transform (digest, w_t); // 0 - 64 - - w_t[ 0] = 0; - w_t[ 1] = 0; - w_t[ 2] = 0; - w_t[ 3] = 0; - w_t[ 4] = 0; - w_t[ 5] = 0; - w_t[ 6] = 0; - w_t[ 7] = 0; - w_t[ 8] = 0; - w_t[ 9] = 0; - w_t[10] = 0; - w_t[11] = 0; - w_t[12] = 0; - w_t[13] = 0; - w_t[14] = 0; - w_t[15] = 0; - - sha256_transform (digest, w_t); // 64 - 128 - sha256_transform (digest, w_t); // 128 - 192 - sha256_transform (digest, w_t); // 192 - 256 - sha256_transform (digest, w_t); // 256 - 320 - sha256_transform (digest, w_t); // 320 - 384 - sha256_transform (digest, w_t); // 384 - 448 - - w_t[15] = 0 | salt_buf0 >> 16; - - sha256_transform (digest, w_t); // 448 - 512 - - w_t[ 0] = salt_buf0 << 16 | salt_buf1 >> 16; - w_t[ 1] = salt_buf1 << 16 | salt_buf2 >> 16; - w_t[ 2] = salt_buf2 << 16 | 0; - w_t[15] = (510 + 8) * 8; - - sha256_transform (digest, w_t); // 512 - 576 - - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08000_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08000_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08000_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - const u32 salt_buf0 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 0]); - const u32 salt_buf1 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 1]); - const u32 salt_buf2 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 2]); // 0x80 - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - u32 wordr0[4]; - u32 wordr1[4]; - u32 wordr2[4]; - u32 wordr3[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - - make_unicode (w1, w2_t, w3_t); - - u32x w_t[16]; - - w_t[ 0] = swap_workaround (w0_t[0]); - w_t[ 1] = swap_workaround (w0_t[1]); - w_t[ 2] = swap_workaround (w0_t[2]); - w_t[ 3] = swap_workaround (w0_t[3]); - w_t[ 4] = swap_workaround (w1_t[0]); - w_t[ 5] = swap_workaround (w1_t[1]); - w_t[ 6] = swap_workaround (w1_t[2]); - w_t[ 7] = swap_workaround (w1_t[3]); - w_t[ 8] = swap_workaround (w2_t[0]); - w_t[ 9] = swap_workaround (w2_t[1]); - w_t[10] = swap_workaround (w2_t[2]); - w_t[11] = swap_workaround (w2_t[3]); - w_t[12] = swap_workaround (w3_t[0]); - w_t[13] = swap_workaround (w3_t[1]); - w_t[14] = swap_workaround (w3_t[2]); - w_t[15] = swap_workaround (w3_t[3]); - - w_t[ 0] = w_t[ 0] >> 8; - w_t[ 1] = w_t[ 1] >> 8; - w_t[ 2] = w_t[ 2] >> 8; - w_t[ 3] = w_t[ 3] >> 8; - w_t[ 4] = w_t[ 4] >> 8; - w_t[ 5] = w_t[ 5] >> 8; - w_t[ 6] = w_t[ 6] >> 8; - w_t[ 7] = w_t[ 7] >> 8; - w_t[ 8] = w_t[ 8] >> 8; - w_t[ 9] = w_t[ 9] >> 8; - w_t[10] = w_t[10] >> 8; - w_t[11] = w_t[11] >> 8; - w_t[12] = w_t[12] >> 8; - w_t[13] = w_t[13] >> 8; - w_t[14] = w_t[14] >> 8; - w_t[15] = w_t[15] >> 8; - - u32x digest[8]; - - digest[0] = SHA256M_A; - digest[1] = SHA256M_B; - digest[2] = SHA256M_C; - digest[3] = SHA256M_D; - digest[4] = SHA256M_E; - digest[5] = SHA256M_F; - digest[6] = SHA256M_G; - digest[7] = SHA256M_H; - - sha256_transform (digest, w_t); // 0 - 64 - - w_t[ 0] = 0; - w_t[ 1] = 0; - w_t[ 2] = 0; - w_t[ 3] = 0; - w_t[ 4] = 0; - w_t[ 5] = 0; - w_t[ 6] = 0; - w_t[ 7] = 0; - w_t[ 8] = 0; - w_t[ 9] = 0; - w_t[10] = 0; - w_t[11] = 0; - w_t[12] = 0; - w_t[13] = 0; - w_t[14] = 0; - w_t[15] = 0; - - sha256_transform (digest, w_t); // 64 - 128 - sha256_transform (digest, w_t); // 128 - 192 - sha256_transform (digest, w_t); // 192 - 256 - sha256_transform (digest, w_t); // 256 - 320 - sha256_transform (digest, w_t); // 320 - 384 - sha256_transform (digest, w_t); // 384 - 448 - - w_t[15] = 0 | salt_buf0 >> 16; - - sha256_transform (digest, w_t); // 448 - 512 - - w_t[ 0] = salt_buf0 << 16 | salt_buf1 >> 16; - w_t[ 1] = salt_buf1 << 16 | salt_buf2 >> 16; - w_t[ 2] = salt_buf2 << 16 | 0; - w_t[15] = (510 + 8) * 8; - - sha256_transform (digest, w_t); // 512 - 576 - - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08000_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08000_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m08000_a3.cu b/nv/m08000_a3.cu deleted file mode 100644 index 89bc4b0..0000000 --- a/nv/m08000_a3.cu +++ /dev/null @@ -1,596 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA256_ -#define _SCALAR_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 7 -#define DGST_R2 2 -#define DGST_R3 6 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -__device__ static void sha256_transform (u32x digest[8], const u32x w[16]) -{ - u32x w0_t = w[ 0]; - u32x w1_t = w[ 1]; - u32x w2_t = w[ 2]; - u32x w3_t = w[ 3]; - u32x w4_t = w[ 4]; - u32x w5_t = w[ 5]; - u32x w6_t = w[ 6]; - u32x w7_t = w[ 7]; - u32x w8_t = w[ 8]; - u32x w9_t = w[ 9]; - u32x wa_t = w[10]; - u32x wb_t = w[11]; - u32x wc_t = w[12]; - u32x wd_t = w[13]; - u32x we_t = w[14]; - u32x wf_t = w[15]; - - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; -} - -__device__ __constant__ u32x c_bfs[1024]; - -__device__ static void m08000m (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - const u32 salt_buf0 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 0]); - const u32 salt_buf1 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 1]); - const u32 salt_buf2 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 2]); // 0x80 - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x w_t[16]; - - w_t[ 0] = w0 >> 8; - w_t[ 1] = w[ 1] >> 8; - w_t[ 2] = w[ 2] >> 8; - w_t[ 3] = w[ 3] >> 8; - w_t[ 4] = w[ 4] >> 8; - w_t[ 5] = w[ 5] >> 8; - w_t[ 6] = w[ 6] >> 8; - w_t[ 7] = w[ 7] >> 8; - w_t[ 8] = w[ 8] >> 8; - w_t[ 9] = w[ 9] >> 8; - w_t[10] = w[10] >> 8; - w_t[11] = w[11] >> 8; - w_t[12] = w[12] >> 8; - w_t[13] = w[13] >> 8; - w_t[14] = w[14] >> 8; - w_t[15] = w[15] >> 8; - - u32x digest[8]; - - digest[0] = SHA256M_A; - digest[1] = SHA256M_B; - digest[2] = SHA256M_C; - digest[3] = SHA256M_D; - digest[4] = SHA256M_E; - digest[5] = SHA256M_F; - digest[6] = SHA256M_G; - digest[7] = SHA256M_H; - - sha256_transform (digest, w_t); // 0 - 64 - - w_t[ 0] = 0; - w_t[ 1] = 0; - w_t[ 2] = 0; - w_t[ 3] = 0; - w_t[ 4] = 0; - w_t[ 5] = 0; - w_t[ 6] = 0; - w_t[ 7] = 0; - w_t[ 8] = 0; - w_t[ 9] = 0; - w_t[10] = 0; - w_t[11] = 0; - w_t[12] = 0; - w_t[13] = 0; - w_t[14] = 0; - w_t[15] = 0; - - sha256_transform (digest, w_t); // 64 - 128 - sha256_transform (digest, w_t); // 128 - 192 - sha256_transform (digest, w_t); // 192 - 256 - sha256_transform (digest, w_t); // 256 - 320 - sha256_transform (digest, w_t); // 320 - 384 - sha256_transform (digest, w_t); // 384 - 448 - - w_t[15] = 0 | salt_buf0 >> 16; - - sha256_transform (digest, w_t); // 448 - 512 - - w_t[ 0] = salt_buf0 << 16 | salt_buf1 >> 16; - w_t[ 1] = salt_buf1 << 16 | salt_buf2 >> 16; - w_t[ 2] = salt_buf2 << 16 | 0; - w_t[15] = (510 + 8) * 8; - - sha256_transform (digest, w_t); // 512 - 576 - - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; - - #include VECT_COMPARE_M - } -} - -__device__ static void m08000s (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - const u32 salt_buf0 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 0]); - const u32 salt_buf1 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 1]); - const u32 salt_buf2 = swap_workaround (salt_bufs[salt_pos].salt_buf[ 2]); // 0x80 - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x w_t[16]; - - w_t[ 0] = w0 >> 8; - w_t[ 1] = w[ 1] >> 8; - w_t[ 2] = w[ 2] >> 8; - w_t[ 3] = w[ 3] >> 8; - w_t[ 4] = w[ 4] >> 8; - w_t[ 5] = w[ 5] >> 8; - w_t[ 6] = w[ 6] >> 8; - w_t[ 7] = w[ 7] >> 8; - w_t[ 8] = w[ 8] >> 8; - w_t[ 9] = w[ 9] >> 8; - w_t[10] = w[10] >> 8; - w_t[11] = w[11] >> 8; - w_t[12] = w[12] >> 8; - w_t[13] = w[13] >> 8; - w_t[14] = w[14] >> 8; - w_t[15] = w[15] >> 8; - - u32x digest[8]; - - digest[0] = SHA256M_A; - digest[1] = SHA256M_B; - digest[2] = SHA256M_C; - digest[3] = SHA256M_D; - digest[4] = SHA256M_E; - digest[5] = SHA256M_F; - digest[6] = SHA256M_G; - digest[7] = SHA256M_H; - - sha256_transform (digest, w_t); // 0 - 64 - - w_t[ 0] = 0; - w_t[ 1] = 0; - w_t[ 2] = 0; - w_t[ 3] = 0; - w_t[ 4] = 0; - w_t[ 5] = 0; - w_t[ 6] = 0; - w_t[ 7] = 0; - w_t[ 8] = 0; - w_t[ 9] = 0; - w_t[10] = 0; - w_t[11] = 0; - w_t[12] = 0; - w_t[13] = 0; - w_t[14] = 0; - w_t[15] = 0; - - sha256_transform (digest, w_t); // 64 - 128 - sha256_transform (digest, w_t); // 128 - 192 - sha256_transform (digest, w_t); // 192 - 256 - sha256_transform (digest, w_t); // 256 - 320 - sha256_transform (digest, w_t); // 320 - 384 - sha256_transform (digest, w_t); // 384 - 448 - - w_t[15] = 0 | salt_buf0 >> 16; - - sha256_transform (digest, w_t); // 448 - 512 - - w_t[ 0] = salt_buf0 << 16 | salt_buf1 >> 16; - w_t[ 1] = salt_buf1 << 16 | salt_buf2 >> 16; - w_t[ 2] = salt_buf2 << 16 | 0; - w_t[15] = (510 + 8) * 8; - - sha256_transform (digest, w_t); // 512 - 576 - - const u32x r0 = digest[3]; - const u32x r1 = digest[7]; - const u32x r2 = digest[2]; - const u32x r3 = digest[6]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08000_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m08000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08000_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m08000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08000_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m08000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08000_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m08000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08000_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m08000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08000_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m08000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m08100_a0.cu b/nv/m08100_a0.cu deleted file mode 100644 index 42c0320..0000000 --- a/nv/m08100_a0.cu +++ /dev/null @@ -1,547 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m08100_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[2]; - - salt_buf0[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[0]); - salt_buf0[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[1]); - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * prepend salt - */ - - const u32 out_salt_len = out_len + salt_len; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = w0[0]; - w0_t[3] = w0[1]; - w1_t[0] = w0[2]; - w1_t[1] = w0[3]; - w1_t[2] = w1[0]; - w1_t[3] = w1[1]; - w2_t[0] = w1[2]; - w2_t[1] = w1[3]; - w2_t[2] = w2[0]; - w2_t[3] = w2[1]; - w3_t[0] = w2[2]; - w3_t[1] = w2[3]; - w3_t[2] = 0; - w3_t[3] = (out_salt_len + 1) * 8; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, out_salt_len + 1); - - /** - * sha1 - */ - - w0_t[0] = swap_workaround (w0_t[0]); - w0_t[1] = swap_workaround (w0_t[1]); - w0_t[2] = swap_workaround (w0_t[2]); - w0_t[3] = swap_workaround (w0_t[3]); - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - //w3_t[2] = swap_workaround (w3_t[2]); - //w3_t[3] = swap_workaround (w3_t[3]); - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t[0]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[1]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[2]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[3]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w1_t[0]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w1_t[1]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t[2]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t[3]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t[0]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w2_t[1]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w2_t[2]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w2_t[3]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w3_t[0]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t[1]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t[2]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w0_t[3]); - - #undef K - #define K SHA1C01 - - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[3]); - - #undef K - #define K SHA1C02 - - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w2_t[3]); - - #undef K - #define K SHA1C03 - - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08100_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08100_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08100_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[2]; - - salt_buf0[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[0]); - salt_buf0[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[1]); - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - /** - * prepend salt - */ - - const u32 out_salt_len = out_len + salt_len; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = w0[0]; - w0_t[3] = w0[1]; - w1_t[0] = w0[2]; - w1_t[1] = w0[3]; - w1_t[2] = w1[0]; - w1_t[3] = w1[1]; - w2_t[0] = w1[2]; - w2_t[1] = w1[3]; - w2_t[2] = w2[0]; - w2_t[3] = w2[1]; - w3_t[0] = w2[2]; - w3_t[1] = w2[3]; - w3_t[2] = 0; - w3_t[3] = (out_salt_len + 1) * 8; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, out_salt_len + 1); - - /** - * sha1 - */ - - w0_t[0] = swap_workaround (w0_t[0]); - w0_t[1] = swap_workaround (w0_t[1]); - w0_t[2] = swap_workaround (w0_t[2]); - w0_t[3] = swap_workaround (w0_t[3]); - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - //w3_t[2] = swap_workaround (w3_t[2]); - //w3_t[3] = swap_workaround (w3_t[3]); - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t[0]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[1]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[2]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[3]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w1_t[0]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w1_t[1]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t[2]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t[3]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t[0]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w2_t[1]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w2_t[2]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w2_t[3]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w3_t[0]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t[1]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t[2]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w0_t[3]); - - #undef K - #define K SHA1C01 - - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[3]); - - #undef K - #define K SHA1C02 - - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w2_t[3]); - - #undef K - #define K SHA1C03 - - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[0]); - - if (e != e_rev) continue; - - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08100_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08100_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m08100_a1.cu b/nv/m08100_a1.cu deleted file mode 100644 index 54da4f6..0000000 --- a/nv/m08100_a1.cu +++ /dev/null @@ -1,641 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m08100_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[2]; - - salt_buf0[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[0]); - salt_buf0[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[1]); - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - u32 wordr1[4]; - u32 wordr2[4]; - u32 wordr3[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * prepend salt - */ - - const u32 pw_salt_len = pw_len + salt_len; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = w0[0]; - w0_t[3] = w0[1]; - w1_t[0] = w0[2]; - w1_t[1] = w0[3]; - w1_t[2] = w1[0]; - w1_t[3] = w1[1]; - w2_t[0] = w1[2]; - w2_t[1] = w1[3]; - w2_t[2] = w2[0]; - w2_t[3] = w2[1]; - w3_t[0] = w2[2]; - w3_t[1] = w2[3]; - w3_t[2] = 0; - w3_t[3] = (pw_salt_len + 1) * 8; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len + 1); - - /** - * sha1 - */ - - w0_t[0] = swap_workaround (w0_t[0]); - w0_t[1] = swap_workaround (w0_t[1]); - w0_t[2] = swap_workaround (w0_t[2]); - w0_t[3] = swap_workaround (w0_t[3]); - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - //w3_t[2] = swap_workaround (w3_t[2]); - //w3_t[3] = swap_workaround (w3_t[3]); - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t[0]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[1]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[2]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[3]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w1_t[0]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w1_t[1]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t[2]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t[3]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t[0]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w2_t[1]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w2_t[2]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w2_t[3]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w3_t[0]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t[1]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t[2]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w0_t[3]); - - #undef K - #define K SHA1C01 - - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[3]); - - #undef K - #define K SHA1C02 - - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w2_t[3]); - - #undef K - #define K SHA1C03 - - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08100_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08100_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08100_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[2]; - - salt_buf0[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[0]); - salt_buf0[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[1]); - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - u32 wordr1[4]; - u32 wordr2[4]; - u32 wordr3[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * prepend salt - */ - - const u32 pw_salt_len = pw_len + salt_len; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = w0[0]; - w0_t[3] = w0[1]; - w1_t[0] = w0[2]; - w1_t[1] = w0[3]; - w1_t[2] = w1[0]; - w1_t[3] = w1[1]; - w2_t[0] = w1[2]; - w2_t[1] = w1[3]; - w2_t[2] = w2[0]; - w2_t[3] = w2[1]; - w3_t[0] = w2[2]; - w3_t[1] = w2[3]; - w3_t[2] = 0; - w3_t[3] = (pw_salt_len + 1) * 8; - - append_0x80_4 (w0_t, w1_t, w2_t, w3_t, pw_salt_len + 1); - - /** - * sha1 - */ - - w0_t[0] = swap_workaround (w0_t[0]); - w0_t[1] = swap_workaround (w0_t[1]); - w0_t[2] = swap_workaround (w0_t[2]); - w0_t[3] = swap_workaround (w0_t[3]); - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - //w3_t[2] = swap_workaround (w3_t[2]); - //w3_t[3] = swap_workaround (w3_t[3]); - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t[0]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[1]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[2]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[3]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w1_t[0]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w1_t[1]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t[2]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t[3]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t[0]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w2_t[1]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w2_t[2]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w2_t[3]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w3_t[0]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t[1]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t[2]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w0_t[3]); - - #undef K - #define K SHA1C01 - - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[3]); - - #undef K - #define K SHA1C02 - - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w2_t[3]); - - #undef K - #define K SHA1C03 - - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[0]); - - if (e != e_rev) continue; - - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08100_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08100_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m08100_a3.cu b/nv/m08100_a3.cu deleted file mode 100644 index ca450ba..0000000 --- a/nv/m08100_a3.cu +++ /dev/null @@ -1,855 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m08100m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[2]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * prepend salt - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = w0[0]; - w0_t[3] = w0[1]; - w1_t[0] = w0[2]; - w1_t[1] = w0[3]; - w1_t[2] = w1[0]; - w1_t[3] = w1[1]; - w2_t[0] = w1[2]; - w2_t[1] = w1[3]; - w2_t[2] = w2[0]; - w2_t[3] = w2[1]; - w3_t[0] = w2[2]; - w3_t[1] = w2[3]; - w3_t[2] = 0; - w3_t[3] = (pw_salt_len + 1) * 8; - - /** - * sha1 - */ - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t[0]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[1]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[2]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[3]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w1_t[0]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w1_t[1]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t[2]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t[3]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t[0]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w2_t[1]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w2_t[2]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w2_t[3]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w3_t[0]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t[1]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t[2]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w0_t[3]); - - #undef K - #define K SHA1C01 - - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[3]); - - #undef K - #define K SHA1C02 - - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w2_t[3]); - - #undef K - #define K SHA1C03 - - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m08100s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * reverse - */ - - const u32 e_rev = rotl32 (search[1], 2u); - - /** - * salt - */ - - u32 salt_buf0[2]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * prepend salt - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = w0[0]; - w0_t[3] = w0[1]; - w1_t[0] = w0[2]; - w1_t[1] = w0[3]; - w1_t[2] = w1[0]; - w1_t[3] = w1[1]; - w2_t[0] = w1[2]; - w2_t[1] = w1[3]; - w2_t[2] = w2[0]; - w2_t[3] = w2[1]; - w3_t[0] = w2[2]; - w3_t[1] = w2[3]; - w3_t[2] = 0; - w3_t[3] = (pw_salt_len + 1) * 8; - - /** - * sha1 - */ - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t[0]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[1]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[2]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[3]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w1_t[0]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w1_t[1]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t[2]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t[3]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t[0]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w2_t[1]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w2_t[2]); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w2_t[3]); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w3_t[0]); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t[1]); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t[2]); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w0_t[3]); - - #undef K - #define K SHA1C01 - - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[3]); - - #undef K - #define K SHA1C02 - - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w2_t[3]); - - #undef K - #define K SHA1C03 - - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t[0]); - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[3]); - w0_t[0] = rotl32 ((w3_t[1] ^ w2_t[0] ^ w0_t[2] ^ w0_t[0]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t[0]); - w0_t[1] = rotl32 ((w3_t[2] ^ w2_t[1] ^ w0_t[3] ^ w0_t[1]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w0_t[1]); - w0_t[2] = rotl32 ((w3_t[3] ^ w2_t[2] ^ w1_t[0] ^ w0_t[2]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w0_t[2]); - w0_t[3] = rotl32 ((w0_t[0] ^ w2_t[3] ^ w1_t[1] ^ w0_t[3]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t[3]); - w1_t[0] = rotl32 ((w0_t[1] ^ w3_t[0] ^ w1_t[2] ^ w1_t[0]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t[0]); - w1_t[1] = rotl32 ((w0_t[2] ^ w3_t[1] ^ w1_t[3] ^ w1_t[1]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w1_t[1]); - w1_t[2] = rotl32 ((w0_t[3] ^ w3_t[2] ^ w2_t[0] ^ w1_t[2]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t[2]); - w1_t[3] = rotl32 ((w1_t[0] ^ w3_t[3] ^ w2_t[1] ^ w1_t[3]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w1_t[3]); - w2_t[0] = rotl32 ((w1_t[1] ^ w0_t[0] ^ w2_t[2] ^ w2_t[0]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w2_t[0]); - w2_t[1] = rotl32 ((w1_t[2] ^ w0_t[1] ^ w2_t[3] ^ w2_t[1]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w2_t[1]); - w2_t[2] = rotl32 ((w1_t[3] ^ w0_t[2] ^ w3_t[0] ^ w2_t[2]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t[2]); - w2_t[3] = rotl32 ((w2_t[0] ^ w0_t[3] ^ w3_t[1] ^ w2_t[3]), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w2_t[3]); - w3_t[0] = rotl32 ((w2_t[1] ^ w1_t[0] ^ w3_t[2] ^ w3_t[0]), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w3_t[0]); - - if (e != e_rev) continue; - - w3_t[1] = rotl32 ((w2_t[2] ^ w1_t[1] ^ w3_t[3] ^ w3_t[1]), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t[1]); - w3_t[2] = rotl32 ((w2_t[3] ^ w1_t[2] ^ w0_t[0] ^ w3_t[2]), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w3_t[2]); - w3_t[3] = rotl32 ((w3_t[0] ^ w1_t[3] ^ w0_t[1] ^ w3_t[3]), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w3_t[3]); - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08100_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * base - */ - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - - append_0x80_2 (w0, w1, pw_len + 1); - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - - /** - * main - */ - - m08100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08100_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * base - */ - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - - append_0x80_3 (w0, w1, w2, pw_len + 1); - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - w2[0] = swap_workaround (w2[0]); - - /** - * main - */ - - m08100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08100_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * base - */ - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - w2[0] = swap_workaround (w2[0]); - w2[1] = swap_workaround (w2[1]); - w2[2] = swap_workaround (w2[2]); - w2[3] = swap_workaround (w2[3]); - w3[0] = swap_workaround (w3[0]); - w3[1] = swap_workaround (w3[1]); - w3[2] = 0; - w3[3] = 0; - - append_0x80_4 (w0, w1, w2, w3, pw_len + 1); - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - w2[0] = swap_workaround (w2[0]); - w2[1] = swap_workaround (w2[1]); - w2[2] = swap_workaround (w2[2]); - w2[3] = swap_workaround (w2[3]); - w3[0] = swap_workaround (w3[0]); - w3[1] = swap_workaround (w3[1]); - w3[2] = 0; - w3[3] = 0; - - /** - * main - */ - - m08100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08100_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * base - */ - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - - append_0x80_2 (w0, w1, pw_len + 1); - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - - /** - * main - */ - - m08100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08100_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * base - */ - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - - append_0x80_3 (w0, w1, w2, pw_len + 1); - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - w2[0] = swap_workaround (w2[0]); - - /** - * main - */ - - m08100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08100_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * base - */ - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - w2[0] = swap_workaround (w2[0]); - w2[1] = swap_workaround (w2[1]); - w2[2] = swap_workaround (w2[2]); - w2[3] = swap_workaround (w2[3]); - w3[0] = swap_workaround (w3[0]); - w3[1] = swap_workaround (w3[1]); - w3[2] = 0; - w3[3] = 0; - - append_0x80_4 (w0, w1, w2, w3, pw_len + 1); - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - w2[0] = swap_workaround (w2[0]); - w2[1] = swap_workaround (w2[1]); - w2[2] = swap_workaround (w2[2]); - w2[3] = swap_workaround (w2[3]); - w3[0] = swap_workaround (w3[0]); - w3[1] = swap_workaround (w3[1]); - w3[2] = 0; - w3[3] = 0; - - /** - * main - */ - - m08100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m08200.cu b/nv/m08200.cu deleted file mode 100644 index 2ce2b77..0000000 --- a/nv/m08200.cu +++ /dev/null @@ -1,793 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _CLOUDKEY_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -__device__ __constant__ u64 k[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -#define ROUND_EXPAND() \ -{ \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ -} - -#define ROUND_STEP(i) \ -{ \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k[i + 15]); \ -} - -__device__ static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; -} - -__device__ static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = SHA256M_A; - ipad[1] = SHA256M_B; - ipad[2] = SHA256M_C; - ipad[3] = SHA256M_D; - ipad[4] = SHA256M_E; - ipad[5] = SHA256M_F; - ipad[6] = SHA256M_G; - ipad[7] = SHA256M_H; - - sha256_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = SHA256M_A; - opad[1] = SHA256M_B; - opad[2] = SHA256M_C; - opad[3] = SHA256M_D; - opad[4] = SHA256M_E; - opad[5] = SHA256M_F; - opad[6] = SHA256M_G; - opad[7] = SHA256M_H; - - sha256_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha256_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8], u32x digest[8]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - digest[5] = ipad[5]; - digest[6] = ipad[6]; - digest[7] = ipad[7]; - - sha256_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = digest[5]; - w1[2] = digest[6]; - w1[3] = digest[7]; - w2[0] = 0x80000000; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 32) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - digest[5] = opad[5]; - digest[6] = opad[6]; - digest[7] = opad[7]; - - sha256_transform (w0, w1, w2, w3, digest); -} - -__device__ static void sha512_transform (const u64 w[16], u64 dgst[8]) -{ - u64 a = dgst[0]; - u64 b = dgst[1]; - u64 c = dgst[2]; - u64 d = dgst[3]; - u64 e = dgst[4]; - u64 f = dgst[5]; - u64 g = dgst[6]; - u64 h = dgst[7]; - - u64 w0_t = w[ 0]; - u64 w1_t = w[ 1]; - u64 w2_t = w[ 2]; - u64 w3_t = w[ 3]; - u64 w4_t = w[ 4]; - u64 w5_t = w[ 5]; - u64 w6_t = w[ 6]; - u64 w7_t = w[ 7]; - u64 w8_t = w[ 8]; - u64 w9_t = w[ 9]; - u64 wa_t = w[10]; - u64 wb_t = w[11]; - u64 wc_t = w[12]; - u64 wd_t = w[13]; - u64 we_t = w[14]; - u64 wf_t = w[15]; - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - dgst[0] += a; - dgst[1] += b; - dgst[2] += c; - dgst[3] += d; - dgst[4] += e; - dgst[5] += f; - dgst[6] += g; - dgst[7] += h; -} - -__device__ static void hmac_sha512_run (const u64 w1[16], const u64 ipad[8], const u64 opad[8], u64 dgst[8]) -{ - dgst[0] = ipad[0]; - dgst[1] = ipad[1]; - dgst[2] = ipad[2]; - dgst[3] = ipad[3]; - dgst[4] = ipad[4]; - dgst[5] = ipad[5]; - dgst[6] = ipad[6]; - dgst[7] = ipad[7]; - - sha512_transform (w1, dgst); - - u64 w[16]; - - w[ 0] = dgst[0]; - w[ 1] = dgst[1]; - w[ 2] = dgst[2]; - w[ 3] = dgst[3]; - w[ 4] = dgst[4]; - w[ 5] = dgst[5]; - w[ 6] = dgst[6]; - w[ 7] = dgst[7]; - w[ 8] = 0x8000000000000000; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = (128 + 64) * 8; - - dgst[0] = opad[0]; - dgst[1] = opad[1]; - dgst[2] = opad[2]; - dgst[3] = opad[3]; - dgst[4] = opad[4]; - dgst[5] = opad[5]; - dgst[6] = opad[6]; - dgst[7] = opad[7]; - - sha512_transform (w, dgst); -} - -__device__ static void hmac_sha512_init (u64 w[16], u64 ipad[8], u64 opad[8]) -{ - w[ 0] ^= 0x3636363636363636; - w[ 1] ^= 0x3636363636363636; - w[ 2] ^= 0x3636363636363636; - w[ 3] ^= 0x3636363636363636; - w[ 4] ^= 0x3636363636363636; - w[ 5] ^= 0x3636363636363636; - w[ 6] ^= 0x3636363636363636; - w[ 7] ^= 0x3636363636363636; - w[ 8] ^= 0x3636363636363636; - w[ 9] ^= 0x3636363636363636; - w[10] ^= 0x3636363636363636; - w[11] ^= 0x3636363636363636; - w[12] ^= 0x3636363636363636; - w[13] ^= 0x3636363636363636; - w[14] ^= 0x3636363636363636; - w[15] ^= 0x3636363636363636; - - ipad[0] = SHA512M_A; - ipad[1] = SHA512M_B; - ipad[2] = SHA512M_C; - ipad[3] = SHA512M_D; - ipad[4] = SHA512M_E; - ipad[5] = SHA512M_F; - ipad[6] = SHA512M_G; - ipad[7] = SHA512M_H; - - sha512_transform (w, ipad); - - w[ 0] ^= 0x6a6a6a6a6a6a6a6a; - w[ 1] ^= 0x6a6a6a6a6a6a6a6a; - w[ 2] ^= 0x6a6a6a6a6a6a6a6a; - w[ 3] ^= 0x6a6a6a6a6a6a6a6a; - w[ 4] ^= 0x6a6a6a6a6a6a6a6a; - w[ 5] ^= 0x6a6a6a6a6a6a6a6a; - w[ 6] ^= 0x6a6a6a6a6a6a6a6a; - w[ 7] ^= 0x6a6a6a6a6a6a6a6a; - w[ 8] ^= 0x6a6a6a6a6a6a6a6a; - w[ 9] ^= 0x6a6a6a6a6a6a6a6a; - w[10] ^= 0x6a6a6a6a6a6a6a6a; - w[11] ^= 0x6a6a6a6a6a6a6a6a; - w[12] ^= 0x6a6a6a6a6a6a6a6a; - w[13] ^= 0x6a6a6a6a6a6a6a6a; - w[14] ^= 0x6a6a6a6a6a6a6a6a; - w[15] ^= 0x6a6a6a6a6a6a6a6a; - - opad[0] = SHA512M_A; - opad[1] = SHA512M_B; - opad[2] = SHA512M_C; - opad[3] = SHA512M_D; - opad[4] = SHA512M_E; - opad[5] = SHA512M_F; - opad[6] = SHA512M_G; - opad[7] = SHA512M_H; - - sha512_transform (w, opad); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08200_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, pbkdf2_sha512_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const cloudkey_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = 0; - salt_buf1[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u64 data_buf[16]; - - data_buf[ 0] = ((u64) swap_workaround (salt_buf0[ 0])) << 32 | (u64) swap_workaround (salt_buf0[ 1]); - data_buf[ 1] = ((u64) swap_workaround (salt_buf0[ 2])) << 32 | (u64) swap_workaround (salt_buf0[ 3]); - data_buf[ 2] = ((u64) swap_workaround (salt_buf1[ 0])) << 32 | (u64) swap_workaround (salt_buf1[ 1]); - data_buf[ 3] = 0; - data_buf[ 4] = 0; - data_buf[ 5] = 0; - data_buf[ 6] = 0; - data_buf[ 7] = 0; - data_buf[ 8] = 0; - data_buf[ 9] = 0; - data_buf[10] = 0; - data_buf[11] = 0; - data_buf[12] = 0; - data_buf[13] = 0; - data_buf[14] = 0; - data_buf[15] = (128 + salt_len + 4) * 8; - - u64 w[16]; - - w[ 0] = ((u64) swap_workaround (w0[0])) << 32 | (u64) swap_workaround (w0[1]); - w[ 1] = ((u64) swap_workaround (w0[2])) << 32 | (u64) swap_workaround (w0[3]); - w[ 2] = ((u64) swap_workaround (w1[0])) << 32 | (u64) swap_workaround (w1[1]); - w[ 3] = ((u64) swap_workaround (w1[2])) << 32 | (u64) swap_workaround (w1[3]); - w[ 4] = ((u64) swap_workaround (w2[0])) << 32 | (u64) swap_workaround (w2[1]); - w[ 5] = ((u64) swap_workaround (w2[2])) << 32 | (u64) swap_workaround (w2[3]); - w[ 6] = ((u64) swap_workaround (w3[0])) << 32 | (u64) swap_workaround (w3[1]); - w[ 7] = ((u64) swap_workaround (w3[2])) << 32 | (u64) swap_workaround (w3[3]); - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - u64 ipad[8]; - u64 opad[8]; - - hmac_sha512_init (w, ipad, opad); - - tmps[gid].ipad[0] = ipad[0]; - tmps[gid].ipad[1] = ipad[1]; - tmps[gid].ipad[2] = ipad[2]; - tmps[gid].ipad[3] = ipad[3]; - tmps[gid].ipad[4] = ipad[4]; - tmps[gid].ipad[5] = ipad[5]; - tmps[gid].ipad[6] = ipad[6]; - tmps[gid].ipad[7] = ipad[7]; - - tmps[gid].opad[0] = opad[0]; - tmps[gid].opad[1] = opad[1]; - tmps[gid].opad[2] = opad[2]; - tmps[gid].opad[3] = opad[3]; - tmps[gid].opad[4] = opad[4]; - tmps[gid].opad[5] = opad[5]; - tmps[gid].opad[6] = opad[6]; - tmps[gid].opad[7] = opad[7]; - - for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) - { - u64 dgst[8]; - - hmac_sha512_run (data_buf, ipad, opad, dgst); - - tmps[gid].dgst[i + 0] = dgst[0]; - tmps[gid].dgst[i + 1] = dgst[1]; - tmps[gid].dgst[i + 2] = dgst[2]; - tmps[gid].dgst[i + 3] = dgst[3]; - tmps[gid].dgst[i + 4] = dgst[4]; - tmps[gid].dgst[i + 5] = dgst[5]; - tmps[gid].dgst[i + 6] = dgst[6]; - tmps[gid].dgst[i + 7] = dgst[7]; - - tmps[gid].out[i + 0] = dgst[0]; - tmps[gid].out[i + 1] = dgst[1]; - tmps[gid].out[i + 2] = dgst[2]; - tmps[gid].out[i + 3] = dgst[3]; - tmps[gid].out[i + 4] = dgst[4]; - tmps[gid].out[i + 5] = dgst[5]; - tmps[gid].out[i + 6] = dgst[6]; - tmps[gid].out[i + 7] = dgst[7]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08200_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, pbkdf2_sha512_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const cloudkey_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u64 ipad[8]; - - ipad[0] = tmps[gid].ipad[0]; - ipad[1] = tmps[gid].ipad[1]; - ipad[2] = tmps[gid].ipad[2]; - ipad[3] = tmps[gid].ipad[3]; - ipad[4] = tmps[gid].ipad[4]; - ipad[5] = tmps[gid].ipad[5]; - ipad[6] = tmps[gid].ipad[6]; - ipad[7] = tmps[gid].ipad[7]; - - u64 opad[8]; - - opad[0] = tmps[gid].opad[0]; - opad[1] = tmps[gid].opad[1]; - opad[2] = tmps[gid].opad[2]; - opad[3] = tmps[gid].opad[3]; - opad[4] = tmps[gid].opad[4]; - opad[5] = tmps[gid].opad[5]; - opad[6] = tmps[gid].opad[6]; - opad[7] = tmps[gid].opad[7]; - - for (u32 i = 0; i < 8; i += 8) - { - u64 dgst[8]; - - dgst[0] = tmps[gid].dgst[i + 0]; - dgst[1] = tmps[gid].dgst[i + 1]; - dgst[2] = tmps[gid].dgst[i + 2]; - dgst[3] = tmps[gid].dgst[i + 3]; - dgst[4] = tmps[gid].dgst[i + 4]; - dgst[5] = tmps[gid].dgst[i + 5]; - dgst[6] = tmps[gid].dgst[i + 6]; - dgst[7] = tmps[gid].dgst[i + 7]; - - u64 out[8]; - - out[0] = tmps[gid].out[i + 0]; - out[1] = tmps[gid].out[i + 1]; - out[2] = tmps[gid].out[i + 2]; - out[3] = tmps[gid].out[i + 3]; - out[4] = tmps[gid].out[i + 4]; - out[5] = tmps[gid].out[i + 5]; - out[6] = tmps[gid].out[i + 6]; - out[7] = tmps[gid].out[i + 7]; - - for (u32 j = 0; j < loop_cnt; j++) - { - u64 w[16]; - - w[ 0] = dgst[0]; - w[ 1] = dgst[1]; - w[ 2] = dgst[2]; - w[ 3] = dgst[3]; - w[ 4] = dgst[4]; - w[ 5] = dgst[5]; - w[ 6] = dgst[6]; - w[ 7] = dgst[7]; - w[ 8] = 0x8000000000000000; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = (128 + 64) * 8; - - hmac_sha512_run (w, ipad, opad, dgst); - - out[0] ^= dgst[0]; - out[1] ^= dgst[1]; - out[2] ^= dgst[2]; - out[3] ^= dgst[3]; - out[4] ^= dgst[4]; - out[5] ^= dgst[5]; - out[6] ^= dgst[6]; - out[7] ^= dgst[7]; - } - - tmps[gid].dgst[i + 0] = dgst[0]; - tmps[gid].dgst[i + 1] = dgst[1]; - tmps[gid].dgst[i + 2] = dgst[2]; - tmps[gid].dgst[i + 3] = dgst[3]; - tmps[gid].dgst[i + 4] = dgst[4]; - tmps[gid].dgst[i + 5] = dgst[5]; - tmps[gid].dgst[i + 6] = dgst[6]; - tmps[gid].dgst[i + 7] = dgst[7]; - - tmps[gid].out[i + 0] = out[0]; - tmps[gid].out[i + 1] = out[1]; - tmps[gid].out[i + 2] = out[2]; - tmps[gid].out[i + 3] = out[3]; - tmps[gid].out[i + 4] = out[4]; - tmps[gid].out[i + 5] = out[5]; - tmps[gid].out[i + 6] = out[6]; - tmps[gid].out[i + 7] = out[7]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08200_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, pbkdf2_sha512_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const cloudkey_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 lid = threadIdx.x; - - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; - - w0[0] = tmps[gid].out[4] >> 32; - w0[1] = tmps[gid].out[4] & 0xffffffff; - w0[2] = tmps[gid].out[5] >> 32; - w0[3] = tmps[gid].out[5] & 0xffffffff; - w1[0] = tmps[gid].out[6] >> 32; - w1[1] = tmps[gid].out[6] & 0xffffffff; - w1[2] = tmps[gid].out[7] >> 32; - w1[3] = tmps[gid].out[7] & 0xffffffff; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - u32 ipad[8]; - u32 opad[8]; - - hmac_sha256_pad (w0, w1, w2, w3, ipad, opad); - - int size = esalt_bufs[salt_pos].data_len; - - int left; - int off; - - for (left = size, off = 0; left >= 56; left -= 64, off += 16) - { - w0[0] = esalt_bufs[salt_pos].data_buf[off + 0]; - w0[1] = esalt_bufs[salt_pos].data_buf[off + 1]; - w0[2] = esalt_bufs[salt_pos].data_buf[off + 2]; - w0[3] = esalt_bufs[salt_pos].data_buf[off + 3]; - w1[0] = esalt_bufs[salt_pos].data_buf[off + 4]; - w1[1] = esalt_bufs[salt_pos].data_buf[off + 5]; - w1[2] = esalt_bufs[salt_pos].data_buf[off + 6]; - w1[3] = esalt_bufs[salt_pos].data_buf[off + 7]; - w2[0] = esalt_bufs[salt_pos].data_buf[off + 8]; - w2[1] = esalt_bufs[salt_pos].data_buf[off + 9]; - w2[2] = esalt_bufs[salt_pos].data_buf[off + 10]; - w2[3] = esalt_bufs[salt_pos].data_buf[off + 11]; - w3[0] = esalt_bufs[salt_pos].data_buf[off + 12]; - w3[1] = esalt_bufs[salt_pos].data_buf[off + 13]; - w3[2] = esalt_bufs[salt_pos].data_buf[off + 14]; - w3[3] = esalt_bufs[salt_pos].data_buf[off + 15]; - - sha256_transform (w0, w1, w2, w3, ipad); - } - - w0[0] = esalt_bufs[salt_pos].data_buf[off + 0]; - w0[1] = esalt_bufs[salt_pos].data_buf[off + 1]; - w0[2] = esalt_bufs[salt_pos].data_buf[off + 2]; - w0[3] = esalt_bufs[salt_pos].data_buf[off + 3]; - w1[0] = esalt_bufs[salt_pos].data_buf[off + 4]; - w1[1] = esalt_bufs[salt_pos].data_buf[off + 5]; - w1[2] = esalt_bufs[salt_pos].data_buf[off + 6]; - w1[3] = esalt_bufs[salt_pos].data_buf[off + 7]; - w2[0] = esalt_bufs[salt_pos].data_buf[off + 8]; - w2[1] = esalt_bufs[salt_pos].data_buf[off + 9]; - w2[2] = esalt_bufs[salt_pos].data_buf[off + 10]; - w2[3] = esalt_bufs[salt_pos].data_buf[off + 11]; - w3[0] = esalt_bufs[salt_pos].data_buf[off + 12]; - w3[1] = esalt_bufs[salt_pos].data_buf[off + 13]; - w3[2] = 0; - w3[3] = (64 + size) * 8; - - u32x digest[8]; - - hmac_sha256_run (w0, w1, w2, w3, ipad, opad, digest); - - const u32x r0 = digest[0]; - const u32x r1 = digest[1]; - const u32x r2 = digest[2]; - const u32x r3 = digest[3]; - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m08300_a0.cu b/nv/m08300_a0.cu deleted file mode 100644 index b3d98b8..0000000 --- a/nv/m08300_a0.cu +++ /dev/null @@ -1,772 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08300_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - const u32 salt_iter = salt_bufs[salt_pos].salt_iter; - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 domain_buf0[4]; - - domain_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[ 0]; - domain_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[ 1]; - domain_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[ 2]; - domain_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[ 3]; - - u32 domain_buf1[4]; - - domain_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[ 4]; - domain_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[ 5]; - domain_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[ 6]; - domain_buf1[3] = 0; - - const u32 domain_len = salt_bufs[salt_pos].salt_buf_pc[ 7]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, 1); - - w0_t[0] |= pw_len & 0xff; - - /** - * salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, 1 + out_len + domain_len + 1); - - u32 d0[4]; - - d0[0] = domain_buf0[0]; - d0[1] = domain_buf0[1]; - d0[2] = domain_buf0[2]; - d0[3] = domain_buf0[3]; - - u32 d1[4]; - - d1[0] = domain_buf1[0]; - d1[1] = domain_buf1[1]; - d1[2] = domain_buf1[2]; - d1[3] = 0; - - u32 d2[4]; - - d2[0] = 0; - d2[1] = 0; - d2[2] = 0; - d2[3] = 0; - - u32 d3[4]; - - d3[0] = 0; - d3[1] = 0; - d3[2] = 0; - d3[3] = 0; - - switch_buffer_by_offset (d0, d1, d2, d3, 1 + out_len); - - /** - * sha1 - */ - - u32x w0_t2[4]; - - w0_t2[0] = swap_workaround (w0_t[0] | d0[0] | s0[0]); - w0_t2[1] = swap_workaround (w0_t[1] | d0[1] | s0[1]); - w0_t2[2] = swap_workaround (w0_t[2] | d0[2] | s0[2]); - w0_t2[3] = swap_workaround (w0_t[3] | d0[3] | s0[3]); - - u32x w1_t2[4]; - - w1_t2[0] = swap_workaround (w1_t[0] | d1[0] | s1[0]); - w1_t2[1] = swap_workaround (w1_t[1] | d1[1] | s1[1]); - w1_t2[2] = swap_workaround (w1_t[2] | d1[2] | s1[2]); - w1_t2[3] = swap_workaround (w1_t[3] | d1[3] | s1[3]); - - u32x w2_t2[4]; - - w2_t2[0] = swap_workaround (w2_t[0] | d2[0] | s2[0]); - w2_t2[1] = swap_workaround (w2_t[1] | d2[1] | s2[1]); - w2_t2[2] = swap_workaround (w2_t[2] | d2[2] | s2[2]); - w2_t2[3] = swap_workaround (w2_t[3] | d2[3] | s2[3]); - - u32x w3_t2[4]; - - w3_t2[0] = swap_workaround (w3_t[0] | d3[0] | s3[0]); - w3_t2[1] = swap_workaround (w3_t[1] | d3[1] | s3[1]); - w3_t2[2] = 0; - w3_t2[3] = (1 + out_len + domain_len + 1 + salt_len) * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t2, w1_t2, w2_t2, w3_t2, digest); - - // iterations - - for (u32 i = 0; i < salt_iter; i++) - { - u32x w0_t3[4]; - - w0_t3[0] = digest[0]; - w0_t3[1] = digest[1]; - w0_t3[2] = digest[2]; - w0_t3[3] = digest[3]; - - u32x w1_t3[4]; - - w1_t3[0] = digest[4]; - w1_t3[1] = swap_workaround (salt_buf0[0]); - w1_t3[2] = swap_workaround (salt_buf0[1]); - w1_t3[3] = swap_workaround (salt_buf0[2]); - - u32x w2_t3[4]; - - w2_t3[0] = swap_workaround (salt_buf0[3]); - w2_t3[1] = swap_workaround (salt_buf1[0]); - w2_t3[2] = swap_workaround (salt_buf1[1]); - w2_t3[3] = swap_workaround (salt_buf1[2]); - - u32x w3_t3[4]; - - w3_t3[0] = swap_workaround (salt_buf1[3]); - w3_t3[1] = 0; - w3_t3[2] = 0; - w3_t3[3] = (20 + salt_len) * 8; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t3, w1_t3, w2_t3, w3_t3, digest); - } - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08300_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08300_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08300_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - const u32 salt_iter = salt_bufs[salt_pos].salt_iter; - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 domain_buf0[4]; - - domain_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[ 0]; - domain_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[ 1]; - domain_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[ 2]; - domain_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[ 3]; - - u32 domain_buf1[4]; - - domain_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[ 4]; - domain_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[ 5]; - domain_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[ 6]; - domain_buf1[3] = 0; - - const u32 domain_len = salt_bufs[salt_pos].salt_buf_pc[ 7]; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, 1); - - w0_t[0] |= pw_len & 0xff; - - /** - * salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, 1 + out_len + domain_len + 1); - - u32 d0[4]; - - d0[0] = domain_buf0[0]; - d0[1] = domain_buf0[1]; - d0[2] = domain_buf0[2]; - d0[3] = domain_buf0[3]; - - u32 d1[4]; - - d1[0] = domain_buf1[0]; - d1[1] = domain_buf1[1]; - d1[2] = domain_buf1[2]; - d1[3] = 0; - - u32 d2[4]; - - d2[0] = 0; - d2[1] = 0; - d2[2] = 0; - d2[3] = 0; - - u32 d3[4]; - - d3[0] = 0; - d3[1] = 0; - d3[2] = 0; - d3[3] = 0; - - switch_buffer_by_offset (d0, d1, d2, d3, 1 + out_len); - - /** - * sha1 - */ - - u32x w0_t2[4]; - - w0_t2[0] = swap_workaround (w0_t[0] | d0[0] | s0[0]); - w0_t2[1] = swap_workaround (w0_t[1] | d0[1] | s0[1]); - w0_t2[2] = swap_workaround (w0_t[2] | d0[2] | s0[2]); - w0_t2[3] = swap_workaround (w0_t[3] | d0[3] | s0[3]); - - u32x w1_t2[4]; - - w1_t2[0] = swap_workaround (w1_t[0] | d1[0] | s1[0]); - w1_t2[1] = swap_workaround (w1_t[1] | d1[1] | s1[1]); - w1_t2[2] = swap_workaround (w1_t[2] | d1[2] | s1[2]); - w1_t2[3] = swap_workaround (w1_t[3] | d1[3] | s1[3]); - - u32x w2_t2[4]; - - w2_t2[0] = swap_workaround (w2_t[0] | d2[0] | s2[0]); - w2_t2[1] = swap_workaround (w2_t[1] | d2[1] | s2[1]); - w2_t2[2] = swap_workaround (w2_t[2] | d2[2] | s2[2]); - w2_t2[3] = swap_workaround (w2_t[3] | d2[3] | s2[3]); - - u32x w3_t2[4]; - - w3_t2[0] = swap_workaround (w3_t[0] | d3[0] | s3[0]); - w3_t2[1] = swap_workaround (w3_t[1] | d3[1] | s3[1]); - w3_t2[2] = 0; - w3_t2[3] = (1 + out_len + domain_len + 1 + salt_len) * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t2, w1_t2, w2_t2, w3_t2, digest); - - // iterations - - for (u32 i = 0; i < salt_iter; i++) - { - u32x w0_t3[4]; - - w0_t3[0] = digest[0]; - w0_t3[1] = digest[1]; - w0_t3[2] = digest[2]; - w0_t3[3] = digest[3]; - - u32x w1_t3[4]; - - w1_t3[0] = digest[4]; - w1_t3[1] = swap_workaround (salt_buf0[0]); - w1_t3[2] = swap_workaround (salt_buf0[1]); - w1_t3[3] = swap_workaround (salt_buf0[2]); - - u32x w2_t3[4]; - - w2_t3[0] = swap_workaround (salt_buf0[3]); - w2_t3[1] = swap_workaround (salt_buf1[0]); - w2_t3[2] = swap_workaround (salt_buf1[1]); - w2_t3[3] = swap_workaround (salt_buf1[2]); - - u32x w3_t3[4]; - - w3_t3[0] = swap_workaround (salt_buf1[3]); - w3_t3[1] = 0; - w3_t3[2] = 0; - w3_t3[3] = (20 + salt_len) * 8; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t3, w1_t3, w2_t3, w3_t3, digest); - } - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08300_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08300_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m08300_a1.cu b/nv/m08300_a1.cu deleted file mode 100644 index 1f3bb63..0000000 --- a/nv/m08300_a1.cu +++ /dev/null @@ -1,866 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08300_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - const u32 salt_iter = salt_bufs[salt_pos].salt_iter; - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 domain_buf0[4]; - - domain_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[ 0]; - domain_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[ 1]; - domain_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[ 2]; - domain_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[ 3]; - - u32 domain_buf1[4]; - - domain_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[ 4]; - domain_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[ 5]; - domain_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[ 6]; - domain_buf1[3] = 0; - - const u32 domain_len = salt_bufs[salt_pos].salt_buf_pc[ 7]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - u32 wordr1[4]; - u32 wordr2[4]; - u32 wordr3[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, 1); - - w0_t[0] |= pw_len & 0xff; - - /** - * salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, 1 + pw_len + domain_len + 1); - - u32 d0[4]; - - d0[0] = domain_buf0[0]; - d0[1] = domain_buf0[1]; - d0[2] = domain_buf0[2]; - d0[3] = domain_buf0[3]; - - u32 d1[4]; - - d1[0] = domain_buf1[0]; - d1[1] = domain_buf1[1]; - d1[2] = domain_buf1[2]; - d1[3] = 0; - - u32 d2[4]; - - d2[0] = 0; - d2[1] = 0; - d2[2] = 0; - d2[3] = 0; - - u32 d3[4]; - - d3[0] = 0; - d3[1] = 0; - d3[2] = 0; - d3[3] = 0; - - switch_buffer_by_offset (d0, d1, d2, d3, 1 + pw_len); - - /** - * sha1 - */ - - u32x w0_t2[4]; - - w0_t2[0] = swap_workaround (w0_t[0] | d0[0] | s0[0]); - w0_t2[1] = swap_workaround (w0_t[1] | d0[1] | s0[1]); - w0_t2[2] = swap_workaround (w0_t[2] | d0[2] | s0[2]); - w0_t2[3] = swap_workaround (w0_t[3] | d0[3] | s0[3]); - - u32x w1_t2[4]; - - w1_t2[0] = swap_workaround (w1_t[0] | d1[0] | s1[0]); - w1_t2[1] = swap_workaround (w1_t[1] | d1[1] | s1[1]); - w1_t2[2] = swap_workaround (w1_t[2] | d1[2] | s1[2]); - w1_t2[3] = swap_workaround (w1_t[3] | d1[3] | s1[3]); - - u32x w2_t2[4]; - - w2_t2[0] = swap_workaround (w2_t[0] | d2[0] | s2[0]); - w2_t2[1] = swap_workaround (w2_t[1] | d2[1] | s2[1]); - w2_t2[2] = swap_workaround (w2_t[2] | d2[2] | s2[2]); - w2_t2[3] = swap_workaround (w2_t[3] | d2[3] | s2[3]); - - u32x w3_t2[4]; - - w3_t2[0] = swap_workaround (w3_t[0] | d3[0] | s3[0]); - w3_t2[1] = swap_workaround (w3_t[1] | d3[1] | s3[1]); - w3_t2[2] = 0; - w3_t2[3] = (1 + pw_len + domain_len + 1 + salt_len) * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t2, w1_t2, w2_t2, w3_t2, digest); - - // iterations - - for (u32 i = 0; i < salt_iter; i++) - { - u32x w0_t3[4]; - - w0_t3[0] = digest[0]; - w0_t3[1] = digest[1]; - w0_t3[2] = digest[2]; - w0_t3[3] = digest[3]; - - u32x w1_t3[4]; - - w1_t3[0] = digest[4]; - w1_t3[1] = swap_workaround (salt_buf0[0]); - w1_t3[2] = swap_workaround (salt_buf0[1]); - w1_t3[3] = swap_workaround (salt_buf0[2]); - - u32x w2_t3[4]; - - w2_t3[0] = swap_workaround (salt_buf0[3]); - w2_t3[1] = swap_workaround (salt_buf1[0]); - w2_t3[2] = swap_workaround (salt_buf1[1]); - w2_t3[3] = swap_workaround (salt_buf1[2]); - - u32x w3_t3[4]; - - w3_t3[0] = swap_workaround (salt_buf1[3]); - w3_t3[1] = 0; - w3_t3[2] = 0; - w3_t3[3] = (20 + salt_len) * 8; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t3, w1_t3, w2_t3, w3_t3, digest); - } - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08300_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08300_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08300_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - const u32 salt_iter = salt_bufs[salt_pos].salt_iter; - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 domain_buf0[4]; - - domain_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[ 0]; - domain_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[ 1]; - domain_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[ 2]; - domain_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[ 3]; - - u32 domain_buf1[4]; - - domain_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[ 4]; - domain_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[ 5]; - domain_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[ 6]; - domain_buf1[3] = 0; - - const u32 domain_len = salt_bufs[salt_pos].salt_buf_pc[ 7]; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - u32 wordr1[4]; - u32 wordr2[4]; - u32 wordr3[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, 1); - - w0_t[0] |= pw_len & 0xff; - - /** - * salt - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, 1 + pw_len + domain_len + 1); - - u32 d0[4]; - - d0[0] = domain_buf0[0]; - d0[1] = domain_buf0[1]; - d0[2] = domain_buf0[2]; - d0[3] = domain_buf0[3]; - - u32 d1[4]; - - d1[0] = domain_buf1[0]; - d1[1] = domain_buf1[1]; - d1[2] = domain_buf1[2]; - d1[3] = 0; - - u32 d2[4]; - - d2[0] = 0; - d2[1] = 0; - d2[2] = 0; - d2[3] = 0; - - u32 d3[4]; - - d3[0] = 0; - d3[1] = 0; - d3[2] = 0; - d3[3] = 0; - - switch_buffer_by_offset (d0, d1, d2, d3, 1 + pw_len); - - /** - * sha1 - */ - - u32x w0_t2[4]; - - w0_t2[0] = swap_workaround (w0_t[0] | d0[0] | s0[0]); - w0_t2[1] = swap_workaround (w0_t[1] | d0[1] | s0[1]); - w0_t2[2] = swap_workaround (w0_t[2] | d0[2] | s0[2]); - w0_t2[3] = swap_workaround (w0_t[3] | d0[3] | s0[3]); - - u32x w1_t2[4]; - - w1_t2[0] = swap_workaround (w1_t[0] | d1[0] | s1[0]); - w1_t2[1] = swap_workaround (w1_t[1] | d1[1] | s1[1]); - w1_t2[2] = swap_workaround (w1_t[2] | d1[2] | s1[2]); - w1_t2[3] = swap_workaround (w1_t[3] | d1[3] | s1[3]); - - u32x w2_t2[4]; - - w2_t2[0] = swap_workaround (w2_t[0] | d2[0] | s2[0]); - w2_t2[1] = swap_workaround (w2_t[1] | d2[1] | s2[1]); - w2_t2[2] = swap_workaround (w2_t[2] | d2[2] | s2[2]); - w2_t2[3] = swap_workaround (w2_t[3] | d2[3] | s2[3]); - - u32x w3_t2[4]; - - w3_t2[0] = swap_workaround (w3_t[0] | d3[0] | s3[0]); - w3_t2[1] = swap_workaround (w3_t[1] | d3[1] | s3[1]); - w3_t2[2] = 0; - w3_t2[3] = (1 + pw_len + domain_len + 1 + salt_len) * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t2, w1_t2, w2_t2, w3_t2, digest); - - // iterations - - for (u32 i = 0; i < salt_iter; i++) - { - u32x w0_t3[4]; - - w0_t3[0] = digest[0]; - w0_t3[1] = digest[1]; - w0_t3[2] = digest[2]; - w0_t3[3] = digest[3]; - - u32x w1_t3[4]; - - w1_t3[0] = digest[4]; - w1_t3[1] = swap_workaround (salt_buf0[0]); - w1_t3[2] = swap_workaround (salt_buf0[1]); - w1_t3[3] = swap_workaround (salt_buf0[2]); - - u32x w2_t3[4]; - - w2_t3[0] = swap_workaround (salt_buf0[3]); - w2_t3[1] = swap_workaround (salt_buf1[0]); - w2_t3[2] = swap_workaround (salt_buf1[1]); - w2_t3[3] = swap_workaround (salt_buf1[2]); - - u32x w3_t3[4]; - - w3_t3[0] = swap_workaround (salt_buf1[3]); - w3_t3[1] = 0; - w3_t3[2] = 0; - w3_t3[3] = (20 + salt_len) * 8; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t3, w1_t3, w2_t3, w3_t3, digest); - } - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08300_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08300_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m08300_a3.cu b/nv/m08300_a3.cu deleted file mode 100644 index 13694ca..0000000 --- a/nv/m08300_a3.cu +++ /dev/null @@ -1,954 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ static void m08300m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - const u32 salt_iter = salt_bufs[salt_pos].salt_iter; - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 domain_buf0[4]; - - domain_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[ 0]; - domain_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[ 1]; - domain_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[ 2]; - domain_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[ 3]; - - u32 domain_buf1[4]; - - domain_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[ 4]; - domain_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[ 5]; - domain_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[ 6]; - domain_buf1[3] = 0; - - const u32 domain_len = salt_bufs[salt_pos].salt_buf_pc[ 7]; - - /** - * base - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, 1 + pw_len + domain_len + 1); - - u32 d0[4]; - - d0[0] = domain_buf0[0]; - d0[1] = domain_buf0[1]; - d0[2] = domain_buf0[2]; - d0[3] = domain_buf0[3]; - - u32 d1[4]; - - d1[0] = domain_buf1[0]; - d1[1] = domain_buf1[1]; - d1[2] = domain_buf1[2]; - d1[3] = 0; - - u32 d2[4]; - - d2[0] = 0; - d2[1] = 0; - d2[2] = 0; - d2[3] = 0; - - u32 d3[4]; - - d3[0] = 0; - d3[1] = 0; - d3[2] = 0; - d3[3] = 0; - - switch_buffer_by_offset (d0, d1, d2, d3, 1 + pw_len); - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, 1); - - w0_t[0] |= pw_len & 0xff; - - /** - * sha1 - */ - - u32x w0_t2[4]; - - w0_t2[0] = swap_workaround (w0_t[0] | d0[0] | s0[0]); - w0_t2[1] = swap_workaround (w0_t[1] | d0[1] | s0[1]); - w0_t2[2] = swap_workaround (w0_t[2] | d0[2] | s0[2]); - w0_t2[3] = swap_workaround (w0_t[3] | d0[3] | s0[3]); - - u32x w1_t2[4]; - - w1_t2[0] = swap_workaround (w1_t[0] | d1[0] | s1[0]); - w1_t2[1] = swap_workaround (w1_t[1] | d1[1] | s1[1]); - w1_t2[2] = swap_workaround (w1_t[2] | d1[2] | s1[2]); - w1_t2[3] = swap_workaround (w1_t[3] | d1[3] | s1[3]); - - u32x w2_t2[4]; - - w2_t2[0] = swap_workaround (w2_t[0] | d2[0] | s2[0]); - w2_t2[1] = swap_workaround (w2_t[1] | d2[1] | s2[1]); - w2_t2[2] = swap_workaround (w2_t[2] | d2[2] | s2[2]); - w2_t2[3] = swap_workaround (w2_t[3] | d2[3] | s2[3]); - - u32x w3_t2[4]; - - w3_t2[0] = swap_workaround (w3_t[0] | d3[0] | s3[0]); - w3_t2[1] = swap_workaround (w3_t[1] | d3[1] | s3[1]); - w3_t2[2] = 0; - w3_t2[3] = (1 + pw_len + domain_len + 1 + salt_len) * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t2, w1_t2, w2_t2, w3_t2, digest); - - // iterations - - for (u32 i = 0; i < salt_iter; i++) - { - u32x w0_t3[4]; - - w0_t3[0] = digest[0]; - w0_t3[1] = digest[1]; - w0_t3[2] = digest[2]; - w0_t3[3] = digest[3]; - - u32x w1_t3[4]; - - w1_t3[0] = digest[4]; - w1_t3[1] = swap_workaround (salt_buf0[0]); - w1_t3[2] = swap_workaround (salt_buf0[1]); - w1_t3[3] = swap_workaround (salt_buf0[2]); - - u32x w2_t3[4]; - - w2_t3[0] = swap_workaround (salt_buf0[3]); - w2_t3[1] = swap_workaround (salt_buf1[0]); - w2_t3[2] = swap_workaround (salt_buf1[1]); - w2_t3[3] = swap_workaround (salt_buf1[2]); - - u32x w3_t3[4]; - - w3_t3[0] = swap_workaround (salt_buf1[3]); - w3_t3[1] = 0; - w3_t3[2] = 0; - w3_t3[3] = (20 + salt_len) * 8; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t3, w1_t3, w2_t3, w3_t3, digest); - } - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -__device__ static void m08300s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - const u32 salt_iter = salt_bufs[salt_pos].salt_iter; - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 domain_buf0[4]; - - domain_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[ 0]; - domain_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[ 1]; - domain_buf0[2] = salt_bufs[salt_pos].salt_buf_pc[ 2]; - domain_buf0[3] = salt_bufs[salt_pos].salt_buf_pc[ 3]; - - u32 domain_buf1[4]; - - domain_buf1[0] = salt_bufs[salt_pos].salt_buf_pc[ 4]; - domain_buf1[1] = salt_bufs[salt_pos].salt_buf_pc[ 5]; - domain_buf1[2] = salt_bufs[salt_pos].salt_buf_pc[ 6]; - domain_buf1[3] = 0; - - const u32 domain_len = salt_bufs[salt_pos].salt_buf_pc[ 7]; - - /** - * base - */ - - u32 s0[4]; - - s0[0] = salt_buf0[0]; - s0[1] = salt_buf0[1]; - s0[2] = salt_buf0[2]; - s0[3] = salt_buf0[3]; - - u32 s1[4]; - - s1[0] = salt_buf1[0]; - s1[1] = salt_buf1[1]; - s1[2] = salt_buf1[2]; - s1[3] = salt_buf1[3]; - - u32 s2[4]; - - s2[0] = 0; - s2[1] = 0; - s2[2] = 0; - s2[3] = 0; - - u32 s3[4]; - - s3[0] = 0; - s3[1] = 0; - s3[2] = 0; - s3[3] = 0; - - switch_buffer_by_offset (s0, s1, s2, s3, 1 + pw_len + domain_len + 1); - - u32 d0[4]; - - d0[0] = domain_buf0[0]; - d0[1] = domain_buf0[1]; - d0[2] = domain_buf0[2]; - d0[3] = domain_buf0[3]; - - u32 d1[4]; - - d1[0] = domain_buf1[0]; - d1[1] = domain_buf1[1]; - d1[2] = domain_buf1[2]; - d1[3] = 0; - - u32 d2[4]; - - d2[0] = 0; - d2[1] = 0; - d2[2] = 0; - d2[3] = 0; - - u32 d3[4]; - - d3[0] = 0; - d3[1] = 0; - d3[2] = 0; - d3[3] = 0; - - switch_buffer_by_offset (d0, d1, d2, d3, 1 + pw_len); - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = w3[2]; - w3_t[3] = w3[3]; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, 1); - - w0_t[0] |= pw_len & 0xff; - - /** - * sha1 - */ - - u32x w0_t2[4]; - - w0_t2[0] = swap_workaround (w0_t[0] | d0[0] | s0[0]); - w0_t2[1] = swap_workaround (w0_t[1] | d0[1] | s0[1]); - w0_t2[2] = swap_workaround (w0_t[2] | d0[2] | s0[2]); - w0_t2[3] = swap_workaround (w0_t[3] | d0[3] | s0[3]); - - u32x w1_t2[4]; - - w1_t2[0] = swap_workaround (w1_t[0] | d1[0] | s1[0]); - w1_t2[1] = swap_workaround (w1_t[1] | d1[1] | s1[1]); - w1_t2[2] = swap_workaround (w1_t[2] | d1[2] | s1[2]); - w1_t2[3] = swap_workaround (w1_t[3] | d1[3] | s1[3]); - - u32x w2_t2[4]; - - w2_t2[0] = swap_workaround (w2_t[0] | d2[0] | s2[0]); - w2_t2[1] = swap_workaround (w2_t[1] | d2[1] | s2[1]); - w2_t2[2] = swap_workaround (w2_t[2] | d2[2] | s2[2]); - w2_t2[3] = swap_workaround (w2_t[3] | d2[3] | s2[3]); - - u32x w3_t2[4]; - - w3_t2[0] = swap_workaround (w3_t[0] | d3[0] | s3[0]); - w3_t2[1] = swap_workaround (w3_t[1] | d3[1] | s3[1]); - w3_t2[2] = 0; - w3_t2[3] = (1 + pw_len + domain_len + 1 + salt_len) * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t2, w1_t2, w2_t2, w3_t2, digest); - - // iterations - - for (u32 i = 0; i < salt_iter; i++) - { - u32x w0_t3[4]; - - w0_t3[0] = digest[0]; - w0_t3[1] = digest[1]; - w0_t3[2] = digest[2]; - w0_t3[3] = digest[3]; - - u32x w1_t3[4]; - - w1_t3[0] = digest[4]; - w1_t3[1] = swap_workaround (salt_buf0[0]); - w1_t3[2] = swap_workaround (salt_buf0[1]); - w1_t3[3] = swap_workaround (salt_buf0[2]); - - u32x w2_t3[4]; - - w2_t3[0] = swap_workaround (salt_buf0[3]); - w2_t3[1] = swap_workaround (salt_buf1[0]); - w2_t3[2] = swap_workaround (salt_buf1[1]); - w2_t3[3] = swap_workaround (salt_buf1[2]); - - u32x w3_t3[4]; - - w3_t3[0] = swap_workaround (salt_buf1[3]); - w3_t3[1] = 0; - w3_t3[2] = 0; - w3_t3[3] = (20 + salt_len) * 8; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t3, w1_t3, w2_t3, w3_t3, digest); - } - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08300_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m08300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08300_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m08300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08300_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m08300m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08300_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m08300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08300_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m08300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08300_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m08300s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m08400_a0.cu b/nv/m08400_a0.cu deleted file mode 100644 index a328a99..0000000 --- a/nv/m08400_a0.cu +++ /dev/null @@ -1,766 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8_le(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08400_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 0 - | c_bin2asc[(lid >> 4) & 15] << 8; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 3]); - - u32 salt_buf1[4]; - - salt_buf1[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 7]); - - u32 salt_buf2[4]; - - salt_buf2[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - - u32x w2_t[4]; - - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - - u32x w3_t[4]; - - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = 0; - w3_t[3] = pw_len * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - u32x a; - u32x b; - u32x c; - u32x d; - u32x e; - - a = digest[0]; - b = digest[1]; - c = digest[2]; - d = digest[3]; - e = digest[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; - w2_t[3] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; - w3_t[0] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; - w3_t[1] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; - w3_t[2] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; - w3_t[3] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; - w0_t[1] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; - w0_t[2] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; - w0_t[3] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; - w1_t[0] = 0x80000000; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (salt_len + 40) * 8; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - a = digest[0]; - b = digest[1]; - c = digest[2]; - d = digest[3]; - e = digest[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; - w2_t[3] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; - w3_t[0] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; - w3_t[1] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; - w3_t[2] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; - w3_t[3] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; - w0_t[1] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; - w0_t[2] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; - w0_t[3] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; - w1_t[0] = 0x80000000; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (salt_len + 40) * 8; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08400_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08400_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08400_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 0 - | c_bin2asc[(lid >> 4) & 15] << 8; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 3]); - - u32 salt_buf1[4]; - - salt_buf1[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 7]); - - u32 salt_buf2[4]; - - salt_buf2[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - - u32x w2_t[4]; - - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - - u32x w3_t[4]; - - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = 0; - w3_t[3] = pw_len * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - u32x a; - u32x b; - u32x c; - u32x d; - u32x e; - - a = digest[0]; - b = digest[1]; - c = digest[2]; - d = digest[3]; - e = digest[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; - w2_t[3] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; - w3_t[0] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; - w3_t[1] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; - w3_t[2] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; - w3_t[3] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; - w0_t[1] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; - w0_t[2] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; - w0_t[3] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; - w1_t[0] = 0x80000000; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 80 * 8; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - a = digest[0]; - b = digest[1]; - c = digest[2]; - d = digest[3]; - e = digest[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; - w2_t[3] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; - w3_t[0] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; - w3_t[1] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; - w3_t[2] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; - w3_t[3] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; - w0_t[1] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; - w0_t[2] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; - w0_t[3] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; - w1_t[0] = 0x80000000; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 80 * 8; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08400_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08400_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m08400_a1.cu b/nv/m08400_a1.cu deleted file mode 100644 index 68e5704..0000000 --- a/nv/m08400_a1.cu +++ /dev/null @@ -1,876 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8_le(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ __constant__ comb_t c_combs[1024]; - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08400_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 0 - | c_bin2asc[(lid >> 4) & 15] << 8; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 3]); - - u32 salt_buf1[4]; - - salt_buf1[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 7]); - - u32 salt_buf2[4]; - - salt_buf2[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - append_0x80_2 (wordr0, wordr1, pw_r_len); - - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - - u32x w2_t[4]; - - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - - u32x w3_t[4]; - - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = 0; - w3_t[3] = pw_len * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - u32x a; - u32x b; - u32x c; - u32x d; - u32x e; - - a = digest[0]; - b = digest[1]; - c = digest[2]; - d = digest[3]; - e = digest[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; - w2_t[3] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; - w3_t[0] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; - w3_t[1] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; - w3_t[2] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; - w3_t[3] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; - w0_t[1] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; - w0_t[2] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; - w0_t[3] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; - w1_t[0] = 0x80000000; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 80 * 8; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - a = digest[0]; - b = digest[1]; - c = digest[2]; - d = digest[3]; - e = digest[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; - w2_t[3] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; - w3_t[0] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; - w3_t[1] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; - w3_t[2] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; - w3_t[3] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; - w0_t[1] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; - w0_t[2] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; - w0_t[3] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; - w1_t[0] = 0x80000000; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 80 * 8; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08400_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08400_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08400_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 0 - | c_bin2asc[(lid >> 4) & 15] << 8; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 3]); - - u32 salt_buf1[4]; - - salt_buf1[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 7]); - - u32 salt_buf2[4]; - - salt_buf2[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - append_0x80_2 (wordr0, wordr1, pw_r_len); - - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - u32x w0_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - - u32x w1_t[4]; - - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - - u32x w2_t[4]; - - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - - u32x w3_t[4]; - - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = 0; - w3_t[3] = pw_len * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - u32x a; - u32x b; - u32x c; - u32x d; - u32x e; - - a = digest[0]; - b = digest[1]; - c = digest[2]; - d = digest[3]; - e = digest[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; - w2_t[3] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; - w3_t[0] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; - w3_t[1] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; - w3_t[2] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; - w3_t[3] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; - w0_t[1] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; - w0_t[2] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; - w0_t[3] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; - w1_t[0] = 0x80000000; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (salt_len + 40) * 8; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - a = digest[0]; - b = digest[1]; - c = digest[2]; - d = digest[3]; - e = digest[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; - w2_t[3] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; - w3_t[0] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; - w3_t[1] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; - w3_t[2] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; - w3_t[3] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; - w0_t[1] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; - w0_t[2] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; - w0_t[3] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; - w1_t[0] = 0x80000000; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (salt_len + 40) * 8; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08400_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08400_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m08400_a3.cu b/nv/m08400_a3.cu deleted file mode 100644 index 6b104d1..0000000 --- a/nv/m08400_a3.cu +++ /dev/null @@ -1,1013 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8_le(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8_le(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ static void m08400m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 3]); - - u32 salt_buf1[4]; - - salt_buf1[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 7]); - - u32 salt_buf2[4]; - - salt_buf2[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * sha1 - */ - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = 0; - w3_t[3] = pw_len * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - u32x a; - u32x b; - u32x c; - u32x d; - u32x e; - - a = digest[0]; - b = digest[1]; - c = digest[2]; - d = digest[3]; - e = digest[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; - w2_t[3] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; - w3_t[0] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; - w3_t[1] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; - w3_t[2] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; - w3_t[3] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; - w0_t[1] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; - w0_t[2] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; - w0_t[3] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; - w1_t[0] = 0x80000000; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 80 * 8; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - a = digest[0]; - b = digest[1]; - c = digest[2]; - d = digest[3]; - e = digest[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; - w2_t[3] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; - w3_t[0] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; - w3_t[1] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; - w3_t[2] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; - w3_t[3] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; - w0_t[1] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; - w0_t[2] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; - w0_t[3] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; - w1_t[0] = 0x80000000; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (salt_len + 40) * 8; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_M - } -} - -__device__ static void m08400s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 0]); - salt_buf0[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 1]); - salt_buf0[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 2]); - salt_buf0[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 3]); - - u32 salt_buf1[4]; - - salt_buf1[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 4]); - salt_buf1[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 5]); - salt_buf1[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 6]); - salt_buf1[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 7]); - - u32 salt_buf2[4]; - - salt_buf2[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 8]); - salt_buf2[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[ 9]); - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * sha1 - */ - - u32x w0_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = 0; - w3_t[3] = pw_len * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - u32x a; - u32x b; - u32x c; - u32x d; - u32x e; - - a = digest[0]; - b = digest[1]; - c = digest[2]; - d = digest[3]; - e = digest[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; - w2_t[3] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; - w3_t[0] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; - w3_t[1] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; - w3_t[2] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; - w3_t[3] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; - w0_t[1] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; - w0_t[2] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; - w0_t[3] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; - w1_t[0] = 0x80000000; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (salt_len + 40) * 8; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - a = digest[0]; - b = digest[1]; - c = digest[2]; - d = digest[3]; - e = digest[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = uint_to_hex_lower8_le ((a >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 24) & 255) << 16; - w2_t[3] = uint_to_hex_lower8_le ((a >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((a >> 8) & 255) << 16; - w3_t[0] = uint_to_hex_lower8_le ((b >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 24) & 255) << 16; - w3_t[1] = uint_to_hex_lower8_le ((b >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((b >> 8) & 255) << 16; - w3_t[2] = uint_to_hex_lower8_le ((c >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 24) & 255) << 16; - w3_t[3] = uint_to_hex_lower8_le ((c >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((c >> 8) & 255) << 16; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = uint_to_hex_lower8_le ((d >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 24) & 255) << 16; - w0_t[1] = uint_to_hex_lower8_le ((d >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((d >> 8) & 255) << 16; - w0_t[2] = uint_to_hex_lower8_le ((e >> 16) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 24) & 255) << 16; - w0_t[3] = uint_to_hex_lower8_le ((e >> 0) & 255) << 0 - | uint_to_hex_lower8_le ((e >> 8) & 255) << 16; - w1_t[0] = 0x80000000; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (salt_len + 40) * 8; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - const u32x r0 = digest[3]; - const u32x r1 = digest[4]; - const u32x r2 = digest[2]; - const u32x r3 = digest[1]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08400_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 0 - | c_bin2asc[(lid >> 4) & 15] << 8; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m08400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08400_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 0 - | c_bin2asc[(lid >> 4) & 15] << 8; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m08400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08400_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 0 - | c_bin2asc[(lid >> 4) & 15] << 8; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m08400m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08400_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 0 - | c_bin2asc[(lid >> 4) & 15] << 8; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m08400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08400_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 0 - | c_bin2asc[(lid >> 4) & 15] << 8; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m08400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08400_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 0 - | c_bin2asc[(lid >> 4) & 15] << 8; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m08400s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m08500_a0.cu b/nv/m08500_a0.cu deleted file mode 100644 index 1cd8c80..0000000 --- a/nv/m08500_a0.cu +++ /dev/null @@ -1,837 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _DES_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#define PERM_OP(a,b,tt,n,m) \ -{ \ - tt = a >> n; \ - tt = tt ^ b; \ - tt = tt & m; \ - b = b ^ tt; \ - tt = tt << n; \ - a = a ^ tt; \ -} - -#define HPERM_OP(a,tt,n,m) \ -{ \ - tt = a << (16 + n); \ - tt = tt ^ a; \ - tt = tt & m; \ - a = a ^ tt; \ - tt = tt >> (16 + n); \ - a = a ^ tt; \ -} - -#define IP(l,r,tt) \ -{ \ - PERM_OP (r, l, tt, 4, 0x0f0f0f0f); \ - PERM_OP (l, r, tt, 16, 0x0000ffff); \ - PERM_OP (r, l, tt, 2, 0x33333333); \ - PERM_OP (l, r, tt, 8, 0x00ff00ff); \ - PERM_OP (r, l, tt, 1, 0x55555555); \ -} - -#define FP(l,r,tt) \ -{ \ - PERM_OP (l, r, tt, 1, 0x55555555); \ - PERM_OP (r, l, tt, 8, 0x00ff00ff); \ - PERM_OP (l, r, tt, 2, 0x33333333); \ - PERM_OP (r, l, tt, 16, 0x0000ffff); \ - PERM_OP (l, r, tt, 4, 0x0f0f0f0f); \ -} - -__device__ __constant__ u32 c_SPtrans[8][64] = -{ - /* nibble 0 */ - 0x02080800, 0x00080000, 0x02000002, 0x02080802, - 0x02000000, 0x00080802, 0x00080002, 0x02000002, - 0x00080802, 0x02080800, 0x02080000, 0x00000802, - 0x02000802, 0x02000000, 0x00000000, 0x00080002, - 0x00080000, 0x00000002, 0x02000800, 0x00080800, - 0x02080802, 0x02080000, 0x00000802, 0x02000800, - 0x00000002, 0x00000800, 0x00080800, 0x02080002, - 0x00000800, 0x02000802, 0x02080002, 0x00000000, - 0x00000000, 0x02080802, 0x02000800, 0x00080002, - 0x02080800, 0x00080000, 0x00000802, 0x02000800, - 0x02080002, 0x00000800, 0x00080800, 0x02000002, - 0x00080802, 0x00000002, 0x02000002, 0x02080000, - 0x02080802, 0x00080800, 0x02080000, 0x02000802, - 0x02000000, 0x00000802, 0x00080002, 0x00000000, - 0x00080000, 0x02000000, 0x02000802, 0x02080800, - 0x00000002, 0x02080002, 0x00000800, 0x00080802, - /* nibble 1 */ - 0x40108010, 0x00000000, 0x00108000, 0x40100000, - 0x40000010, 0x00008010, 0x40008000, 0x00108000, - 0x00008000, 0x40100010, 0x00000010, 0x40008000, - 0x00100010, 0x40108000, 0x40100000, 0x00000010, - 0x00100000, 0x40008010, 0x40100010, 0x00008000, - 0x00108010, 0x40000000, 0x00000000, 0x00100010, - 0x40008010, 0x00108010, 0x40108000, 0x40000010, - 0x40000000, 0x00100000, 0x00008010, 0x40108010, - 0x00100010, 0x40108000, 0x40008000, 0x00108010, - 0x40108010, 0x00100010, 0x40000010, 0x00000000, - 0x40000000, 0x00008010, 0x00100000, 0x40100010, - 0x00008000, 0x40000000, 0x00108010, 0x40008010, - 0x40108000, 0x00008000, 0x00000000, 0x40000010, - 0x00000010, 0x40108010, 0x00108000, 0x40100000, - 0x40100010, 0x00100000, 0x00008010, 0x40008000, - 0x40008010, 0x00000010, 0x40100000, 0x00108000, - /* nibble 2 */ - 0x04000001, 0x04040100, 0x00000100, 0x04000101, - 0x00040001, 0x04000000, 0x04000101, 0x00040100, - 0x04000100, 0x00040000, 0x04040000, 0x00000001, - 0x04040101, 0x00000101, 0x00000001, 0x04040001, - 0x00000000, 0x00040001, 0x04040100, 0x00000100, - 0x00000101, 0x04040101, 0x00040000, 0x04000001, - 0x04040001, 0x04000100, 0x00040101, 0x04040000, - 0x00040100, 0x00000000, 0x04000000, 0x00040101, - 0x04040100, 0x00000100, 0x00000001, 0x00040000, - 0x00000101, 0x00040001, 0x04040000, 0x04000101, - 0x00000000, 0x04040100, 0x00040100, 0x04040001, - 0x00040001, 0x04000000, 0x04040101, 0x00000001, - 0x00040101, 0x04000001, 0x04000000, 0x04040101, - 0x00040000, 0x04000100, 0x04000101, 0x00040100, - 0x04000100, 0x00000000, 0x04040001, 0x00000101, - 0x04000001, 0x00040101, 0x00000100, 0x04040000, - /* nibble 3 */ - 0x00401008, 0x10001000, 0x00000008, 0x10401008, - 0x00000000, 0x10400000, 0x10001008, 0x00400008, - 0x10401000, 0x10000008, 0x10000000, 0x00001008, - 0x10000008, 0x00401008, 0x00400000, 0x10000000, - 0x10400008, 0x00401000, 0x00001000, 0x00000008, - 0x00401000, 0x10001008, 0x10400000, 0x00001000, - 0x00001008, 0x00000000, 0x00400008, 0x10401000, - 0x10001000, 0x10400008, 0x10401008, 0x00400000, - 0x10400008, 0x00001008, 0x00400000, 0x10000008, - 0x00401000, 0x10001000, 0x00000008, 0x10400000, - 0x10001008, 0x00000000, 0x00001000, 0x00400008, - 0x00000000, 0x10400008, 0x10401000, 0x00001000, - 0x10000000, 0x10401008, 0x00401008, 0x00400000, - 0x10401008, 0x00000008, 0x10001000, 0x00401008, - 0x00400008, 0x00401000, 0x10400000, 0x10001008, - 0x00001008, 0x10000000, 0x10000008, 0x10401000, - /* nibble 4 */ - 0x08000000, 0x00010000, 0x00000400, 0x08010420, - 0x08010020, 0x08000400, 0x00010420, 0x08010000, - 0x00010000, 0x00000020, 0x08000020, 0x00010400, - 0x08000420, 0x08010020, 0x08010400, 0x00000000, - 0x00010400, 0x08000000, 0x00010020, 0x00000420, - 0x08000400, 0x00010420, 0x00000000, 0x08000020, - 0x00000020, 0x08000420, 0x08010420, 0x00010020, - 0x08010000, 0x00000400, 0x00000420, 0x08010400, - 0x08010400, 0x08000420, 0x00010020, 0x08010000, - 0x00010000, 0x00000020, 0x08000020, 0x08000400, - 0x08000000, 0x00010400, 0x08010420, 0x00000000, - 0x00010420, 0x08000000, 0x00000400, 0x00010020, - 0x08000420, 0x00000400, 0x00000000, 0x08010420, - 0x08010020, 0x08010400, 0x00000420, 0x00010000, - 0x00010400, 0x08010020, 0x08000400, 0x00000420, - 0x00000020, 0x00010420, 0x08010000, 0x08000020, - /* nibble 5 */ - 0x80000040, 0x00200040, 0x00000000, 0x80202000, - 0x00200040, 0x00002000, 0x80002040, 0x00200000, - 0x00002040, 0x80202040, 0x00202000, 0x80000000, - 0x80002000, 0x80000040, 0x80200000, 0x00202040, - 0x00200000, 0x80002040, 0x80200040, 0x00000000, - 0x00002000, 0x00000040, 0x80202000, 0x80200040, - 0x80202040, 0x80200000, 0x80000000, 0x00002040, - 0x00000040, 0x00202000, 0x00202040, 0x80002000, - 0x00002040, 0x80000000, 0x80002000, 0x00202040, - 0x80202000, 0x00200040, 0x00000000, 0x80002000, - 0x80000000, 0x00002000, 0x80200040, 0x00200000, - 0x00200040, 0x80202040, 0x00202000, 0x00000040, - 0x80202040, 0x00202000, 0x00200000, 0x80002040, - 0x80000040, 0x80200000, 0x00202040, 0x00000000, - 0x00002000, 0x80000040, 0x80002040, 0x80202000, - 0x80200000, 0x00002040, 0x00000040, 0x80200040, - /* nibble 6 */ - 0x00004000, 0x00000200, 0x01000200, 0x01000004, - 0x01004204, 0x00004004, 0x00004200, 0x00000000, - 0x01000000, 0x01000204, 0x00000204, 0x01004000, - 0x00000004, 0x01004200, 0x01004000, 0x00000204, - 0x01000204, 0x00004000, 0x00004004, 0x01004204, - 0x00000000, 0x01000200, 0x01000004, 0x00004200, - 0x01004004, 0x00004204, 0x01004200, 0x00000004, - 0x00004204, 0x01004004, 0x00000200, 0x01000000, - 0x00004204, 0x01004000, 0x01004004, 0x00000204, - 0x00004000, 0x00000200, 0x01000000, 0x01004004, - 0x01000204, 0x00004204, 0x00004200, 0x00000000, - 0x00000200, 0x01000004, 0x00000004, 0x01000200, - 0x00000000, 0x01000204, 0x01000200, 0x00004200, - 0x00000204, 0x00004000, 0x01004204, 0x01000000, - 0x01004200, 0x00000004, 0x00004004, 0x01004204, - 0x01000004, 0x01004200, 0x01004000, 0x00004004, - /* nibble 7 */ - 0x20800080, 0x20820000, 0x00020080, 0x00000000, - 0x20020000, 0x00800080, 0x20800000, 0x20820080, - 0x00000080, 0x20000000, 0x00820000, 0x00020080, - 0x00820080, 0x20020080, 0x20000080, 0x20800000, - 0x00020000, 0x00820080, 0x00800080, 0x20020000, - 0x20820080, 0x20000080, 0x00000000, 0x00820000, - 0x20000000, 0x00800000, 0x20020080, 0x20800080, - 0x00800000, 0x00020000, 0x20820000, 0x00000080, - 0x00800000, 0x00020000, 0x20000080, 0x20820080, - 0x00020080, 0x20000000, 0x00000000, 0x00820000, - 0x20800080, 0x20020080, 0x20020000, 0x00800080, - 0x20820000, 0x00000080, 0x00800080, 0x20020000, - 0x20820080, 0x00800000, 0x20800000, 0x20000080, - 0x00820000, 0x00020080, 0x20020080, 0x20800000, - 0x00000080, 0x20820000, 0x00820080, 0x00000000, - 0x20000000, 0x20800080, 0x00020000, 0x00820080, -}; - -__device__ __constant__ u32 c_skb[8][64] = -{ - /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ - 0x00000000, 0x00000010, 0x20000000, 0x20000010, - 0x00010000, 0x00010010, 0x20010000, 0x20010010, - 0x00000800, 0x00000810, 0x20000800, 0x20000810, - 0x00010800, 0x00010810, 0x20010800, 0x20010810, - 0x00000020, 0x00000030, 0x20000020, 0x20000030, - 0x00010020, 0x00010030, 0x20010020, 0x20010030, - 0x00000820, 0x00000830, 0x20000820, 0x20000830, - 0x00010820, 0x00010830, 0x20010820, 0x20010830, - 0x00080000, 0x00080010, 0x20080000, 0x20080010, - 0x00090000, 0x00090010, 0x20090000, 0x20090010, - 0x00080800, 0x00080810, 0x20080800, 0x20080810, - 0x00090800, 0x00090810, 0x20090800, 0x20090810, - 0x00080020, 0x00080030, 0x20080020, 0x20080030, - 0x00090020, 0x00090030, 0x20090020, 0x20090030, - 0x00080820, 0x00080830, 0x20080820, 0x20080830, - 0x00090820, 0x00090830, 0x20090820, 0x20090830, - /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */ - 0x00000000, 0x02000000, 0x00002000, 0x02002000, - 0x00200000, 0x02200000, 0x00202000, 0x02202000, - 0x00000004, 0x02000004, 0x00002004, 0x02002004, - 0x00200004, 0x02200004, 0x00202004, 0x02202004, - 0x00000400, 0x02000400, 0x00002400, 0x02002400, - 0x00200400, 0x02200400, 0x00202400, 0x02202400, - 0x00000404, 0x02000404, 0x00002404, 0x02002404, - 0x00200404, 0x02200404, 0x00202404, 0x02202404, - 0x10000000, 0x12000000, 0x10002000, 0x12002000, - 0x10200000, 0x12200000, 0x10202000, 0x12202000, - 0x10000004, 0x12000004, 0x10002004, 0x12002004, - 0x10200004, 0x12200004, 0x10202004, 0x12202004, - 0x10000400, 0x12000400, 0x10002400, 0x12002400, - 0x10200400, 0x12200400, 0x10202400, 0x12202400, - 0x10000404, 0x12000404, 0x10002404, 0x12002404, - 0x10200404, 0x12200404, 0x10202404, 0x12202404, - /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */ - 0x00000000, 0x00000001, 0x00040000, 0x00040001, - 0x01000000, 0x01000001, 0x01040000, 0x01040001, - 0x00000002, 0x00000003, 0x00040002, 0x00040003, - 0x01000002, 0x01000003, 0x01040002, 0x01040003, - 0x00000200, 0x00000201, 0x00040200, 0x00040201, - 0x01000200, 0x01000201, 0x01040200, 0x01040201, - 0x00000202, 0x00000203, 0x00040202, 0x00040203, - 0x01000202, 0x01000203, 0x01040202, 0x01040203, - 0x08000000, 0x08000001, 0x08040000, 0x08040001, - 0x09000000, 0x09000001, 0x09040000, 0x09040001, - 0x08000002, 0x08000003, 0x08040002, 0x08040003, - 0x09000002, 0x09000003, 0x09040002, 0x09040003, - 0x08000200, 0x08000201, 0x08040200, 0x08040201, - 0x09000200, 0x09000201, 0x09040200, 0x09040201, - 0x08000202, 0x08000203, 0x08040202, 0x08040203, - 0x09000202, 0x09000203, 0x09040202, 0x09040203, - /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */ - 0x00000000, 0x00100000, 0x00000100, 0x00100100, - 0x00000008, 0x00100008, 0x00000108, 0x00100108, - 0x00001000, 0x00101000, 0x00001100, 0x00101100, - 0x00001008, 0x00101008, 0x00001108, 0x00101108, - 0x04000000, 0x04100000, 0x04000100, 0x04100100, - 0x04000008, 0x04100008, 0x04000108, 0x04100108, - 0x04001000, 0x04101000, 0x04001100, 0x04101100, - 0x04001008, 0x04101008, 0x04001108, 0x04101108, - 0x00020000, 0x00120000, 0x00020100, 0x00120100, - 0x00020008, 0x00120008, 0x00020108, 0x00120108, - 0x00021000, 0x00121000, 0x00021100, 0x00121100, - 0x00021008, 0x00121008, 0x00021108, 0x00121108, - 0x04020000, 0x04120000, 0x04020100, 0x04120100, - 0x04020008, 0x04120008, 0x04020108, 0x04120108, - 0x04021000, 0x04121000, 0x04021100, 0x04121100, - 0x04021008, 0x04121008, 0x04021108, 0x04121108, - /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ - 0x00000000, 0x10000000, 0x00010000, 0x10010000, - 0x00000004, 0x10000004, 0x00010004, 0x10010004, - 0x20000000, 0x30000000, 0x20010000, 0x30010000, - 0x20000004, 0x30000004, 0x20010004, 0x30010004, - 0x00100000, 0x10100000, 0x00110000, 0x10110000, - 0x00100004, 0x10100004, 0x00110004, 0x10110004, - 0x20100000, 0x30100000, 0x20110000, 0x30110000, - 0x20100004, 0x30100004, 0x20110004, 0x30110004, - 0x00001000, 0x10001000, 0x00011000, 0x10011000, - 0x00001004, 0x10001004, 0x00011004, 0x10011004, - 0x20001000, 0x30001000, 0x20011000, 0x30011000, - 0x20001004, 0x30001004, 0x20011004, 0x30011004, - 0x00101000, 0x10101000, 0x00111000, 0x10111000, - 0x00101004, 0x10101004, 0x00111004, 0x10111004, - 0x20101000, 0x30101000, 0x20111000, 0x30111000, - 0x20101004, 0x30101004, 0x20111004, 0x30111004, - /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */ - 0x00000000, 0x08000000, 0x00000008, 0x08000008, - 0x00000400, 0x08000400, 0x00000408, 0x08000408, - 0x00020000, 0x08020000, 0x00020008, 0x08020008, - 0x00020400, 0x08020400, 0x00020408, 0x08020408, - 0x00000001, 0x08000001, 0x00000009, 0x08000009, - 0x00000401, 0x08000401, 0x00000409, 0x08000409, - 0x00020001, 0x08020001, 0x00020009, 0x08020009, - 0x00020401, 0x08020401, 0x00020409, 0x08020409, - 0x02000000, 0x0A000000, 0x02000008, 0x0A000008, - 0x02000400, 0x0A000400, 0x02000408, 0x0A000408, - 0x02020000, 0x0A020000, 0x02020008, 0x0A020008, - 0x02020400, 0x0A020400, 0x02020408, 0x0A020408, - 0x02000001, 0x0A000001, 0x02000009, 0x0A000009, - 0x02000401, 0x0A000401, 0x02000409, 0x0A000409, - 0x02020001, 0x0A020001, 0x02020009, 0x0A020009, - 0x02020401, 0x0A020401, 0x02020409, 0x0A020409, - /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */ - 0x00000000, 0x00000100, 0x00080000, 0x00080100, - 0x01000000, 0x01000100, 0x01080000, 0x01080100, - 0x00000010, 0x00000110, 0x00080010, 0x00080110, - 0x01000010, 0x01000110, 0x01080010, 0x01080110, - 0x00200000, 0x00200100, 0x00280000, 0x00280100, - 0x01200000, 0x01200100, 0x01280000, 0x01280100, - 0x00200010, 0x00200110, 0x00280010, 0x00280110, - 0x01200010, 0x01200110, 0x01280010, 0x01280110, - 0x00000200, 0x00000300, 0x00080200, 0x00080300, - 0x01000200, 0x01000300, 0x01080200, 0x01080300, - 0x00000210, 0x00000310, 0x00080210, 0x00080310, - 0x01000210, 0x01000310, 0x01080210, 0x01080310, - 0x00200200, 0x00200300, 0x00280200, 0x00280300, - 0x01200200, 0x01200300, 0x01280200, 0x01280300, - 0x00200210, 0x00200310, 0x00280210, 0x00280310, - 0x01200210, 0x01200310, 0x01280210, 0x01280310, - /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */ - 0x00000000, 0x04000000, 0x00040000, 0x04040000, - 0x00000002, 0x04000002, 0x00040002, 0x04040002, - 0x00002000, 0x04002000, 0x00042000, 0x04042000, - 0x00002002, 0x04002002, 0x00042002, 0x04042002, - 0x00000020, 0x04000020, 0x00040020, 0x04040020, - 0x00000022, 0x04000022, 0x00040022, 0x04040022, - 0x00002020, 0x04002020, 0x00042020, 0x04042020, - 0x00002022, 0x04002022, 0x00042022, 0x04042022, - 0x00000800, 0x04000800, 0x00040800, 0x04040800, - 0x00000802, 0x04000802, 0x00040802, 0x04040802, - 0x00002800, 0x04002800, 0x00042800, 0x04042800, - 0x00002802, 0x04002802, 0x00042802, 0x04042802, - 0x00000820, 0x04000820, 0x00040820, 0x04040820, - 0x00000822, 0x04000822, 0x00040822, 0x04040822, - 0x00002820, 0x04002820, 0x00042820, 0x04042820, - 0x00002822, 0x04002822, 0x00042822, 0x04042822, -}; - -#define NBOX(i,n,S) (S)[(n)][(i)] - -__device__ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], u32 s_SPtrans[8][64]) -{ - u32x tt; - - u32x r = data[0]; - u32x l = data[1]; - - #pragma unroll 16 - for (u32 i = 0; i < 16; i++) - { - u32x u = Kc[i] ^ r; - u32x t = Kd[i] ^ rotl32 (r, 28u); - - #ifdef VECT_SIZE1 - l ^= NBOX (((u >> 2) & 0x3f), 0, s_SPtrans) - | NBOX (((u >> 10) & 0x3f), 2, s_SPtrans) - | NBOX (((u >> 18) & 0x3f), 4, s_SPtrans) - | NBOX (((u >> 26) & 0x3f), 6, s_SPtrans) - | NBOX (((t >> 2) & 0x3f), 1, s_SPtrans) - | NBOX (((t >> 10) & 0x3f), 3, s_SPtrans) - | NBOX (((t >> 18) & 0x3f), 5, s_SPtrans) - | NBOX (((t >> 26) & 0x3f), 7, s_SPtrans); - #endif - - #ifdef VECT_SIZE2 - l.s0 ^= NBOX (((u.s0 >> 2) & 0x3f), 0, s_SPtrans) - | NBOX (((u.s0 >> 10) & 0x3f), 2, s_SPtrans) - | NBOX (((u.s0 >> 18) & 0x3f), 4, s_SPtrans) - | NBOX (((u.s0 >> 26) & 0x3f), 6, s_SPtrans) - | NBOX (((t.s0 >> 2) & 0x3f), 1, s_SPtrans) - | NBOX (((t.s0 >> 10) & 0x3f), 3, s_SPtrans) - | NBOX (((t.s0 >> 18) & 0x3f), 5, s_SPtrans) - | NBOX (((t.s0 >> 26) & 0x3f), 7, s_SPtrans); - - l.s1 ^= NBOX (((u.s1 >> 2) & 0x3f), 0, s_SPtrans) - | NBOX (((u.s1 >> 10) & 0x3f), 2, s_SPtrans) - | NBOX (((u.s1 >> 18) & 0x3f), 4, s_SPtrans) - | NBOX (((u.s1 >> 26) & 0x3f), 6, s_SPtrans) - | NBOX (((t.s1 >> 2) & 0x3f), 1, s_SPtrans) - | NBOX (((t.s1 >> 10) & 0x3f), 3, s_SPtrans) - | NBOX (((t.s1 >> 18) & 0x3f), 5, s_SPtrans) - | NBOX (((t.s1 >> 26) & 0x3f), 7, s_SPtrans); - #endif - - tt = l; - l = r; - r = tt; - } - - iv[0] = l; - iv[1] = r; -} - -__device__ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], u32 s_skb[8][64]) -{ - u32x tt; - - PERM_OP (d, c, tt, 4, 0x0f0f0f0f); - HPERM_OP (c, tt, 2, 0xcccc0000); - HPERM_OP (d, tt, 2, 0xcccc0000); - PERM_OP (d, c, tt, 1, 0x55555555); - PERM_OP (c, d, tt, 8, 0x00ff00ff); - PERM_OP (d, c, tt, 1, 0x55555555); - - d = ((d & 0x000000ff) << 16) - | ((d & 0x0000ff00) << 0) - | ((d & 0x00ff0000) >> 16) - | ((c & 0xf0000000) >> 4); - - c = c & 0x0fffffff; - - //#pragma unroll 16 - for (u32 i = 0; i < 16; i++) - { - const u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 }; - const u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 }; - - c = c >> shifts3s0[i] | c << shifts3s1[i]; - d = d >> shifts3s0[i] | d << shifts3s1[i]; - - c = c & 0x0fffffff; - d = d & 0x0fffffff; - - u32x s; - u32x t; - - #ifdef VECT_SIZE1 - s = NBOX ((( c >> 0) & 0x3f), 0, s_skb) - | NBOX ((((c >> 6) & 0x03) - | ((c >> 7) & 0x3c)), 1, s_skb) - | NBOX ((((c >> 13) & 0x0f) - | ((c >> 14) & 0x30)), 2, s_skb) - | NBOX ((((c >> 20) & 0x01) - | ((c >> 21) & 0x06) - | ((c >> 22) & 0x38)), 3, s_skb); - - t = NBOX ((( d >> 0) & 0x3f), 4, s_skb) - | NBOX ((((d >> 7) & 0x03) - | ((d >> 8) & 0x3c)), 5, s_skb) - | NBOX ((((d >> 15) & 0x3f)), 6, s_skb) - | NBOX ((((d >> 21) & 0x0f) - | ((d >> 22) & 0x30)), 7, s_skb); - #endif - - #ifdef VECT_SIZE2 - s.s0 = NBOX ((( c.s0 >> 0) & 0x3f), 0, s_skb) - | NBOX ((((c.s0 >> 6) & 0x03) - | ((c.s0 >> 7) & 0x3c)), 1, s_skb) - | NBOX ((((c.s0 >> 13) & 0x0f) - | ((c.s0 >> 14) & 0x30)), 2, s_skb) - | NBOX ((((c.s0 >> 20) & 0x01) - | ((c.s0 >> 21) & 0x06) - | ((c.s0 >> 22) & 0x38)), 3, s_skb); - - t.s0 = NBOX ((( d.s0 >> 0) & 0x3f), 4, s_skb) - | NBOX ((((d.s0 >> 7) & 0x03) - | ((d.s0 >> 8) & 0x3c)), 5, s_skb) - | NBOX ((((d.s0 >> 15) & 0x3f)), 6, s_skb) - | NBOX ((((d.s0 >> 21) & 0x0f) - | ((d.s0 >> 22) & 0x30)), 7, s_skb); - - s.s1 = NBOX ((( c.s1 >> 0) & 0x3f), 0, s_skb) - | NBOX ((((c.s1 >> 6) & 0x03) - | ((c.s1 >> 7) & 0x3c)), 1, s_skb) - | NBOX ((((c.s1 >> 13) & 0x0f) - | ((c.s1 >> 14) & 0x30)), 2, s_skb) - | NBOX ((((c.s1 >> 20) & 0x01) - | ((c.s1 >> 21) & 0x06) - | ((c.s1 >> 22) & 0x38)), 3, s_skb); - - t.s1 = NBOX ((( d.s1 >> 0) & 0x3f), 4, s_skb) - | NBOX ((((d.s1 >> 7) & 0x03) - | ((d.s1 >> 8) & 0x3c)), 5, s_skb) - | NBOX ((((d.s1 >> 15) & 0x3f)), 6, s_skb) - | NBOX ((((d.s1 >> 21) & 0x0f) - | ((d.s1 >> 22) & 0x30)), 7, s_skb); - #endif - - #if __CUDA_ARCH__ >= 200 - Kc[i] = __byte_perm (s, t, 0x5410); - Kd[i] = __byte_perm (s, t, 0x7632); - #else - Kc[i] = ((t << 16) | (s & 0x0000ffff)); - Kd[i] = ((s >> 16) | (t & 0xffff0000)); - #endif - - Kc[i] = rotl32 (Kc[i], 2u); - Kd[i] = rotl32 (Kd[i], 2u); - } -} - -__device__ static void transform_racf_key (const u32x w0, const u32x w1, u32x key[2]) -{ - - const u8 ascii_to_ebcdic_pc[256] = - { - // little hack, can't crack 0-bytes in password, but who cares - // 0xab, 0xa8, 0xae, 0xad, 0xc4, 0xf1, 0xf7, 0xf4, 0x86, 0xa1, 0xe0, 0xbc, 0xb3, 0xb0, 0xb6, 0xb5, - 0x2a, 0xa8, 0xae, 0xad, 0xc4, 0xf1, 0xf7, 0xf4, 0x86, 0xa1, 0xe0, 0xbc, 0xb3, 0xb0, 0xb6, 0xb5, - 0x8a, 0x89, 0x8f, 0x8c, 0xd3, 0xd0, 0xce, 0xe6, 0x9b, 0x98, 0xd5, 0xe5, 0x92, 0x91, 0x97, 0x94, - 0x2a, 0x34, 0x54, 0x5d, 0x1c, 0x73, 0x0b, 0x51, 0x31, 0x10, 0x13, 0x37, 0x7c, 0x6b, 0x3d, 0x68, - 0x4a, 0x49, 0x4f, 0x4c, 0x43, 0x40, 0x46, 0x45, 0x5b, 0x58, 0x5e, 0x16, 0x32, 0x57, 0x76, 0x75, - 0x52, 0x29, 0x2f, 0x2c, 0x23, 0x20, 0x26, 0x25, 0x3b, 0x38, 0x08, 0x0e, 0x0d, 0x02, 0x01, 0x07, - 0x04, 0x1a, 0x19, 0x6e, 0x6d, 0x62, 0x61, 0x67, 0x64, 0x7a, 0x79, 0x3e, 0x6b, 0x1f, 0x15, 0x70, - 0x58, 0xa8, 0xae, 0xad, 0xa2, 0xa1, 0xa7, 0xa4, 0xba, 0xb9, 0x89, 0x8f, 0x8c, 0x83, 0x80, 0x86, - 0x85, 0x9b, 0x98, 0xef, 0xec, 0xe3, 0xe0, 0xe6, 0xe5, 0xfb, 0xf8, 0x2a, 0x7f, 0x0b, 0xe9, 0xa4, - 0xea, 0xe9, 0xef, 0xec, 0xe3, 0x80, 0xa7, 0x85, 0xfb, 0xf8, 0xfe, 0xfd, 0xf2, 0xb9, 0xbf, 0x9d, - 0xcb, 0xc8, 0x9e, 0xcd, 0xc2, 0xc1, 0xc7, 0xba, 0xda, 0xd9, 0xdf, 0xdc, 0xa2, 0x83, 0xd6, 0x68, - 0x29, 0x2f, 0x2c, 0x23, 0x20, 0x26, 0x25, 0x3b, 0x38, 0x08, 0x0e, 0x0d, 0x02, 0x01, 0x07, 0x04, - 0x1a, 0x19, 0x6e, 0x6d, 0x62, 0x61, 0x67, 0x64, 0x7a, 0x79, 0x4a, 0x49, 0x4f, 0x4c, 0x43, 0x40, - 0x46, 0x45, 0x5b, 0xab, 0xbf, 0xbc, 0xb3, 0xb0, 0xb6, 0xb5, 0x8a, 0x9e, 0x9d, 0x92, 0x91, 0x97, - 0x94, 0xea, 0xfe, 0xfd, 0xf2, 0xf1, 0xf7, 0xf4, 0xcb, 0xc8, 0xce, 0xcd, 0xc2, 0xc1, 0xc7, 0xc4, - 0xda, 0xd9, 0xdf, 0xdc, 0xd3, 0xd0, 0xd6, 0xd5, 0x3e, 0x3d, 0x32, 0x31, 0x37, 0x34, 0x1f, 0x1c, - 0x13, 0x10, 0x16, 0x15, 0x7f, 0x7c, 0x73, 0x70, 0x76, 0x75, 0x5e, 0x5d, 0x52, 0x51, 0x57, 0x54, - }; - - #ifdef VECT_SIZE1 - - key[0] = (ascii_to_ebcdic_pc[(w0 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w0 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w0 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w0 >> 24) & 0xff]) << 24; - - key[1] = (ascii_to_ebcdic_pc[(w1 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w1 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w1 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w1 >> 24) & 0xff]) << 24; - #endif - - #ifdef VECT_SIZE2 - - key[0].s0 = (ascii_to_ebcdic_pc[(w0.s0 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w0.s0 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w0.s0 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w0.s0 >> 24) & 0xff]) << 24; - - key[0].s1 = (ascii_to_ebcdic_pc[(w0.s1 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w0.s1 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w0.s1 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w0.s1 >> 24) & 0xff]) << 24; - - key[1].s0 = (ascii_to_ebcdic_pc[(w1.s0 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w1.s0 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w1.s0 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w1.s0 >> 24) & 0xff]) << 24; - - key[1].s1 = (ascii_to_ebcdic_pc[(w1.s1 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w1.s1 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w1.s1 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w1.s1 >> 24) & 0xff]) << 24; - #endif -} - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m08500_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf[4]; - - pw_buf[0] = pws[gid].i[ 0]; - pw_buf[1] = pws[gid].i[ 1]; - pw_buf[2] = 0; - pw_buf[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[2]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - - /** - * sbox, kbox - */ - - __shared__ u32 s_SPtrans[8][64]; - __shared__ u32 s_skb[8][64]; - - if (lid < 64) - { - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf[0]; - w0[1] = pw_buf[1]; - w0[2] = pw_buf[2]; - w0[3] = pw_buf[3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - out_len = (out_len >= 8) ? 8 : out_len; - - u32x key[2]; - - transform_racf_key (w0[0], w0[1], key); - - const u32x c = key[0]; - const u32x d = key[1]; - - u32x Kc[16]; - u32x Kd[16]; - - _des_crypt_keysetup (c, d, Kc, Kd, s_skb); - - u32x data[2]; - - data[0] = salt_buf0[0]; - data[1] = salt_buf0[1]; - - u32x iv[2]; - - _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); - - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08500_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08500_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08500_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf[4]; - - pw_buf[0] = pws[gid].i[ 0]; - pw_buf[1] = pws[gid].i[ 1]; - pw_buf[2] = 0; - pw_buf[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[2]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - - /** - * sbox, kbox - */ - - __shared__ u32 s_SPtrans[8][64]; - __shared__ u32 s_skb[8][64]; - - if (lid < 64) - { - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * main - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf[0]; - w0[1] = pw_buf[1]; - w0[2] = pw_buf[2]; - w0[3] = pw_buf[3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - out_len = (out_len >= 8) ? 8 : out_len; - - u32x key[2]; - - transform_racf_key (w0[0], w0[1], key); - - const u32x c = key[0]; - const u32x d = key[1]; - - u32x Kc[16]; - u32x Kd[16]; - - _des_crypt_keysetup (c, d, Kc, Kd, s_skb); - - u32x data[2]; - - data[0] = salt_buf0[0]; - data[1] = salt_buf0[1]; - - u32x iv[2]; - - _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); - - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08500_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08500_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m08500_a1.cu b/nv/m08500_a1.cu deleted file mode 100644 index 3e9db6d..0000000 --- a/nv/m08500_a1.cu +++ /dev/null @@ -1,944 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _DES_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#define PERM_OP(a,b,tt,n,m) \ -{ \ - tt = a >> n; \ - tt = tt ^ b; \ - tt = tt & m; \ - b = b ^ tt; \ - tt = tt << n; \ - a = a ^ tt; \ -} - -#define HPERM_OP(a,tt,n,m) \ -{ \ - tt = a << (16 + n); \ - tt = tt ^ a; \ - tt = tt & m; \ - a = a ^ tt; \ - tt = tt >> (16 + n); \ - a = a ^ tt; \ -} - -#define IP(l,r,tt) \ -{ \ - PERM_OP (r, l, tt, 4, 0x0f0f0f0f); \ - PERM_OP (l, r, tt, 16, 0x0000ffff); \ - PERM_OP (r, l, tt, 2, 0x33333333); \ - PERM_OP (l, r, tt, 8, 0x00ff00ff); \ - PERM_OP (r, l, tt, 1, 0x55555555); \ -} - -#define FP(l,r,tt) \ -{ \ - PERM_OP (l, r, tt, 1, 0x55555555); \ - PERM_OP (r, l, tt, 8, 0x00ff00ff); \ - PERM_OP (l, r, tt, 2, 0x33333333); \ - PERM_OP (r, l, tt, 16, 0x0000ffff); \ - PERM_OP (l, r, tt, 4, 0x0f0f0f0f); \ -} - -__device__ __constant__ u8 ascii_to_ebcdic_pc[256] = -{ - // little hack, can't crack 0-bytes in password, but who cares - // 0xab, 0xa8, 0xae, 0xad, 0xc4, 0xf1, 0xf7, 0xf4, 0x86, 0xa1, 0xe0, 0xbc, 0xb3, 0xb0, 0xb6, 0xb5, - 0x2a, 0xa8, 0xae, 0xad, 0xc4, 0xf1, 0xf7, 0xf4, 0x86, 0xa1, 0xe0, 0xbc, 0xb3, 0xb0, 0xb6, 0xb5, - 0x8a, 0x89, 0x8f, 0x8c, 0xd3, 0xd0, 0xce, 0xe6, 0x9b, 0x98, 0xd5, 0xe5, 0x92, 0x91, 0x97, 0x94, - 0x2a, 0x34, 0x54, 0x5d, 0x1c, 0x73, 0x0b, 0x51, 0x31, 0x10, 0x13, 0x37, 0x7c, 0x6b, 0x3d, 0x68, - 0x4a, 0x49, 0x4f, 0x4c, 0x43, 0x40, 0x46, 0x45, 0x5b, 0x58, 0x5e, 0x16, 0x32, 0x57, 0x76, 0x75, - 0x52, 0x29, 0x2f, 0x2c, 0x23, 0x20, 0x26, 0x25, 0x3b, 0x38, 0x08, 0x0e, 0x0d, 0x02, 0x01, 0x07, - 0x04, 0x1a, 0x19, 0x6e, 0x6d, 0x62, 0x61, 0x67, 0x64, 0x7a, 0x79, 0x3e, 0x6b, 0x1f, 0x15, 0x70, - 0x58, 0xa8, 0xae, 0xad, 0xa2, 0xa1, 0xa7, 0xa4, 0xba, 0xb9, 0x89, 0x8f, 0x8c, 0x83, 0x80, 0x86, - 0x85, 0x9b, 0x98, 0xef, 0xec, 0xe3, 0xe0, 0xe6, 0xe5, 0xfb, 0xf8, 0x2a, 0x7f, 0x0b, 0xe9, 0xa4, - 0xea, 0xe9, 0xef, 0xec, 0xe3, 0x80, 0xa7, 0x85, 0xfb, 0xf8, 0xfe, 0xfd, 0xf2, 0xb9, 0xbf, 0x9d, - 0xcb, 0xc8, 0x9e, 0xcd, 0xc2, 0xc1, 0xc7, 0xba, 0xda, 0xd9, 0xdf, 0xdc, 0xa2, 0x83, 0xd6, 0x68, - 0x29, 0x2f, 0x2c, 0x23, 0x20, 0x26, 0x25, 0x3b, 0x38, 0x08, 0x0e, 0x0d, 0x02, 0x01, 0x07, 0x04, - 0x1a, 0x19, 0x6e, 0x6d, 0x62, 0x61, 0x67, 0x64, 0x7a, 0x79, 0x4a, 0x49, 0x4f, 0x4c, 0x43, 0x40, - 0x46, 0x45, 0x5b, 0xab, 0xbf, 0xbc, 0xb3, 0xb0, 0xb6, 0xb5, 0x8a, 0x9e, 0x9d, 0x92, 0x91, 0x97, - 0x94, 0xea, 0xfe, 0xfd, 0xf2, 0xf1, 0xf7, 0xf4, 0xcb, 0xc8, 0xce, 0xcd, 0xc2, 0xc1, 0xc7, 0xc4, - 0xda, 0xd9, 0xdf, 0xdc, 0xd3, 0xd0, 0xd6, 0xd5, 0x3e, 0x3d, 0x32, 0x31, 0x37, 0x34, 0x1f, 0x1c, - 0x13, 0x10, 0x16, 0x15, 0x7f, 0x7c, 0x73, 0x70, 0x76, 0x75, 0x5e, 0x5d, 0x52, 0x51, 0x57, 0x54, -}; - -__device__ __constant__ u32 c_SPtrans[8][64] = -{ - /* nibble 0 */ - 0x02080800, 0x00080000, 0x02000002, 0x02080802, - 0x02000000, 0x00080802, 0x00080002, 0x02000002, - 0x00080802, 0x02080800, 0x02080000, 0x00000802, - 0x02000802, 0x02000000, 0x00000000, 0x00080002, - 0x00080000, 0x00000002, 0x02000800, 0x00080800, - 0x02080802, 0x02080000, 0x00000802, 0x02000800, - 0x00000002, 0x00000800, 0x00080800, 0x02080002, - 0x00000800, 0x02000802, 0x02080002, 0x00000000, - 0x00000000, 0x02080802, 0x02000800, 0x00080002, - 0x02080800, 0x00080000, 0x00000802, 0x02000800, - 0x02080002, 0x00000800, 0x00080800, 0x02000002, - 0x00080802, 0x00000002, 0x02000002, 0x02080000, - 0x02080802, 0x00080800, 0x02080000, 0x02000802, - 0x02000000, 0x00000802, 0x00080002, 0x00000000, - 0x00080000, 0x02000000, 0x02000802, 0x02080800, - 0x00000002, 0x02080002, 0x00000800, 0x00080802, - /* nibble 1 */ - 0x40108010, 0x00000000, 0x00108000, 0x40100000, - 0x40000010, 0x00008010, 0x40008000, 0x00108000, - 0x00008000, 0x40100010, 0x00000010, 0x40008000, - 0x00100010, 0x40108000, 0x40100000, 0x00000010, - 0x00100000, 0x40008010, 0x40100010, 0x00008000, - 0x00108010, 0x40000000, 0x00000000, 0x00100010, - 0x40008010, 0x00108010, 0x40108000, 0x40000010, - 0x40000000, 0x00100000, 0x00008010, 0x40108010, - 0x00100010, 0x40108000, 0x40008000, 0x00108010, - 0x40108010, 0x00100010, 0x40000010, 0x00000000, - 0x40000000, 0x00008010, 0x00100000, 0x40100010, - 0x00008000, 0x40000000, 0x00108010, 0x40008010, - 0x40108000, 0x00008000, 0x00000000, 0x40000010, - 0x00000010, 0x40108010, 0x00108000, 0x40100000, - 0x40100010, 0x00100000, 0x00008010, 0x40008000, - 0x40008010, 0x00000010, 0x40100000, 0x00108000, - /* nibble 2 */ - 0x04000001, 0x04040100, 0x00000100, 0x04000101, - 0x00040001, 0x04000000, 0x04000101, 0x00040100, - 0x04000100, 0x00040000, 0x04040000, 0x00000001, - 0x04040101, 0x00000101, 0x00000001, 0x04040001, - 0x00000000, 0x00040001, 0x04040100, 0x00000100, - 0x00000101, 0x04040101, 0x00040000, 0x04000001, - 0x04040001, 0x04000100, 0x00040101, 0x04040000, - 0x00040100, 0x00000000, 0x04000000, 0x00040101, - 0x04040100, 0x00000100, 0x00000001, 0x00040000, - 0x00000101, 0x00040001, 0x04040000, 0x04000101, - 0x00000000, 0x04040100, 0x00040100, 0x04040001, - 0x00040001, 0x04000000, 0x04040101, 0x00000001, - 0x00040101, 0x04000001, 0x04000000, 0x04040101, - 0x00040000, 0x04000100, 0x04000101, 0x00040100, - 0x04000100, 0x00000000, 0x04040001, 0x00000101, - 0x04000001, 0x00040101, 0x00000100, 0x04040000, - /* nibble 3 */ - 0x00401008, 0x10001000, 0x00000008, 0x10401008, - 0x00000000, 0x10400000, 0x10001008, 0x00400008, - 0x10401000, 0x10000008, 0x10000000, 0x00001008, - 0x10000008, 0x00401008, 0x00400000, 0x10000000, - 0x10400008, 0x00401000, 0x00001000, 0x00000008, - 0x00401000, 0x10001008, 0x10400000, 0x00001000, - 0x00001008, 0x00000000, 0x00400008, 0x10401000, - 0x10001000, 0x10400008, 0x10401008, 0x00400000, - 0x10400008, 0x00001008, 0x00400000, 0x10000008, - 0x00401000, 0x10001000, 0x00000008, 0x10400000, - 0x10001008, 0x00000000, 0x00001000, 0x00400008, - 0x00000000, 0x10400008, 0x10401000, 0x00001000, - 0x10000000, 0x10401008, 0x00401008, 0x00400000, - 0x10401008, 0x00000008, 0x10001000, 0x00401008, - 0x00400008, 0x00401000, 0x10400000, 0x10001008, - 0x00001008, 0x10000000, 0x10000008, 0x10401000, - /* nibble 4 */ - 0x08000000, 0x00010000, 0x00000400, 0x08010420, - 0x08010020, 0x08000400, 0x00010420, 0x08010000, - 0x00010000, 0x00000020, 0x08000020, 0x00010400, - 0x08000420, 0x08010020, 0x08010400, 0x00000000, - 0x00010400, 0x08000000, 0x00010020, 0x00000420, - 0x08000400, 0x00010420, 0x00000000, 0x08000020, - 0x00000020, 0x08000420, 0x08010420, 0x00010020, - 0x08010000, 0x00000400, 0x00000420, 0x08010400, - 0x08010400, 0x08000420, 0x00010020, 0x08010000, - 0x00010000, 0x00000020, 0x08000020, 0x08000400, - 0x08000000, 0x00010400, 0x08010420, 0x00000000, - 0x00010420, 0x08000000, 0x00000400, 0x00010020, - 0x08000420, 0x00000400, 0x00000000, 0x08010420, - 0x08010020, 0x08010400, 0x00000420, 0x00010000, - 0x00010400, 0x08010020, 0x08000400, 0x00000420, - 0x00000020, 0x00010420, 0x08010000, 0x08000020, - /* nibble 5 */ - 0x80000040, 0x00200040, 0x00000000, 0x80202000, - 0x00200040, 0x00002000, 0x80002040, 0x00200000, - 0x00002040, 0x80202040, 0x00202000, 0x80000000, - 0x80002000, 0x80000040, 0x80200000, 0x00202040, - 0x00200000, 0x80002040, 0x80200040, 0x00000000, - 0x00002000, 0x00000040, 0x80202000, 0x80200040, - 0x80202040, 0x80200000, 0x80000000, 0x00002040, - 0x00000040, 0x00202000, 0x00202040, 0x80002000, - 0x00002040, 0x80000000, 0x80002000, 0x00202040, - 0x80202000, 0x00200040, 0x00000000, 0x80002000, - 0x80000000, 0x00002000, 0x80200040, 0x00200000, - 0x00200040, 0x80202040, 0x00202000, 0x00000040, - 0x80202040, 0x00202000, 0x00200000, 0x80002040, - 0x80000040, 0x80200000, 0x00202040, 0x00000000, - 0x00002000, 0x80000040, 0x80002040, 0x80202000, - 0x80200000, 0x00002040, 0x00000040, 0x80200040, - /* nibble 6 */ - 0x00004000, 0x00000200, 0x01000200, 0x01000004, - 0x01004204, 0x00004004, 0x00004200, 0x00000000, - 0x01000000, 0x01000204, 0x00000204, 0x01004000, - 0x00000004, 0x01004200, 0x01004000, 0x00000204, - 0x01000204, 0x00004000, 0x00004004, 0x01004204, - 0x00000000, 0x01000200, 0x01000004, 0x00004200, - 0x01004004, 0x00004204, 0x01004200, 0x00000004, - 0x00004204, 0x01004004, 0x00000200, 0x01000000, - 0x00004204, 0x01004000, 0x01004004, 0x00000204, - 0x00004000, 0x00000200, 0x01000000, 0x01004004, - 0x01000204, 0x00004204, 0x00004200, 0x00000000, - 0x00000200, 0x01000004, 0x00000004, 0x01000200, - 0x00000000, 0x01000204, 0x01000200, 0x00004200, - 0x00000204, 0x00004000, 0x01004204, 0x01000000, - 0x01004200, 0x00000004, 0x00004004, 0x01004204, - 0x01000004, 0x01004200, 0x01004000, 0x00004004, - /* nibble 7 */ - 0x20800080, 0x20820000, 0x00020080, 0x00000000, - 0x20020000, 0x00800080, 0x20800000, 0x20820080, - 0x00000080, 0x20000000, 0x00820000, 0x00020080, - 0x00820080, 0x20020080, 0x20000080, 0x20800000, - 0x00020000, 0x00820080, 0x00800080, 0x20020000, - 0x20820080, 0x20000080, 0x00000000, 0x00820000, - 0x20000000, 0x00800000, 0x20020080, 0x20800080, - 0x00800000, 0x00020000, 0x20820000, 0x00000080, - 0x00800000, 0x00020000, 0x20000080, 0x20820080, - 0x00020080, 0x20000000, 0x00000000, 0x00820000, - 0x20800080, 0x20020080, 0x20020000, 0x00800080, - 0x20820000, 0x00000080, 0x00800080, 0x20020000, - 0x20820080, 0x00800000, 0x20800000, 0x20000080, - 0x00820000, 0x00020080, 0x20020080, 0x20800000, - 0x00000080, 0x20820000, 0x00820080, 0x00000000, - 0x20000000, 0x20800080, 0x00020000, 0x00820080, -}; - -__device__ __constant__ u32 c_skb[8][64] = -{ - /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ - 0x00000000, 0x00000010, 0x20000000, 0x20000010, - 0x00010000, 0x00010010, 0x20010000, 0x20010010, - 0x00000800, 0x00000810, 0x20000800, 0x20000810, - 0x00010800, 0x00010810, 0x20010800, 0x20010810, - 0x00000020, 0x00000030, 0x20000020, 0x20000030, - 0x00010020, 0x00010030, 0x20010020, 0x20010030, - 0x00000820, 0x00000830, 0x20000820, 0x20000830, - 0x00010820, 0x00010830, 0x20010820, 0x20010830, - 0x00080000, 0x00080010, 0x20080000, 0x20080010, - 0x00090000, 0x00090010, 0x20090000, 0x20090010, - 0x00080800, 0x00080810, 0x20080800, 0x20080810, - 0x00090800, 0x00090810, 0x20090800, 0x20090810, - 0x00080020, 0x00080030, 0x20080020, 0x20080030, - 0x00090020, 0x00090030, 0x20090020, 0x20090030, - 0x00080820, 0x00080830, 0x20080820, 0x20080830, - 0x00090820, 0x00090830, 0x20090820, 0x20090830, - /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */ - 0x00000000, 0x02000000, 0x00002000, 0x02002000, - 0x00200000, 0x02200000, 0x00202000, 0x02202000, - 0x00000004, 0x02000004, 0x00002004, 0x02002004, - 0x00200004, 0x02200004, 0x00202004, 0x02202004, - 0x00000400, 0x02000400, 0x00002400, 0x02002400, - 0x00200400, 0x02200400, 0x00202400, 0x02202400, - 0x00000404, 0x02000404, 0x00002404, 0x02002404, - 0x00200404, 0x02200404, 0x00202404, 0x02202404, - 0x10000000, 0x12000000, 0x10002000, 0x12002000, - 0x10200000, 0x12200000, 0x10202000, 0x12202000, - 0x10000004, 0x12000004, 0x10002004, 0x12002004, - 0x10200004, 0x12200004, 0x10202004, 0x12202004, - 0x10000400, 0x12000400, 0x10002400, 0x12002400, - 0x10200400, 0x12200400, 0x10202400, 0x12202400, - 0x10000404, 0x12000404, 0x10002404, 0x12002404, - 0x10200404, 0x12200404, 0x10202404, 0x12202404, - /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */ - 0x00000000, 0x00000001, 0x00040000, 0x00040001, - 0x01000000, 0x01000001, 0x01040000, 0x01040001, - 0x00000002, 0x00000003, 0x00040002, 0x00040003, - 0x01000002, 0x01000003, 0x01040002, 0x01040003, - 0x00000200, 0x00000201, 0x00040200, 0x00040201, - 0x01000200, 0x01000201, 0x01040200, 0x01040201, - 0x00000202, 0x00000203, 0x00040202, 0x00040203, - 0x01000202, 0x01000203, 0x01040202, 0x01040203, - 0x08000000, 0x08000001, 0x08040000, 0x08040001, - 0x09000000, 0x09000001, 0x09040000, 0x09040001, - 0x08000002, 0x08000003, 0x08040002, 0x08040003, - 0x09000002, 0x09000003, 0x09040002, 0x09040003, - 0x08000200, 0x08000201, 0x08040200, 0x08040201, - 0x09000200, 0x09000201, 0x09040200, 0x09040201, - 0x08000202, 0x08000203, 0x08040202, 0x08040203, - 0x09000202, 0x09000203, 0x09040202, 0x09040203, - /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */ - 0x00000000, 0x00100000, 0x00000100, 0x00100100, - 0x00000008, 0x00100008, 0x00000108, 0x00100108, - 0x00001000, 0x00101000, 0x00001100, 0x00101100, - 0x00001008, 0x00101008, 0x00001108, 0x00101108, - 0x04000000, 0x04100000, 0x04000100, 0x04100100, - 0x04000008, 0x04100008, 0x04000108, 0x04100108, - 0x04001000, 0x04101000, 0x04001100, 0x04101100, - 0x04001008, 0x04101008, 0x04001108, 0x04101108, - 0x00020000, 0x00120000, 0x00020100, 0x00120100, - 0x00020008, 0x00120008, 0x00020108, 0x00120108, - 0x00021000, 0x00121000, 0x00021100, 0x00121100, - 0x00021008, 0x00121008, 0x00021108, 0x00121108, - 0x04020000, 0x04120000, 0x04020100, 0x04120100, - 0x04020008, 0x04120008, 0x04020108, 0x04120108, - 0x04021000, 0x04121000, 0x04021100, 0x04121100, - 0x04021008, 0x04121008, 0x04021108, 0x04121108, - /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ - 0x00000000, 0x10000000, 0x00010000, 0x10010000, - 0x00000004, 0x10000004, 0x00010004, 0x10010004, - 0x20000000, 0x30000000, 0x20010000, 0x30010000, - 0x20000004, 0x30000004, 0x20010004, 0x30010004, - 0x00100000, 0x10100000, 0x00110000, 0x10110000, - 0x00100004, 0x10100004, 0x00110004, 0x10110004, - 0x20100000, 0x30100000, 0x20110000, 0x30110000, - 0x20100004, 0x30100004, 0x20110004, 0x30110004, - 0x00001000, 0x10001000, 0x00011000, 0x10011000, - 0x00001004, 0x10001004, 0x00011004, 0x10011004, - 0x20001000, 0x30001000, 0x20011000, 0x30011000, - 0x20001004, 0x30001004, 0x20011004, 0x30011004, - 0x00101000, 0x10101000, 0x00111000, 0x10111000, - 0x00101004, 0x10101004, 0x00111004, 0x10111004, - 0x20101000, 0x30101000, 0x20111000, 0x30111000, - 0x20101004, 0x30101004, 0x20111004, 0x30111004, - /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */ - 0x00000000, 0x08000000, 0x00000008, 0x08000008, - 0x00000400, 0x08000400, 0x00000408, 0x08000408, - 0x00020000, 0x08020000, 0x00020008, 0x08020008, - 0x00020400, 0x08020400, 0x00020408, 0x08020408, - 0x00000001, 0x08000001, 0x00000009, 0x08000009, - 0x00000401, 0x08000401, 0x00000409, 0x08000409, - 0x00020001, 0x08020001, 0x00020009, 0x08020009, - 0x00020401, 0x08020401, 0x00020409, 0x08020409, - 0x02000000, 0x0A000000, 0x02000008, 0x0A000008, - 0x02000400, 0x0A000400, 0x02000408, 0x0A000408, - 0x02020000, 0x0A020000, 0x02020008, 0x0A020008, - 0x02020400, 0x0A020400, 0x02020408, 0x0A020408, - 0x02000001, 0x0A000001, 0x02000009, 0x0A000009, - 0x02000401, 0x0A000401, 0x02000409, 0x0A000409, - 0x02020001, 0x0A020001, 0x02020009, 0x0A020009, - 0x02020401, 0x0A020401, 0x02020409, 0x0A020409, - /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */ - 0x00000000, 0x00000100, 0x00080000, 0x00080100, - 0x01000000, 0x01000100, 0x01080000, 0x01080100, - 0x00000010, 0x00000110, 0x00080010, 0x00080110, - 0x01000010, 0x01000110, 0x01080010, 0x01080110, - 0x00200000, 0x00200100, 0x00280000, 0x00280100, - 0x01200000, 0x01200100, 0x01280000, 0x01280100, - 0x00200010, 0x00200110, 0x00280010, 0x00280110, - 0x01200010, 0x01200110, 0x01280010, 0x01280110, - 0x00000200, 0x00000300, 0x00080200, 0x00080300, - 0x01000200, 0x01000300, 0x01080200, 0x01080300, - 0x00000210, 0x00000310, 0x00080210, 0x00080310, - 0x01000210, 0x01000310, 0x01080210, 0x01080310, - 0x00200200, 0x00200300, 0x00280200, 0x00280300, - 0x01200200, 0x01200300, 0x01280200, 0x01280300, - 0x00200210, 0x00200310, 0x00280210, 0x00280310, - 0x01200210, 0x01200310, 0x01280210, 0x01280310, - /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */ - 0x00000000, 0x04000000, 0x00040000, 0x04040000, - 0x00000002, 0x04000002, 0x00040002, 0x04040002, - 0x00002000, 0x04002000, 0x00042000, 0x04042000, - 0x00002002, 0x04002002, 0x00042002, 0x04042002, - 0x00000020, 0x04000020, 0x00040020, 0x04040020, - 0x00000022, 0x04000022, 0x00040022, 0x04040022, - 0x00002020, 0x04002020, 0x00042020, 0x04042020, - 0x00002022, 0x04002022, 0x00042022, 0x04042022, - 0x00000800, 0x04000800, 0x00040800, 0x04040800, - 0x00000802, 0x04000802, 0x00040802, 0x04040802, - 0x00002800, 0x04002800, 0x00042800, 0x04042800, - 0x00002802, 0x04002802, 0x00042802, 0x04042802, - 0x00000820, 0x04000820, 0x00040820, 0x04040820, - 0x00000822, 0x04000822, 0x00040822, 0x04040822, - 0x00002820, 0x04002820, 0x00042820, 0x04042820, - 0x00002822, 0x04002822, 0x00042822, 0x04042822, -}; - -#define NBOX(i,n,S) (S)[(n)][(i)] - -__device__ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], u32 s_SPtrans[8][64]) -{ - u32x tt; - - u32x r = data[0]; - u32x l = data[1]; - - #pragma unroll 16 - for (u32 i = 0; i < 16; i++) - { - u32x u = Kc[i] ^ r; - u32x t = Kd[i] ^ rotl32 (r, 28u); - - #ifdef VECT_SIZE1 - l ^= NBOX (((u >> 2) & 0x3f), 0, s_SPtrans) - | NBOX (((u >> 10) & 0x3f), 2, s_SPtrans) - | NBOX (((u >> 18) & 0x3f), 4, s_SPtrans) - | NBOX (((u >> 26) & 0x3f), 6, s_SPtrans) - | NBOX (((t >> 2) & 0x3f), 1, s_SPtrans) - | NBOX (((t >> 10) & 0x3f), 3, s_SPtrans) - | NBOX (((t >> 18) & 0x3f), 5, s_SPtrans) - | NBOX (((t >> 26) & 0x3f), 7, s_SPtrans); - #endif - - #ifdef VECT_SIZE2 - l.s0 ^= NBOX (((u.s0 >> 2) & 0x3f), 0, s_SPtrans) - | NBOX (((u.s0 >> 10) & 0x3f), 2, s_SPtrans) - | NBOX (((u.s0 >> 18) & 0x3f), 4, s_SPtrans) - | NBOX (((u.s0 >> 26) & 0x3f), 6, s_SPtrans) - | NBOX (((t.s0 >> 2) & 0x3f), 1, s_SPtrans) - | NBOX (((t.s0 >> 10) & 0x3f), 3, s_SPtrans) - | NBOX (((t.s0 >> 18) & 0x3f), 5, s_SPtrans) - | NBOX (((t.s0 >> 26) & 0x3f), 7, s_SPtrans); - - l.s1 ^= NBOX (((u.s1 >> 2) & 0x3f), 0, s_SPtrans) - | NBOX (((u.s1 >> 10) & 0x3f), 2, s_SPtrans) - | NBOX (((u.s1 >> 18) & 0x3f), 4, s_SPtrans) - | NBOX (((u.s1 >> 26) & 0x3f), 6, s_SPtrans) - | NBOX (((t.s1 >> 2) & 0x3f), 1, s_SPtrans) - | NBOX (((t.s1 >> 10) & 0x3f), 3, s_SPtrans) - | NBOX (((t.s1 >> 18) & 0x3f), 5, s_SPtrans) - | NBOX (((t.s1 >> 26) & 0x3f), 7, s_SPtrans); - #endif - - tt = l; - l = r; - r = tt; - } - - iv[0] = l; - iv[1] = r; -} - -__device__ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], u32 s_skb[8][64]) -{ - u32x tt; - - PERM_OP (d, c, tt, 4, 0x0f0f0f0f); - HPERM_OP (c, tt, 2, 0xcccc0000); - HPERM_OP (d, tt, 2, 0xcccc0000); - PERM_OP (d, c, tt, 1, 0x55555555); - PERM_OP (c, d, tt, 8, 0x00ff00ff); - PERM_OP (d, c, tt, 1, 0x55555555); - - d = ((d & 0x000000ff) << 16) - | ((d & 0x0000ff00) << 0) - | ((d & 0x00ff0000) >> 16) - | ((c & 0xf0000000) >> 4); - - c = c & 0x0fffffff; - - //#pragma unroll 16 - for (u32 i = 0; i < 16; i++) - { - const u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 }; - const u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 }; - - c = c >> shifts3s0[i] | c << shifts3s1[i]; - d = d >> shifts3s0[i] | d << shifts3s1[i]; - - c = c & 0x0fffffff; - d = d & 0x0fffffff; - - u32x s; - u32x t; - - #ifdef VECT_SIZE1 - s = NBOX ((( c >> 0) & 0x3f), 0, s_skb) - | NBOX ((((c >> 6) & 0x03) - | ((c >> 7) & 0x3c)), 1, s_skb) - | NBOX ((((c >> 13) & 0x0f) - | ((c >> 14) & 0x30)), 2, s_skb) - | NBOX ((((c >> 20) & 0x01) - | ((c >> 21) & 0x06) - | ((c >> 22) & 0x38)), 3, s_skb); - - t = NBOX ((( d >> 0) & 0x3f), 4, s_skb) - | NBOX ((((d >> 7) & 0x03) - | ((d >> 8) & 0x3c)), 5, s_skb) - | NBOX ((((d >> 15) & 0x3f)), 6, s_skb) - | NBOX ((((d >> 21) & 0x0f) - | ((d >> 22) & 0x30)), 7, s_skb); - #endif - - #ifdef VECT_SIZE2 - s.s0 = NBOX ((( c.s0 >> 0) & 0x3f), 0, s_skb) - | NBOX ((((c.s0 >> 6) & 0x03) - | ((c.s0 >> 7) & 0x3c)), 1, s_skb) - | NBOX ((((c.s0 >> 13) & 0x0f) - | ((c.s0 >> 14) & 0x30)), 2, s_skb) - | NBOX ((((c.s0 >> 20) & 0x01) - | ((c.s0 >> 21) & 0x06) - | ((c.s0 >> 22) & 0x38)), 3, s_skb); - - t.s0 = NBOX ((( d.s0 >> 0) & 0x3f), 4, s_skb) - | NBOX ((((d.s0 >> 7) & 0x03) - | ((d.s0 >> 8) & 0x3c)), 5, s_skb) - | NBOX ((((d.s0 >> 15) & 0x3f)), 6, s_skb) - | NBOX ((((d.s0 >> 21) & 0x0f) - | ((d.s0 >> 22) & 0x30)), 7, s_skb); - - s.s1 = NBOX ((( c.s1 >> 0) & 0x3f), 0, s_skb) - | NBOX ((((c.s1 >> 6) & 0x03) - | ((c.s1 >> 7) & 0x3c)), 1, s_skb) - | NBOX ((((c.s1 >> 13) & 0x0f) - | ((c.s1 >> 14) & 0x30)), 2, s_skb) - | NBOX ((((c.s1 >> 20) & 0x01) - | ((c.s1 >> 21) & 0x06) - | ((c.s1 >> 22) & 0x38)), 3, s_skb); - - t.s1 = NBOX ((( d.s1 >> 0) & 0x3f), 4, s_skb) - | NBOX ((((d.s1 >> 7) & 0x03) - | ((d.s1 >> 8) & 0x3c)), 5, s_skb) - | NBOX ((((d.s1 >> 15) & 0x3f)), 6, s_skb) - | NBOX ((((d.s1 >> 21) & 0x0f) - | ((d.s1 >> 22) & 0x30)), 7, s_skb); - #endif - - #if __CUDA_ARCH__ >= 200 - Kc[i] = __byte_perm (s, t, 0x5410); - Kd[i] = __byte_perm (s, t, 0x7632); - #else - Kc[i] = ((t << 16) | (s & 0x0000ffff)); - Kd[i] = ((s >> 16) | (t & 0xffff0000)); - #endif - - Kc[i] = rotl32 (Kc[i], 2u); - Kd[i] = rotl32 (Kd[i], 2u); - } -} - -__device__ static void transform_racf_key (const u32x w0, const u32x w1, u32x key[2]) -{ - #ifdef VECT_SIZE1 - - key[0] = (ascii_to_ebcdic_pc[(w0 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w0 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w0 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w0 >> 24) & 0xff]) << 24; - - key[1] = (ascii_to_ebcdic_pc[(w1 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w1 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w1 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w1 >> 24) & 0xff]) << 24; - #endif - - #ifdef VECT_SIZE2 - - key[0].s0 = (ascii_to_ebcdic_pc[(w0.s0 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w0.s0 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w0.s0 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w0.s0 >> 24) & 0xff]) << 24; - - key[0].s1 = (ascii_to_ebcdic_pc[(w0.s1 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w0.s1 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w0.s1 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w0.s1 >> 24) & 0xff]) << 24; - - key[1].s0 = (ascii_to_ebcdic_pc[(w1.s0 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w1.s0 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w1.s0 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w1.s0 >> 24) & 0xff]) << 24; - - key[1].s1 = (ascii_to_ebcdic_pc[(w1.s1 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w1.s1 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w1.s1 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w1.s1 >> 24) & 0xff]) << 24; - #endif -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m08500_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = 0; - wordl0[3] = 0; - - u32x wordl1[4]; - - wordl1[0] = 0; - wordl1[1] = 0; - wordl1[2] = 0; - wordl1[3] = 0; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[2]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - - /** - * sbox, kbox - */ - - __shared__ u32 s_SPtrans[8][64]; - - __shared__ u32 s_skb[8][64]; - - if (lid < 64) - { - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - u32 pw_len = pw_l_len + pw_r_len; - - pw_len = (pw_len >= 7) ? 7 : pw_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = 0; - wordr0[3] = 0; - - u32 wordr1[4]; - - wordr1[0] = 0; - wordr1[1] = 0; - wordr1[2] = 0; - wordr1[3] = 0; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = 0; - w0[3] = 0; - - u32x key[2]; - - transform_racf_key (w0[0], w0[1], key); - - const u32x c = key[0]; - const u32x d = key[1]; - - u32x Kc[16]; - u32x Kd[16]; - - _des_crypt_keysetup (c, d, Kc, Kd, s_skb); - - u32x data[2]; - - data[0] = salt_buf0[0]; - data[1] = salt_buf0[1]; - - u32x iv[2]; - - _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); - - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08500_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08500_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08500_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = 0; - wordl0[3] = 0; - - u32x wordl1[4]; - - wordl1[0] = 0; - wordl1[1] = 0; - wordl1[2] = 0; - wordl1[3] = 0; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[2]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - - /** - * sbox, kbox - */ - - __shared__ u32 s_SPtrans[8][64]; - - __shared__ u32 s_skb[8][64]; - - if (lid < 64) - { - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * main - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - u32 pw_len = pw_l_len + pw_r_len; - - pw_len = (pw_len >= 7) ? 7 : pw_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = 0; - wordr0[3] = 0; - - u32 wordr1[4]; - - wordr1[0] = 0; - wordr1[1] = 0; - wordr1[2] = 0; - wordr1[3] = 0; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = 0; - w0[3] = 0; - - u32x key[2]; - - transform_racf_key (w0[0], w0[1], key); - - const u32x c = key[0]; - const u32x d = key[1]; - - u32x Kc[16]; - u32x Kd[16]; - - _des_crypt_keysetup (c, d, Kc, Kd, s_skb); - - u32x data[2]; - - data[0] = salt_buf0[0]; - data[1] = salt_buf0[1]; - - u32x iv[2]; - - _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); - - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08500_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08500_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m08500_a3.cu b/nv/m08500_a3.cu deleted file mode 100644 index 541d7a8..0000000 --- a/nv/m08500_a3.cu +++ /dev/null @@ -1,863 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _DES_ -#define _SCALAR_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#define PERM_OP(a,b,tt,n,m) \ -{ \ - tt = a >> n; \ - tt = tt ^ b; \ - tt = tt & m; \ - b = b ^ tt; \ - tt = tt << n; \ - a = a ^ tt; \ -} - -#define HPERM_OP(a,tt,n,m) \ -{ \ - tt = a << (16 + n); \ - tt = tt ^ a; \ - tt = tt & m; \ - a = a ^ tt; \ - tt = tt >> (16 + n); \ - a = a ^ tt; \ -} - -#define IP(l,r,tt) \ -{ \ - PERM_OP (r, l, tt, 4, 0x0f0f0f0f); \ - PERM_OP (l, r, tt, 16, 0x0000ffff); \ - PERM_OP (r, l, tt, 2, 0x33333333); \ - PERM_OP (l, r, tt, 8, 0x00ff00ff); \ - PERM_OP (r, l, tt, 1, 0x55555555); \ -} - -#define FP(l,r,tt) \ -{ \ - PERM_OP (l, r, tt, 1, 0x55555555); \ - PERM_OP (r, l, tt, 8, 0x00ff00ff); \ - PERM_OP (l, r, tt, 2, 0x33333333); \ - PERM_OP (r, l, tt, 16, 0x0000ffff); \ - PERM_OP (l, r, tt, 4, 0x0f0f0f0f); \ -} - -__device__ __constant__ u8 ascii_to_ebcdic_pc[256] = -{ - // little hack, can't crack 0-bytes in password, but who cares - // 0xab, 0xa8, 0xae, 0xad, 0xc4, 0xf1, 0xf7, 0xf4, 0x86, 0xa1, 0xe0, 0xbc, 0xb3, 0xb0, 0xb6, 0xb5, - 0x2a, 0xa8, 0xae, 0xad, 0xc4, 0xf1, 0xf7, 0xf4, 0x86, 0xa1, 0xe0, 0xbc, 0xb3, 0xb0, 0xb6, 0xb5, - 0x8a, 0x89, 0x8f, 0x8c, 0xd3, 0xd0, 0xce, 0xe6, 0x9b, 0x98, 0xd5, 0xe5, 0x92, 0x91, 0x97, 0x94, - 0x2a, 0x34, 0x54, 0x5d, 0x1c, 0x73, 0x0b, 0x51, 0x31, 0x10, 0x13, 0x37, 0x7c, 0x6b, 0x3d, 0x68, - 0x4a, 0x49, 0x4f, 0x4c, 0x43, 0x40, 0x46, 0x45, 0x5b, 0x58, 0x5e, 0x16, 0x32, 0x57, 0x76, 0x75, - 0x52, 0x29, 0x2f, 0x2c, 0x23, 0x20, 0x26, 0x25, 0x3b, 0x38, 0x08, 0x0e, 0x0d, 0x02, 0x01, 0x07, - 0x04, 0x1a, 0x19, 0x6e, 0x6d, 0x62, 0x61, 0x67, 0x64, 0x7a, 0x79, 0x3e, 0x6b, 0x1f, 0x15, 0x70, - 0x58, 0xa8, 0xae, 0xad, 0xa2, 0xa1, 0xa7, 0xa4, 0xba, 0xb9, 0x89, 0x8f, 0x8c, 0x83, 0x80, 0x86, - 0x85, 0x9b, 0x98, 0xef, 0xec, 0xe3, 0xe0, 0xe6, 0xe5, 0xfb, 0xf8, 0x2a, 0x7f, 0x0b, 0xe9, 0xa4, - 0xea, 0xe9, 0xef, 0xec, 0xe3, 0x80, 0xa7, 0x85, 0xfb, 0xf8, 0xfe, 0xfd, 0xf2, 0xb9, 0xbf, 0x9d, - 0xcb, 0xc8, 0x9e, 0xcd, 0xc2, 0xc1, 0xc7, 0xba, 0xda, 0xd9, 0xdf, 0xdc, 0xa2, 0x83, 0xd6, 0x68, - 0x29, 0x2f, 0x2c, 0x23, 0x20, 0x26, 0x25, 0x3b, 0x38, 0x08, 0x0e, 0x0d, 0x02, 0x01, 0x07, 0x04, - 0x1a, 0x19, 0x6e, 0x6d, 0x62, 0x61, 0x67, 0x64, 0x7a, 0x79, 0x4a, 0x49, 0x4f, 0x4c, 0x43, 0x40, - 0x46, 0x45, 0x5b, 0xab, 0xbf, 0xbc, 0xb3, 0xb0, 0xb6, 0xb5, 0x8a, 0x9e, 0x9d, 0x92, 0x91, 0x97, - 0x94, 0xea, 0xfe, 0xfd, 0xf2, 0xf1, 0xf7, 0xf4, 0xcb, 0xc8, 0xce, 0xcd, 0xc2, 0xc1, 0xc7, 0xc4, - 0xda, 0xd9, 0xdf, 0xdc, 0xd3, 0xd0, 0xd6, 0xd5, 0x3e, 0x3d, 0x32, 0x31, 0x37, 0x34, 0x1f, 0x1c, - 0x13, 0x10, 0x16, 0x15, 0x7f, 0x7c, 0x73, 0x70, 0x76, 0x75, 0x5e, 0x5d, 0x52, 0x51, 0x57, 0x54, -}; - -__device__ __constant__ u32 c_SPtrans[8][64] = -{ - /* nibble 0 */ - 0x02080800, 0x00080000, 0x02000002, 0x02080802, - 0x02000000, 0x00080802, 0x00080002, 0x02000002, - 0x00080802, 0x02080800, 0x02080000, 0x00000802, - 0x02000802, 0x02000000, 0x00000000, 0x00080002, - 0x00080000, 0x00000002, 0x02000800, 0x00080800, - 0x02080802, 0x02080000, 0x00000802, 0x02000800, - 0x00000002, 0x00000800, 0x00080800, 0x02080002, - 0x00000800, 0x02000802, 0x02080002, 0x00000000, - 0x00000000, 0x02080802, 0x02000800, 0x00080002, - 0x02080800, 0x00080000, 0x00000802, 0x02000800, - 0x02080002, 0x00000800, 0x00080800, 0x02000002, - 0x00080802, 0x00000002, 0x02000002, 0x02080000, - 0x02080802, 0x00080800, 0x02080000, 0x02000802, - 0x02000000, 0x00000802, 0x00080002, 0x00000000, - 0x00080000, 0x02000000, 0x02000802, 0x02080800, - 0x00000002, 0x02080002, 0x00000800, 0x00080802, - /* nibble 1 */ - 0x40108010, 0x00000000, 0x00108000, 0x40100000, - 0x40000010, 0x00008010, 0x40008000, 0x00108000, - 0x00008000, 0x40100010, 0x00000010, 0x40008000, - 0x00100010, 0x40108000, 0x40100000, 0x00000010, - 0x00100000, 0x40008010, 0x40100010, 0x00008000, - 0x00108010, 0x40000000, 0x00000000, 0x00100010, - 0x40008010, 0x00108010, 0x40108000, 0x40000010, - 0x40000000, 0x00100000, 0x00008010, 0x40108010, - 0x00100010, 0x40108000, 0x40008000, 0x00108010, - 0x40108010, 0x00100010, 0x40000010, 0x00000000, - 0x40000000, 0x00008010, 0x00100000, 0x40100010, - 0x00008000, 0x40000000, 0x00108010, 0x40008010, - 0x40108000, 0x00008000, 0x00000000, 0x40000010, - 0x00000010, 0x40108010, 0x00108000, 0x40100000, - 0x40100010, 0x00100000, 0x00008010, 0x40008000, - 0x40008010, 0x00000010, 0x40100000, 0x00108000, - /* nibble 2 */ - 0x04000001, 0x04040100, 0x00000100, 0x04000101, - 0x00040001, 0x04000000, 0x04000101, 0x00040100, - 0x04000100, 0x00040000, 0x04040000, 0x00000001, - 0x04040101, 0x00000101, 0x00000001, 0x04040001, - 0x00000000, 0x00040001, 0x04040100, 0x00000100, - 0x00000101, 0x04040101, 0x00040000, 0x04000001, - 0x04040001, 0x04000100, 0x00040101, 0x04040000, - 0x00040100, 0x00000000, 0x04000000, 0x00040101, - 0x04040100, 0x00000100, 0x00000001, 0x00040000, - 0x00000101, 0x00040001, 0x04040000, 0x04000101, - 0x00000000, 0x04040100, 0x00040100, 0x04040001, - 0x00040001, 0x04000000, 0x04040101, 0x00000001, - 0x00040101, 0x04000001, 0x04000000, 0x04040101, - 0x00040000, 0x04000100, 0x04000101, 0x00040100, - 0x04000100, 0x00000000, 0x04040001, 0x00000101, - 0x04000001, 0x00040101, 0x00000100, 0x04040000, - /* nibble 3 */ - 0x00401008, 0x10001000, 0x00000008, 0x10401008, - 0x00000000, 0x10400000, 0x10001008, 0x00400008, - 0x10401000, 0x10000008, 0x10000000, 0x00001008, - 0x10000008, 0x00401008, 0x00400000, 0x10000000, - 0x10400008, 0x00401000, 0x00001000, 0x00000008, - 0x00401000, 0x10001008, 0x10400000, 0x00001000, - 0x00001008, 0x00000000, 0x00400008, 0x10401000, - 0x10001000, 0x10400008, 0x10401008, 0x00400000, - 0x10400008, 0x00001008, 0x00400000, 0x10000008, - 0x00401000, 0x10001000, 0x00000008, 0x10400000, - 0x10001008, 0x00000000, 0x00001000, 0x00400008, - 0x00000000, 0x10400008, 0x10401000, 0x00001000, - 0x10000000, 0x10401008, 0x00401008, 0x00400000, - 0x10401008, 0x00000008, 0x10001000, 0x00401008, - 0x00400008, 0x00401000, 0x10400000, 0x10001008, - 0x00001008, 0x10000000, 0x10000008, 0x10401000, - /* nibble 4 */ - 0x08000000, 0x00010000, 0x00000400, 0x08010420, - 0x08010020, 0x08000400, 0x00010420, 0x08010000, - 0x00010000, 0x00000020, 0x08000020, 0x00010400, - 0x08000420, 0x08010020, 0x08010400, 0x00000000, - 0x00010400, 0x08000000, 0x00010020, 0x00000420, - 0x08000400, 0x00010420, 0x00000000, 0x08000020, - 0x00000020, 0x08000420, 0x08010420, 0x00010020, - 0x08010000, 0x00000400, 0x00000420, 0x08010400, - 0x08010400, 0x08000420, 0x00010020, 0x08010000, - 0x00010000, 0x00000020, 0x08000020, 0x08000400, - 0x08000000, 0x00010400, 0x08010420, 0x00000000, - 0x00010420, 0x08000000, 0x00000400, 0x00010020, - 0x08000420, 0x00000400, 0x00000000, 0x08010420, - 0x08010020, 0x08010400, 0x00000420, 0x00010000, - 0x00010400, 0x08010020, 0x08000400, 0x00000420, - 0x00000020, 0x00010420, 0x08010000, 0x08000020, - /* nibble 5 */ - 0x80000040, 0x00200040, 0x00000000, 0x80202000, - 0x00200040, 0x00002000, 0x80002040, 0x00200000, - 0x00002040, 0x80202040, 0x00202000, 0x80000000, - 0x80002000, 0x80000040, 0x80200000, 0x00202040, - 0x00200000, 0x80002040, 0x80200040, 0x00000000, - 0x00002000, 0x00000040, 0x80202000, 0x80200040, - 0x80202040, 0x80200000, 0x80000000, 0x00002040, - 0x00000040, 0x00202000, 0x00202040, 0x80002000, - 0x00002040, 0x80000000, 0x80002000, 0x00202040, - 0x80202000, 0x00200040, 0x00000000, 0x80002000, - 0x80000000, 0x00002000, 0x80200040, 0x00200000, - 0x00200040, 0x80202040, 0x00202000, 0x00000040, - 0x80202040, 0x00202000, 0x00200000, 0x80002040, - 0x80000040, 0x80200000, 0x00202040, 0x00000000, - 0x00002000, 0x80000040, 0x80002040, 0x80202000, - 0x80200000, 0x00002040, 0x00000040, 0x80200040, - /* nibble 6 */ - 0x00004000, 0x00000200, 0x01000200, 0x01000004, - 0x01004204, 0x00004004, 0x00004200, 0x00000000, - 0x01000000, 0x01000204, 0x00000204, 0x01004000, - 0x00000004, 0x01004200, 0x01004000, 0x00000204, - 0x01000204, 0x00004000, 0x00004004, 0x01004204, - 0x00000000, 0x01000200, 0x01000004, 0x00004200, - 0x01004004, 0x00004204, 0x01004200, 0x00000004, - 0x00004204, 0x01004004, 0x00000200, 0x01000000, - 0x00004204, 0x01004000, 0x01004004, 0x00000204, - 0x00004000, 0x00000200, 0x01000000, 0x01004004, - 0x01000204, 0x00004204, 0x00004200, 0x00000000, - 0x00000200, 0x01000004, 0x00000004, 0x01000200, - 0x00000000, 0x01000204, 0x01000200, 0x00004200, - 0x00000204, 0x00004000, 0x01004204, 0x01000000, - 0x01004200, 0x00000004, 0x00004004, 0x01004204, - 0x01000004, 0x01004200, 0x01004000, 0x00004004, - /* nibble 7 */ - 0x20800080, 0x20820000, 0x00020080, 0x00000000, - 0x20020000, 0x00800080, 0x20800000, 0x20820080, - 0x00000080, 0x20000000, 0x00820000, 0x00020080, - 0x00820080, 0x20020080, 0x20000080, 0x20800000, - 0x00020000, 0x00820080, 0x00800080, 0x20020000, - 0x20820080, 0x20000080, 0x00000000, 0x00820000, - 0x20000000, 0x00800000, 0x20020080, 0x20800080, - 0x00800000, 0x00020000, 0x20820000, 0x00000080, - 0x00800000, 0x00020000, 0x20000080, 0x20820080, - 0x00020080, 0x20000000, 0x00000000, 0x00820000, - 0x20800080, 0x20020080, 0x20020000, 0x00800080, - 0x20820000, 0x00000080, 0x00800080, 0x20020000, - 0x20820080, 0x00800000, 0x20800000, 0x20000080, - 0x00820000, 0x00020080, 0x20020080, 0x20800000, - 0x00000080, 0x20820000, 0x00820080, 0x00000000, - 0x20000000, 0x20800080, 0x00020000, 0x00820080, -}; - -__device__ __constant__ u32 c_skb[8][64] = -{ - /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ - 0x00000000, 0x00000010, 0x20000000, 0x20000010, - 0x00010000, 0x00010010, 0x20010000, 0x20010010, - 0x00000800, 0x00000810, 0x20000800, 0x20000810, - 0x00010800, 0x00010810, 0x20010800, 0x20010810, - 0x00000020, 0x00000030, 0x20000020, 0x20000030, - 0x00010020, 0x00010030, 0x20010020, 0x20010030, - 0x00000820, 0x00000830, 0x20000820, 0x20000830, - 0x00010820, 0x00010830, 0x20010820, 0x20010830, - 0x00080000, 0x00080010, 0x20080000, 0x20080010, - 0x00090000, 0x00090010, 0x20090000, 0x20090010, - 0x00080800, 0x00080810, 0x20080800, 0x20080810, - 0x00090800, 0x00090810, 0x20090800, 0x20090810, - 0x00080020, 0x00080030, 0x20080020, 0x20080030, - 0x00090020, 0x00090030, 0x20090020, 0x20090030, - 0x00080820, 0x00080830, 0x20080820, 0x20080830, - 0x00090820, 0x00090830, 0x20090820, 0x20090830, - /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */ - 0x00000000, 0x02000000, 0x00002000, 0x02002000, - 0x00200000, 0x02200000, 0x00202000, 0x02202000, - 0x00000004, 0x02000004, 0x00002004, 0x02002004, - 0x00200004, 0x02200004, 0x00202004, 0x02202004, - 0x00000400, 0x02000400, 0x00002400, 0x02002400, - 0x00200400, 0x02200400, 0x00202400, 0x02202400, - 0x00000404, 0x02000404, 0x00002404, 0x02002404, - 0x00200404, 0x02200404, 0x00202404, 0x02202404, - 0x10000000, 0x12000000, 0x10002000, 0x12002000, - 0x10200000, 0x12200000, 0x10202000, 0x12202000, - 0x10000004, 0x12000004, 0x10002004, 0x12002004, - 0x10200004, 0x12200004, 0x10202004, 0x12202004, - 0x10000400, 0x12000400, 0x10002400, 0x12002400, - 0x10200400, 0x12200400, 0x10202400, 0x12202400, - 0x10000404, 0x12000404, 0x10002404, 0x12002404, - 0x10200404, 0x12200404, 0x10202404, 0x12202404, - /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */ - 0x00000000, 0x00000001, 0x00040000, 0x00040001, - 0x01000000, 0x01000001, 0x01040000, 0x01040001, - 0x00000002, 0x00000003, 0x00040002, 0x00040003, - 0x01000002, 0x01000003, 0x01040002, 0x01040003, - 0x00000200, 0x00000201, 0x00040200, 0x00040201, - 0x01000200, 0x01000201, 0x01040200, 0x01040201, - 0x00000202, 0x00000203, 0x00040202, 0x00040203, - 0x01000202, 0x01000203, 0x01040202, 0x01040203, - 0x08000000, 0x08000001, 0x08040000, 0x08040001, - 0x09000000, 0x09000001, 0x09040000, 0x09040001, - 0x08000002, 0x08000003, 0x08040002, 0x08040003, - 0x09000002, 0x09000003, 0x09040002, 0x09040003, - 0x08000200, 0x08000201, 0x08040200, 0x08040201, - 0x09000200, 0x09000201, 0x09040200, 0x09040201, - 0x08000202, 0x08000203, 0x08040202, 0x08040203, - 0x09000202, 0x09000203, 0x09040202, 0x09040203, - /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */ - 0x00000000, 0x00100000, 0x00000100, 0x00100100, - 0x00000008, 0x00100008, 0x00000108, 0x00100108, - 0x00001000, 0x00101000, 0x00001100, 0x00101100, - 0x00001008, 0x00101008, 0x00001108, 0x00101108, - 0x04000000, 0x04100000, 0x04000100, 0x04100100, - 0x04000008, 0x04100008, 0x04000108, 0x04100108, - 0x04001000, 0x04101000, 0x04001100, 0x04101100, - 0x04001008, 0x04101008, 0x04001108, 0x04101108, - 0x00020000, 0x00120000, 0x00020100, 0x00120100, - 0x00020008, 0x00120008, 0x00020108, 0x00120108, - 0x00021000, 0x00121000, 0x00021100, 0x00121100, - 0x00021008, 0x00121008, 0x00021108, 0x00121108, - 0x04020000, 0x04120000, 0x04020100, 0x04120100, - 0x04020008, 0x04120008, 0x04020108, 0x04120108, - 0x04021000, 0x04121000, 0x04021100, 0x04121100, - 0x04021008, 0x04121008, 0x04021108, 0x04121108, - /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ - 0x00000000, 0x10000000, 0x00010000, 0x10010000, - 0x00000004, 0x10000004, 0x00010004, 0x10010004, - 0x20000000, 0x30000000, 0x20010000, 0x30010000, - 0x20000004, 0x30000004, 0x20010004, 0x30010004, - 0x00100000, 0x10100000, 0x00110000, 0x10110000, - 0x00100004, 0x10100004, 0x00110004, 0x10110004, - 0x20100000, 0x30100000, 0x20110000, 0x30110000, - 0x20100004, 0x30100004, 0x20110004, 0x30110004, - 0x00001000, 0x10001000, 0x00011000, 0x10011000, - 0x00001004, 0x10001004, 0x00011004, 0x10011004, - 0x20001000, 0x30001000, 0x20011000, 0x30011000, - 0x20001004, 0x30001004, 0x20011004, 0x30011004, - 0x00101000, 0x10101000, 0x00111000, 0x10111000, - 0x00101004, 0x10101004, 0x00111004, 0x10111004, - 0x20101000, 0x30101000, 0x20111000, 0x30111000, - 0x20101004, 0x30101004, 0x20111004, 0x30111004, - /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */ - 0x00000000, 0x08000000, 0x00000008, 0x08000008, - 0x00000400, 0x08000400, 0x00000408, 0x08000408, - 0x00020000, 0x08020000, 0x00020008, 0x08020008, - 0x00020400, 0x08020400, 0x00020408, 0x08020408, - 0x00000001, 0x08000001, 0x00000009, 0x08000009, - 0x00000401, 0x08000401, 0x00000409, 0x08000409, - 0x00020001, 0x08020001, 0x00020009, 0x08020009, - 0x00020401, 0x08020401, 0x00020409, 0x08020409, - 0x02000000, 0x0A000000, 0x02000008, 0x0A000008, - 0x02000400, 0x0A000400, 0x02000408, 0x0A000408, - 0x02020000, 0x0A020000, 0x02020008, 0x0A020008, - 0x02020400, 0x0A020400, 0x02020408, 0x0A020408, - 0x02000001, 0x0A000001, 0x02000009, 0x0A000009, - 0x02000401, 0x0A000401, 0x02000409, 0x0A000409, - 0x02020001, 0x0A020001, 0x02020009, 0x0A020009, - 0x02020401, 0x0A020401, 0x02020409, 0x0A020409, - /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */ - 0x00000000, 0x00000100, 0x00080000, 0x00080100, - 0x01000000, 0x01000100, 0x01080000, 0x01080100, - 0x00000010, 0x00000110, 0x00080010, 0x00080110, - 0x01000010, 0x01000110, 0x01080010, 0x01080110, - 0x00200000, 0x00200100, 0x00280000, 0x00280100, - 0x01200000, 0x01200100, 0x01280000, 0x01280100, - 0x00200010, 0x00200110, 0x00280010, 0x00280110, - 0x01200010, 0x01200110, 0x01280010, 0x01280110, - 0x00000200, 0x00000300, 0x00080200, 0x00080300, - 0x01000200, 0x01000300, 0x01080200, 0x01080300, - 0x00000210, 0x00000310, 0x00080210, 0x00080310, - 0x01000210, 0x01000310, 0x01080210, 0x01080310, - 0x00200200, 0x00200300, 0x00280200, 0x00280300, - 0x01200200, 0x01200300, 0x01280200, 0x01280300, - 0x00200210, 0x00200310, 0x00280210, 0x00280310, - 0x01200210, 0x01200310, 0x01280210, 0x01280310, - /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */ - 0x00000000, 0x04000000, 0x00040000, 0x04040000, - 0x00000002, 0x04000002, 0x00040002, 0x04040002, - 0x00002000, 0x04002000, 0x00042000, 0x04042000, - 0x00002002, 0x04002002, 0x00042002, 0x04042002, - 0x00000020, 0x04000020, 0x00040020, 0x04040020, - 0x00000022, 0x04000022, 0x00040022, 0x04040022, - 0x00002020, 0x04002020, 0x00042020, 0x04042020, - 0x00002022, 0x04002022, 0x00042022, 0x04042022, - 0x00000800, 0x04000800, 0x00040800, 0x04040800, - 0x00000802, 0x04000802, 0x00040802, 0x04040802, - 0x00002800, 0x04002800, 0x00042800, 0x04042800, - 0x00002802, 0x04002802, 0x00042802, 0x04042802, - 0x00000820, 0x04000820, 0x00040820, 0x04040820, - 0x00000822, 0x04000822, 0x00040822, 0x04040822, - 0x00002820, 0x04002820, 0x00042820, 0x04042820, - 0x00002822, 0x04002822, 0x00042822, 0x04042822, -}; - -#define NBOX(i,n,S) (S)[(n)][(i)] - -__device__ static void _des_crypt_encrypt (u32x iv[2], u32x data[2], u32x Kc[16], u32x Kd[16], u32 s_SPtrans[8][64]) -{ - u32x tt; - - u32x r = data[0]; - u32x l = data[1]; - - #pragma unroll 16 - for (u32 i = 0; i < 16; i++) - { - u32x u = Kc[i] ^ r; - u32x t = Kd[i] ^ rotl32 (r, 28u); - - #ifdef VECT_SIZE1 - l ^= NBOX (((u >> 2) & 0x3f), 0, s_SPtrans) - | NBOX (((u >> 10) & 0x3f), 2, s_SPtrans) - | NBOX (((u >> 18) & 0x3f), 4, s_SPtrans) - | NBOX (((u >> 26) & 0x3f), 6, s_SPtrans) - | NBOX (((t >> 2) & 0x3f), 1, s_SPtrans) - | NBOX (((t >> 10) & 0x3f), 3, s_SPtrans) - | NBOX (((t >> 18) & 0x3f), 5, s_SPtrans) - | NBOX (((t >> 26) & 0x3f), 7, s_SPtrans); - #endif - - #ifdef VECT_SIZE2 - l.s0 ^= NBOX (((u.s0 >> 2) & 0x3f), 0, s_SPtrans) - | NBOX (((u.s0 >> 10) & 0x3f), 2, s_SPtrans) - | NBOX (((u.s0 >> 18) & 0x3f), 4, s_SPtrans) - | NBOX (((u.s0 >> 26) & 0x3f), 6, s_SPtrans) - | NBOX (((t.s0 >> 2) & 0x3f), 1, s_SPtrans) - | NBOX (((t.s0 >> 10) & 0x3f), 3, s_SPtrans) - | NBOX (((t.s0 >> 18) & 0x3f), 5, s_SPtrans) - | NBOX (((t.s0 >> 26) & 0x3f), 7, s_SPtrans); - - l.s1 ^= NBOX (((u.s1 >> 2) & 0x3f), 0, s_SPtrans) - | NBOX (((u.s1 >> 10) & 0x3f), 2, s_SPtrans) - | NBOX (((u.s1 >> 18) & 0x3f), 4, s_SPtrans) - | NBOX (((u.s1 >> 26) & 0x3f), 6, s_SPtrans) - | NBOX (((t.s1 >> 2) & 0x3f), 1, s_SPtrans) - | NBOX (((t.s1 >> 10) & 0x3f), 3, s_SPtrans) - | NBOX (((t.s1 >> 18) & 0x3f), 5, s_SPtrans) - | NBOX (((t.s1 >> 26) & 0x3f), 7, s_SPtrans); - #endif - - tt = l; - l = r; - r = tt; - } - - iv[0] = l; - iv[1] = r; -} - -__device__ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], u32 s_skb[8][64]) -{ - u32x tt; - - PERM_OP (d, c, tt, 4, 0x0f0f0f0f); - HPERM_OP (c, tt, 2, 0xcccc0000); - HPERM_OP (d, tt, 2, 0xcccc0000); - PERM_OP (d, c, tt, 1, 0x55555555); - PERM_OP (c, d, tt, 8, 0x00ff00ff); - PERM_OP (d, c, tt, 1, 0x55555555); - - d = ((d & 0x000000ff) << 16) - | ((d & 0x0000ff00) << 0) - | ((d & 0x00ff0000) >> 16) - | ((c & 0xf0000000) >> 4); - - c = c & 0x0fffffff; - - //#pragma unroll 16 - for (u32 i = 0; i < 16; i++) - { - const u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 }; - const u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 }; - - c = c >> shifts3s0[i] | c << shifts3s1[i]; - d = d >> shifts3s0[i] | d << shifts3s1[i]; - - c = c & 0x0fffffff; - d = d & 0x0fffffff; - - u32x s; - u32x t; - - #ifdef VECT_SIZE1 - s = NBOX ((( c >> 0) & 0x3f), 0, s_skb) - | NBOX ((((c >> 6) & 0x03) - | ((c >> 7) & 0x3c)), 1, s_skb) - | NBOX ((((c >> 13) & 0x0f) - | ((c >> 14) & 0x30)), 2, s_skb) - | NBOX ((((c >> 20) & 0x01) - | ((c >> 21) & 0x06) - | ((c >> 22) & 0x38)), 3, s_skb); - - t = NBOX ((( d >> 0) & 0x3f), 4, s_skb) - | NBOX ((((d >> 7) & 0x03) - | ((d >> 8) & 0x3c)), 5, s_skb) - | NBOX ((((d >> 15) & 0x3f)), 6, s_skb) - | NBOX ((((d >> 21) & 0x0f) - | ((d >> 22) & 0x30)), 7, s_skb); - #endif - - #ifdef VECT_SIZE2 - s.s0 = NBOX ((( c.s0 >> 0) & 0x3f), 0, s_skb) - | NBOX ((((c.s0 >> 6) & 0x03) - | ((c.s0 >> 7) & 0x3c)), 1, s_skb) - | NBOX ((((c.s0 >> 13) & 0x0f) - | ((c.s0 >> 14) & 0x30)), 2, s_skb) - | NBOX ((((c.s0 >> 20) & 0x01) - | ((c.s0 >> 21) & 0x06) - | ((c.s0 >> 22) & 0x38)), 3, s_skb); - - t.s0 = NBOX ((( d.s0 >> 0) & 0x3f), 4, s_skb) - | NBOX ((((d.s0 >> 7) & 0x03) - | ((d.s0 >> 8) & 0x3c)), 5, s_skb) - | NBOX ((((d.s0 >> 15) & 0x3f)), 6, s_skb) - | NBOX ((((d.s0 >> 21) & 0x0f) - | ((d.s0 >> 22) & 0x30)), 7, s_skb); - - s.s1 = NBOX ((( c.s1 >> 0) & 0x3f), 0, s_skb) - | NBOX ((((c.s1 >> 6) & 0x03) - | ((c.s1 >> 7) & 0x3c)), 1, s_skb) - | NBOX ((((c.s1 >> 13) & 0x0f) - | ((c.s1 >> 14) & 0x30)), 2, s_skb) - | NBOX ((((c.s1 >> 20) & 0x01) - | ((c.s1 >> 21) & 0x06) - | ((c.s1 >> 22) & 0x38)), 3, s_skb); - - t.s1 = NBOX ((( d.s1 >> 0) & 0x3f), 4, s_skb) - | NBOX ((((d.s1 >> 7) & 0x03) - | ((d.s1 >> 8) & 0x3c)), 5, s_skb) - | NBOX ((((d.s1 >> 15) & 0x3f)), 6, s_skb) - | NBOX ((((d.s1 >> 21) & 0x0f) - | ((d.s1 >> 22) & 0x30)), 7, s_skb); - #endif - - #if __CUDA_ARCH__ >= 200 - Kc[i] = __byte_perm (s, t, 0x5410); - Kd[i] = __byte_perm (s, t, 0x7632); - #else - Kc[i] = ((t << 16) | (s & 0x0000ffff)); - Kd[i] = ((s >> 16) | (t & 0xffff0000)); - #endif - - Kc[i] = rotl32 (Kc[i], 2u); - Kd[i] = rotl32 (Kd[i], 2u); - } -} - -__device__ static void transform_racf_key (const u32x w0, const u32x w1, u32x key[2]) -{ - #ifdef VECT_SIZE1 - - key[0] = (ascii_to_ebcdic_pc[(w0 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w0 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w0 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w0 >> 24) & 0xff]) << 24; - - key[1] = (ascii_to_ebcdic_pc[(w1 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w1 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w1 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w1 >> 24) & 0xff]) << 24; - #endif - - #ifdef VECT_SIZE2 - - key[0].s0 = (ascii_to_ebcdic_pc[(w0.s0 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w0.s0 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w0.s0 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w0.s0 >> 24) & 0xff]) << 24; - - key[0].s1 = (ascii_to_ebcdic_pc[(w0.s1 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w0.s1 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w0.s1 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w0.s1 >> 24) & 0xff]) << 24; - - key[1].s0 = (ascii_to_ebcdic_pc[(w1.s0 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w1.s0 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w1.s0 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w1.s0 >> 24) & 0xff]) << 24; - - key[1].s1 = (ascii_to_ebcdic_pc[(w1.s1 >> 0) & 0xff]) << 0 - | (ascii_to_ebcdic_pc[(w1.s1 >> 8) & 0xff]) << 8 - | (ascii_to_ebcdic_pc[(w1.s1 >> 16) & 0xff]) << 16 - | (ascii_to_ebcdic_pc[(w1.s1 >> 24) & 0xff]) << 24; - #endif -} - -__shared__ u32 s_SPtrans[8][64]; - -__shared__ u32 s_skb[8][64]; - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m08500m (u32x w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[2]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - u32x w1 = w[1]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos].i; - - const u32x w0 = w0l | w0r; - - u32x key[2]; - - transform_racf_key (w0, w1, key); - - const u32x c = key[0]; - const u32x d = key[1]; - - u32x Kc[16]; - u32x Kd[16]; - - _des_crypt_keysetup (c, d, Kc, Kd, s_skb); - - u32x data[2]; - - data[0] = salt_buf0[0]; - data[1] = salt_buf0[1]; - - u32x iv[2]; - - _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); - - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } -} - -__device__ static void m08500s (u32x w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[2]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - u32x w1 = w[1]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos].i; - - const u32x w0 = w0l | w0r; - - u32x key[2]; - - transform_racf_key (w0, w1, key); - - const u32x c = key[0]; - const u32x d = key[1]; - - u32x Kc[16]; - u32x Kd[16]; - - _des_crypt_keysetup (c, d, Kc, Kd, s_skb); - - u32x data[2]; - - data[0] = salt_buf0[0]; - data[1] = salt_buf0[1]; - - u32x iv[2]; - - _des_crypt_encrypt (iv, data, Kc, Kd, s_SPtrans); - - const u32x r0 = iv[0]; - const u32x r1 = iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08500_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = 0; - w[ 3] = 0; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * sbox, kbox - */ - - if (lid < 64) - { - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m08500m (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08500_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08500_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08500_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = 0; - w[ 3] = 0; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * sbox, kbox - */ - - if (lid < 64) - { - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m08500s (w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08500_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08500_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m08600_a0.cu b/nv/m08600_a0.cu deleted file mode 100644 index 4a455be..0000000 --- a/nv/m08600_a0.cu +++ /dev/null @@ -1,555 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _LOTUS5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ __constant__ u32 lotus_magic_table[256] = -{ - 0xbd, 0x56, 0xea, 0xf2, 0xa2, 0xf1, 0xac, 0x2a, - 0xb0, 0x93, 0xd1, 0x9c, 0x1b, 0x33, 0xfd, 0xd0, - 0x30, 0x04, 0xb6, 0xdc, 0x7d, 0xdf, 0x32, 0x4b, - 0xf7, 0xcb, 0x45, 0x9b, 0x31, 0xbb, 0x21, 0x5a, - 0x41, 0x9f, 0xe1, 0xd9, 0x4a, 0x4d, 0x9e, 0xda, - 0xa0, 0x68, 0x2c, 0xc3, 0x27, 0x5f, 0x80, 0x36, - 0x3e, 0xee, 0xfb, 0x95, 0x1a, 0xfe, 0xce, 0xa8, - 0x34, 0xa9, 0x13, 0xf0, 0xa6, 0x3f, 0xd8, 0x0c, - 0x78, 0x24, 0xaf, 0x23, 0x52, 0xc1, 0x67, 0x17, - 0xf5, 0x66, 0x90, 0xe7, 0xe8, 0x07, 0xb8, 0x60, - 0x48, 0xe6, 0x1e, 0x53, 0xf3, 0x92, 0xa4, 0x72, - 0x8c, 0x08, 0x15, 0x6e, 0x86, 0x00, 0x84, 0xfa, - 0xf4, 0x7f, 0x8a, 0x42, 0x19, 0xf6, 0xdb, 0xcd, - 0x14, 0x8d, 0x50, 0x12, 0xba, 0x3c, 0x06, 0x4e, - 0xec, 0xb3, 0x35, 0x11, 0xa1, 0x88, 0x8e, 0x2b, - 0x94, 0x99, 0xb7, 0x71, 0x74, 0xd3, 0xe4, 0xbf, - 0x3a, 0xde, 0x96, 0x0e, 0xbc, 0x0a, 0xed, 0x77, - 0xfc, 0x37, 0x6b, 0x03, 0x79, 0x89, 0x62, 0xc6, - 0xd7, 0xc0, 0xd2, 0x7c, 0x6a, 0x8b, 0x22, 0xa3, - 0x5b, 0x05, 0x5d, 0x02, 0x75, 0xd5, 0x61, 0xe3, - 0x18, 0x8f, 0x55, 0x51, 0xad, 0x1f, 0x0b, 0x5e, - 0x85, 0xe5, 0xc2, 0x57, 0x63, 0xca, 0x3d, 0x6c, - 0xb4, 0xc5, 0xcc, 0x70, 0xb2, 0x91, 0x59, 0x0d, - 0x47, 0x20, 0xc8, 0x4f, 0x58, 0xe0, 0x01, 0xe2, - 0x16, 0x38, 0xc4, 0x6f, 0x3b, 0x0f, 0x65, 0x46, - 0xbe, 0x7e, 0x2d, 0x7b, 0x82, 0xf9, 0x40, 0xb5, - 0x1d, 0x73, 0xf8, 0xeb, 0x26, 0xc7, 0x87, 0x97, - 0x25, 0x54, 0xb1, 0x28, 0xaa, 0x98, 0x9d, 0xa5, - 0x64, 0x6d, 0x7a, 0xd4, 0x10, 0x81, 0x44, 0xef, - 0x49, 0xd6, 0xae, 0x2e, 0xdd, 0x76, 0x5c, 0x2f, - 0xa7, 0x1c, 0xc9, 0x09, 0x69, 0x9a, 0x83, 0xcf, - 0x29, 0x39, 0xb9, 0xe9, 0x4c, 0xff, 0x43, 0xab, -}; - -#ifdef VECT_SIZE1 -#define BOX(S,i) u32x ((S)[(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(S,i) u32x ((S)[(i).x], (S)[(i).y]) -#endif - -__device__ static void lotus_mix (u32x *in, u32 s_lotus_magic_table[256]) -{ - u32x p = 0; - - for (int i = 0; i < 18; i++) - { - u32 s = 48; - - #pragma unroll 12 - for (int j = 0; j < 12; j++) - { - u32x tmp_in = in[j]; - u32x tmp_out = 0; - - p = (p + s--) & 0xff; p = ((tmp_in >> 0) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 0; - p = (p + s--) & 0xff; p = ((tmp_in >> 8) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 8; - p = (p + s--) & 0xff; p = ((tmp_in >> 16) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 16; - p = (p + s--) & 0xff; p = ((tmp_in >> 24) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 24; - - in[j] = tmp_out; - } - } -} - -__device__ static void lotus_transform_password (u32x *in, u32x *out, u32 s_lotus_magic_table[256]) -{ - u32x t = out[3] >> 24; - - u32x c; - - for (int i = 0; i < 4; i++) - { - t ^= (in[i] >> 0) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 0; t = ((out[i] >> 0) & 0xff); - t ^= (in[i] >> 8) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 8; t = ((out[i] >> 8) & 0xff); - t ^= (in[i] >> 16) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 16; t = ((out[i] >> 16) & 0xff); - t ^= (in[i] >> 24) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 24; t = ((out[i] >> 24) & 0xff); - } -} - -__device__ static void pad (u32x w[4], const u32 len) -{ - const u32 val = 16 - len; - - const u32 mask1 = val << 24; - - const u32 mask2 = val << 16 - | val << 24; - - const u32 mask3 = val << 8 - | val << 16 - | val << 24; - - const u32 mask4 = val << 0 - | val << 8 - | val << 16 - | val << 24; - - switch (len) - { - case 0: w[0] = mask4; - w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 1: w[0] |= mask3; - w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 2: w[0] |= mask2; - w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 3: w[0] |= mask1; - w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 4: w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 5: w[1] |= mask3; - w[2] = mask4; - w[3] = mask4; - break; - case 6: w[1] |= mask2; - w[2] = mask4; - w[3] = mask4; - break; - case 7: w[1] |= mask1; - w[2] = mask4; - w[3] = mask4; - break; - case 8: w[2] = mask4; - w[3] = mask4; - break; - case 9: w[2] |= mask3; - w[3] = mask4; - break; - case 10: w[2] |= mask2; - w[3] = mask4; - break; - case 11: w[2] |= mask1; - w[3] = mask4; - break; - case 12: w[3] = mask4; - break; - case 13: w[3] |= mask3; - break; - case 14: w[3] |= mask2; - break; - case 15: w[3] |= mask1; - break; - } -} - -__device__ static void mdtransform_norecalc (u32x state[4], u32x block[4], u32 s_lotus_magic_table[256]) -{ - u32x x[12]; - - x[ 0] = state[0]; - x[ 1] = state[1]; - x[ 2] = state[2]; - x[ 3] = state[3]; - x[ 4] = block[0]; - x[ 5] = block[1]; - x[ 6] = block[2]; - x[ 7] = block[3]; - x[ 8] = state[0] ^ block[0]; - x[ 9] = state[1] ^ block[1]; - x[10] = state[2] ^ block[2]; - x[11] = state[3] ^ block[3]; - - lotus_mix (x, s_lotus_magic_table); - - state[0] = x[0]; - state[1] = x[1]; - state[2] = x[2]; - state[3] = x[3]; -} - -__device__ static void mdtransform (u32x state[4], u32x checksum[4], u32x block[4], u32 s_lotus_magic_table[256]) -{ - mdtransform_norecalc (state, block, s_lotus_magic_table); - - lotus_transform_password (block, checksum, s_lotus_magic_table); -} - -__device__ static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[4], u32 s_lotus_magic_table[256]) -{ - u32x checksum[4]; - - checksum[0] = 0; - checksum[1] = 0; - checksum[2] = 0; - checksum[3] = 0; - - u32x block[4]; - - block[0] = saved_key[0]; - block[1] = saved_key[1]; - block[2] = saved_key[2]; - block[3] = saved_key[3]; - - mdtransform (state, checksum, block, s_lotus_magic_table); - - mdtransform_norecalc (state, checksum, s_lotus_magic_table); -} - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m08600_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * sbox - */ - - __shared__ u32 s_lotus_magic_table[256]; - - s_lotus_magic_table[lid] = lotus_magic_table[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - u32x w[16]; - - w[ 0] = w0[0]; - w[ 1] = w0[1]; - w[ 2] = w0[2]; - w[ 3] = w0[3]; - w[ 4] = w1[0]; - w[ 5] = w1[1]; - w[ 6] = w1[2]; - w[ 7] = w1[3]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - u32x state[4]; - - state[0] = 0; - state[1] = 0; - state[2] = 0; - state[3] = 0; - - /** - * padding - */ - - if (pw_len < 16) - { - pad (&w[ 0], pw_len & 0xf); - } - else if (pw_len < 32) - { - pad (&w[ 4], pw_len & 0xf); - } - else if (pw_len < 48) - { - pad (&w[ 8], pw_len & 0xf); - } - else if (pw_len < 64) - { - pad (&w[12], pw_len & 0xf); - } - - domino_big_md (w, pw_len, state, s_lotus_magic_table); - - const u32x r0 = state[0]; - const u32x r1 = state[1]; - const u32x r2 = state[2]; - const u32x r3 = state[3]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08600_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08600_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08600_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * sbox - */ - - __shared__ u32 s_lotus_magic_table[256]; - - s_lotus_magic_table[lid] = lotus_magic_table[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - u32x w[16]; - - w[ 0] = w0[0]; - w[ 1] = w0[1]; - w[ 2] = w0[2]; - w[ 3] = w0[3]; - w[ 4] = w1[0]; - w[ 5] = w1[1]; - w[ 6] = w1[2]; - w[ 7] = w1[3]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - u32x state[4]; - - state[0] = 0; - state[1] = 0; - state[2] = 0; - state[3] = 0; - - /** - * padding - */ - - if (pw_len < 16) - { - pad (&w[ 0], pw_len & 0xf); - } - else if (pw_len < 32) - { - pad (&w[ 4], pw_len & 0xf); - } - else if (pw_len < 48) - { - pad (&w[ 8], pw_len & 0xf); - } - else if (pw_len < 64) - { - pad (&w[12], pw_len & 0xf); - } - - domino_big_md (w, pw_len, state, s_lotus_magic_table); - - const u32x r0 = state[0]; - const u32x r1 = state[1]; - const u32x r2 = state[2]; - const u32x r3 = state[3]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08600_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08600_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m08600_a1.cu b/nv/m08600_a1.cu deleted file mode 100644 index b53b47a..0000000 --- a/nv/m08600_a1.cu +++ /dev/null @@ -1,605 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _LOTUS5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ __constant__ u32 lotus_magic_table[256] = -{ - 0xbd, 0x56, 0xea, 0xf2, 0xa2, 0xf1, 0xac, 0x2a, - 0xb0, 0x93, 0xd1, 0x9c, 0x1b, 0x33, 0xfd, 0xd0, - 0x30, 0x04, 0xb6, 0xdc, 0x7d, 0xdf, 0x32, 0x4b, - 0xf7, 0xcb, 0x45, 0x9b, 0x31, 0xbb, 0x21, 0x5a, - 0x41, 0x9f, 0xe1, 0xd9, 0x4a, 0x4d, 0x9e, 0xda, - 0xa0, 0x68, 0x2c, 0xc3, 0x27, 0x5f, 0x80, 0x36, - 0x3e, 0xee, 0xfb, 0x95, 0x1a, 0xfe, 0xce, 0xa8, - 0x34, 0xa9, 0x13, 0xf0, 0xa6, 0x3f, 0xd8, 0x0c, - 0x78, 0x24, 0xaf, 0x23, 0x52, 0xc1, 0x67, 0x17, - 0xf5, 0x66, 0x90, 0xe7, 0xe8, 0x07, 0xb8, 0x60, - 0x48, 0xe6, 0x1e, 0x53, 0xf3, 0x92, 0xa4, 0x72, - 0x8c, 0x08, 0x15, 0x6e, 0x86, 0x00, 0x84, 0xfa, - 0xf4, 0x7f, 0x8a, 0x42, 0x19, 0xf6, 0xdb, 0xcd, - 0x14, 0x8d, 0x50, 0x12, 0xba, 0x3c, 0x06, 0x4e, - 0xec, 0xb3, 0x35, 0x11, 0xa1, 0x88, 0x8e, 0x2b, - 0x94, 0x99, 0xb7, 0x71, 0x74, 0xd3, 0xe4, 0xbf, - 0x3a, 0xde, 0x96, 0x0e, 0xbc, 0x0a, 0xed, 0x77, - 0xfc, 0x37, 0x6b, 0x03, 0x79, 0x89, 0x62, 0xc6, - 0xd7, 0xc0, 0xd2, 0x7c, 0x6a, 0x8b, 0x22, 0xa3, - 0x5b, 0x05, 0x5d, 0x02, 0x75, 0xd5, 0x61, 0xe3, - 0x18, 0x8f, 0x55, 0x51, 0xad, 0x1f, 0x0b, 0x5e, - 0x85, 0xe5, 0xc2, 0x57, 0x63, 0xca, 0x3d, 0x6c, - 0xb4, 0xc5, 0xcc, 0x70, 0xb2, 0x91, 0x59, 0x0d, - 0x47, 0x20, 0xc8, 0x4f, 0x58, 0xe0, 0x01, 0xe2, - 0x16, 0x38, 0xc4, 0x6f, 0x3b, 0x0f, 0x65, 0x46, - 0xbe, 0x7e, 0x2d, 0x7b, 0x82, 0xf9, 0x40, 0xb5, - 0x1d, 0x73, 0xf8, 0xeb, 0x26, 0xc7, 0x87, 0x97, - 0x25, 0x54, 0xb1, 0x28, 0xaa, 0x98, 0x9d, 0xa5, - 0x64, 0x6d, 0x7a, 0xd4, 0x10, 0x81, 0x44, 0xef, - 0x49, 0xd6, 0xae, 0x2e, 0xdd, 0x76, 0x5c, 0x2f, - 0xa7, 0x1c, 0xc9, 0x09, 0x69, 0x9a, 0x83, 0xcf, - 0x29, 0x39, 0xb9, 0xe9, 0x4c, 0xff, 0x43, 0xab, -}; - -#ifdef VECT_SIZE1 -#define BOX(S,i) u32x ((S)[(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(S,i) u32x ((S)[(i).x], (S)[(i).y]) -#endif - -__device__ static void lotus_mix (u32x *in, u32 s_lotus_magic_table[256]) -{ - u32x p = 0; - - for (int i = 0; i < 18; i++) - { - u32 s = 48; - - #pragma unroll 12 - for (int j = 0; j < 12; j++) - { - u32x tmp_in = in[j]; - u32x tmp_out = 0; - - p = (p + s--) & 0xff; p = ((tmp_in >> 0) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 0; - p = (p + s--) & 0xff; p = ((tmp_in >> 8) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 8; - p = (p + s--) & 0xff; p = ((tmp_in >> 16) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 16; - p = (p + s--) & 0xff; p = ((tmp_in >> 24) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 24; - - in[j] = tmp_out; - } - } -} - -__device__ static void lotus_transform_password (u32x *in, u32x *out, u32 s_lotus_magic_table[256]) -{ - u32x t = out[3] >> 24; - - u32x c; - - for (int i = 0; i < 4; i++) - { - t ^= (in[i] >> 0) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 0; t = ((out[i] >> 0) & 0xff); - t ^= (in[i] >> 8) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 8; t = ((out[i] >> 8) & 0xff); - t ^= (in[i] >> 16) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 16; t = ((out[i] >> 16) & 0xff); - t ^= (in[i] >> 24) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 24; t = ((out[i] >> 24) & 0xff); - } -} - -__device__ static void pad (u32x w[4], const u32 len) -{ - const u32 val = 16 - len; - - const u32 mask1 = val << 24; - - const u32 mask2 = val << 16 - | val << 24; - - const u32 mask3 = val << 8 - | val << 16 - | val << 24; - - const u32 mask4 = val << 0 - | val << 8 - | val << 16 - | val << 24; - - switch (len) - { - case 0: w[0] = mask4; - w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 1: w[0] |= mask3; - w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 2: w[0] |= mask2; - w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 3: w[0] |= mask1; - w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 4: w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 5: w[1] |= mask3; - w[2] = mask4; - w[3] = mask4; - break; - case 6: w[1] |= mask2; - w[2] = mask4; - w[3] = mask4; - break; - case 7: w[1] |= mask1; - w[2] = mask4; - w[3] = mask4; - break; - case 8: w[2] = mask4; - w[3] = mask4; - break; - case 9: w[2] |= mask3; - w[3] = mask4; - break; - case 10: w[2] |= mask2; - w[3] = mask4; - break; - case 11: w[2] |= mask1; - w[3] = mask4; - break; - case 12: w[3] = mask4; - break; - case 13: w[3] |= mask3; - break; - case 14: w[3] |= mask2; - break; - case 15: w[3] |= mask1; - break; - } -} - -__device__ static void mdtransform_norecalc (u32x state[4], u32x block[4], u32 s_lotus_magic_table[256]) -{ - u32x x[12]; - - x[ 0] = state[0]; - x[ 1] = state[1]; - x[ 2] = state[2]; - x[ 3] = state[3]; - x[ 4] = block[0]; - x[ 5] = block[1]; - x[ 6] = block[2]; - x[ 7] = block[3]; - x[ 8] = state[0] ^ block[0]; - x[ 9] = state[1] ^ block[1]; - x[10] = state[2] ^ block[2]; - x[11] = state[3] ^ block[3]; - - lotus_mix (x, s_lotus_magic_table); - - state[0] = x[0]; - state[1] = x[1]; - state[2] = x[2]; - state[3] = x[3]; -} - -__device__ static void mdtransform (u32x state[4], u32x checksum[4], u32x block[4], u32 s_lotus_magic_table[256]) -{ - mdtransform_norecalc (state, block, s_lotus_magic_table); - - lotus_transform_password (block, checksum, s_lotus_magic_table); -} - -__device__ static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[4], u32 s_lotus_magic_table[256]) -{ - u32x checksum[4]; - - checksum[0] = 0; - checksum[1] = 0; - checksum[2] = 0; - checksum[3] = 0; - - u32x block[4]; - - block[0] = saved_key[0]; - block[1] = saved_key[1]; - block[2] = saved_key[2]; - block[3] = saved_key[3]; - - mdtransform (state, checksum, block, s_lotus_magic_table); - - mdtransform_norecalc (state, checksum, s_lotus_magic_table); -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m08600_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * sbox - */ - - __shared__ u32 s_lotus_magic_table[256]; - - s_lotus_magic_table[lid] = lotus_magic_table[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w[16]; - - w[ 0] = wordl0[0] | wordr0[0]; - w[ 1] = wordl0[1] | wordr0[1]; - w[ 2] = wordl0[2] | wordr0[2]; - w[ 3] = wordl0[3] | wordr0[3]; - w[ 4] = wordl1[0] | wordr1[0]; - w[ 5] = wordl1[1] | wordr1[1]; - w[ 6] = wordl1[2] | wordr1[2]; - w[ 7] = wordl1[3] | wordr1[3]; - w[ 8] = wordl2[0] | wordr2[0]; - w[ 9] = wordl2[1] | wordr2[1]; - w[10] = wordl2[2] | wordr2[2]; - w[11] = wordl2[3] | wordr2[3]; - w[12] = wordl3[0] | wordr3[0]; - w[13] = wordl3[1] | wordr3[1]; - w[14] = wordl3[2] | wordr3[2]; - w[15] = wordl3[3] | wordr3[3]; - - u32x state[4]; - - state[0] = 0; - state[1] = 0; - state[2] = 0; - state[3] = 0; - - /** - * padding - */ - - if (pw_len < 16) - { - pad (&w[ 0], pw_len & 0xf); - } - else if (pw_len < 32) - { - pad (&w[ 4], pw_len & 0xf); - } - else if (pw_len < 48) - { - pad (&w[ 8], pw_len & 0xf); - } - else if (pw_len < 64) - { - pad (&w[12], pw_len & 0xf); - } - - domino_big_md (w, pw_len, state, s_lotus_magic_table); - - const u32x r0 = state[0]; - const u32x r1 = state[1]; - const u32x r2 = state[2]; - const u32x r3 = state[3]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08600_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08600_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08600_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * sbox - */ - - __shared__ u32 s_lotus_magic_table[256]; - - s_lotus_magic_table[lid] = lotus_magic_table[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w[16]; - - w[ 0] = wordl0[0] | wordr0[0]; - w[ 1] = wordl0[1] | wordr0[1]; - w[ 2] = wordl0[2] | wordr0[2]; - w[ 3] = wordl0[3] | wordr0[3]; - w[ 4] = wordl1[0] | wordr1[0]; - w[ 5] = wordl1[1] | wordr1[1]; - w[ 6] = wordl1[2] | wordr1[2]; - w[ 7] = wordl1[3] | wordr1[3]; - w[ 8] = wordl2[0] | wordr2[0]; - w[ 9] = wordl2[1] | wordr2[1]; - w[10] = wordl2[2] | wordr2[2]; - w[11] = wordl2[3] | wordr2[3]; - w[12] = wordl3[0] | wordr3[0]; - w[13] = wordl3[1] | wordr3[1]; - w[14] = wordl3[2] | wordr3[2]; - w[15] = wordl3[3] | wordr3[3]; - - u32x state[4]; - - state[0] = 0; - state[1] = 0; - state[2] = 0; - state[3] = 0; - - /** - * padding - */ - - if (pw_len < 16) - { - pad (&w[ 0], pw_len & 0xf); - } - else if (pw_len < 32) - { - pad (&w[ 4], pw_len & 0xf); - } - else if (pw_len < 48) - { - pad (&w[ 8], pw_len & 0xf); - } - else if (pw_len < 64) - { - pad (&w[12], pw_len & 0xf); - } - - domino_big_md (w, pw_len, state, s_lotus_magic_table); - - const u32x r0 = state[0]; - const u32x r1 = state[1]; - const u32x r2 = state[2]; - const u32x r3 = state[3]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08600_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08600_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m08600_a3.cu b/nv/m08600_a3.cu deleted file mode 100644 index a8d53b8..0000000 --- a/nv/m08600_a3.cu +++ /dev/null @@ -1,722 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _LOTUS5_ -#define _SCALAR_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -__device__ __constant__ u32 lotus_magic_table[256] = -{ - 0xbd, 0x56, 0xea, 0xf2, 0xa2, 0xf1, 0xac, 0x2a, - 0xb0, 0x93, 0xd1, 0x9c, 0x1b, 0x33, 0xfd, 0xd0, - 0x30, 0x04, 0xb6, 0xdc, 0x7d, 0xdf, 0x32, 0x4b, - 0xf7, 0xcb, 0x45, 0x9b, 0x31, 0xbb, 0x21, 0x5a, - 0x41, 0x9f, 0xe1, 0xd9, 0x4a, 0x4d, 0x9e, 0xda, - 0xa0, 0x68, 0x2c, 0xc3, 0x27, 0x5f, 0x80, 0x36, - 0x3e, 0xee, 0xfb, 0x95, 0x1a, 0xfe, 0xce, 0xa8, - 0x34, 0xa9, 0x13, 0xf0, 0xa6, 0x3f, 0xd8, 0x0c, - 0x78, 0x24, 0xaf, 0x23, 0x52, 0xc1, 0x67, 0x17, - 0xf5, 0x66, 0x90, 0xe7, 0xe8, 0x07, 0xb8, 0x60, - 0x48, 0xe6, 0x1e, 0x53, 0xf3, 0x92, 0xa4, 0x72, - 0x8c, 0x08, 0x15, 0x6e, 0x86, 0x00, 0x84, 0xfa, - 0xf4, 0x7f, 0x8a, 0x42, 0x19, 0xf6, 0xdb, 0xcd, - 0x14, 0x8d, 0x50, 0x12, 0xba, 0x3c, 0x06, 0x4e, - 0xec, 0xb3, 0x35, 0x11, 0xa1, 0x88, 0x8e, 0x2b, - 0x94, 0x99, 0xb7, 0x71, 0x74, 0xd3, 0xe4, 0xbf, - 0x3a, 0xde, 0x96, 0x0e, 0xbc, 0x0a, 0xed, 0x77, - 0xfc, 0x37, 0x6b, 0x03, 0x79, 0x89, 0x62, 0xc6, - 0xd7, 0xc0, 0xd2, 0x7c, 0x6a, 0x8b, 0x22, 0xa3, - 0x5b, 0x05, 0x5d, 0x02, 0x75, 0xd5, 0x61, 0xe3, - 0x18, 0x8f, 0x55, 0x51, 0xad, 0x1f, 0x0b, 0x5e, - 0x85, 0xe5, 0xc2, 0x57, 0x63, 0xca, 0x3d, 0x6c, - 0xb4, 0xc5, 0xcc, 0x70, 0xb2, 0x91, 0x59, 0x0d, - 0x47, 0x20, 0xc8, 0x4f, 0x58, 0xe0, 0x01, 0xe2, - 0x16, 0x38, 0xc4, 0x6f, 0x3b, 0x0f, 0x65, 0x46, - 0xbe, 0x7e, 0x2d, 0x7b, 0x82, 0xf9, 0x40, 0xb5, - 0x1d, 0x73, 0xf8, 0xeb, 0x26, 0xc7, 0x87, 0x97, - 0x25, 0x54, 0xb1, 0x28, 0xaa, 0x98, 0x9d, 0xa5, - 0x64, 0x6d, 0x7a, 0xd4, 0x10, 0x81, 0x44, 0xef, - 0x49, 0xd6, 0xae, 0x2e, 0xdd, 0x76, 0x5c, 0x2f, - 0xa7, 0x1c, 0xc9, 0x09, 0x69, 0x9a, 0x83, 0xcf, - 0x29, 0x39, 0xb9, 0xe9, 0x4c, 0xff, 0x43, 0xab, -}; - -#ifdef VECT_SIZE1 -#define BOX(S,i) u32x ((S)[(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(S,i) u32x ((S)[(i).x], (S)[(i).y]) -#endif - -__device__ static void lotus_mix (u32x *in, u32 s_lotus_magic_table[256]) -{ - u32x p = 0; - - for (int i = 0; i < 18; i++) - { - u32 s = 48; - - #pragma unroll 12 - for (int j = 0; j < 12; j++) - { - u32x tmp_in = in[j]; - u32x tmp_out = 0; - - p = (p + s--) & 0xff; p = ((tmp_in >> 0) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 0; - p = (p + s--) & 0xff; p = ((tmp_in >> 8) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 8; - p = (p + s--) & 0xff; p = ((tmp_in >> 16) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 16; - p = (p + s--) & 0xff; p = ((tmp_in >> 24) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 24; - - in[j] = tmp_out; - } - } -} - -__device__ static void lotus_transform_password (u32x *in, u32x *out, u32 s_lotus_magic_table[256]) -{ - u32x t = out[3] >> 24; - - u32x c; - - for (int i = 0; i < 4; i++) - { - t ^= (in[i] >> 0) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 0; t = ((out[i] >> 0) & 0xff); - t ^= (in[i] >> 8) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 8; t = ((out[i] >> 8) & 0xff); - t ^= (in[i] >> 16) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 16; t = ((out[i] >> 16) & 0xff); - t ^= (in[i] >> 24) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 24; t = ((out[i] >> 24) & 0xff); - } -} - -__device__ static void pad (u32 w[4], const u32 len) -{ - const u32 val = 16 - len; - - const u32 mask1 = val << 24; - - const u32 mask2 = val << 16 - | val << 24; - - const u32 mask3 = val << 8 - | val << 16 - | val << 24; - - const u32 mask4 = val << 0 - | val << 8 - | val << 16 - | val << 24; - - switch (len) - { - case 0: w[0] = mask4; - w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 1: w[0] |= mask3; - w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 2: w[0] |= mask2; - w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 3: w[0] |= mask1; - w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 4: w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 5: w[1] |= mask3; - w[2] = mask4; - w[3] = mask4; - break; - case 6: w[1] |= mask2; - w[2] = mask4; - w[3] = mask4; - break; - case 7: w[1] |= mask1; - w[2] = mask4; - w[3] = mask4; - break; - case 8: w[2] = mask4; - w[3] = mask4; - break; - case 9: w[2] |= mask3; - w[3] = mask4; - break; - case 10: w[2] |= mask2; - w[3] = mask4; - break; - case 11: w[2] |= mask1; - w[3] = mask4; - break; - case 12: w[3] = mask4; - break; - case 13: w[3] |= mask3; - break; - case 14: w[3] |= mask2; - break; - case 15: w[3] |= mask1; - break; - } -} - -__device__ static void mdtransform_norecalc (u32x state[4], u32x block[4], u32 s_lotus_magic_table[256]) -{ - u32x x[12]; - - x[ 0] = state[0]; - x[ 1] = state[1]; - x[ 2] = state[2]; - x[ 3] = state[3]; - x[ 4] = block[0]; - x[ 5] = block[1]; - x[ 6] = block[2]; - x[ 7] = block[3]; - x[ 8] = state[0] ^ block[0]; - x[ 9] = state[1] ^ block[1]; - x[10] = state[2] ^ block[2]; - x[11] = state[3] ^ block[3]; - - lotus_mix (x, s_lotus_magic_table); - - state[0] = x[0]; - state[1] = x[1]; - state[2] = x[2]; - state[3] = x[3]; -} - -__device__ static void mdtransform (u32x state[4], u32x checksum[4], u32x block[4], u32 s_lotus_magic_table[256]) -{ - mdtransform_norecalc (state, block, s_lotus_magic_table); - - lotus_transform_password (block, checksum, s_lotus_magic_table); -} - -__device__ static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[4], u32 s_lotus_magic_table[256]) -{ - u32x checksum[4]; - - checksum[0] = 0; - checksum[1] = 0; - checksum[2] = 0; - checksum[3] = 0; - - u32x block[4]; - - block[0] = saved_key[0]; - block[1] = saved_key[1]; - block[2] = saved_key[2]; - block[3] = saved_key[3]; - - mdtransform (state, checksum, block, s_lotus_magic_table); - - mdtransform_norecalc (state, checksum, s_lotus_magic_table); -} - -__device__ __constant__ u32x c_bfs[1024]; - -__device__ static void m08600m (u32 s_lotus_magic_table[256], u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * padding - */ - - if (pw_len < 16) - { - pad (&w[ 0], pw_len & 0xf); - } - else if (pw_len < 32) - { - pad (&w[ 4], pw_len & 0xf); - } - else if (pw_len < 48) - { - pad (&w[ 8], pw_len & 0xf); - } - else if (pw_len < 64) - { - pad (&w[12], pw_len & 0xf); - } - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x w_tmp[16]; - - w_tmp[ 0] = w0; - w_tmp[ 1] = w[ 1]; - w_tmp[ 2] = w[ 2]; - w_tmp[ 3] = w[ 3]; - w_tmp[ 4] = w[ 4]; - w_tmp[ 5] = w[ 5]; - w_tmp[ 6] = w[ 6]; - w_tmp[ 7] = w[ 7]; - w_tmp[ 8] = w[ 8]; - w_tmp[ 9] = w[ 9]; - w_tmp[10] = w[10]; - w_tmp[11] = w[11]; - w_tmp[12] = w[12]; - w_tmp[13] = w[13]; - w_tmp[14] = w[14]; - w_tmp[15] = w[15]; - - u32x state[4]; - - state[0] = 0; - state[1] = 0; - state[2] = 0; - state[3] = 0; - - domino_big_md (w_tmp, pw_len, state, s_lotus_magic_table); - - const u32x r0 = state[0]; - const u32x r1 = state[1]; - const u32x r2 = state[2]; - const u32x r3 = state[3]; - - #include VECT_COMPARE_M - } -} - -__device__ static void m08600s (u32 s_lotus_magic_table[256], u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * padding - */ - - if (pw_len < 16) - { - pad (&w[ 0], pw_len & 0xf); - } - else if (pw_len < 32) - { - pad (&w[ 4], pw_len & 0xf); - } - else if (pw_len < 48) - { - pad (&w[ 8], pw_len & 0xf); - } - else if (pw_len < 64) - { - pad (&w[12], pw_len & 0xf); - } - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x w_tmp[16]; - - w_tmp[ 0] = w0; - w_tmp[ 1] = w[ 1]; - w_tmp[ 2] = w[ 2]; - w_tmp[ 3] = w[ 3]; - w_tmp[ 4] = w[ 4]; - w_tmp[ 5] = w[ 5]; - w_tmp[ 6] = w[ 6]; - w_tmp[ 7] = w[ 7]; - w_tmp[ 8] = w[ 8]; - w_tmp[ 9] = w[ 9]; - w_tmp[10] = w[10]; - w_tmp[11] = w[11]; - w_tmp[12] = w[12]; - w_tmp[13] = w[13]; - w_tmp[14] = w[14]; - w_tmp[15] = w[15]; - - u32x state[4]; - - state[0] = 0; - state[1] = 0; - state[2] = 0; - state[3] = 0; - - domino_big_md (w_tmp, pw_len, state, s_lotus_magic_table); - - const u32x r0 = state[0]; - const u32x r1 = state[1]; - const u32x r2 = state[2]; - const u32x r3 = state[3]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08600_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * sbox - */ - - __shared__ u32 s_lotus_magic_table[256]; - - s_lotus_magic_table[lid] = lotus_magic_table[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m08600m (s_lotus_magic_table, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08600_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * sbox - */ - - __shared__ u32 s_lotus_magic_table[256]; - - s_lotus_magic_table[lid] = lotus_magic_table[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m08600m (s_lotus_magic_table, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08600_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * sbox - */ - - __shared__ u32 s_lotus_magic_table[256]; - - s_lotus_magic_table[lid] = lotus_magic_table[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m08600m (s_lotus_magic_table, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08600_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * sbox - */ - - __shared__ u32 s_lotus_magic_table[256]; - - s_lotus_magic_table[lid] = lotus_magic_table[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m08600s (s_lotus_magic_table, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08600_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * sbox - */ - - __shared__ u32 s_lotus_magic_table[256]; - - s_lotus_magic_table[lid] = lotus_magic_table[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m08600s (s_lotus_magic_table, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08600_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * sbox - */ - - __shared__ u32 s_lotus_magic_table[256]; - - s_lotus_magic_table[lid] = lotus_magic_table[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m08600s (s_lotus_magic_table, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m08700_a0.cu b/nv/m08700_a0.cu deleted file mode 100644 index 26ab61d..0000000 --- a/nv/m08700_a0.cu +++ /dev/null @@ -1,723 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _LOTUS6_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ __constant__ u32 lotus_magic_table[256] = -{ - 0xbd, 0x56, 0xea, 0xf2, 0xa2, 0xf1, 0xac, 0x2a, - 0xb0, 0x93, 0xd1, 0x9c, 0x1b, 0x33, 0xfd, 0xd0, - 0x30, 0x04, 0xb6, 0xdc, 0x7d, 0xdf, 0x32, 0x4b, - 0xf7, 0xcb, 0x45, 0x9b, 0x31, 0xbb, 0x21, 0x5a, - 0x41, 0x9f, 0xe1, 0xd9, 0x4a, 0x4d, 0x9e, 0xda, - 0xa0, 0x68, 0x2c, 0xc3, 0x27, 0x5f, 0x80, 0x36, - 0x3e, 0xee, 0xfb, 0x95, 0x1a, 0xfe, 0xce, 0xa8, - 0x34, 0xa9, 0x13, 0xf0, 0xa6, 0x3f, 0xd8, 0x0c, - 0x78, 0x24, 0xaf, 0x23, 0x52, 0xc1, 0x67, 0x17, - 0xf5, 0x66, 0x90, 0xe7, 0xe8, 0x07, 0xb8, 0x60, - 0x48, 0xe6, 0x1e, 0x53, 0xf3, 0x92, 0xa4, 0x72, - 0x8c, 0x08, 0x15, 0x6e, 0x86, 0x00, 0x84, 0xfa, - 0xf4, 0x7f, 0x8a, 0x42, 0x19, 0xf6, 0xdb, 0xcd, - 0x14, 0x8d, 0x50, 0x12, 0xba, 0x3c, 0x06, 0x4e, - 0xec, 0xb3, 0x35, 0x11, 0xa1, 0x88, 0x8e, 0x2b, - 0x94, 0x99, 0xb7, 0x71, 0x74, 0xd3, 0xe4, 0xbf, - 0x3a, 0xde, 0x96, 0x0e, 0xbc, 0x0a, 0xed, 0x77, - 0xfc, 0x37, 0x6b, 0x03, 0x79, 0x89, 0x62, 0xc6, - 0xd7, 0xc0, 0xd2, 0x7c, 0x6a, 0x8b, 0x22, 0xa3, - 0x5b, 0x05, 0x5d, 0x02, 0x75, 0xd5, 0x61, 0xe3, - 0x18, 0x8f, 0x55, 0x51, 0xad, 0x1f, 0x0b, 0x5e, - 0x85, 0xe5, 0xc2, 0x57, 0x63, 0xca, 0x3d, 0x6c, - 0xb4, 0xc5, 0xcc, 0x70, 0xb2, 0x91, 0x59, 0x0d, - 0x47, 0x20, 0xc8, 0x4f, 0x58, 0xe0, 0x01, 0xe2, - 0x16, 0x38, 0xc4, 0x6f, 0x3b, 0x0f, 0x65, 0x46, - 0xbe, 0x7e, 0x2d, 0x7b, 0x82, 0xf9, 0x40, 0xb5, - 0x1d, 0x73, 0xf8, 0xeb, 0x26, 0xc7, 0x87, 0x97, - 0x25, 0x54, 0xb1, 0x28, 0xaa, 0x98, 0x9d, 0xa5, - 0x64, 0x6d, 0x7a, 0xd4, 0x10, 0x81, 0x44, 0xef, - 0x49, 0xd6, 0xae, 0x2e, 0xdd, 0x76, 0x5c, 0x2f, - 0xa7, 0x1c, 0xc9, 0x09, 0x69, 0x9a, 0x83, 0xcf, - 0x29, 0x39, 0xb9, 0xe9, 0x4c, 0xff, 0x43, 0xab, -}; - -#ifdef VECT_SIZE1 -#define BOX(S,i) u32x ((S)[(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(S,i) u32x ((S)[(i).x], (S)[(i).y]) -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_upper8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_upper8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ static void lotus_mix (u32x *in, u32 s_lotus_magic_table[256]) -{ - u32x p = 0; - - for (int i = 0; i < 18; i++) - { - u32 s = 48; - - #pragma unroll 12 - for (int j = 0; j < 12; j++) - { - u32x tmp_in = in[j]; - u32x tmp_out = 0; - - p = (p + s--) & 0xff; p = ((tmp_in >> 0) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 0; - p = (p + s--) & 0xff; p = ((tmp_in >> 8) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 8; - p = (p + s--) & 0xff; p = ((tmp_in >> 16) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 16; - p = (p + s--) & 0xff; p = ((tmp_in >> 24) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 24; - - in[j] = tmp_out; - } - } -} - -__device__ static void lotus_transform_password (u32x *in, u32x *out, u32 s_lotus_magic_table[256]) -{ - u32x t = out[3] >> 24; - - u32x c; - - for (int i = 0; i < 4; i++) - { - t ^= (in[i] >> 0) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 0; t = ((out[i] >> 0) & 0xff); - t ^= (in[i] >> 8) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 8; t = ((out[i] >> 8) & 0xff); - t ^= (in[i] >> 16) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 16; t = ((out[i] >> 16) & 0xff); - t ^= (in[i] >> 24) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 24; t = ((out[i] >> 24) & 0xff); - } -} - -__device__ static void pad (u32x w[4], const u32 len) -{ - const u32 val = 16 - len; - - const u32 mask1 = val << 24; - - const u32 mask2 = val << 16 - | val << 24; - - const u32 mask3 = val << 8 - | val << 16 - | val << 24; - - const u32 mask4 = val << 0 - | val << 8 - | val << 16 - | val << 24; - - switch (len) - { - case 0: w[0] = mask4; - w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 1: w[0] |= mask3; - w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 2: w[0] |= mask2; - w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 3: w[0] |= mask1; - w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 4: w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 5: w[1] |= mask3; - w[2] = mask4; - w[3] = mask4; - break; - case 6: w[1] |= mask2; - w[2] = mask4; - w[3] = mask4; - break; - case 7: w[1] |= mask1; - w[2] = mask4; - w[3] = mask4; - break; - case 8: w[2] = mask4; - w[3] = mask4; - break; - case 9: w[2] |= mask3; - w[3] = mask4; - break; - case 10: w[2] |= mask2; - w[3] = mask4; - break; - case 11: w[2] |= mask1; - w[3] = mask4; - break; - case 12: w[3] = mask4; - break; - case 13: w[3] |= mask3; - break; - case 14: w[3] |= mask2; - break; - case 15: w[3] |= mask1; - break; - } -} - -__device__ static void mdtransform_norecalc (u32x state[4], u32x block[4], u32 s_lotus_magic_table[256]) -{ - u32x x[12]; - - x[ 0] = state[0]; - x[ 1] = state[1]; - x[ 2] = state[2]; - x[ 3] = state[3]; - x[ 4] = block[0]; - x[ 5] = block[1]; - x[ 6] = block[2]; - x[ 7] = block[3]; - x[ 8] = state[0] ^ block[0]; - x[ 9] = state[1] ^ block[1]; - x[10] = state[2] ^ block[2]; - x[11] = state[3] ^ block[3]; - - lotus_mix (x, s_lotus_magic_table); - - state[0] = x[0]; - state[1] = x[1]; - state[2] = x[2]; - state[3] = x[3]; -} - -__device__ static void mdtransform (u32x state[4], u32x checksum[4], u32x block[4], u32 s_lotus_magic_table[256]) -{ - mdtransform_norecalc (state, block, s_lotus_magic_table); - - lotus_transform_password (block, checksum, s_lotus_magic_table); -} - -__device__ static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[4], u32 s_lotus_magic_table[256]) -{ - u32x checksum[4]; - - checksum[0] = 0; - checksum[1] = 0; - checksum[2] = 0; - checksum[3] = 0; - - u32x block[4]; - - block[0] = 0; - block[1] = 0; - block[2] = 0; - block[3] = 0; - - u32 curpos; - u32 idx; - - for (curpos = 0, idx = 0; curpos + 16 < size; curpos += 16, idx += 4) - { - block[0] = saved_key[idx + 0]; - block[1] = saved_key[idx + 1]; - block[2] = saved_key[idx + 2]; - block[3] = saved_key[idx + 3]; - - mdtransform (state, checksum, block, s_lotus_magic_table); - } - - u32 left = size - curpos; - - block[0] = saved_key[idx + 0]; - block[1] = saved_key[idx + 1]; - block[2] = saved_key[idx + 2]; - block[3] = saved_key[idx + 3]; - - mdtransform (state, checksum, block, s_lotus_magic_table); - - mdtransform_norecalc (state, checksum, s_lotus_magic_table); -} - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m08700_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - /** - * sbox - */ - - __shared__ u32 s_lotus_magic_table[256]; - - s_lotus_magic_table[lid] = lotus_magic_table[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * base - */ - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - const u32 salt0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 salt1 = salt_bufs[salt_pos].salt_buf[1] & 0xff | '(' << 8; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - u32x w[16]; - - w[ 0] = w0[0]; - w[ 1] = w0[1]; - w[ 2] = w0[2]; - w[ 3] = w0[3]; - w[ 4] = w1[0]; - w[ 5] = w1[1]; - w[ 6] = w1[2]; - w[ 7] = w1[3]; - w[ 8] = w2[0]; - w[ 9] = w2[1]; - w[10] = w2[2]; - w[11] = w2[3]; - w[12] = w3[0]; - w[13] = w3[1]; - w[14] = w3[2]; - w[15] = w3[3]; - - u32x state[4]; - - state[0] = 0; - state[1] = 0; - state[2] = 0; - state[3] = 0; - - /** - * padding - */ - - if (pw_len < 16) - { - pad (&w[ 0], pw_len & 0xf); - } - else if (pw_len < 32) - { - pad (&w[ 4], pw_len & 0xf); - } - else if (pw_len < 48) - { - pad (&w[ 8], pw_len & 0xf); - } - else if (pw_len < 64) - { - pad (&w[12], pw_len & 0xf); - } - - domino_big_md (w, pw_len, state, s_lotus_magic_table); - - const u32x w0_t = uint_to_hex_upper8 ((state[0] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[0] >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_upper8 ((state[0] >> 16) & 255) << 0 - | uint_to_hex_upper8 ((state[0] >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_upper8 ((state[1] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[1] >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_upper8 ((state[1] >> 16) & 255) << 0 - | uint_to_hex_upper8 ((state[1] >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_upper8 ((state[2] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[2] >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_upper8 ((state[2] >> 16) & 255) << 0 - | uint_to_hex_upper8 ((state[2] >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_upper8 ((state[3] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[3] >> 8) & 255) << 16; - //const u32x w7_t = uint_to_hex_upper8 ((state[3] >> 16) & 255) << 0 - // | uint_to_hex_upper8 ((state[3] >> 24) & 255) << 16; - - const u32 pade = 0x0e0e0e0e; - - w[ 0] = salt0; - w[ 1] = salt1 | w0_t << 16; - w[ 2] = w0_t >> 16 | w1_t << 16; - w[ 3] = w1_t >> 16 | w2_t << 16; - w[ 4] = w2_t >> 16 | w3_t << 16; - w[ 5] = w3_t >> 16 | w4_t << 16; - w[ 6] = w4_t >> 16 | w5_t << 16; - w[ 7] = w5_t >> 16 | w6_t << 16; - w[ 8] = w6_t >> 16 | pade << 16; // | w7_t << 8; - w[ 9] = pade; - w[10] = pade; - w[11] = pade; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - state[0] = 0; - state[1] = 0; - state[2] = 0; - state[3] = 0; - - domino_big_md (w, 34, state, s_lotus_magic_table); - - u32x a = state[0] & 0xffffffff; - u32x b = state[1] & 0xffffffff; - u32x c = state[2] & 0x000000ff; - u32x d = state[3] & 0x00000000; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = c; - const u32x r3 = d; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08700_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08700_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08700_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - /** - * sbox - */ - - __shared__ u32 s_lotus_magic_table[256]; - - s_lotus_magic_table[lid] = lotus_magic_table[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * base - */ - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - const u32 salt0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 salt1 = salt_bufs[salt_pos].salt_buf[1] & 0xff | '(' << 8; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - u32x w[16]; - - w[ 0] = w0[0]; - w[ 1] = w0[1]; - w[ 2] = w0[2]; - w[ 3] = w0[3]; - w[ 4] = w1[0]; - w[ 5] = w1[1]; - w[ 6] = w1[2]; - w[ 7] = w1[3]; - w[ 8] = w2[0]; - w[ 9] = w2[1]; - w[10] = w2[2]; - w[11] = w2[3]; - w[12] = w3[0]; - w[13] = w3[1]; - w[14] = w3[2]; - w[15] = w3[3]; - - u32x state[4]; - - state[0] = 0; - state[1] = 0; - state[2] = 0; - state[3] = 0; - - /** - * padding - */ - - if (pw_len < 16) - { - pad (&w[ 0], pw_len & 0xf); - } - else if (pw_len < 32) - { - pad (&w[ 4], pw_len & 0xf); - } - else if (pw_len < 48) - { - pad (&w[ 8], pw_len & 0xf); - } - else if (pw_len < 64) - { - pad (&w[12], pw_len & 0xf); - } - - domino_big_md (w, pw_len, state, s_lotus_magic_table); - - const u32x w0_t = uint_to_hex_upper8 ((state[0] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[0] >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_upper8 ((state[0] >> 16) & 255) << 0 - | uint_to_hex_upper8 ((state[0] >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_upper8 ((state[1] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[1] >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_upper8 ((state[1] >> 16) & 255) << 0 - | uint_to_hex_upper8 ((state[1] >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_upper8 ((state[2] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[2] >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_upper8 ((state[2] >> 16) & 255) << 0 - | uint_to_hex_upper8 ((state[2] >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_upper8 ((state[3] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[3] >> 8) & 255) << 16; - //const u32x w7_t = uint_to_hex_upper8 ((state[3] >> 16) & 255) << 0 - // | uint_to_hex_upper8 ((state[3] >> 24) & 255) << 16; - - const u32 pade = 0x0e0e0e0e; - - w[ 0] = salt0; - w[ 1] = salt1 | w0_t << 16; - w[ 2] = w0_t >> 16 | w1_t << 16; - w[ 3] = w1_t >> 16 | w2_t << 16; - w[ 4] = w2_t >> 16 | w3_t << 16; - w[ 5] = w3_t >> 16 | w4_t << 16; - w[ 6] = w4_t >> 16 | w5_t << 16; - w[ 7] = w5_t >> 16 | w6_t << 16; - w[ 8] = w6_t >> 16 | pade << 16; // | w7_t << 8; - w[ 9] = pade; - w[10] = pade; - w[11] = pade; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - state[0] = 0; - state[1] = 0; - state[2] = 0; - state[3] = 0; - - domino_big_md (w, 34, state, s_lotus_magic_table); - - u32x a = state[0] & 0xffffffff; - u32x b = state[1] & 0xffffffff; - u32x c = state[2] & 0x000000ff; - u32x d = state[3] & 0x00000000; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = c; - const u32x r3 = d; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08700_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08700_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m08700_a1.cu b/nv/m08700_a1.cu deleted file mode 100644 index e3a0312..0000000 --- a/nv/m08700_a1.cu +++ /dev/null @@ -1,774 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _LOTUS6_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ __constant__ u32 lotus_magic_table[256] = -{ - 0xbd, 0x56, 0xea, 0xf2, 0xa2, 0xf1, 0xac, 0x2a, - 0xb0, 0x93, 0xd1, 0x9c, 0x1b, 0x33, 0xfd, 0xd0, - 0x30, 0x04, 0xb6, 0xdc, 0x7d, 0xdf, 0x32, 0x4b, - 0xf7, 0xcb, 0x45, 0x9b, 0x31, 0xbb, 0x21, 0x5a, - 0x41, 0x9f, 0xe1, 0xd9, 0x4a, 0x4d, 0x9e, 0xda, - 0xa0, 0x68, 0x2c, 0xc3, 0x27, 0x5f, 0x80, 0x36, - 0x3e, 0xee, 0xfb, 0x95, 0x1a, 0xfe, 0xce, 0xa8, - 0x34, 0xa9, 0x13, 0xf0, 0xa6, 0x3f, 0xd8, 0x0c, - 0x78, 0x24, 0xaf, 0x23, 0x52, 0xc1, 0x67, 0x17, - 0xf5, 0x66, 0x90, 0xe7, 0xe8, 0x07, 0xb8, 0x60, - 0x48, 0xe6, 0x1e, 0x53, 0xf3, 0x92, 0xa4, 0x72, - 0x8c, 0x08, 0x15, 0x6e, 0x86, 0x00, 0x84, 0xfa, - 0xf4, 0x7f, 0x8a, 0x42, 0x19, 0xf6, 0xdb, 0xcd, - 0x14, 0x8d, 0x50, 0x12, 0xba, 0x3c, 0x06, 0x4e, - 0xec, 0xb3, 0x35, 0x11, 0xa1, 0x88, 0x8e, 0x2b, - 0x94, 0x99, 0xb7, 0x71, 0x74, 0xd3, 0xe4, 0xbf, - 0x3a, 0xde, 0x96, 0x0e, 0xbc, 0x0a, 0xed, 0x77, - 0xfc, 0x37, 0x6b, 0x03, 0x79, 0x89, 0x62, 0xc6, - 0xd7, 0xc0, 0xd2, 0x7c, 0x6a, 0x8b, 0x22, 0xa3, - 0x5b, 0x05, 0x5d, 0x02, 0x75, 0xd5, 0x61, 0xe3, - 0x18, 0x8f, 0x55, 0x51, 0xad, 0x1f, 0x0b, 0x5e, - 0x85, 0xe5, 0xc2, 0x57, 0x63, 0xca, 0x3d, 0x6c, - 0xb4, 0xc5, 0xcc, 0x70, 0xb2, 0x91, 0x59, 0x0d, - 0x47, 0x20, 0xc8, 0x4f, 0x58, 0xe0, 0x01, 0xe2, - 0x16, 0x38, 0xc4, 0x6f, 0x3b, 0x0f, 0x65, 0x46, - 0xbe, 0x7e, 0x2d, 0x7b, 0x82, 0xf9, 0x40, 0xb5, - 0x1d, 0x73, 0xf8, 0xeb, 0x26, 0xc7, 0x87, 0x97, - 0x25, 0x54, 0xb1, 0x28, 0xaa, 0x98, 0x9d, 0xa5, - 0x64, 0x6d, 0x7a, 0xd4, 0x10, 0x81, 0x44, 0xef, - 0x49, 0xd6, 0xae, 0x2e, 0xdd, 0x76, 0x5c, 0x2f, - 0xa7, 0x1c, 0xc9, 0x09, 0x69, 0x9a, 0x83, 0xcf, - 0x29, 0x39, 0xb9, 0xe9, 0x4c, 0xff, 0x43, 0xab, -}; - -#ifdef VECT_SIZE1 -#define BOX(S,i) u32x ((S)[(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(S,i) u32x ((S)[(i).x], (S)[(i).y]) -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_upper8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_upper8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ static void lotus_mix (u32x *in, u32 s_lotus_magic_table[256]) -{ - u32x p = 0; - - for (int i = 0; i < 18; i++) - { - u32 s = 48; - - #pragma unroll 12 - for (int j = 0; j < 12; j++) - { - u32x tmp_in = in[j]; - u32x tmp_out = 0; - - p = (p + s--) & 0xff; p = ((tmp_in >> 0) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 0; - p = (p + s--) & 0xff; p = ((tmp_in >> 8) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 8; - p = (p + s--) & 0xff; p = ((tmp_in >> 16) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 16; - p = (p + s--) & 0xff; p = ((tmp_in >> 24) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 24; - - in[j] = tmp_out; - } - } -} - -__device__ static void lotus_transform_password (u32x *in, u32x *out, u32 s_lotus_magic_table[256]) -{ - u32x t = out[3] >> 24; - - u32x c; - - for (int i = 0; i < 4; i++) - { - t ^= (in[i] >> 0) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 0; t = ((out[i] >> 0) & 0xff); - t ^= (in[i] >> 8) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 8; t = ((out[i] >> 8) & 0xff); - t ^= (in[i] >> 16) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 16; t = ((out[i] >> 16) & 0xff); - t ^= (in[i] >> 24) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 24; t = ((out[i] >> 24) & 0xff); - } -} - -__device__ static void pad (u32x w[4], const u32 len) -{ - const u32 val = 16 - len; - - const u32 mask1 = val << 24; - - const u32 mask2 = val << 16 - | val << 24; - - const u32 mask3 = val << 8 - | val << 16 - | val << 24; - - const u32 mask4 = val << 0 - | val << 8 - | val << 16 - | val << 24; - - switch (len) - { - case 0: w[0] = mask4; - w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 1: w[0] |= mask3; - w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 2: w[0] |= mask2; - w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 3: w[0] |= mask1; - w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 4: w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 5: w[1] |= mask3; - w[2] = mask4; - w[3] = mask4; - break; - case 6: w[1] |= mask2; - w[2] = mask4; - w[3] = mask4; - break; - case 7: w[1] |= mask1; - w[2] = mask4; - w[3] = mask4; - break; - case 8: w[2] = mask4; - w[3] = mask4; - break; - case 9: w[2] |= mask3; - w[3] = mask4; - break; - case 10: w[2] |= mask2; - w[3] = mask4; - break; - case 11: w[2] |= mask1; - w[3] = mask4; - break; - case 12: w[3] = mask4; - break; - case 13: w[3] |= mask3; - break; - case 14: w[3] |= mask2; - break; - case 15: w[3] |= mask1; - break; - } -} - -__device__ static void mdtransform_norecalc (u32x state[4], u32x block[4], u32 s_lotus_magic_table[256]) -{ - u32x x[12]; - - x[ 0] = state[0]; - x[ 1] = state[1]; - x[ 2] = state[2]; - x[ 3] = state[3]; - x[ 4] = block[0]; - x[ 5] = block[1]; - x[ 6] = block[2]; - x[ 7] = block[3]; - x[ 8] = state[0] ^ block[0]; - x[ 9] = state[1] ^ block[1]; - x[10] = state[2] ^ block[2]; - x[11] = state[3] ^ block[3]; - - lotus_mix (x, s_lotus_magic_table); - - state[0] = x[0]; - state[1] = x[1]; - state[2] = x[2]; - state[3] = x[3]; -} - -__device__ static void mdtransform (u32x state[4], u32x checksum[4], u32x block[4], u32 s_lotus_magic_table[256]) -{ - mdtransform_norecalc (state, block, s_lotus_magic_table); - - lotus_transform_password (block, checksum, s_lotus_magic_table); -} - -__device__ static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[4], u32 s_lotus_magic_table[256]) -{ - u32x checksum[4]; - - checksum[0] = 0; - checksum[1] = 0; - checksum[2] = 0; - checksum[3] = 0; - - u32x block[4]; - - block[0] = 0; - block[1] = 0; - block[2] = 0; - block[3] = 0; - - u32 curpos; - u32 idx; - - for (curpos = 0, idx = 0; curpos + 16 < size; curpos += 16, idx += 4) - { - block[0] = saved_key[idx + 0]; - block[1] = saved_key[idx + 1]; - block[2] = saved_key[idx + 2]; - block[3] = saved_key[idx + 3]; - - mdtransform (state, checksum, block, s_lotus_magic_table); - } - - u32 left = size - curpos; - - block[0] = saved_key[idx + 0]; - block[1] = saved_key[idx + 1]; - block[2] = saved_key[idx + 2]; - block[3] = saved_key[idx + 3]; - - mdtransform (state, checksum, block, s_lotus_magic_table); - - mdtransform_norecalc (state, checksum, s_lotus_magic_table); -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m08700_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - /** - * sbox - */ - - __shared__ u32 s_lotus_magic_table[256]; - - s_lotus_magic_table[lid] = lotus_magic_table[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * base - */ - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - const u32 salt0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 salt1 = salt_bufs[salt_pos].salt_buf[1] & 0xff | '(' << 8; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w[16]; - - w[ 0] = wordl0[0] | wordr0[0]; - w[ 1] = wordl0[1] | wordr0[1]; - w[ 2] = wordl0[2] | wordr0[2]; - w[ 3] = wordl0[3] | wordr0[3]; - w[ 4] = wordl1[0] | wordr1[0]; - w[ 5] = wordl1[1] | wordr1[1]; - w[ 6] = wordl1[2] | wordr1[2]; - w[ 7] = wordl1[3] | wordr1[3]; - w[ 8] = wordl2[0] | wordr2[0]; - w[ 9] = wordl2[1] | wordr2[1]; - w[10] = wordl2[2] | wordr2[2]; - w[11] = wordl2[3] | wordr2[3]; - w[12] = wordl3[0] | wordr3[0]; - w[13] = wordl3[1] | wordr3[1]; - w[14] = wordl3[2] | wordr3[2]; - w[15] = wordl3[3] | wordr3[3]; - - u32x state[4]; - - state[0] = 0; - state[1] = 0; - state[2] = 0; - state[3] = 0; - - /** - * padding - */ - - if (pw_len < 16) - { - pad (&w[ 0], pw_len & 0xf); - } - else if (pw_len < 32) - { - pad (&w[ 4], pw_len & 0xf); - } - else if (pw_len < 48) - { - pad (&w[ 8], pw_len & 0xf); - } - else if (pw_len < 64) - { - pad (&w[12], pw_len & 0xf); - } - - domino_big_md (w, pw_len, state, s_lotus_magic_table); - - const u32x w0_t = uint_to_hex_upper8 ((state[0] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[0] >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_upper8 ((state[0] >> 16) & 255) << 0 - | uint_to_hex_upper8 ((state[0] >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_upper8 ((state[1] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[1] >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_upper8 ((state[1] >> 16) & 255) << 0 - | uint_to_hex_upper8 ((state[1] >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_upper8 ((state[2] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[2] >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_upper8 ((state[2] >> 16) & 255) << 0 - | uint_to_hex_upper8 ((state[2] >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_upper8 ((state[3] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[3] >> 8) & 255) << 16; - //const u32x w7_t = uint_to_hex_upper8 ((state[3] >> 16) & 255) << 0 - // | uint_to_hex_upper8 ((state[3] >> 24) & 255) << 16; - - const u32 pade = 0x0e0e0e0e; - - w[ 0] = salt0; - w[ 1] = salt1 | w0_t << 16; - w[ 2] = w0_t >> 16 | w1_t << 16; - w[ 3] = w1_t >> 16 | w2_t << 16; - w[ 4] = w2_t >> 16 | w3_t << 16; - w[ 5] = w3_t >> 16 | w4_t << 16; - w[ 6] = w4_t >> 16 | w5_t << 16; - w[ 7] = w5_t >> 16 | w6_t << 16; - w[ 8] = w6_t >> 16 | pade << 16; // | w7_t << 8; - w[ 9] = pade; - w[10] = pade; - w[11] = pade; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - state[0] = 0; - state[1] = 0; - state[2] = 0; - state[3] = 0; - - domino_big_md (w, 34, state, s_lotus_magic_table); - - u32x a = state[0] & 0xffffffff; - u32x b = state[1] & 0xffffffff; - u32x c = state[2] & 0x000000ff; - u32x d = state[3] & 0x00000000; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = c; - const u32x r3 = d; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08700_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08700_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08700_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - /** - * sbox - */ - - __shared__ u32 s_lotus_magic_table[256]; - - s_lotus_magic_table[lid] = lotus_magic_table[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * base - */ - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - const u32 salt0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 salt1 = salt_bufs[salt_pos].salt_buf[1] & 0xff | '(' << 8; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w[16]; - - w[ 0] = wordl0[0] | wordr0[0]; - w[ 1] = wordl0[1] | wordr0[1]; - w[ 2] = wordl0[2] | wordr0[2]; - w[ 3] = wordl0[3] | wordr0[3]; - w[ 4] = wordl1[0] | wordr1[0]; - w[ 5] = wordl1[1] | wordr1[1]; - w[ 6] = wordl1[2] | wordr1[2]; - w[ 7] = wordl1[3] | wordr1[3]; - w[ 8] = wordl2[0] | wordr2[0]; - w[ 9] = wordl2[1] | wordr2[1]; - w[10] = wordl2[2] | wordr2[2]; - w[11] = wordl2[3] | wordr2[3]; - w[12] = wordl3[0] | wordr3[0]; - w[13] = wordl3[1] | wordr3[1]; - w[14] = wordl3[2] | wordr3[2]; - w[15] = wordl3[3] | wordr3[3]; - - u32x state[4]; - - state[0] = 0; - state[1] = 0; - state[2] = 0; - state[3] = 0; - - /** - * padding - */ - - if (pw_len < 16) - { - pad (&w[ 0], pw_len & 0xf); - } - else if (pw_len < 32) - { - pad (&w[ 4], pw_len & 0xf); - } - else if (pw_len < 48) - { - pad (&w[ 8], pw_len & 0xf); - } - else if (pw_len < 64) - { - pad (&w[12], pw_len & 0xf); - } - - domino_big_md (w, pw_len, state, s_lotus_magic_table); - - const u32x w0_t = uint_to_hex_upper8 ((state[0] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[0] >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_upper8 ((state[0] >> 16) & 255) << 0 - | uint_to_hex_upper8 ((state[0] >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_upper8 ((state[1] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[1] >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_upper8 ((state[1] >> 16) & 255) << 0 - | uint_to_hex_upper8 ((state[1] >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_upper8 ((state[2] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[2] >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_upper8 ((state[2] >> 16) & 255) << 0 - | uint_to_hex_upper8 ((state[2] >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_upper8 ((state[3] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[3] >> 8) & 255) << 16; - //const u32x w7_t = uint_to_hex_upper8 ((state[3] >> 16) & 255) << 0 - // | uint_to_hex_upper8 ((state[3] >> 24) & 255) << 16; - - const u32 pade = 0x0e0e0e0e; - - w[ 0] = salt0; - w[ 1] = salt1 | w0_t << 16; - w[ 2] = w0_t >> 16 | w1_t << 16; - w[ 3] = w1_t >> 16 | w2_t << 16; - w[ 4] = w2_t >> 16 | w3_t << 16; - w[ 5] = w3_t >> 16 | w4_t << 16; - w[ 6] = w4_t >> 16 | w5_t << 16; - w[ 7] = w5_t >> 16 | w6_t << 16; - w[ 8] = w6_t >> 16 | pade << 16; // | w7_t << 8; - w[ 9] = pade; - w[10] = pade; - w[11] = pade; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - state[0] = 0; - state[1] = 0; - state[2] = 0; - state[3] = 0; - - domino_big_md (w, 34, state, s_lotus_magic_table); - - u32x a = state[0] & 0xffffffff; - u32x b = state[1] & 0xffffffff; - u32x c = state[2] & 0x000000ff; - u32x d = state[3] & 0x00000000; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = c; - const u32x r3 = d; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08700_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08700_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m08700_a3.cu b/nv/m08700_a3.cu deleted file mode 100644 index 1bdcc1c..0000000 --- a/nv/m08700_a3.cu +++ /dev/null @@ -1,919 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _LOTUS6_ -#define _SCALAR_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -__device__ __constant__ u32 lotus_magic_table[256] = -{ - 0xbd, 0x56, 0xea, 0xf2, 0xa2, 0xf1, 0xac, 0x2a, - 0xb0, 0x93, 0xd1, 0x9c, 0x1b, 0x33, 0xfd, 0xd0, - 0x30, 0x04, 0xb6, 0xdc, 0x7d, 0xdf, 0x32, 0x4b, - 0xf7, 0xcb, 0x45, 0x9b, 0x31, 0xbb, 0x21, 0x5a, - 0x41, 0x9f, 0xe1, 0xd9, 0x4a, 0x4d, 0x9e, 0xda, - 0xa0, 0x68, 0x2c, 0xc3, 0x27, 0x5f, 0x80, 0x36, - 0x3e, 0xee, 0xfb, 0x95, 0x1a, 0xfe, 0xce, 0xa8, - 0x34, 0xa9, 0x13, 0xf0, 0xa6, 0x3f, 0xd8, 0x0c, - 0x78, 0x24, 0xaf, 0x23, 0x52, 0xc1, 0x67, 0x17, - 0xf5, 0x66, 0x90, 0xe7, 0xe8, 0x07, 0xb8, 0x60, - 0x48, 0xe6, 0x1e, 0x53, 0xf3, 0x92, 0xa4, 0x72, - 0x8c, 0x08, 0x15, 0x6e, 0x86, 0x00, 0x84, 0xfa, - 0xf4, 0x7f, 0x8a, 0x42, 0x19, 0xf6, 0xdb, 0xcd, - 0x14, 0x8d, 0x50, 0x12, 0xba, 0x3c, 0x06, 0x4e, - 0xec, 0xb3, 0x35, 0x11, 0xa1, 0x88, 0x8e, 0x2b, - 0x94, 0x99, 0xb7, 0x71, 0x74, 0xd3, 0xe4, 0xbf, - 0x3a, 0xde, 0x96, 0x0e, 0xbc, 0x0a, 0xed, 0x77, - 0xfc, 0x37, 0x6b, 0x03, 0x79, 0x89, 0x62, 0xc6, - 0xd7, 0xc0, 0xd2, 0x7c, 0x6a, 0x8b, 0x22, 0xa3, - 0x5b, 0x05, 0x5d, 0x02, 0x75, 0xd5, 0x61, 0xe3, - 0x18, 0x8f, 0x55, 0x51, 0xad, 0x1f, 0x0b, 0x5e, - 0x85, 0xe5, 0xc2, 0x57, 0x63, 0xca, 0x3d, 0x6c, - 0xb4, 0xc5, 0xcc, 0x70, 0xb2, 0x91, 0x59, 0x0d, - 0x47, 0x20, 0xc8, 0x4f, 0x58, 0xe0, 0x01, 0xe2, - 0x16, 0x38, 0xc4, 0x6f, 0x3b, 0x0f, 0x65, 0x46, - 0xbe, 0x7e, 0x2d, 0x7b, 0x82, 0xf9, 0x40, 0xb5, - 0x1d, 0x73, 0xf8, 0xeb, 0x26, 0xc7, 0x87, 0x97, - 0x25, 0x54, 0xb1, 0x28, 0xaa, 0x98, 0x9d, 0xa5, - 0x64, 0x6d, 0x7a, 0xd4, 0x10, 0x81, 0x44, 0xef, - 0x49, 0xd6, 0xae, 0x2e, 0xdd, 0x76, 0x5c, 0x2f, - 0xa7, 0x1c, 0xc9, 0x09, 0x69, 0x9a, 0x83, 0xcf, - 0x29, 0x39, 0xb9, 0xe9, 0x4c, 0xff, 0x43, 0xab, -}; - -#ifdef VECT_SIZE1 -#define BOX(S,i) u32x ((S)[(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(S,i) u32x ((S)[(i).x], (S)[(i).y]) -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_upper8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_upper8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ static void lotus_mix (u32x *in, u32 s_lotus_magic_table[256]) -{ - u32x p = 0; - - for (int i = 0; i < 18; i++) - { - u32 s = 48; - - #pragma unroll 12 - for (int j = 0; j < 12; j++) - { - u32x tmp_in = in[j]; - u32x tmp_out = 0; - - p = (p + s--) & 0xff; p = ((tmp_in >> 0) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 0; - p = (p + s--) & 0xff; p = ((tmp_in >> 8) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 8; - p = (p + s--) & 0xff; p = ((tmp_in >> 16) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 16; - p = (p + s--) & 0xff; p = ((tmp_in >> 24) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 24; - - in[j] = tmp_out; - } - } -} - -__device__ static void lotus_transform_password (u32x *in, u32x *out, u32 s_lotus_magic_table[256]) -{ - u32x t = out[3] >> 24; - - u32x c; - - for (int i = 0; i < 4; i++) - { - t ^= (in[i] >> 0) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 0; t = ((out[i] >> 0) & 0xff); - t ^= (in[i] >> 8) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 8; t = ((out[i] >> 8) & 0xff); - t ^= (in[i] >> 16) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 16; t = ((out[i] >> 16) & 0xff); - t ^= (in[i] >> 24) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 24; t = ((out[i] >> 24) & 0xff); - } -} - -__device__ static void pad (u32 w[4], const u32 len) -{ - const u32 val = 16 - len; - - const u32 mask1 = val << 24; - - const u32 mask2 = val << 16 - | val << 24; - - const u32 mask3 = val << 8 - | val << 16 - | val << 24; - - const u32 mask4 = val << 0 - | val << 8 - | val << 16 - | val << 24; - - switch (len) - { - case 0: w[0] = mask4; - w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 1: w[0] |= mask3; - w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 2: w[0] |= mask2; - w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 3: w[0] |= mask1; - w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 4: w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 5: w[1] |= mask3; - w[2] = mask4; - w[3] = mask4; - break; - case 6: w[1] |= mask2; - w[2] = mask4; - w[3] = mask4; - break; - case 7: w[1] |= mask1; - w[2] = mask4; - w[3] = mask4; - break; - case 8: w[2] = mask4; - w[3] = mask4; - break; - case 9: w[2] |= mask3; - w[3] = mask4; - break; - case 10: w[2] |= mask2; - w[3] = mask4; - break; - case 11: w[2] |= mask1; - w[3] = mask4; - break; - case 12: w[3] = mask4; - break; - case 13: w[3] |= mask3; - break; - case 14: w[3] |= mask2; - break; - case 15: w[3] |= mask1; - break; - } -} - -__device__ static void mdtransform_norecalc (u32x state[4], u32x block[4], u32 s_lotus_magic_table[256]) -{ - u32x x[12]; - - x[ 0] = state[0]; - x[ 1] = state[1]; - x[ 2] = state[2]; - x[ 3] = state[3]; - x[ 4] = block[0]; - x[ 5] = block[1]; - x[ 6] = block[2]; - x[ 7] = block[3]; - x[ 8] = state[0] ^ block[0]; - x[ 9] = state[1] ^ block[1]; - x[10] = state[2] ^ block[2]; - x[11] = state[3] ^ block[3]; - - lotus_mix (x, s_lotus_magic_table); - - state[0] = x[0]; - state[1] = x[1]; - state[2] = x[2]; - state[3] = x[3]; -} - -__device__ static void mdtransform (u32x state[4], u32x checksum[4], u32x block[4], u32 s_lotus_magic_table[256]) -{ - mdtransform_norecalc (state, block, s_lotus_magic_table); - - lotus_transform_password (block, checksum, s_lotus_magic_table); -} - -__device__ static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[4], u32 s_lotus_magic_table[256]) -{ - u32x checksum[4]; - - checksum[0] = 0; - checksum[1] = 0; - checksum[2] = 0; - checksum[3] = 0; - - u32x block[4]; - - block[0] = 0; - block[1] = 0; - block[2] = 0; - block[3] = 0; - - u32 curpos; - u32 idx; - - for (curpos = 0, idx = 0; curpos + 16 < size; curpos += 16, idx += 4) - { - block[0] = saved_key[idx + 0]; - block[1] = saved_key[idx + 1]; - block[2] = saved_key[idx + 2]; - block[3] = saved_key[idx + 3]; - - mdtransform (state, checksum, block, s_lotus_magic_table); - } - - u32 left = size - curpos; - - block[0] = saved_key[idx + 0]; - block[1] = saved_key[idx + 1]; - block[2] = saved_key[idx + 2]; - block[3] = saved_key[idx + 3]; - - mdtransform (state, checksum, block, s_lotus_magic_table); - - mdtransform_norecalc (state, checksum, s_lotus_magic_table); -} - -__device__ __constant__ u32x c_bfs[1024]; - -__device__ static void m08700m (u32 s_lotus_magic_table[256], u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * padding - */ - - if (pw_len < 16) - { - pad (&w[ 0], pw_len & 0xf); - } - else if (pw_len < 32) - { - pad (&w[ 4], pw_len & 0xf); - } - else if (pw_len < 48) - { - pad (&w[ 8], pw_len & 0xf); - } - else if (pw_len < 64) - { - pad (&w[12], pw_len & 0xf); - } - - /** - * salt - */ - - const u32 salt0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 salt1 = salt_bufs[salt_pos].salt_buf[1] & 0xff | '(' << 8; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x w_tmp[16]; - - w_tmp[ 0] = w0; - w_tmp[ 1] = w[ 1]; - w_tmp[ 2] = w[ 2]; - w_tmp[ 3] = w[ 3]; - w_tmp[ 4] = w[ 4]; - w_tmp[ 5] = w[ 5]; - w_tmp[ 6] = w[ 6]; - w_tmp[ 7] = w[ 7]; - w_tmp[ 8] = w[ 8]; - w_tmp[ 9] = w[ 9]; - w_tmp[10] = w[10]; - w_tmp[11] = w[11]; - w_tmp[12] = w[12]; - w_tmp[13] = w[13]; - w_tmp[14] = w[14]; - w_tmp[15] = w[15]; - - u32x state[4]; - - state[0] = 0; - state[1] = 0; - state[2] = 0; - state[3] = 0; - - domino_big_md (w_tmp, pw_len, state, s_lotus_magic_table); - - const u32x w0_t = uint_to_hex_upper8 ((state[0] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[0] >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_upper8 ((state[0] >> 16) & 255) << 0 - | uint_to_hex_upper8 ((state[0] >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_upper8 ((state[1] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[1] >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_upper8 ((state[1] >> 16) & 255) << 0 - | uint_to_hex_upper8 ((state[1] >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_upper8 ((state[2] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[2] >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_upper8 ((state[2] >> 16) & 255) << 0 - | uint_to_hex_upper8 ((state[2] >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_upper8 ((state[3] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[3] >> 8) & 255) << 16; - //const u32x w7_t = uint_to_hex_upper8 ((state[3] >> 16) & 255) << 0 - // | uint_to_hex_upper8 ((state[3] >> 24) & 255) << 16; - - const u32 pade = 0x0e0e0e0e; - - w_tmp[ 0] = salt0; - w_tmp[ 1] = salt1 | w0_t << 16; - w_tmp[ 2] = w0_t >> 16 | w1_t << 16; - w_tmp[ 3] = w1_t >> 16 | w2_t << 16; - w_tmp[ 4] = w2_t >> 16 | w3_t << 16; - w_tmp[ 5] = w3_t >> 16 | w4_t << 16; - w_tmp[ 6] = w4_t >> 16 | w5_t << 16; - w_tmp[ 7] = w5_t >> 16 | w6_t << 16; - w_tmp[ 8] = w6_t >> 16 | pade << 16; // | w7_t << 8; - w_tmp[ 9] = pade; - w_tmp[10] = pade; - w_tmp[11] = pade; - w_tmp[12] = 0; - w_tmp[13] = 0; - w_tmp[14] = 0; - w_tmp[15] = 0; - - state[0] = 0; - state[1] = 0; - state[2] = 0; - state[3] = 0; - - domino_big_md (w_tmp, 34, state, s_lotus_magic_table); - - u32x a = state[0] & 0xffffffff; - u32x b = state[1] & 0xffffffff; - u32x c = state[2] & 0x000000ff; - u32x d = state[3] & 0x00000000; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = c; - const u32x r3 = d; - - #include VECT_COMPARE_M - } -} - -__device__ static void m08700s (u32 s_lotus_magic_table[256], u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * padding - */ - - if (pw_len < 16) - { - pad (&w[ 0], pw_len & 0xf); - } - else if (pw_len < 32) - { - pad (&w[ 4], pw_len & 0xf); - } - else if (pw_len < 48) - { - pad (&w[ 8], pw_len & 0xf); - } - else if (pw_len < 64) - { - pad (&w[12], pw_len & 0xf); - } - - - /** - * salt - */ - - const u32 salt0 = salt_bufs[salt_pos].salt_buf[0]; - const u32 salt1 = salt_bufs[salt_pos].salt_buf[1] & 0xff | '(' << 8; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x w_tmp[16]; - - w_tmp[ 0] = w0; - w_tmp[ 1] = w[ 1]; - w_tmp[ 2] = w[ 2]; - w_tmp[ 3] = w[ 3]; - w_tmp[ 4] = w[ 4]; - w_tmp[ 5] = w[ 5]; - w_tmp[ 6] = w[ 6]; - w_tmp[ 7] = w[ 7]; - w_tmp[ 8] = w[ 8]; - w_tmp[ 9] = w[ 9]; - w_tmp[10] = w[10]; - w_tmp[11] = w[11]; - w_tmp[12] = w[12]; - w_tmp[13] = w[13]; - w_tmp[14] = w[14]; - w_tmp[15] = w[15]; - - u32x state[4]; - - state[0] = 0; - state[1] = 0; - state[2] = 0; - state[3] = 0; - - domino_big_md (w_tmp, pw_len, state, s_lotus_magic_table); - - const u32x w0_t = uint_to_hex_upper8 ((state[0] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[0] >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_upper8 ((state[0] >> 16) & 255) << 0 - | uint_to_hex_upper8 ((state[0] >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_upper8 ((state[1] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[1] >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_upper8 ((state[1] >> 16) & 255) << 0 - | uint_to_hex_upper8 ((state[1] >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_upper8 ((state[2] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[2] >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_upper8 ((state[2] >> 16) & 255) << 0 - | uint_to_hex_upper8 ((state[2] >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_upper8 ((state[3] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[3] >> 8) & 255) << 16; - //const u32x w7_t = uint_to_hex_upper8 ((state[3] >> 16) & 255) << 0 - // | uint_to_hex_upper8 ((state[3] >> 24) & 255) << 16; - - const u32 pade = 0x0e0e0e0e; - - w_tmp[ 0] = salt0; - w_tmp[ 1] = salt1 | w0_t << 16; - w_tmp[ 2] = w0_t >> 16 | w1_t << 16; - w_tmp[ 3] = w1_t >> 16 | w2_t << 16; - w_tmp[ 4] = w2_t >> 16 | w3_t << 16; - w_tmp[ 5] = w3_t >> 16 | w4_t << 16; - w_tmp[ 6] = w4_t >> 16 | w5_t << 16; - w_tmp[ 7] = w5_t >> 16 | w6_t << 16; - w_tmp[ 8] = w6_t >> 16 | pade << 16; // | w7_t << 8; - w_tmp[ 9] = pade; - w_tmp[10] = pade; - w_tmp[11] = pade; - w_tmp[12] = 0; - w_tmp[13] = 0; - w_tmp[14] = 0; - w_tmp[15] = 0; - - state[0] = 0; - state[1] = 0; - state[2] = 0; - state[3] = 0; - - domino_big_md (w_tmp, 34, state, s_lotus_magic_table); - - u32x a = state[0] & 0xffffffff; - u32x b = state[1] & 0xffffffff; - u32x c = state[2] & 0x000000ff; - u32x d = state[3] & 0x00000000; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = c; - const u32x r3 = d; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08700_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - /** - * sbox - */ - - __shared__ u32 s_lotus_magic_table[256]; - - s_lotus_magic_table[lid] = lotus_magic_table[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m08700m (s_lotus_magic_table, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08700_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - /** - * sbox - */ - - __shared__ u32 s_lotus_magic_table[256]; - - s_lotus_magic_table[lid] = lotus_magic_table[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m08700m (s_lotus_magic_table, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08700_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - /** - * sbox - */ - - __shared__ u32 s_lotus_magic_table[256]; - - s_lotus_magic_table[lid] = lotus_magic_table[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m08700m (s_lotus_magic_table, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08700_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - /** - * sbox - */ - - __shared__ u32 s_lotus_magic_table[256]; - - s_lotus_magic_table[lid] = lotus_magic_table[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m08700s (s_lotus_magic_table, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08700_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - /** - * sbox - */ - - __shared__ u32 s_lotus_magic_table[256]; - - s_lotus_magic_table[lid] = lotus_magic_table[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m08700s (s_lotus_magic_table, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08700_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - /** - * sbox - */ - - __shared__ u32 s_lotus_magic_table[256]; - - s_lotus_magic_table[lid] = lotus_magic_table[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m08700s (s_lotus_magic_table, w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m08800.cu b/nv/m08800.cu deleted file mode 100644 index 39e88bf..0000000 --- a/nv/m08800.cu +++ /dev/null @@ -1,1963 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _ANDROIDFDE_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -__device__ __constant__ u32 te0[256] = -{ - 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, - 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, - 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, - 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, - 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, - 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, - 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, - 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, - 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, - 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, - 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, - 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, - 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, - 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, - 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, - 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, - 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, - 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, - 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, - 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, - 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, - 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, - 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, - 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, - 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, - 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, - 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, - 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, - 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, - 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, - 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, - 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, - 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, - 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, - 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, - 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, - 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, - 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, - 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, - 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, - 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, - 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, - 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, - 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, - 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, - 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, - 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, - 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, - 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, - 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, - 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, - 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, - 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, - 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, - 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, - 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, - 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, - 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, - 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, - 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, - 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, - 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, - 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, - 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a, -}; - -__device__ __constant__ u32 te1[256] = -{ - 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b, - 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5, - 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b, - 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676, - 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d, - 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0, - 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf, - 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0, - 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626, - 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc, - 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1, - 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515, - 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3, - 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a, - 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2, - 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575, - 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a, - 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0, - 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3, - 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484, - 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded, - 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b, - 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939, - 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf, - 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb, - 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585, - 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f, - 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8, - 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f, - 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5, - 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121, - 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2, - 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec, - 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717, - 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d, - 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373, - 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc, - 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888, - 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414, - 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb, - 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a, - 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c, - 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262, - 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979, - 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d, - 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9, - 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea, - 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808, - 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e, - 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6, - 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f, - 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a, - 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666, - 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e, - 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9, - 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e, - 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111, - 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494, - 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9, - 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf, - 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d, - 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868, - 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f, - 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616, -}; - -__device__ __constant__ u32 te2[256] = -{ - 0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b, - 0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5, - 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b, - 0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76, - 0xca458fca, 0x829d1f82, 0xc94089c9, 0x7d87fa7d, - 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0, - 0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af, - 0x9cbf239c, 0xa4f753a4, 0x7296e472, 0xc05b9bc0, - 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26, - 0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc, - 0x345c6834, 0xa5f451a5, 0xe534d1e5, 0xf108f9f1, - 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15, - 0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3, - 0x18283018, 0x96a13796, 0x050f0a05, 0x9ab52f9a, - 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2, - 0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75, - 0x091b1209, 0x839e1d83, 0x2c74582c, 0x1a2e341a, - 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0, - 0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3, - 0x297b5229, 0xe33edde3, 0x2f715e2f, 0x84971384, - 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed, - 0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b, - 0x6abed46a, 0xcb468dcb, 0xbed967be, 0x394b7239, - 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf, - 0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb, - 0x43c58643, 0x4dd79a4d, 0x33556633, 0x85941185, - 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f, - 0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8, - 0x51f3a251, 0xa3fe5da3, 0x40c08040, 0x8f8a058f, - 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5, - 0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221, - 0x10302010, 0xff1ae5ff, 0xf30efdf3, 0xd26dbfd2, - 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec, - 0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17, - 0xc45793c4, 0xa7f255a7, 0x7e82fc7e, 0x3d477a3d, - 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673, - 0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc, - 0x22664422, 0x2a7e542a, 0x90ab3b90, 0x88830b88, - 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814, - 0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb, - 0xe03bdbe0, 0x32566432, 0x3a4e743a, 0x0a1e140a, - 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c, - 0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462, - 0x91a83991, 0x95a43195, 0xe437d3e4, 0x798bf279, - 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d, - 0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9, - 0x6cb4d86c, 0x56faac56, 0xf407f3f4, 0xea25cfea, - 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008, - 0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e, - 0x1c24381c, 0xa6f157a6, 0xb4c773b4, 0xc65197c6, - 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f, - 0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a, - 0x7090e070, 0x3e427c3e, 0xb5c471b5, 0x66aacc66, - 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e, - 0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9, - 0x86911786, 0xc15899c1, 0x1d273a1d, 0x9eb9279e, - 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211, - 0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394, - 0x9bb62d9b, 0x1e223c1e, 0x87921587, 0xe920c9e9, - 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df, - 0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d, - 0xbfda65bf, 0xe631d7e6, 0x42c68442, 0x68b8d068, - 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f, - 0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16, -}; - -__device__ __constant__ u32 te3[256] = -{ - 0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6, - 0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491, - 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56, - 0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec, - 0xcaca458f, 0x82829d1f, 0xc9c94089, 0x7d7d87fa, - 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb, - 0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45, - 0x9c9cbf23, 0xa4a4f753, 0x727296e4, 0xc0c05b9b, - 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c, - 0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83, - 0x34345c68, 0xa5a5f451, 0xe5e534d1, 0xf1f108f9, - 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a, - 0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d, - 0x18182830, 0x9696a137, 0x05050f0a, 0x9a9ab52f, - 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf, - 0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea, - 0x09091b12, 0x83839e1d, 0x2c2c7458, 0x1a1a2e34, - 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b, - 0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d, - 0x29297b52, 0xe3e33edd, 0x2f2f715e, 0x84849713, - 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1, - 0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6, - 0x6a6abed4, 0xcbcb468d, 0xbebed967, 0x39394b72, - 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85, - 0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed, - 0x4343c586, 0x4d4dd79a, 0x33335566, 0x85859411, - 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe, - 0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b, - 0x5151f3a2, 0xa3a3fe5d, 0x4040c080, 0x8f8f8a05, - 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1, - 0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342, - 0x10103020, 0xffff1ae5, 0xf3f30efd, 0xd2d26dbf, - 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3, - 0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e, - 0xc4c45793, 0xa7a7f255, 0x7e7e82fc, 0x3d3d477a, - 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6, - 0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3, - 0x22226644, 0x2a2a7e54, 0x9090ab3b, 0x8888830b, - 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28, - 0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad, - 0xe0e03bdb, 0x32325664, 0x3a3a4e74, 0x0a0a1e14, - 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8, - 0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4, - 0x9191a839, 0x9595a431, 0xe4e437d3, 0x79798bf2, - 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da, - 0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049, - 0x6c6cb4d8, 0x5656faac, 0xf4f407f3, 0xeaea25cf, - 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810, - 0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c, - 0x1c1c2438, 0xa6a6f157, 0xb4b4c773, 0xc6c65197, - 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e, - 0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f, - 0x707090e0, 0x3e3e427c, 0xb5b5c471, 0x6666aacc, - 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c, - 0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069, - 0x86869117, 0xc1c15899, 0x1d1d273a, 0x9e9eb927, - 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322, - 0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733, - 0x9b9bb62d, 0x1e1e223c, 0x87879215, 0xe9e920c9, - 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5, - 0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a, - 0xbfbfda65, 0xe6e631d7, 0x4242c684, 0x6868b8d0, - 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e, - 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c, -}; - -__device__ __constant__ u32 te4[256] = -{ - 0x63636363, 0x7c7c7c7c, 0x77777777, 0x7b7b7b7b, - 0xf2f2f2f2, 0x6b6b6b6b, 0x6f6f6f6f, 0xc5c5c5c5, - 0x30303030, 0x01010101, 0x67676767, 0x2b2b2b2b, - 0xfefefefe, 0xd7d7d7d7, 0xabababab, 0x76767676, - 0xcacacaca, 0x82828282, 0xc9c9c9c9, 0x7d7d7d7d, - 0xfafafafa, 0x59595959, 0x47474747, 0xf0f0f0f0, - 0xadadadad, 0xd4d4d4d4, 0xa2a2a2a2, 0xafafafaf, - 0x9c9c9c9c, 0xa4a4a4a4, 0x72727272, 0xc0c0c0c0, - 0xb7b7b7b7, 0xfdfdfdfd, 0x93939393, 0x26262626, - 0x36363636, 0x3f3f3f3f, 0xf7f7f7f7, 0xcccccccc, - 0x34343434, 0xa5a5a5a5, 0xe5e5e5e5, 0xf1f1f1f1, - 0x71717171, 0xd8d8d8d8, 0x31313131, 0x15151515, - 0x04040404, 0xc7c7c7c7, 0x23232323, 0xc3c3c3c3, - 0x18181818, 0x96969696, 0x05050505, 0x9a9a9a9a, - 0x07070707, 0x12121212, 0x80808080, 0xe2e2e2e2, - 0xebebebeb, 0x27272727, 0xb2b2b2b2, 0x75757575, - 0x09090909, 0x83838383, 0x2c2c2c2c, 0x1a1a1a1a, - 0x1b1b1b1b, 0x6e6e6e6e, 0x5a5a5a5a, 0xa0a0a0a0, - 0x52525252, 0x3b3b3b3b, 0xd6d6d6d6, 0xb3b3b3b3, - 0x29292929, 0xe3e3e3e3, 0x2f2f2f2f, 0x84848484, - 0x53535353, 0xd1d1d1d1, 0x00000000, 0xedededed, - 0x20202020, 0xfcfcfcfc, 0xb1b1b1b1, 0x5b5b5b5b, - 0x6a6a6a6a, 0xcbcbcbcb, 0xbebebebe, 0x39393939, - 0x4a4a4a4a, 0x4c4c4c4c, 0x58585858, 0xcfcfcfcf, - 0xd0d0d0d0, 0xefefefef, 0xaaaaaaaa, 0xfbfbfbfb, - 0x43434343, 0x4d4d4d4d, 0x33333333, 0x85858585, - 0x45454545, 0xf9f9f9f9, 0x02020202, 0x7f7f7f7f, - 0x50505050, 0x3c3c3c3c, 0x9f9f9f9f, 0xa8a8a8a8, - 0x51515151, 0xa3a3a3a3, 0x40404040, 0x8f8f8f8f, - 0x92929292, 0x9d9d9d9d, 0x38383838, 0xf5f5f5f5, - 0xbcbcbcbc, 0xb6b6b6b6, 0xdadadada, 0x21212121, - 0x10101010, 0xffffffff, 0xf3f3f3f3, 0xd2d2d2d2, - 0xcdcdcdcd, 0x0c0c0c0c, 0x13131313, 0xecececec, - 0x5f5f5f5f, 0x97979797, 0x44444444, 0x17171717, - 0xc4c4c4c4, 0xa7a7a7a7, 0x7e7e7e7e, 0x3d3d3d3d, - 0x64646464, 0x5d5d5d5d, 0x19191919, 0x73737373, - 0x60606060, 0x81818181, 0x4f4f4f4f, 0xdcdcdcdc, - 0x22222222, 0x2a2a2a2a, 0x90909090, 0x88888888, - 0x46464646, 0xeeeeeeee, 0xb8b8b8b8, 0x14141414, - 0xdededede, 0x5e5e5e5e, 0x0b0b0b0b, 0xdbdbdbdb, - 0xe0e0e0e0, 0x32323232, 0x3a3a3a3a, 0x0a0a0a0a, - 0x49494949, 0x06060606, 0x24242424, 0x5c5c5c5c, - 0xc2c2c2c2, 0xd3d3d3d3, 0xacacacac, 0x62626262, - 0x91919191, 0x95959595, 0xe4e4e4e4, 0x79797979, - 0xe7e7e7e7, 0xc8c8c8c8, 0x37373737, 0x6d6d6d6d, - 0x8d8d8d8d, 0xd5d5d5d5, 0x4e4e4e4e, 0xa9a9a9a9, - 0x6c6c6c6c, 0x56565656, 0xf4f4f4f4, 0xeaeaeaea, - 0x65656565, 0x7a7a7a7a, 0xaeaeaeae, 0x08080808, - 0xbabababa, 0x78787878, 0x25252525, 0x2e2e2e2e, - 0x1c1c1c1c, 0xa6a6a6a6, 0xb4b4b4b4, 0xc6c6c6c6, - 0xe8e8e8e8, 0xdddddddd, 0x74747474, 0x1f1f1f1f, - 0x4b4b4b4b, 0xbdbdbdbd, 0x8b8b8b8b, 0x8a8a8a8a, - 0x70707070, 0x3e3e3e3e, 0xb5b5b5b5, 0x66666666, - 0x48484848, 0x03030303, 0xf6f6f6f6, 0x0e0e0e0e, - 0x61616161, 0x35353535, 0x57575757, 0xb9b9b9b9, - 0x86868686, 0xc1c1c1c1, 0x1d1d1d1d, 0x9e9e9e9e, - 0xe1e1e1e1, 0xf8f8f8f8, 0x98989898, 0x11111111, - 0x69696969, 0xd9d9d9d9, 0x8e8e8e8e, 0x94949494, - 0x9b9b9b9b, 0x1e1e1e1e, 0x87878787, 0xe9e9e9e9, - 0xcececece, 0x55555555, 0x28282828, 0xdfdfdfdf, - 0x8c8c8c8c, 0xa1a1a1a1, 0x89898989, 0x0d0d0d0d, - 0xbfbfbfbf, 0xe6e6e6e6, 0x42424242, 0x68686868, - 0x41414141, 0x99999999, 0x2d2d2d2d, 0x0f0f0f0f, - 0xb0b0b0b0, 0x54545454, 0xbbbbbbbb, 0x16161616, -}; - -__device__ __constant__ u32 td0[256] = -{ - 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, - 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, - 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, - 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, - 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, - 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, - 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, - 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, - 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, - 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, - 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, - 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, - 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, - 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, - 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, - 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, - 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, - 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, - 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, - 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, - 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, - 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, - 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, - 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, - 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, - 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, - 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, - 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, - 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, - 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, - 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, - 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, - 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, - 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, - 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, - 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, - 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, - 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, - 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, - 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, - 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, - 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, - 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, - 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, - 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, - 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, - 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, - 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, - 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, - 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, - 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, - 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, - 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, - 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, - 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, - 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, - 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, - 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, - 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, - 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, - 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, - 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, - 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, - 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742, -}; - -__device__ __constant__ u32 td1[256] = -{ - 0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e, - 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303, - 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c, - 0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3, - 0x49deb15a, 0x6725ba1b, 0x9845ea0e, 0xe15dfec0, - 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9, - 0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259, - 0x2dd4be83, 0xd3587421, 0x2949e069, 0x448ec9c8, - 0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971, - 0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a, - 0x1863df4a, 0x82e51a31, 0x60975133, 0x4562537f, - 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b, - 0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8, - 0x23ab73d3, 0xe2724b02, 0x57e31f8f, 0x2a6655ab, - 0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708, - 0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682, - 0x2b8acf1c, 0x92a779b4, 0xf0f307f2, 0xa14e69e2, - 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe, - 0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb, - 0x390b83ec, 0xaa4060ef, 0x065e719f, 0x51bd6e10, - 0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd, - 0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015, - 0x241998fb, 0x97d6bde9, 0xcc894043, 0x7767d99e, - 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee, - 0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000, - 0x83098086, 0x48322bed, 0xac1e1170, 0x4e6c5a72, - 0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39, - 0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e, - 0xb10c0a67, 0x0f9357e7, 0xd2b4ee96, 0x9e1b9b91, - 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a, - 0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17, - 0x0b0e090d, 0xadf28bc7, 0xb92db6a8, 0xc8141ea9, - 0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60, - 0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e, - 0x768b4329, 0xdccb23c6, 0x68b6edfc, 0x63b8e4f1, - 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611, - 0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1, - 0x4b1d9e2f, 0xf3dcb230, 0xec0d8652, 0xd077c1e3, - 0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964, - 0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390, - 0xc787494e, 0xc1d938d1, 0xfe8ccaa2, 0x3698d40b, - 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf, - 0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46, - 0xc2f68d13, 0xe890d8b8, 0x5e2e39f7, 0xf582c3af, - 0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512, - 0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb, - 0x09cd2678, 0xf46e5918, 0x01ec9ab7, 0xa8834f9a, - 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8, - 0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c, - 0xaf31a4b2, 0x312a3f23, 0x30c6a594, 0xc035a266, - 0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8, - 0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6, - 0x8d764dd6, 0x4d43efb0, 0x54ccaa4d, 0xdfe49604, - 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551, - 0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41, - 0x5ab3671d, 0x5292dbd2, 0x33e91056, 0x136dd647, - 0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c, - 0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1, - 0x599cd2df, 0x3f55f273, 0x791814ce, 0xbf73c737, - 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db, - 0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340, - 0x72161dc3, 0x0cbce225, 0x8b283c49, 0x41ff0d95, - 0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1, - 0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857, -}; - -__device__ __constant__ u32 td2[256] = -{ - 0xa75051f4, 0x65537e41, 0xa4c31a17, 0x5e963a27, - 0x6bcb3bab, 0x45f11f9d, 0x58abacfa, 0x03934be3, - 0xfa552030, 0x6df6ad76, 0x769188cc, 0x4c25f502, - 0xd7fc4fe5, 0xcbd7c52a, 0x44802635, 0xa38fb562, - 0x5a49deb1, 0x1b6725ba, 0x0e9845ea, 0xc0e15dfe, - 0x7502c32f, 0xf012814c, 0x97a38d46, 0xf9c66bd3, - 0x5fe7038f, 0x9c951592, 0x7aebbf6d, 0x59da9552, - 0x832dd4be, 0x21d35874, 0x692949e0, 0xc8448ec9, - 0x896a75c2, 0x7978f48e, 0x3e6b9958, 0x71dd27b9, - 0x4fb6bee1, 0xad17f088, 0xac66c920, 0x3ab47dce, - 0x4a1863df, 0x3182e51a, 0x33609751, 0x7f456253, - 0x77e0b164, 0xae84bb6b, 0xa01cfe81, 0x2b94f908, - 0x68587048, 0xfd198f45, 0x6c8794de, 0xf8b7527b, - 0xd323ab73, 0x02e2724b, 0x8f57e31f, 0xab2a6655, - 0x2807b2eb, 0xc2032fb5, 0x7b9a86c5, 0x08a5d337, - 0x87f23028, 0xa5b223bf, 0x6aba0203, 0x825ced16, - 0x1c2b8acf, 0xb492a779, 0xf2f0f307, 0xe2a14e69, - 0xf4cd65da, 0xbed50605, 0x621fd134, 0xfe8ac4a6, - 0x539d342e, 0x55a0a2f3, 0xe132058a, 0xeb75a4f6, - 0xec390b83, 0xefaa4060, 0x9f065e71, 0x1051bd6e, - 0x8af93e21, 0x063d96dd, 0x05aedd3e, 0xbd464de6, - 0x8db59154, 0x5d0571c4, 0xd46f0406, 0x15ff6050, - 0xfb241998, 0xe997d6bd, 0x43cc8940, 0x9e7767d9, - 0x42bdb0e8, 0x8b880789, 0x5b38e719, 0xeedb79c8, - 0x0a47a17c, 0x0fe97c42, 0x1ec9f884, 0x00000000, - 0x86830980, 0xed48322b, 0x70ac1e11, 0x724e6c5a, - 0xfffbfd0e, 0x38560f85, 0xd51e3dae, 0x3927362d, - 0xd9640a0f, 0xa621685c, 0x54d19b5b, 0x2e3a2436, - 0x67b10c0a, 0xe70f9357, 0x96d2b4ee, 0x919e1b9b, - 0xc54f80c0, 0x20a261dc, 0x4b695a77, 0x1a161c12, - 0xba0ae293, 0x2ae5c0a0, 0xe0433c22, 0x171d121b, - 0x0d0b0e09, 0xc7adf28b, 0xa8b92db6, 0xa9c8141e, - 0x198557f1, 0x074caf75, 0xddbbee99, 0x60fda37f, - 0x269ff701, 0xf5bc5c72, 0x3bc54466, 0x7e345bfb, - 0x29768b43, 0xc6dccb23, 0xfc68b6ed, 0xf163b8e4, - 0xdccad731, 0x85104263, 0x22401397, 0x112084c6, - 0x247d854a, 0x3df8d2bb, 0x3211aef9, 0xa16dc729, - 0x2f4b1d9e, 0x30f3dcb2, 0x52ec0d86, 0xe3d077c1, - 0x166c2bb3, 0xb999a970, 0x48fa1194, 0x642247e9, - 0x8cc4a8fc, 0x3f1aa0f0, 0x2cd8567d, 0x90ef2233, - 0x4ec78749, 0xd1c1d938, 0xa2fe8cca, 0x0b3698d4, - 0x81cfa6f5, 0xde28a57a, 0x8e26dab7, 0xbfa43fad, - 0x9de42c3a, 0x920d5078, 0xcc9b6a5f, 0x4662547e, - 0x13c2f68d, 0xb8e890d8, 0xf75e2e39, 0xaff582c3, - 0x80be9f5d, 0x937c69d0, 0x2da96fd5, 0x12b3cf25, - 0x993bc8ac, 0x7da71018, 0x636ee89c, 0xbb7bdb3b, - 0x7809cd26, 0x18f46e59, 0xb701ec9a, 0x9aa8834f, - 0x6e65e695, 0xe67eaaff, 0xcf0821bc, 0xe8e6ef15, - 0x9bd9bae7, 0x36ce4a6f, 0x09d4ea9f, 0x7cd629b0, - 0xb2af31a4, 0x23312a3f, 0x9430c6a5, 0x66c035a2, - 0xbc37744e, 0xcaa6fc82, 0xd0b0e090, 0xd81533a7, - 0x984af104, 0xdaf741ec, 0x500e7fcd, 0xf62f1791, - 0xd68d764d, 0xb04d43ef, 0x4d54ccaa, 0x04dfe496, - 0xb5e39ed1, 0x881b4c6a, 0x1fb8c12c, 0x517f4665, - 0xea049d5e, 0x355d018c, 0x7473fa87, 0x412efb0b, - 0x1d5ab367, 0xd25292db, 0x5633e910, 0x47136dd6, - 0x618c9ad7, 0x0c7a37a1, 0x148e59f8, 0x3c89eb13, - 0x27eecea9, 0xc935b761, 0xe5ede11c, 0xb13c7a47, - 0xdf599cd2, 0x733f55f2, 0xce791814, 0x37bf73c7, - 0xcdea53f7, 0xaa5b5ffd, 0x6f14df3d, 0xdb867844, - 0xf381caaf, 0xc43eb968, 0x342c3824, 0x405fc2a3, - 0xc372161d, 0x250cbce2, 0x498b283c, 0x9541ff0d, - 0x017139a8, 0xb3de080c, 0xe49cd8b4, 0xc1906456, - 0x84617bcb, 0xb670d532, 0x5c74486c, 0x5742d0b8, -}; - -__device__ __constant__ u32 td3[256] = -{ - 0xf4a75051, 0x4165537e, 0x17a4c31a, 0x275e963a, - 0xab6bcb3b, 0x9d45f11f, 0xfa58abac, 0xe303934b, - 0x30fa5520, 0x766df6ad, 0xcc769188, 0x024c25f5, - 0xe5d7fc4f, 0x2acbd7c5, 0x35448026, 0x62a38fb5, - 0xb15a49de, 0xba1b6725, 0xea0e9845, 0xfec0e15d, - 0x2f7502c3, 0x4cf01281, 0x4697a38d, 0xd3f9c66b, - 0x8f5fe703, 0x929c9515, 0x6d7aebbf, 0x5259da95, - 0xbe832dd4, 0x7421d358, 0xe0692949, 0xc9c8448e, - 0xc2896a75, 0x8e7978f4, 0x583e6b99, 0xb971dd27, - 0xe14fb6be, 0x88ad17f0, 0x20ac66c9, 0xce3ab47d, - 0xdf4a1863, 0x1a3182e5, 0x51336097, 0x537f4562, - 0x6477e0b1, 0x6bae84bb, 0x81a01cfe, 0x082b94f9, - 0x48685870, 0x45fd198f, 0xde6c8794, 0x7bf8b752, - 0x73d323ab, 0x4b02e272, 0x1f8f57e3, 0x55ab2a66, - 0xeb2807b2, 0xb5c2032f, 0xc57b9a86, 0x3708a5d3, - 0x2887f230, 0xbfa5b223, 0x036aba02, 0x16825ced, - 0xcf1c2b8a, 0x79b492a7, 0x07f2f0f3, 0x69e2a14e, - 0xdaf4cd65, 0x05bed506, 0x34621fd1, 0xa6fe8ac4, - 0x2e539d34, 0xf355a0a2, 0x8ae13205, 0xf6eb75a4, - 0x83ec390b, 0x60efaa40, 0x719f065e, 0x6e1051bd, - 0x218af93e, 0xdd063d96, 0x3e05aedd, 0xe6bd464d, - 0x548db591, 0xc45d0571, 0x06d46f04, 0x5015ff60, - 0x98fb2419, 0xbde997d6, 0x4043cc89, 0xd99e7767, - 0xe842bdb0, 0x898b8807, 0x195b38e7, 0xc8eedb79, - 0x7c0a47a1, 0x420fe97c, 0x841ec9f8, 0x00000000, - 0x80868309, 0x2bed4832, 0x1170ac1e, 0x5a724e6c, - 0x0efffbfd, 0x8538560f, 0xaed51e3d, 0x2d392736, - 0x0fd9640a, 0x5ca62168, 0x5b54d19b, 0x362e3a24, - 0x0a67b10c, 0x57e70f93, 0xee96d2b4, 0x9b919e1b, - 0xc0c54f80, 0xdc20a261, 0x774b695a, 0x121a161c, - 0x93ba0ae2, 0xa02ae5c0, 0x22e0433c, 0x1b171d12, - 0x090d0b0e, 0x8bc7adf2, 0xb6a8b92d, 0x1ea9c814, - 0xf1198557, 0x75074caf, 0x99ddbbee, 0x7f60fda3, - 0x01269ff7, 0x72f5bc5c, 0x663bc544, 0xfb7e345b, - 0x4329768b, 0x23c6dccb, 0xedfc68b6, 0xe4f163b8, - 0x31dccad7, 0x63851042, 0x97224013, 0xc6112084, - 0x4a247d85, 0xbb3df8d2, 0xf93211ae, 0x29a16dc7, - 0x9e2f4b1d, 0xb230f3dc, 0x8652ec0d, 0xc1e3d077, - 0xb3166c2b, 0x70b999a9, 0x9448fa11, 0xe9642247, - 0xfc8cc4a8, 0xf03f1aa0, 0x7d2cd856, 0x3390ef22, - 0x494ec787, 0x38d1c1d9, 0xcaa2fe8c, 0xd40b3698, - 0xf581cfa6, 0x7ade28a5, 0xb78e26da, 0xadbfa43f, - 0x3a9de42c, 0x78920d50, 0x5fcc9b6a, 0x7e466254, - 0x8d13c2f6, 0xd8b8e890, 0x39f75e2e, 0xc3aff582, - 0x5d80be9f, 0xd0937c69, 0xd52da96f, 0x2512b3cf, - 0xac993bc8, 0x187da710, 0x9c636ee8, 0x3bbb7bdb, - 0x267809cd, 0x5918f46e, 0x9ab701ec, 0x4f9aa883, - 0x956e65e6, 0xffe67eaa, 0xbccf0821, 0x15e8e6ef, - 0xe79bd9ba, 0x6f36ce4a, 0x9f09d4ea, 0xb07cd629, - 0xa4b2af31, 0x3f23312a, 0xa59430c6, 0xa266c035, - 0x4ebc3774, 0x82caa6fc, 0x90d0b0e0, 0xa7d81533, - 0x04984af1, 0xecdaf741, 0xcd500e7f, 0x91f62f17, - 0x4dd68d76, 0xefb04d43, 0xaa4d54cc, 0x9604dfe4, - 0xd1b5e39e, 0x6a881b4c, 0x2c1fb8c1, 0x65517f46, - 0x5eea049d, 0x8c355d01, 0x877473fa, 0x0b412efb, - 0x671d5ab3, 0xdbd25292, 0x105633e9, 0xd647136d, - 0xd7618c9a, 0xa10c7a37, 0xf8148e59, 0x133c89eb, - 0xa927eece, 0x61c935b7, 0x1ce5ede1, 0x47b13c7a, - 0xd2df599c, 0xf2733f55, 0x14ce7918, 0xc737bf73, - 0xf7cdea53, 0xfdaa5b5f, 0x3d6f14df, 0x44db8678, - 0xaff381ca, 0x68c43eb9, 0x24342c38, 0xa3405fc2, - 0x1dc37216, 0xe2250cbc, 0x3c498b28, 0x0d9541ff, - 0xa8017139, 0x0cb3de08, 0xb4e49cd8, 0x56c19064, - 0xcb84617b, 0x32b670d5, 0x6c5c7448, 0xb85742d0, -}; - -__device__ __constant__ u32 td4[256] = -{ - 0x52525252, 0x09090909, 0x6a6a6a6a, 0xd5d5d5d5, - 0x30303030, 0x36363636, 0xa5a5a5a5, 0x38383838, - 0xbfbfbfbf, 0x40404040, 0xa3a3a3a3, 0x9e9e9e9e, - 0x81818181, 0xf3f3f3f3, 0xd7d7d7d7, 0xfbfbfbfb, - 0x7c7c7c7c, 0xe3e3e3e3, 0x39393939, 0x82828282, - 0x9b9b9b9b, 0x2f2f2f2f, 0xffffffff, 0x87878787, - 0x34343434, 0x8e8e8e8e, 0x43434343, 0x44444444, - 0xc4c4c4c4, 0xdededede, 0xe9e9e9e9, 0xcbcbcbcb, - 0x54545454, 0x7b7b7b7b, 0x94949494, 0x32323232, - 0xa6a6a6a6, 0xc2c2c2c2, 0x23232323, 0x3d3d3d3d, - 0xeeeeeeee, 0x4c4c4c4c, 0x95959595, 0x0b0b0b0b, - 0x42424242, 0xfafafafa, 0xc3c3c3c3, 0x4e4e4e4e, - 0x08080808, 0x2e2e2e2e, 0xa1a1a1a1, 0x66666666, - 0x28282828, 0xd9d9d9d9, 0x24242424, 0xb2b2b2b2, - 0x76767676, 0x5b5b5b5b, 0xa2a2a2a2, 0x49494949, - 0x6d6d6d6d, 0x8b8b8b8b, 0xd1d1d1d1, 0x25252525, - 0x72727272, 0xf8f8f8f8, 0xf6f6f6f6, 0x64646464, - 0x86868686, 0x68686868, 0x98989898, 0x16161616, - 0xd4d4d4d4, 0xa4a4a4a4, 0x5c5c5c5c, 0xcccccccc, - 0x5d5d5d5d, 0x65656565, 0xb6b6b6b6, 0x92929292, - 0x6c6c6c6c, 0x70707070, 0x48484848, 0x50505050, - 0xfdfdfdfd, 0xedededed, 0xb9b9b9b9, 0xdadadada, - 0x5e5e5e5e, 0x15151515, 0x46464646, 0x57575757, - 0xa7a7a7a7, 0x8d8d8d8d, 0x9d9d9d9d, 0x84848484, - 0x90909090, 0xd8d8d8d8, 0xabababab, 0x00000000, - 0x8c8c8c8c, 0xbcbcbcbc, 0xd3d3d3d3, 0x0a0a0a0a, - 0xf7f7f7f7, 0xe4e4e4e4, 0x58585858, 0x05050505, - 0xb8b8b8b8, 0xb3b3b3b3, 0x45454545, 0x06060606, - 0xd0d0d0d0, 0x2c2c2c2c, 0x1e1e1e1e, 0x8f8f8f8f, - 0xcacacaca, 0x3f3f3f3f, 0x0f0f0f0f, 0x02020202, - 0xc1c1c1c1, 0xafafafaf, 0xbdbdbdbd, 0x03030303, - 0x01010101, 0x13131313, 0x8a8a8a8a, 0x6b6b6b6b, - 0x3a3a3a3a, 0x91919191, 0x11111111, 0x41414141, - 0x4f4f4f4f, 0x67676767, 0xdcdcdcdc, 0xeaeaeaea, - 0x97979797, 0xf2f2f2f2, 0xcfcfcfcf, 0xcececece, - 0xf0f0f0f0, 0xb4b4b4b4, 0xe6e6e6e6, 0x73737373, - 0x96969696, 0xacacacac, 0x74747474, 0x22222222, - 0xe7e7e7e7, 0xadadadad, 0x35353535, 0x85858585, - 0xe2e2e2e2, 0xf9f9f9f9, 0x37373737, 0xe8e8e8e8, - 0x1c1c1c1c, 0x75757575, 0xdfdfdfdf, 0x6e6e6e6e, - 0x47474747, 0xf1f1f1f1, 0x1a1a1a1a, 0x71717171, - 0x1d1d1d1d, 0x29292929, 0xc5c5c5c5, 0x89898989, - 0x6f6f6f6f, 0xb7b7b7b7, 0x62626262, 0x0e0e0e0e, - 0xaaaaaaaa, 0x18181818, 0xbebebebe, 0x1b1b1b1b, - 0xfcfcfcfc, 0x56565656, 0x3e3e3e3e, 0x4b4b4b4b, - 0xc6c6c6c6, 0xd2d2d2d2, 0x79797979, 0x20202020, - 0x9a9a9a9a, 0xdbdbdbdb, 0xc0c0c0c0, 0xfefefefe, - 0x78787878, 0xcdcdcdcd, 0x5a5a5a5a, 0xf4f4f4f4, - 0x1f1f1f1f, 0xdddddddd, 0xa8a8a8a8, 0x33333333, - 0x88888888, 0x07070707, 0xc7c7c7c7, 0x31313131, - 0xb1b1b1b1, 0x12121212, 0x10101010, 0x59595959, - 0x27272727, 0x80808080, 0xecececec, 0x5f5f5f5f, - 0x60606060, 0x51515151, 0x7f7f7f7f, 0xa9a9a9a9, - 0x19191919, 0xb5b5b5b5, 0x4a4a4a4a, 0x0d0d0d0d, - 0x2d2d2d2d, 0xe5e5e5e5, 0x7a7a7a7a, 0x9f9f9f9f, - 0x93939393, 0xc9c9c9c9, 0x9c9c9c9c, 0xefefefef, - 0xa0a0a0a0, 0xe0e0e0e0, 0x3b3b3b3b, 0x4d4d4d4d, - 0xaeaeaeae, 0x2a2a2a2a, 0xf5f5f5f5, 0xb0b0b0b0, - 0xc8c8c8c8, 0xebebebeb, 0xbbbbbbbb, 0x3c3c3c3c, - 0x83838383, 0x53535353, 0x99999999, 0x61616161, - 0x17171717, 0x2b2b2b2b, 0x04040404, 0x7e7e7e7e, - 0xbabababa, 0x77777777, 0xd6d6d6d6, 0x26262626, - 0xe1e1e1e1, 0x69696969, 0x14141414, 0x63636363, - 0x55555555, 0x21212121, 0x0c0c0c0c, 0x7d7d7d7d, -}; - -__device__ __constant__ u32 rcon[] = -{ - 0x01000000, 0x02000000, 0x04000000, 0x08000000, - 0x10000000, 0x20000000, 0x40000000, 0x80000000, - 0x1b000000, 0x36000000, -}; - -__device__ static void AES128_ExpandKey (u32 *userkey, u32 *rek, u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - rek[0] = userkey[0]; - rek[1] = userkey[1]; - rek[2] = userkey[2]; - rek[3] = userkey[3]; - - #pragma unroll 10 - for (u32 i = 0, j = 0; i < 10; i += 1, j += 4) - { - u32 temp = rek[j + 3]; - - temp = (s_te2[(temp >> 16) & 0xff] & 0xff000000) - ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000) - ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00) - ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff); - - rek[j + 4] = rek[j + 0] - ^ temp - ^ rcon[i]; - - rek[j + 5] = rek[j + 1] ^ rek[j + 4]; - rek[j + 6] = rek[j + 2] ^ rek[j + 5]; - rek[j + 7] = rek[j + 3] ^ rek[j + 6]; - } -} - -__device__ static void AES128_InvertKey (u32 *rdk, u32 s_td0[256], u32 s_td1[256], u32 s_td2[256], u32 s_td3[256], u32 s_td4[256], u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - for (u32 i = 0, j = 40; i < j; i += 4, j -= 4) - { - u32 temp; - - temp = rdk[i + 0]; rdk[i + 0] = rdk[j + 0]; rdk[j + 0] = temp; - temp = rdk[i + 1]; rdk[i + 1] = rdk[j + 1]; rdk[j + 1] = temp; - temp = rdk[i + 2]; rdk[i + 2] = rdk[j + 2]; rdk[j + 2] = temp; - temp = rdk[i + 3]; rdk[i + 3] = rdk[j + 3]; rdk[j + 3] = temp; - } - - for (u32 i = 1, j = 4; i < 10; i += 1, j += 4) - { - rdk[j + 0] = - s_td0[s_te1[(rdk[j + 0] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 0] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 0] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 0] >> 0) & 0xff] & 0xff]; - - rdk[j + 1] = - s_td0[s_te1[(rdk[j + 1] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 1] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 1] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 1] >> 0) & 0xff] & 0xff]; - - rdk[j + 2] = - s_td0[s_te1[(rdk[j + 2] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 2] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 2] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 2] >> 0) & 0xff] & 0xff]; - - rdk[j + 3] = - s_td0[s_te1[(rdk[j + 3] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 3] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 3] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 3] >> 0) & 0xff] & 0xff]; - } -} - -__device__ static void AES128_decrypt (const u32 *in, u32 *out, const u32 *rdk, u32 s_td0[256], u32 s_td1[256], u32 s_td2[256], u32 s_td3[256], u32 s_td4[256]) -{ - u32 s0 = in[0] ^ rdk[0]; - u32 s1 = in[1] ^ rdk[1]; - u32 s2 = in[2] ^ rdk[2]; - u32 s3 = in[3] ^ rdk[3]; - - u32 t0; - u32 t1; - u32 t2; - u32 t3; - - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[ 4]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[ 5]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[ 6]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[ 7]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[ 8]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[ 9]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[10]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[11]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[12]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[13]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[14]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[15]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[16]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[17]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[18]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[19]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[20]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[21]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[22]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[23]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[24]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[25]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[26]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[27]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[28]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[29]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[30]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[31]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[32]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[33]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[34]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[35]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[36]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[37]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[38]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[39]; - - out[0] = (s_td4[(t0 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t3 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t2 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t1 >> 0) & 0xff] & 0x000000ff) - ^ rdk[40]; - - out[1] = (s_td4[(t1 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t0 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t3 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t2 >> 0) & 0xff] & 0x000000ff) - ^ rdk[41]; - - out[2] = (s_td4[(t2 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t1 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t0 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t3 >> 0) & 0xff] & 0x000000ff) - ^ rdk[42]; - - out[3] = (s_td4[(t3 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t2 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t1 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t0 >> 0) & 0xff] & 0x000000ff) - ^ rdk[43]; -} - -__device__ static void AES256_ExpandKey (u32 *userkey, u32 *rek, u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - rek[0] = userkey[0]; - rek[1] = userkey[1]; - rek[2] = userkey[2]; - rek[3] = userkey[3]; - rek[4] = userkey[4]; - rek[5] = userkey[5]; - rek[6] = userkey[6]; - rek[7] = userkey[7]; - - int i; - int j; - - i = 0; - j = 0; - - u32 run = 1; - - while (run) - { - u32 temp = rek[j + 7]; - - rek[j + 8] = rek[j + 0] - ^ (s_te2[(temp >> 16) & 0xff] & 0xff000000) - ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000) - ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00) - ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff) - ^ rcon[i]; - - rek[j + 9] = rek[j + 1] ^ rek[j + 8]; - rek[j + 10] = rek[j + 2] ^ rek[j + 9]; - rek[j + 11] = rek[j + 3] ^ rek[j + 10]; - - if (++i == 7) - { - run = 0; - continue; - } - - temp = rek[j + 11]; - - rek[j + 12] = rek[j + 4] - ^ (s_te2[(temp >> 24) & 0xff] & 0xff000000) - ^ (s_te3[(temp >> 16) & 0xff] & 0x00ff0000) - ^ (s_te0[(temp >> 8) & 0xff] & 0x0000ff00) - ^ (s_te1[(temp >> 0) & 0xff] & 0x000000ff); - - rek[j + 13] = rek[j + 5] ^ rek[j + 12]; - rek[j + 14] = rek[j + 6] ^ rek[j + 13]; - rek[j + 15] = rek[j + 7] ^ rek[j + 14]; - - j += 8; - } -} - -__device__ static void AES256_InvertKey (u32 *rdk, u32 s_td0[256], u32 s_td1[256], u32 s_td2[256], u32 s_td3[256], u32 s_td4[256], u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - for (u32 i = 0, j = 56; i < j; i += 4, j -= 4) - { - u32 temp; - - temp = rdk[i + 0]; rdk[i + 0] = rdk[j + 0]; rdk[j + 0] = temp; - temp = rdk[i + 1]; rdk[i + 1] = rdk[j + 1]; rdk[j + 1] = temp; - temp = rdk[i + 2]; rdk[i + 2] = rdk[j + 2]; rdk[j + 2] = temp; - temp = rdk[i + 3]; rdk[i + 3] = rdk[j + 3]; rdk[j + 3] = temp; - } - - for (u32 i = 1, j = 4; i < 14; i += 1, j += 4) - { - rdk[j + 0] = - s_td0[s_te1[(rdk[j + 0] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 0] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 0] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 0] >> 0) & 0xff] & 0xff]; - - rdk[j + 1] = - s_td0[s_te1[(rdk[j + 1] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 1] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 1] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 1] >> 0) & 0xff] & 0xff]; - - rdk[j + 2] = - s_td0[s_te1[(rdk[j + 2] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 2] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 2] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 2] >> 0) & 0xff] & 0xff]; - - rdk[j + 3] = - s_td0[s_te1[(rdk[j + 3] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 3] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 3] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 3] >> 0) & 0xff] & 0xff]; - } -} - -__device__ static void AES256_decrypt (const u32 *in, u32 *out, const u32 *rdk, u32 s_td0[256], u32 s_td1[256], u32 s_td2[256], u32 s_td3[256], u32 s_td4[256]) -{ - u32 s0 = in[0] ^ rdk[0]; - u32 s1 = in[1] ^ rdk[1]; - u32 s2 = in[2] ^ rdk[2]; - u32 s3 = in[3] ^ rdk[3]; - - u32 t0; - u32 t1; - u32 t2; - u32 t3; - - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[ 4]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[ 5]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[ 6]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[ 7]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[ 8]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[ 9]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[10]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[11]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[12]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[13]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[14]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[15]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[16]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[17]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[18]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[19]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[20]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[21]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[22]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[23]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[24]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[25]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[26]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[27]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[28]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[29]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[30]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[31]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[32]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[33]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[34]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[35]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[36]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[37]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[38]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[39]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[40]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[41]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[42]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[43]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[44]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[45]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[46]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[47]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[48]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[49]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[50]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[51]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[52]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[53]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[54]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[55]; - - out[0] = (s_td4[(t0 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t3 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t2 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t1 >> 0) & 0xff] & 0x000000ff) - ^ rdk[56]; - - out[1] = (s_td4[(t1 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t0 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t3 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t2 >> 0) & 0xff] & 0x000000ff) - ^ rdk[57]; - - out[2] = (s_td4[(t2 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t1 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t0 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t3 >> 0) & 0xff] & 0x000000ff) - ^ rdk[58]; - - out[3] = (s_td4[(t3 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t2 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t1 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t0 >> 0) & 0xff] & 0x000000ff) - ^ rdk[59]; -} - -__device__ static void AES256_encrypt (const u32 *in, u32 *out, const u32 *rek, u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - u32 s0 = in[0] ^ rek[0]; - u32 s1 = in[1] ^ rek[1]; - u32 s2 = in[2] ^ rek[2]; - u32 s3 = in[3] ^ rek[3]; - - u32 t0; - u32 t1; - u32 t2; - u32 t3; - - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[ 4]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[ 5]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[ 6]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[ 7]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[ 8]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[ 9]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[10]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[11]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[12]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[13]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[14]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[15]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[16]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[17]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[18]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[19]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[20]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[21]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[22]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[23]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[24]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[25]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[26]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[27]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[28]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[29]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[30]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[31]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[32]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[33]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[34]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[35]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[36]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[37]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[38]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[39]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[40]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[41]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[42]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[43]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[44]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[45]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[46]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[47]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[48]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[49]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[50]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[51]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[52]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[53]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[54]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[55]; - - out[0] = (s_te4[(t0 >> 24) & 0xff] & 0xff000000) - ^ (s_te4[(t1 >> 16) & 0xff] & 0x00ff0000) - ^ (s_te4[(t2 >> 8) & 0xff] & 0x0000ff00) - ^ (s_te4[(t3 >> 0) & 0xff] & 0x000000ff) - ^ rek[56]; - - out[1] = (s_te4[(t1 >> 24) & 0xff] & 0xff000000) - ^ (s_te4[(t2 >> 16) & 0xff] & 0x00ff0000) - ^ (s_te4[(t3 >> 8) & 0xff] & 0x0000ff00) - ^ (s_te4[(t0 >> 0) & 0xff] & 0x000000ff) - ^ rek[57]; - - out[2] = (s_te4[(t2 >> 24) & 0xff] & 0xff000000) - ^ (s_te4[(t3 >> 16) & 0xff] & 0x00ff0000) - ^ (s_te4[(t0 >> 8) & 0xff] & 0x0000ff00) - ^ (s_te4[(t1 >> 0) & 0xff] & 0x000000ff) - ^ rek[58]; - - out[3] = (s_te4[(t3 >> 24) & 0xff] & 0xff000000) - ^ (s_te4[(t0 >> 16) & 0xff] & 0x00ff0000) - ^ (s_te4[(t1 >> 8) & 0xff] & 0x0000ff00) - ^ (s_te4[(t2 >> 0) & 0xff] & 0x000000ff) - ^ rek[59]; -} - -__device__ static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; -} - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = SHA1M_A; - ipad[1] = SHA1M_B; - ipad[2] = SHA1M_C; - ipad[3] = SHA1M_D; - ipad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = SHA1M_A; - opad[1] = SHA1M_B; - opad[2] = SHA1M_C; - opad[3] = SHA1M_D; - opad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - - sha1_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - - sha1_transform (w0, w1, w2, w3, digest); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08800_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, androidfde_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const androidfde_t *androidfde_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - /** - * salt - */ - - u32 salt_len = 16; - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - /** - * pads - */ - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - w2[0] = swap_workaround (w2[0]); - w2[1] = swap_workaround (w2[1]); - w2[2] = swap_workaround (w2[2]); - w2[3] = swap_workaround (w2[3]); - w3[0] = swap_workaround (w3[0]); - w3[1] = swap_workaround (w3[1]); - w3[2] = swap_workaround (w3[2]); - w3[3] = swap_workaround (w3[3]); - - u32x ipad[5]; - u32x opad[5]; - - hmac_sha1_pad (w0, w1, w2, w3, ipad, opad); - - tmps[gid].ipad[0] = ipad[0]; - tmps[gid].ipad[1] = ipad[1]; - tmps[gid].ipad[2] = ipad[2]; - tmps[gid].ipad[3] = ipad[3]; - tmps[gid].ipad[4] = ipad[4]; - - tmps[gid].opad[0] = opad[0]; - tmps[gid].opad[1] = opad[1]; - tmps[gid].opad[2] = opad[2]; - tmps[gid].opad[3] = opad[3]; - tmps[gid].opad[4] = opad[4]; - - for (u32 i = 0, j = 1; i < 8; i += 5, j += 1) - { - w0[0] = salt_buf[0]; - w0[1] = salt_buf[1]; - w0[2] = salt_buf[2]; - w0[3] = salt_buf[3]; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - if (j == 1) - append_0x01_3 (w0, w1, w2, salt_len + 3); - else - append_0x02_3 (w0, w1, w2, salt_len + 3); - - append_0x80_3 (w0, w1, w2, salt_len + 4); - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - w2[0] = swap_workaround (w2[0]); - w2[1] = swap_workaround (w2[1]); - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + salt_len + 4) * 8; - - u32x dgst[5]; - - hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst); - - tmps[gid].dgst[i + 0] = dgst[0]; - tmps[gid].dgst[i + 1] = dgst[1]; - tmps[gid].dgst[i + 2] = dgst[2]; - tmps[gid].dgst[i + 3] = dgst[3]; - tmps[gid].dgst[i + 4] = dgst[4]; - - tmps[gid].out[i + 0] = dgst[0]; - tmps[gid].out[i + 1] = dgst[1]; - tmps[gid].out[i + 2] = dgst[2]; - tmps[gid].out[i + 3] = dgst[3]; - tmps[gid].out[i + 4] = dgst[4]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08800_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, androidfde_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const androidfde_t *androidfde_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x ipad[5]; - u32x opad[5]; - - ipad[0] = tmps[gid].ipad[0]; - ipad[1] = tmps[gid].ipad[1]; - ipad[2] = tmps[gid].ipad[2]; - ipad[3] = tmps[gid].ipad[3]; - ipad[4] = tmps[gid].ipad[4]; - - opad[0] = tmps[gid].opad[0]; - opad[1] = tmps[gid].opad[1]; - opad[2] = tmps[gid].opad[2]; - opad[3] = tmps[gid].opad[3]; - opad[4] = tmps[gid].opad[4]; - - for (u32 i = 0; i < 8; i += 5) - { - u32x dgst[5]; - u32x out[5]; - - dgst[0] = tmps[gid].dgst[i + 0]; - dgst[1] = tmps[gid].dgst[i + 1]; - dgst[2] = tmps[gid].dgst[i + 2]; - dgst[3] = tmps[gid].dgst[i + 3]; - dgst[4] = tmps[gid].dgst[i + 4]; - - out[0] = tmps[gid].out[i + 0]; - out[1] = tmps[gid].out[i + 1]; - out[2] = tmps[gid].out[i + 2]; - out[3] = tmps[gid].out[i + 3]; - out[4] = tmps[gid].out[i + 4]; - - for (u32 j = 0; j < loop_cnt; j++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = dgst[0]; - w0[1] = dgst[1]; - w0[2] = dgst[2]; - w0[3] = dgst[3]; - w1[0] = dgst[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst); - - out[0] ^= dgst[0]; - out[1] ^= dgst[1]; - out[2] ^= dgst[2]; - out[3] ^= dgst[3]; - out[4] ^= dgst[4]; - } - - tmps[gid].dgst[i + 0] = dgst[0]; - tmps[gid].dgst[i + 1] = dgst[1]; - tmps[gid].dgst[i + 2] = dgst[2]; - tmps[gid].dgst[i + 3] = dgst[3]; - tmps[gid].dgst[i + 4] = dgst[4]; - - tmps[gid].out[i + 0] = out[0]; - tmps[gid].out[i + 1] = out[1]; - tmps[gid].out[i + 2] = out[2]; - tmps[gid].out[i + 3] = out[3]; - tmps[gid].out[i + 4] = out[4]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m08800_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, androidfde_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const androidfde_t *androidfde_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32 rek[60]; - u32 rdk[60]; - - u32 data[4]; - u32 iv[4]; - u32 out[4]; - - /** - * aes shared - */ - - __shared__ u32 s_td0[256]; - __shared__ u32 s_td1[256]; - __shared__ u32 s_td2[256]; - __shared__ u32 s_td3[256]; - __shared__ u32 s_td4[256]; - - __shared__ u32 s_te0[256]; - __shared__ u32 s_te1[256]; - __shared__ u32 s_te2[256]; - __shared__ u32 s_te3[256]; - __shared__ u32 s_te4[256]; - - s_td0[lid] = td0[lid]; - s_td1[lid] = td1[lid]; - s_td2[lid] = td2[lid]; - s_td3[lid] = td3[lid]; - s_td4[lid] = td4[lid]; - - s_te0[lid] = te0[lid]; - s_te1[lid] = te1[lid]; - s_te2[lid] = te2[lid]; - s_te3[lid] = te3[lid]; - s_te4[lid] = te4[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * aes init - */ - - u32x ukeyx[8]; - - ukeyx[0] = tmps[gid].out[0]; - ukeyx[1] = tmps[gid].out[1]; - ukeyx[2] = tmps[gid].out[2]; - ukeyx[3] = tmps[gid].out[3]; - ukeyx[4] = 0; - ukeyx[5] = 0; - ukeyx[6] = 0; - ukeyx[7] = 0; - - u32x a; - u32x b; - u32x c; - u32x d; - - /** - * aes decrypt key - */ - - data[0] = digests_buf[digests_offset].digest_buf[0]; - data[1] = digests_buf[digests_offset].digest_buf[1]; - data[2] = digests_buf[digests_offset].digest_buf[2]; - data[3] = digests_buf[digests_offset].digest_buf[3]; - - iv[0] = tmps[gid].out[4]; - iv[1] = tmps[gid].out[5]; - iv[2] = tmps[gid].out[6]; - iv[3] = tmps[gid].out[7]; - - AES128_ExpandKey (ukeyx, rek, s_te0, s_te1, s_te2, s_te3, s_te4); - - for (u32 i = 0; i < 44; i++) rdk[i] = rek[i]; - - AES128_InvertKey (rdk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4); - - AES128_decrypt (data, out, rdk, s_td0, s_td1, s_td2, s_td3, s_td4); - - a = out[0] ^ iv[0]; - b = out[1] ^ iv[1]; - c = out[2] ^ iv[2]; - d = out[3] ^ iv[3]; - - /** - * section AES_cbc_essiv() starting - */ - - // 1. start with simple sha256_transform - - u32x essivhash[8]; - - essivhash[0] = SHA256M_A; - essivhash[1] = SHA256M_B; - essivhash[2] = SHA256M_C; - essivhash[3] = SHA256M_D; - essivhash[4] = SHA256M_E; - essivhash[5] = SHA256M_F; - essivhash[6] = SHA256M_G; - essivhash[7] = SHA256M_H; - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = a; - w0[1] = b; - w0[2] = c; - w0[3] = d; - w1[0] = 0x80000000; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 16 * 8; - - sha256_transform (w0, w1, w2, w3, essivhash); - - // check for FAT - - { - // 2. generate essiv based on startsector -- each 512 byte is one sector - - data[0] = 0; - data[1] = 0; - data[2] = 0; - data[3] = 0; - - ukeyx[0] = essivhash[0]; - ukeyx[1] = essivhash[1]; - ukeyx[2] = essivhash[2]; - ukeyx[3] = essivhash[3]; - ukeyx[4] = essivhash[4]; - ukeyx[5] = essivhash[5]; - ukeyx[6] = essivhash[6]; - ukeyx[7] = essivhash[7]; - - AES256_ExpandKey (ukeyx, rek, s_te0, s_te1, s_te2, s_te3, s_te4); - - u32 essiv[4]; - - AES256_encrypt (data, essiv, rek, s_te0, s_te1, s_te2, s_te3, s_te4); - - // 3. decrypt real data, xor essiv afterwards - - data[0] = androidfde_bufs[salt_pos].data[0]; - data[1] = androidfde_bufs[salt_pos].data[1]; - data[2] = androidfde_bufs[salt_pos].data[2]; - data[3] = androidfde_bufs[salt_pos].data[3]; - - iv[0] = essiv[0]; - iv[1] = essiv[1]; - iv[2] = essiv[2]; - iv[3] = essiv[3]; - - ukeyx[0] = a; - ukeyx[1] = b; - ukeyx[2] = c; - ukeyx[3] = d; - ukeyx[4] = 0; - ukeyx[5] = 0; - ukeyx[6] = 0; - ukeyx[7] = 0; - - AES128_ExpandKey (ukeyx, rek, s_te0, s_te1, s_te2, s_te3, s_te4); - - for (u32 i = 0; i < 44; i++) rdk[i] = rek[i]; - - AES128_InvertKey (rdk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4); - - AES128_decrypt (data, out, rdk, s_td0, s_td1, s_td2, s_td3, s_td4); - - u32x r0 = out[0] ^ iv[0]; - u32x r1 = out[1] ^ iv[1]; - u32x r2 = out[2] ^ iv[2]; - u32x r3 = out[3] ^ iv[3]; - - // rotate 3 byte (static in fat!) - - r0 = r1 << 8 | r0 >> 24; - r1 = r2 << 8 | r1 >> 24; - - // MSDOS5.0 - if ((r0 == 0x4f44534d) && (r1 == 0x302e3553)) - { - mark_hash_s0 (plains_buf, hashes_shown, digests_offset + 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - // check for extfs - - { - u32 r[16]; - - // 2. generate essiv based on startsector -- each 512 byte is one sector - - // not needed because of cbc mode -- implementation flaw !!. first 16 byte are not interessting - - r[0] = 0; - r[1] = 0; - r[2] = 0; - r[3] = 0; - - // 3. decrypt real data - - ukeyx[0] = a; - ukeyx[1] = b; - ukeyx[2] = c; - ukeyx[3] = d; - ukeyx[4] = 0; - ukeyx[5] = 0; - ukeyx[6] = 0; - ukeyx[7] = 0; - - AES128_ExpandKey (ukeyx, rek, s_te0, s_te1, s_te2, s_te3, s_te4); - - for (u32 i = 0; i < 44; i++) rdk[i] = rek[i]; - - AES128_InvertKey (rdk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4); - - for (u32 i = 4; i < 16; i += 4) - { - data[0] = androidfde_bufs[salt_pos].data[256 + i + 0]; - data[1] = androidfde_bufs[salt_pos].data[256 + i + 1]; - data[2] = androidfde_bufs[salt_pos].data[256 + i + 2]; - data[3] = androidfde_bufs[salt_pos].data[256 + i + 3]; - - iv[0] = androidfde_bufs[salt_pos].data[256 + i + 0 - 4]; - iv[1] = androidfde_bufs[salt_pos].data[256 + i + 1 - 4]; - iv[2] = androidfde_bufs[salt_pos].data[256 + i + 2 - 4]; - iv[3] = androidfde_bufs[salt_pos].data[256 + i + 3 - 4]; - - AES128_decrypt (data, out, rdk, s_td0, s_td1, s_td2, s_td3, s_td4); - - r[i + 0] = out[0] ^ iv[0]; - r[i + 1] = out[1] ^ iv[1]; - r[i + 2] = out[2] ^ iv[2]; - r[i + 3] = out[3] ^ iv[3]; - } - - // we need just a few swapped, because we do not access the others - r[ 5] = swap_workaround (r[ 5]); - r[ 6] = swap_workaround (r[ 6]); - r[14] = swap_workaround (r[14]); - - // superblock not on id 0 or 1 - // assumes max block size is 32MiB - // has EXT2_SUPER_MAGIC - - if ((r[5] < 2) && (r[6] < 16) && ((r[14] & 0xffff) == 0xEF53)) - { - mark_hash_s0 (plains_buf, hashes_shown, digests_offset + 0, gid, 0); - - d_return_buf[lid] = 1; - } - } -} diff --git a/nv/m08900.cu b/nv/m08900.cu deleted file mode 100644 index e589a98..0000000 --- a/nv/m08900.cu +++ /dev/null @@ -1,1179 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SCRYPT_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -class uintm -{ - private: - public: - - u32 x; - u32 y; - u32 z; - u32 w; - - inline __device__ uintm (const u32 a, const u32 b, const u32 c, const u32 d) : x(a), y(b), z(c), w(d) { } - inline __device__ uintm (const u32 a) : x(a), y(a), z(a), w(a) { } - - inline __device__ uintm (void) { } - inline __device__ ~uintm (void) { } -}; - -typedef struct -{ - uintm P[64]; - -} scrypt_tmp_t; - -__device__ static uintm __byte_perm (const uintm a, const uintm b, const u32 c) -{ - return uintm (__byte_perm (a.x, b.x, c), - __byte_perm (a.y, b.y, c), - __byte_perm (a.z, b.z, c), - __byte_perm (a.w, b.w, c)); -} - -__device__ static uintm rotate (const uintm a, const unsigned int n) -{ - return uintm (rotl32 (a.x, n), - rotl32 (a.y, n), - rotl32 (a.z, n), - rotl32 (a.w, n)); -} - -inline __device__ uintm wxyz (const uintm a) { return uintm (a.w, a.x, a.y, a.z); } -inline __device__ uintm zwxy (const uintm a) { return uintm (a.z, a.w, a.x, a.y); } - -inline __device__ uintm operator << (const uintm a, const u32 b) { return uintm ((a.x << b ), (a.y << b ), (a.z << b ), (a.w << b )); } -inline __device__ uintm operator << (const uintm a, const uintm b) { return uintm ((a.x << b.x), (a.y << b.y), (a.z << b.z), (a.w << b.w)); } - -inline __device__ uintm operator >> (const uintm a, const u32 b) { return uintm ((a.x >> b ), (a.y >> b ), (a.z >> b ), (a.w >> b )); } -inline __device__ uintm operator >> (const uintm a, const uintm b) { return uintm ((a.x >> b.x), (a.y >> b.y), (a.z >> b.z), (a.w >> b.w)); } - -inline __device__ uintm operator ^ (const uintm a, const u32 b) { return uintm ((a.x ^ b ), (a.y ^ b ), (a.z ^ b ), (a.w ^ b )); } -inline __device__ uintm operator ^ (const uintm a, const uintm b) { return uintm ((a.x ^ b.x), (a.y ^ b.y), (a.z ^ b.z), (a.w ^ b.w)); } - -inline __device__ uintm operator | (const uintm a, const u32 b) { return uintm ((a.x | b ), (a.y | b ), (a.z | b ), (a.w | b )); } -inline __device__ uintm operator | (const uintm a, const uintm b) { return uintm ((a.x | b.x), (a.y | b.y), (a.z | b.z), (a.w | b.w)); } - -inline __device__ uintm operator & (const uintm a, const u32 b) { return uintm ((a.x & b ), (a.y & b ), (a.z & b ), (a.w & b )); } -inline __device__ uintm operator & (const uintm a, const uintm b) { return uintm ((a.x & b.x), (a.y & b.y), (a.z & b.z), (a.w & b.w)); } - -inline __device__ uintm operator + (const uintm a, const u32 b) { return uintm ((a.x + b ), (a.y + b ), (a.z + b ), (a.w + b )); } -inline __device__ uintm operator + (const uintm a, const uintm b) { return uintm ((a.x + b.x), (a.y + b.y), (a.z + b.z), (a.w + b.w)); } - -inline __device__ void operator ^= (uintm &a, const u32 b) { a.x ^= b; a.y ^= b; a.z ^= b; a.w ^= b; } -inline __device__ void operator ^= (uintm &a, const uintm b) { a.x ^= b.x; a.y ^= b.y; a.z ^= b.z; a.w ^= b.w; } - -inline __device__ void operator += (uintm &a, const u32 b) { a.x += b; a.y += b; a.z += b; a.w += b; } -inline __device__ void operator += (uintm &a, const uintm b) { a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w; } - -__constant__ u32 k_sha256[64] = -{ - SHA256C00, SHA256C01, SHA256C02, SHA256C03, - SHA256C04, SHA256C05, SHA256C06, SHA256C07, - SHA256C08, SHA256C09, SHA256C0a, SHA256C0b, - SHA256C0c, SHA256C0d, SHA256C0e, SHA256C0f, - SHA256C10, SHA256C11, SHA256C12, SHA256C13, - SHA256C14, SHA256C15, SHA256C16, SHA256C17, - SHA256C18, SHA256C19, SHA256C1a, SHA256C1b, - SHA256C1c, SHA256C1d, SHA256C1e, SHA256C1f, - SHA256C20, SHA256C21, SHA256C22, SHA256C23, - SHA256C24, SHA256C25, SHA256C26, SHA256C27, - SHA256C28, SHA256C29, SHA256C2a, SHA256C2b, - SHA256C2c, SHA256C2d, SHA256C2e, SHA256C2f, - SHA256C30, SHA256C31, SHA256C32, SHA256C33, - SHA256C34, SHA256C35, SHA256C36, SHA256C37, - SHA256C38, SHA256C39, SHA256C3a, SHA256C3b, - SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f, -}; - -__device__ static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha256[i + 0]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha256[i + 1]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha256[i + 2]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha256[i + 3]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha256[i + 4]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha256[i + 5]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha256[i + 6]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha256[i + 7]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha256[i + 8]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha256[i + 9]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha256[i + 10]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha256[i + 11]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha256[i + 12]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha256[i + 13]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, k_sha256[i + 14]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha256[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 64; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; -} - -__device__ static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = SHA256M_A; - ipad[1] = SHA256M_B; - ipad[2] = SHA256M_C; - ipad[3] = SHA256M_D; - ipad[4] = SHA256M_E; - ipad[5] = SHA256M_F; - ipad[6] = SHA256M_G; - ipad[7] = SHA256M_H; - - sha256_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = SHA256M_A; - opad[1] = SHA256M_B; - opad[2] = SHA256M_C; - opad[3] = SHA256M_D; - opad[4] = SHA256M_E; - opad[5] = SHA256M_F; - opad[6] = SHA256M_G; - opad[7] = SHA256M_H; - - sha256_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha256_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8], u32x digest[8]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - digest[5] = ipad[5]; - digest[6] = ipad[6]; - digest[7] = ipad[7]; - - sha256_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = digest[5]; - w1[2] = digest[6]; - w1[3] = digest[7]; - w2[0] = 0x80000000; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 32) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - digest[5] = opad[5]; - digest[6] = opad[6]; - digest[7] = opad[7]; - - sha256_transform (w0, w1, w2, w3, digest); -} - -__device__ static void memcat8 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32 append[2]) -{ - switch (block_len) - { - case 0: - block0[0] = append[0]; - block0[1] = append[1]; - break; - - case 1: - block0[0] = block0[0] | append[0] << 8; - block0[1] = append[0] >> 24 | append[1] << 8; - block0[2] = append[1] >> 24; - break; - - case 2: - block0[0] = block0[0] | append[0] << 16; - block0[1] = append[0] >> 16 | append[1] << 16; - block0[2] = append[1] >> 16; - break; - - case 3: - block0[0] = block0[0] | append[0] << 24; - block0[1] = append[0] >> 8 | append[1] << 24; - block0[2] = append[1] >> 8; - break; - - case 4: - block0[1] = append[0]; - block0[2] = append[1]; - break; - - case 5: - block0[1] = block0[1] | append[0] << 8; - block0[2] = append[0] >> 24 | append[1] << 8; - block0[3] = append[1] >> 24; - break; - - case 6: - block0[1] = block0[1] | append[0] << 16; - block0[2] = append[0] >> 16 | append[1] << 16; - block0[3] = append[1] >> 16; - break; - - case 7: - block0[1] = block0[1] | append[0] << 24; - block0[2] = append[0] >> 8 | append[1] << 24; - block0[3] = append[1] >> 8; - break; - - case 8: - block0[2] = append[0]; - block0[3] = append[1]; - break; - - case 9: - block0[2] = block0[2] | append[0] << 8; - block0[3] = append[0] >> 24 | append[1] << 8; - block1[0] = append[1] >> 24; - break; - - case 10: - block0[2] = block0[2] | append[0] << 16; - block0[3] = append[0] >> 16 | append[1] << 16; - block1[0] = append[1] >> 16; - break; - - case 11: - block0[2] = block0[2] | append[0] << 24; - block0[3] = append[0] >> 8 | append[1] << 24; - block1[0] = append[1] >> 8; - break; - - case 12: - block0[3] = append[0]; - block1[0] = append[1]; - break; - - case 13: - block0[3] = block0[3] | append[0] << 8; - block1[0] = append[0] >> 24 | append[1] << 8; - block1[1] = append[1] >> 24; - break; - - case 14: - block0[3] = block0[3] | append[0] << 16; - block1[0] = append[0] >> 16 | append[1] << 16; - block1[1] = append[1] >> 16; - break; - - case 15: - block0[3] = block0[3] | append[0] << 24; - block1[0] = append[0] >> 8 | append[1] << 24; - block1[1] = append[1] >> 8; - break; - - case 16: - block1[0] = append[0]; - block1[1] = append[1]; - break; - - case 17: - block1[0] = block1[0] | append[0] << 8; - block1[1] = append[0] >> 24 | append[1] << 8; - block1[2] = append[1] >> 24; - break; - - case 18: - block1[0] = block1[0] | append[0] << 16; - block1[1] = append[0] >> 16 | append[1] << 16; - block1[2] = append[1] >> 16; - break; - - case 19: - block1[0] = block1[0] | append[0] << 24; - block1[1] = append[0] >> 8 | append[1] << 24; - block1[2] = append[1] >> 8; - break; - - case 20: - block1[1] = append[0]; - block1[2] = append[1]; - break; - - case 21: - block1[1] = block1[1] | append[0] << 8; - block1[2] = append[0] >> 24 | append[1] << 8; - block1[3] = append[1] >> 24; - break; - - case 22: - block1[1] = block1[1] | append[0] << 16; - block1[2] = append[0] >> 16 | append[1] << 16; - block1[3] = append[1] >> 16; - break; - - case 23: - block1[1] = block1[1] | append[0] << 24; - block1[2] = append[0] >> 8 | append[1] << 24; - block1[3] = append[1] >> 8; - break; - - case 24: - block1[2] = append[0]; - block1[3] = append[1]; - break; - - case 25: - block1[2] = block1[2] | append[0] << 8; - block1[3] = append[0] >> 24 | append[1] << 8; - block2[0] = append[1] >> 24; - break; - - case 26: - block1[2] = block1[2] | append[0] << 16; - block1[3] = append[0] >> 16 | append[1] << 16; - block2[0] = append[1] >> 16; - break; - - case 27: - block1[2] = block1[2] | append[0] << 24; - block1[3] = append[0] >> 8 | append[1] << 24; - block2[0] = append[1] >> 8; - break; - - case 28: - block1[3] = append[0]; - block2[0] = append[1]; - break; - - case 29: - block1[3] = block1[3] | append[0] << 8; - block2[0] = append[0] >> 24 | append[1] << 8; - block2[1] = append[1] >> 24; - break; - - case 30: - block1[3] = block1[3] | append[0] << 16; - block2[0] = append[0] >> 16 | append[1] << 16; - block2[1] = append[1] >> 16; - break; - - case 31: - block1[3] = block1[3] | append[0] << 24; - block2[0] = append[0] >> 8 | append[1] << 24; - block2[1] = append[1] >> 8; - break; - - case 32: - block2[0] = append[0]; - block2[1] = append[1]; - break; - - case 33: - block2[0] = block2[0] | append[0] << 8; - block2[1] = append[0] >> 24 | append[1] << 8; - block2[2] = append[1] >> 24; - break; - - case 34: - block2[0] = block2[0] | append[0] << 16; - block2[1] = append[0] >> 16 | append[1] << 16; - block2[2] = append[1] >> 16; - break; - - case 35: - block2[0] = block2[0] | append[0] << 24; - block2[1] = append[0] >> 8 | append[1] << 24; - block2[2] = append[1] >> 8; - break; - - case 36: - block2[1] = append[0]; - block2[2] = append[1]; - break; - - case 37: - block2[1] = block2[1] | append[0] << 8; - block2[2] = append[0] >> 24 | append[1] << 8; - block2[3] = append[1] >> 24; - break; - - case 38: - block2[1] = block2[1] | append[0] << 16; - block2[2] = append[0] >> 16 | append[1] << 16; - block2[3] = append[1] >> 16; - break; - - case 39: - block2[1] = block2[1] | append[0] << 24; - block2[2] = append[0] >> 8 | append[1] << 24; - block2[3] = append[1] >> 8; - break; - - case 40: - block2[2] = append[0]; - block2[3] = append[1]; - break; - - case 41: - block2[2] = block2[2] | append[0] << 8; - block2[3] = append[0] >> 24 | append[1] << 8; - block3[0] = append[1] >> 24; - break; - - case 42: - block2[2] = block2[2] | append[0] << 16; - block2[3] = append[0] >> 16 | append[1] << 16; - block3[0] = append[1] >> 16; - break; - - case 43: - block2[2] = block2[2] | append[0] << 24; - block2[3] = append[0] >> 8 | append[1] << 24; - block3[0] = append[1] >> 8; - break; - - case 44: - block2[3] = append[0]; - block3[0] = append[1]; - break; - - case 45: - block2[3] = block2[3] | append[0] << 8; - block3[0] = append[0] >> 24 | append[1] << 8; - block3[1] = append[1] >> 24; - break; - - case 46: - block2[3] = block2[3] | append[0] << 16; - block3[0] = append[0] >> 16 | append[1] << 16; - block3[1] = append[1] >> 16; - break; - - case 47: - block2[3] = block2[3] | append[0] << 24; - block3[0] = append[0] >> 8 | append[1] << 24; - block3[1] = append[1] >> 8; - break; - - case 48: - block3[0] = append[0]; - block3[1] = append[1]; - break; - - case 49: - block3[0] = block3[0] | append[0] << 8; - block3[1] = append[0] >> 24 | append[1] << 8; - block3[2] = append[1] >> 24; - break; - - case 50: - block3[0] = block3[0] | append[0] << 16; - block3[1] = append[0] >> 16 | append[1] << 16; - block3[2] = append[1] >> 16; - break; - - case 51: - block3[0] = block3[0] | append[0] << 24; - block3[1] = append[0] >> 8 | append[1] << 24; - block3[2] = append[1] >> 8; - break; - - case 52: - block3[1] = append[0]; - block3[2] = append[1]; - break; - - case 53: - block3[1] = block3[1] | append[0] << 8; - block3[2] = append[0] >> 24 | append[1] << 8; - block3[3] = append[1] >> 24; - break; - - case 54: - block3[1] = block3[1] | append[0] << 16; - block3[2] = append[0] >> 16 | append[1] << 16; - block3[3] = append[1] >> 16; - break; - - case 55: - block3[1] = block3[1] | append[0] << 24; - block3[2] = append[0] >> 8 | append[1] << 24; - block3[3] = append[1] >> 8; - break; - - case 56: - block3[2] = append[0]; - block3[3] = append[1]; - break; - } -} - -__device__ static uintm swap_workaround (uintm v) -{ - return __byte_perm (v, 0, 0x0123); -} - -#define GET_SCRYPT_CNT(r,p) (2 * (r) * 16 * (p)) -#define GET_SMIX_CNT(r,N) (2 * (r) * 16 * (N)) -#define GET_STATE_CNT(r) (2 * (r) * 16) - -#define ADD_ROTATE_XOR(r,i1,i2,s) (r) ^= rotate ((i1) + (i2), (s)); - -#define SALSA20_2R() \ -{ \ - ADD_ROTATE_XOR (X1, X0, X3, 7); \ - ADD_ROTATE_XOR (X2, X1, X0, 9); \ - ADD_ROTATE_XOR (X3, X2, X1, 13); \ - ADD_ROTATE_XOR (X0, X3, X2, 18); \ - \ - X1 = uintm (X1.w, X1.x, X1.y, X1.z); \ - X2 = uintm (X2.z, X2.w, X2.x, X2.y); \ - X3 = uintm (X3.y, X3.z, X3.w, X3.x); \ - \ - ADD_ROTATE_XOR (X3, X0, X1, 7); \ - ADD_ROTATE_XOR (X2, X3, X0, 9); \ - ADD_ROTATE_XOR (X1, X2, X3, 13); \ - ADD_ROTATE_XOR (X0, X1, X2, 18); \ - \ - X1 = uintm (X1.y, X1.z, X1.w, X1.x); \ - X2 = uintm (X2.z, X2.w, X2.x, X2.y); \ - X3 = uintm (X3.w, X3.x, X3.y, X3.z); \ -} - -#define SALSA20_8_XOR() \ -{ \ - R0 = R0 ^ Y0; \ - R1 = R1 ^ Y1; \ - R2 = R2 ^ Y2; \ - R3 = R3 ^ Y3; \ - \ - uintm X0 = R0; \ - uintm X1 = R1; \ - uintm X2 = R2; \ - uintm X3 = R3; \ - \ - SALSA20_2R (); \ - SALSA20_2R (); \ - SALSA20_2R (); \ - SALSA20_2R (); \ - \ - R0 = R0 + X0; \ - R1 = R1 + X1; \ - R2 = R2 + X2; \ - R3 = R3 + X3; \ -} - -__device__ static void salsa_r (uintm T[8], const u32 r) -{ - const u32 state_cnt = GET_STATE_CNT (r); - - const u32 state_cnt4 = state_cnt / 4; - - uintm R0 = T[state_cnt4 - 4]; - uintm R1 = T[state_cnt4 - 3]; - uintm R2 = T[state_cnt4 - 2]; - uintm R3 = T[state_cnt4 - 1]; - - for (u32 i = 0; i < state_cnt4; i += 8) - { - uintm Y0; - uintm Y1; - uintm Y2; - uintm Y3; - - Y0 = T[i + 0]; - Y1 = T[i + 1]; - Y2 = T[i + 2]; - Y3 = T[i + 3]; - - SALSA20_8_XOR (); - - T[i + 0] = R0; - T[i + 1] = R1; - T[i + 2] = R2; - T[i + 3] = R3; - - Y0 = T[i + 4]; - Y1 = T[i + 5]; - Y2 = T[i + 6]; - Y3 = T[i + 7]; - - SALSA20_8_XOR (); - - T[i + 4] = R0; - T[i + 5] = R1; - T[i + 6] = R2; - T[i + 7] = R3; - } - - #define exchg(x,y) { const uintm t = T[(x)]; T[(x)] = T[(y)]; T[(y)] = t; } - - #define exchg4(x,y) \ - { \ - const u32 x4 = (x) * 4; \ - const u32 y4 = (y) * 4; \ - \ - exchg (x4 + 0, y4 + 0); \ - exchg (x4 + 1, y4 + 1); \ - exchg (x4 + 2, y4 + 2); \ - exchg (x4 + 3, y4 + 3); \ - } - - for (u32 i = 1; i < r / 1; i++) - { - const u32 x = i * 1; - const u32 y = i * 2; - - exchg4 (x, y); - } - - for (u32 i = 1; i < r / 2; i++) - { - const u32 x = i * 1; - const u32 y = i * 2; - - const u32 xr1 = (r * 2) - 1 - x; - const u32 yr1 = (r * 2) - 1 - y; - - exchg4 (xr1, yr1); - } -} - -__device__ static void scrypt_smix (uintm *X, uintm *T, const u32 N, const u32 r, const u32 tmto, const u32 phy, uintm *V) -{ - const u32 state_cnt = GET_STATE_CNT (r); - - const u32 state_cnt4 = state_cnt / 4; - - #if __CUDA_ARCH__ >= 500 - #define Coord(x,y,z) (((y) * zSIZE) + ((x) * zSIZE * ySIZE) + (z)) - #define CO Coord(x,y,z) - #else - #define Coord(x,y,z) (((x) * zSIZE) + ((y) * zSIZE * xSIZE) + (z)) - #define CO Coord(x,y,z) - #endif - - const u32 xSIZE = phy; - const u32 ySIZE = N / tmto; - const u32 zSIZE = state_cnt4; - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - const u32 x = gid % xSIZE; - - for (u32 i = 0; i < state_cnt4; i += 4) - { - T[0] = uintm (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w); - T[1] = uintm (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w); - T[2] = uintm (X[i + 2].x, X[i + 3].y, X[i + 0].z, X[i + 1].w); - T[3] = uintm (X[i + 3].x, X[i + 0].y, X[i + 1].z, X[i + 2].w); - - X[i + 0] = T[0]; - X[i + 1] = T[1]; - X[i + 2] = T[2]; - X[i + 3] = T[3]; - } - - for (u32 y = 0; y < ySIZE; y++) - { - for (u32 z = 0; z < zSIZE; z++) V[CO] = X[z]; - - for (u32 i = 0; i < tmto; i++) salsa_r (X, r); - } - - for (u32 i = 0; i < N; i++) - { - const u32 k = X[zSIZE - 4].x & (N - 1); - - const u32 y = k / tmto; - - const u32 km = k - (y * tmto); - - for (u32 z = 0; z < zSIZE; z++) T[z] = V[CO]; - - for (u32 i = 0; i < km; i++) salsa_r (T, r); - - for (u32 z = 0; z < zSIZE; z++) X[z] ^= T[z]; - - salsa_r (X, r); - } - - for (u32 i = 0; i < state_cnt4; i += 4) - { - T[0] = uintm (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w); - T[1] = uintm (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w); - T[2] = uintm (X[i + 2].x, X[i + 1].y, X[i + 0].z, X[i + 3].w); - T[3] = uintm (X[i + 3].x, X[i + 2].y, X[i + 1].z, X[i + 0].w); - - X[i + 0] = T[0]; - X[i + 1] = T[1]; - X[i + 2] = T[2]; - X[i + 3] = T[3]; - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m08900_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, scrypt_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, uintm *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * memory buffers - */ - - const u32 scrypt_r = SCRYPT_R; - const u32 scrypt_p = SCRYPT_P; - //const u32 scrypt_N = SCRYPT_N; - - //const u32 state_cnt = GET_STATE_CNT (scrypt_r); - const u32 scrypt_cnt = GET_SCRYPT_CNT (scrypt_r, scrypt_p); - //const u32 smix_cnt = GET_SMIX_CNT (scrypt_r, scrypt_N); - - /** - * 1st pbkdf2, creates B - */ - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - w2[0] = swap_workaround (w2[0]); - w2[1] = swap_workaround (w2[1]); - w2[2] = swap_workaround (w2[2]); - w2[3] = swap_workaround (w2[3]); - w3[0] = swap_workaround (w3[0]); - w3[1] = swap_workaround (w3[1]); - w3[2] = swap_workaround (w3[2]); - w3[3] = swap_workaround (w3[3]); - - u32 ipad[8]; - u32 opad[8]; - - hmac_sha256_pad (w0, w1, w2, w3, ipad, opad); - - for (u32 i = 0, j = 0, k = 0; i < scrypt_cnt; i += 8, j += 1, k += 2) - { - w0[0] = salt_buf0[0]; - w0[1] = salt_buf0[1]; - w0[2] = salt_buf0[2]; - w0[3] = salt_buf0[3]; - w1[0] = salt_buf1[0]; - w1[1] = salt_buf1[1]; - w1[2] = salt_buf1[2]; - w1[3] = salt_buf1[3]; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - u32 append[2]; - - append[0] = swap_workaround (j + 1); - append[1] = 0x80; - - memcat8 (w0, w1, w2, w3, salt_len, append); - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - w2[0] = swap_workaround (w2[0]); - w2[1] = swap_workaround (w2[1]); - w2[2] = swap_workaround (w2[2]); - w2[3] = swap_workaround (w2[3]); - w3[0] = swap_workaround (w3[0]); - w3[1] = swap_workaround (w3[1]); - w3[2] = 0; - w3[3] = (64 + salt_len + 4) * 8; - - u32x digest[8]; - - hmac_sha256_run (w0, w1, w2, w3, ipad, opad, digest); - - const uintm tmp0 = uintm (digest[0], digest[1], digest[2], digest[3]); - const uintm tmp1 = uintm (digest[4], digest[5], digest[6], digest[7]); - - __syncthreads (); - - tmps[gid].P[k + 0] = tmp0; - tmps[gid].P[k + 1] = tmp1; - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m08900_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, scrypt_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, uintm *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 scrypt_phy = salt_bufs[salt_pos].scrypt_phy; - - const u32 state_cnt = GET_STATE_CNT (SCRYPT_R); - const u32 scrypt_cnt = GET_SCRYPT_CNT (SCRYPT_R, SCRYPT_P); - - const u32 state_cnt4 = state_cnt / 4; - const u32 scrypt_cnt4 = scrypt_cnt / 4; - - uintm X[state_cnt4]; - uintm T[state_cnt4]; - - for (int z = 0; z < state_cnt4; z++) X[z] = swap_workaround (tmps[gid].P[z]); - - scrypt_smix (X, T, SCRYPT_N, SCRYPT_R, SCRYPT_TMTO, scrypt_phy, d_scryptV_buf); - - for (int z = 0; z < state_cnt4; z++) tmps[gid].P[z] = swap_workaround (X[z]); - - #if SCRYPT_P >= 1 - for (int i = state_cnt4; i < scrypt_cnt4; i += state_cnt4) - { - for (int z = 0; z < state_cnt4; z++) X[z] = swap_workaround (tmps[gid].P[i + z]); - - scrypt_smix (X, T, SCRYPT_N, SCRYPT_R, SCRYPT_TMTO, scrypt_phy, d_scryptV_buf); - - for (int z = 0; z < state_cnt4; z++) tmps[gid].P[i + z] = swap_workaround (X[z]); - } - #endif -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m08900_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, scrypt_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, uintm *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x;; - const u32 lid = threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - /** - * memory buffers - */ - - const u32 scrypt_r = SCRYPT_R; - const u32 scrypt_p = SCRYPT_P; - - const u32 scrypt_cnt = GET_SCRYPT_CNT (scrypt_r, scrypt_p); - - const u32 scrypt_cnt4 = scrypt_cnt / 4; - - /** - * 2nd pbkdf2, creates B - */ - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - w2[0] = swap_workaround (w2[0]); - w2[1] = swap_workaround (w2[1]); - w2[2] = swap_workaround (w2[2]); - w2[3] = swap_workaround (w2[3]); - w3[0] = swap_workaround (w3[0]); - w3[1] = swap_workaround (w3[1]); - w3[2] = swap_workaround (w3[2]); - w3[3] = swap_workaround (w3[3]); - - u32 ipad[8]; - u32 opad[8]; - - hmac_sha256_pad (w0, w1, w2, w3, ipad, opad); - - for (u32 l = 0; l < scrypt_cnt4; l += 4) - { - __syncthreads (); - - uintm tmp; - - tmp = tmps[gid].P[l + 0]; - - w0[0] = tmp.x; - w0[1] = tmp.y; - w0[2] = tmp.z; - w0[3] = tmp.w; - - tmp = tmps[gid].P[l + 1]; - - w1[0] = tmp.x; - w1[1] = tmp.y; - w1[2] = tmp.z; - w1[3] = tmp.w; - - tmp = tmps[gid].P[l + 2]; - - w2[0] = tmp.x; - w2[1] = tmp.y; - w2[2] = tmp.z; - w2[3] = tmp.w; - - tmp = tmps[gid].P[l + 3]; - - w3[0] = tmp.x; - w3[1] = tmp.y; - w3[2] = tmp.z; - w3[3] = tmp.w; - - sha256_transform (w0, w1, w2, w3, ipad); - } - - w0[0] = 0x00000001; - w0[1] = 0x80000000; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + (scrypt_cnt * 4) + 4) * 8; - - u32x digest[8]; - - hmac_sha256_run (w0, w1, w2, w3, ipad, opad, digest); - - const u32x r0 = swap_workaround (digest[DGST_R0]); - const u32x r1 = swap_workaround (digest[DGST_R1]); - const u32x r2 = swap_workaround (digest[DGST_R2]); - const u32x r3 = swap_workaround (digest[DGST_R3]); - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m09000.cu b/nv/m09000.cu deleted file mode 100644 index 7b10acb..0000000 --- a/nv/m09000.cu +++ /dev/null @@ -1,821 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _PSAFE2_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -// http://www.schneier.com/code/constants.txt - -__device__ __constant__ u32 c_sbox0[256] = -{ - 0xd1310ba6, 0x98dfb5ac, 0x2ffd72db, 0xd01adfb7, - 0xb8e1afed, 0x6a267e96, 0xba7c9045, 0xf12c7f99, - 0x24a19947, 0xb3916cf7, 0x0801f2e2, 0x858efc16, - 0x636920d8, 0x71574e69, 0xa458fea3, 0xf4933d7e, - 0x0d95748f, 0x728eb658, 0x718bcd58, 0x82154aee, - 0x7b54a41d, 0xc25a59b5, 0x9c30d539, 0x2af26013, - 0xc5d1b023, 0x286085f0, 0xca417918, 0xb8db38ef, - 0x8e79dcb0, 0x603a180e, 0x6c9e0e8b, 0xb01e8a3e, - 0xd71577c1, 0xbd314b27, 0x78af2fda, 0x55605c60, - 0xe65525f3, 0xaa55ab94, 0x57489862, 0x63e81440, - 0x55ca396a, 0x2aab10b6, 0xb4cc5c34, 0x1141e8ce, - 0xa15486af, 0x7c72e993, 0xb3ee1411, 0x636fbc2a, - 0x2ba9c55d, 0x741831f6, 0xce5c3e16, 0x9b87931e, - 0xafd6ba33, 0x6c24cf5c, 0x7a325381, 0x28958677, - 0x3b8f4898, 0x6b4bb9af, 0xc4bfe81b, 0x66282193, - 0x61d809cc, 0xfb21a991, 0x487cac60, 0x5dec8032, - 0xef845d5d, 0xe98575b1, 0xdc262302, 0xeb651b88, - 0x23893e81, 0xd396acc5, 0x0f6d6ff3, 0x83f44239, - 0x2e0b4482, 0xa4842004, 0x69c8f04a, 0x9e1f9b5e, - 0x21c66842, 0xf6e96c9a, 0x670c9c61, 0xabd388f0, - 0x6a51a0d2, 0xd8542f68, 0x960fa728, 0xab5133a3, - 0x6eef0b6c, 0x137a3be4, 0xba3bf050, 0x7efb2a98, - 0xa1f1651d, 0x39af0176, 0x66ca593e, 0x82430e88, - 0x8cee8619, 0x456f9fb4, 0x7d84a5c3, 0x3b8b5ebe, - 0xe06f75d8, 0x85c12073, 0x401a449f, 0x56c16aa6, - 0x4ed3aa62, 0x363f7706, 0x1bfedf72, 0x429b023d, - 0x37d0d724, 0xd00a1248, 0xdb0fead3, 0x49f1c09b, - 0x075372c9, 0x80991b7b, 0x25d479d8, 0xf6e8def7, - 0xe3fe501a, 0xb6794c3b, 0x976ce0bd, 0x04c006ba, - 0xc1a94fb6, 0x409f60c4, 0x5e5c9ec2, 0x196a2463, - 0x68fb6faf, 0x3e6c53b5, 0x1339b2eb, 0x3b52ec6f, - 0x6dfc511f, 0x9b30952c, 0xcc814544, 0xaf5ebd09, - 0xbee3d004, 0xde334afd, 0x660f2807, 0x192e4bb3, - 0xc0cba857, 0x45c8740f, 0xd20b5f39, 0xb9d3fbdb, - 0x5579c0bd, 0x1a60320a, 0xd6a100c6, 0x402c7279, - 0x679f25fe, 0xfb1fa3cc, 0x8ea5e9f8, 0xdb3222f8, - 0x3c7516df, 0xfd616b15, 0x2f501ec8, 0xad0552ab, - 0x323db5fa, 0xfd238760, 0x53317b48, 0x3e00df82, - 0x9e5c57bb, 0xca6f8ca0, 0x1a87562e, 0xdf1769db, - 0xd542a8f6, 0x287effc3, 0xac6732c6, 0x8c4f5573, - 0x695b27b0, 0xbbca58c8, 0xe1ffa35d, 0xb8f011a0, - 0x10fa3d98, 0xfd2183b8, 0x4afcb56c, 0x2dd1d35b, - 0x9a53e479, 0xb6f84565, 0xd28e49bc, 0x4bfb9790, - 0xe1ddf2da, 0xa4cb7e33, 0x62fb1341, 0xcee4c6e8, - 0xef20cada, 0x36774c01, 0xd07e9efe, 0x2bf11fb4, - 0x95dbda4d, 0xae909198, 0xeaad8e71, 0x6b93d5a0, - 0xd08ed1d0, 0xafc725e0, 0x8e3c5b2f, 0x8e7594b7, - 0x8ff6e2fb, 0xf2122b64, 0x8888b812, 0x900df01c, - 0x4fad5ea0, 0x688fc31c, 0xd1cff191, 0xb3a8c1ad, - 0x2f2f2218, 0xbe0e1777, 0xea752dfe, 0x8b021fa1, - 0xe5a0cc0f, 0xb56f74e8, 0x18acf3d6, 0xce89e299, - 0xb4a84fe0, 0xfd13e0b7, 0x7cc43b81, 0xd2ada8d9, - 0x165fa266, 0x80957705, 0x93cc7314, 0x211a1477, - 0xe6ad2065, 0x77b5fa86, 0xc75442f5, 0xfb9d35cf, - 0xebcdaf0c, 0x7b3e89a0, 0xd6411bd3, 0xae1e7e49, - 0x00250e2d, 0x2071b35e, 0x226800bb, 0x57b8e0af, - 0x2464369b, 0xf009b91e, 0x5563911d, 0x59dfa6aa, - 0x78c14389, 0xd95a537f, 0x207d5ba2, 0x02e5b9c5, - 0x83260376, 0x6295cfa9, 0x11c81968, 0x4e734a41, - 0xb3472dca, 0x7b14a94a, 0x1b510052, 0x9a532915, - 0xd60f573f, 0xbc9bc6e4, 0x2b60a476, 0x81e67400, - 0x08ba6fb5, 0x571be91f, 0xf296ec6b, 0x2a0dd915, - 0xb6636521, 0xe7b9f9b6, 0xff34052e, 0xc5855664, - 0x53b02d5d, 0xa99f8fa1, 0x08ba4799, 0x6e85076a -}; - -__device__ __constant__ u32 c_sbox1[256] = -{ - 0x4b7a70e9, 0xb5b32944, 0xdb75092e, 0xc4192623, - 0xad6ea6b0, 0x49a7df7d, 0x9cee60b8, 0x8fedb266, - 0xecaa8c71, 0x699a17ff, 0x5664526c, 0xc2b19ee1, - 0x193602a5, 0x75094c29, 0xa0591340, 0xe4183a3e, - 0x3f54989a, 0x5b429d65, 0x6b8fe4d6, 0x99f73fd6, - 0xa1d29c07, 0xefe830f5, 0x4d2d38e6, 0xf0255dc1, - 0x4cdd2086, 0x8470eb26, 0x6382e9c6, 0x021ecc5e, - 0x09686b3f, 0x3ebaefc9, 0x3c971814, 0x6b6a70a1, - 0x687f3584, 0x52a0e286, 0xb79c5305, 0xaa500737, - 0x3e07841c, 0x7fdeae5c, 0x8e7d44ec, 0x5716f2b8, - 0xb03ada37, 0xf0500c0d, 0xf01c1f04, 0x0200b3ff, - 0xae0cf51a, 0x3cb574b2, 0x25837a58, 0xdc0921bd, - 0xd19113f9, 0x7ca92ff6, 0x94324773, 0x22f54701, - 0x3ae5e581, 0x37c2dadc, 0xc8b57634, 0x9af3dda7, - 0xa9446146, 0x0fd0030e, 0xecc8c73e, 0xa4751e41, - 0xe238cd99, 0x3bea0e2f, 0x3280bba1, 0x183eb331, - 0x4e548b38, 0x4f6db908, 0x6f420d03, 0xf60a04bf, - 0x2cb81290, 0x24977c79, 0x5679b072, 0xbcaf89af, - 0xde9a771f, 0xd9930810, 0xb38bae12, 0xdccf3f2e, - 0x5512721f, 0x2e6b7124, 0x501adde6, 0x9f84cd87, - 0x7a584718, 0x7408da17, 0xbc9f9abc, 0xe94b7d8c, - 0xec7aec3a, 0xdb851dfa, 0x63094366, 0xc464c3d2, - 0xef1c1847, 0x3215d908, 0xdd433b37, 0x24c2ba16, - 0x12a14d43, 0x2a65c451, 0x50940002, 0x133ae4dd, - 0x71dff89e, 0x10314e55, 0x81ac77d6, 0x5f11199b, - 0x043556f1, 0xd7a3c76b, 0x3c11183b, 0x5924a509, - 0xf28fe6ed, 0x97f1fbfa, 0x9ebabf2c, 0x1e153c6e, - 0x86e34570, 0xeae96fb1, 0x860e5e0a, 0x5a3e2ab3, - 0x771fe71c, 0x4e3d06fa, 0x2965dcb9, 0x99e71d0f, - 0x803e89d6, 0x5266c825, 0x2e4cc978, 0x9c10b36a, - 0xc6150eba, 0x94e2ea78, 0xa5fc3c53, 0x1e0a2df4, - 0xf2f74ea7, 0x361d2b3d, 0x1939260f, 0x19c27960, - 0x5223a708, 0xf71312b6, 0xebadfe6e, 0xeac31f66, - 0xe3bc4595, 0xa67bc883, 0xb17f37d1, 0x018cff28, - 0xc332ddef, 0xbe6c5aa5, 0x65582185, 0x68ab9802, - 0xeecea50f, 0xdb2f953b, 0x2aef7dad, 0x5b6e2f84, - 0x1521b628, 0x29076170, 0xecdd4775, 0x619f1510, - 0x13cca830, 0xeb61bd96, 0x0334fe1e, 0xaa0363cf, - 0xb5735c90, 0x4c70a239, 0xd59e9e0b, 0xcbaade14, - 0xeecc86bc, 0x60622ca7, 0x9cab5cab, 0xb2f3846e, - 0x648b1eaf, 0x19bdf0ca, 0xa02369b9, 0x655abb50, - 0x40685a32, 0x3c2ab4b3, 0x319ee9d5, 0xc021b8f7, - 0x9b540b19, 0x875fa099, 0x95f7997e, 0x623d7da8, - 0xf837889a, 0x97e32d77, 0x11ed935f, 0x16681281, - 0x0e358829, 0xc7e61fd6, 0x96dedfa1, 0x7858ba99, - 0x57f584a5, 0x1b227263, 0x9b83c3ff, 0x1ac24696, - 0xcdb30aeb, 0x532e3054, 0x8fd948e4, 0x6dbc3128, - 0x58ebf2ef, 0x34c6ffea, 0xfe28ed61, 0xee7c3c73, - 0x5d4a14d9, 0xe864b7e3, 0x42105d14, 0x203e13e0, - 0x45eee2b6, 0xa3aaabea, 0xdb6c4f15, 0xfacb4fd0, - 0xc742f442, 0xef6abbb5, 0x654f3b1d, 0x41cd2105, - 0xd81e799e, 0x86854dc7, 0xe44b476a, 0x3d816250, - 0xcf62a1f2, 0x5b8d2646, 0xfc8883a0, 0xc1c7b6a3, - 0x7f1524c3, 0x69cb7492, 0x47848a0b, 0x5692b285, - 0x095bbf00, 0xad19489d, 0x1462b174, 0x23820e00, - 0x58428d2a, 0x0c55f5ea, 0x1dadf43e, 0x233f7061, - 0x3372f092, 0x8d937e41, 0xd65fecf1, 0x6c223bdb, - 0x7cde3759, 0xcbee7460, 0x4085f2a7, 0xce77326e, - 0xa6078084, 0x19f8509e, 0xe8efd855, 0x61d99735, - 0xa969a7aa, 0xc50c06c2, 0x5a04abfc, 0x800bcadc, - 0x9e447a2e, 0xc3453484, 0xfdd56705, 0x0e1e9ec9, - 0xdb73dbd3, 0x105588cd, 0x675fda79, 0xe3674340, - 0xc5c43465, 0x713e38d8, 0x3d28f89e, 0xf16dff20, - 0x153e21e7, 0x8fb03d4a, 0xe6e39f2b, 0xdb83adf7 -}; - -__device__ __constant__ u32 c_sbox2[256] = -{ - 0xe93d5a68, 0x948140f7, 0xf64c261c, 0x94692934, - 0x411520f7, 0x7602d4f7, 0xbcf46b2e, 0xd4a20068, - 0xd4082471, 0x3320f46a, 0x43b7d4b7, 0x500061af, - 0x1e39f62e, 0x97244546, 0x14214f74, 0xbf8b8840, - 0x4d95fc1d, 0x96b591af, 0x70f4ddd3, 0x66a02f45, - 0xbfbc09ec, 0x03bd9785, 0x7fac6dd0, 0x31cb8504, - 0x96eb27b3, 0x55fd3941, 0xda2547e6, 0xabca0a9a, - 0x28507825, 0x530429f4, 0x0a2c86da, 0xe9b66dfb, - 0x68dc1462, 0xd7486900, 0x680ec0a4, 0x27a18dee, - 0x4f3ffea2, 0xe887ad8c, 0xb58ce006, 0x7af4d6b6, - 0xaace1e7c, 0xd3375fec, 0xce78a399, 0x406b2a42, - 0x20fe9e35, 0xd9f385b9, 0xee39d7ab, 0x3b124e8b, - 0x1dc9faf7, 0x4b6d1856, 0x26a36631, 0xeae397b2, - 0x3a6efa74, 0xdd5b4332, 0x6841e7f7, 0xca7820fb, - 0xfb0af54e, 0xd8feb397, 0x454056ac, 0xba489527, - 0x55533a3a, 0x20838d87, 0xfe6ba9b7, 0xd096954b, - 0x55a867bc, 0xa1159a58, 0xcca92963, 0x99e1db33, - 0xa62a4a56, 0x3f3125f9, 0x5ef47e1c, 0x9029317c, - 0xfdf8e802, 0x04272f70, 0x80bb155c, 0x05282ce3, - 0x95c11548, 0xe4c66d22, 0x48c1133f, 0xc70f86dc, - 0x07f9c9ee, 0x41041f0f, 0x404779a4, 0x5d886e17, - 0x325f51eb, 0xd59bc0d1, 0xf2bcc18f, 0x41113564, - 0x257b7834, 0x602a9c60, 0xdff8e8a3, 0x1f636c1b, - 0x0e12b4c2, 0x02e1329e, 0xaf664fd1, 0xcad18115, - 0x6b2395e0, 0x333e92e1, 0x3b240b62, 0xeebeb922, - 0x85b2a20e, 0xe6ba0d99, 0xde720c8c, 0x2da2f728, - 0xd0127845, 0x95b794fd, 0x647d0862, 0xe7ccf5f0, - 0x5449a36f, 0x877d48fa, 0xc39dfd27, 0xf33e8d1e, - 0x0a476341, 0x992eff74, 0x3a6f6eab, 0xf4f8fd37, - 0xa812dc60, 0xa1ebddf8, 0x991be14c, 0xdb6e6b0d, - 0xc67b5510, 0x6d672c37, 0x2765d43b, 0xdcd0e804, - 0xf1290dc7, 0xcc00ffa3, 0xb5390f92, 0x690fed0b, - 0x667b9ffb, 0xcedb7d9c, 0xa091cf0b, 0xd9155ea3, - 0xbb132f88, 0x515bad24, 0x7b9479bf, 0x763bd6eb, - 0x37392eb3, 0xcc115979, 0x8026e297, 0xf42e312d, - 0x6842ada7, 0xc66a2b3b, 0x12754ccc, 0x782ef11c, - 0x6a124237, 0xb79251e7, 0x06a1bbe6, 0x4bfb6350, - 0x1a6b1018, 0x11caedfa, 0x3d25bdd8, 0xe2e1c3c9, - 0x44421659, 0x0a121386, 0xd90cec6e, 0xd5abea2a, - 0x64af674e, 0xda86a85f, 0xbebfe988, 0x64e4c3fe, - 0x9dbc8057, 0xf0f7c086, 0x60787bf8, 0x6003604d, - 0xd1fd8346, 0xf6381fb0, 0x7745ae04, 0xd736fccc, - 0x83426b33, 0xf01eab71, 0xb0804187, 0x3c005e5f, - 0x77a057be, 0xbde8ae24, 0x55464299, 0xbf582e61, - 0x4e58f48f, 0xf2ddfda2, 0xf474ef38, 0x8789bdc2, - 0x5366f9c3, 0xc8b38e74, 0xb475f255, 0x46fcd9b9, - 0x7aeb2661, 0x8b1ddf84, 0x846a0e79, 0x915f95e2, - 0x466e598e, 0x20b45770, 0x8cd55591, 0xc902de4c, - 0xb90bace1, 0xbb8205d0, 0x11a86248, 0x7574a99e, - 0xb77f19b6, 0xe0a9dc09, 0x662d09a1, 0xc4324633, - 0xe85a1f02, 0x09f0be8c, 0x4a99a025, 0x1d6efe10, - 0x1ab93d1d, 0x0ba5a4df, 0xa186f20f, 0x2868f169, - 0xdcb7da83, 0x573906fe, 0xa1e2ce9b, 0x4fcd7f52, - 0x50115e01, 0xa70683fa, 0xa002b5c4, 0x0de6d027, - 0x9af88c27, 0x773f8641, 0xc3604c06, 0x61a806b5, - 0xf0177a28, 0xc0f586e0, 0x006058aa, 0x30dc7d62, - 0x11e69ed7, 0x2338ea63, 0x53c2dd94, 0xc2c21634, - 0xbbcbee56, 0x90bcb6de, 0xebfc7da1, 0xce591d76, - 0x6f05e409, 0x4b7c0188, 0x39720a3d, 0x7c927c24, - 0x86e3725f, 0x724d9db9, 0x1ac15bb4, 0xd39eb8fc, - 0xed545578, 0x08fca5b5, 0xd83d7cd3, 0x4dad0fc4, - 0x1e50ef5e, 0xb161e6f8, 0xa28514d9, 0x6c51133c, - 0x6fd5c7e7, 0x56e14ec4, 0x362abfce, 0xddc6c837, - 0xd79a3234, 0x92638212, 0x670efa8e, 0x406000e0 -}; - -__device__ __constant__ u32 c_sbox3[256] = -{ - 0x3a39ce37, 0xd3faf5cf, 0xabc27737, 0x5ac52d1b, - 0x5cb0679e, 0x4fa33742, 0xd3822740, 0x99bc9bbe, - 0xd5118e9d, 0xbf0f7315, 0xd62d1c7e, 0xc700c47b, - 0xb78c1b6b, 0x21a19045, 0xb26eb1be, 0x6a366eb4, - 0x5748ab2f, 0xbc946e79, 0xc6a376d2, 0x6549c2c8, - 0x530ff8ee, 0x468dde7d, 0xd5730a1d, 0x4cd04dc6, - 0x2939bbdb, 0xa9ba4650, 0xac9526e8, 0xbe5ee304, - 0xa1fad5f0, 0x6a2d519a, 0x63ef8ce2, 0x9a86ee22, - 0xc089c2b8, 0x43242ef6, 0xa51e03aa, 0x9cf2d0a4, - 0x83c061ba, 0x9be96a4d, 0x8fe51550, 0xba645bd6, - 0x2826a2f9, 0xa73a3ae1, 0x4ba99586, 0xef5562e9, - 0xc72fefd3, 0xf752f7da, 0x3f046f69, 0x77fa0a59, - 0x80e4a915, 0x87b08601, 0x9b09e6ad, 0x3b3ee593, - 0xe990fd5a, 0x9e34d797, 0x2cf0b7d9, 0x022b8b51, - 0x96d5ac3a, 0x017da67d, 0xd1cf3ed6, 0x7c7d2d28, - 0x1f9f25cf, 0xadf2b89b, 0x5ad6b472, 0x5a88f54c, - 0xe029ac71, 0xe019a5e6, 0x47b0acfd, 0xed93fa9b, - 0xe8d3c48d, 0x283b57cc, 0xf8d56629, 0x79132e28, - 0x785f0191, 0xed756055, 0xf7960e44, 0xe3d35e8c, - 0x15056dd4, 0x88f46dba, 0x03a16125, 0x0564f0bd, - 0xc3eb9e15, 0x3c9057a2, 0x97271aec, 0xa93a072a, - 0x1b3f6d9b, 0x1e6321f5, 0xf59c66fb, 0x26dcf319, - 0x7533d928, 0xb155fdf5, 0x03563482, 0x8aba3cbb, - 0x28517711, 0xc20ad9f8, 0xabcc5167, 0xccad925f, - 0x4de81751, 0x3830dc8e, 0x379d5862, 0x9320f991, - 0xea7a90c2, 0xfb3e7bce, 0x5121ce64, 0x774fbe32, - 0xa8b6e37e, 0xc3293d46, 0x48de5369, 0x6413e680, - 0xa2ae0810, 0xdd6db224, 0x69852dfd, 0x09072166, - 0xb39a460a, 0x6445c0dd, 0x586cdecf, 0x1c20c8ae, - 0x5bbef7dd, 0x1b588d40, 0xccd2017f, 0x6bb4e3bb, - 0xdda26a7e, 0x3a59ff45, 0x3e350a44, 0xbcb4cdd5, - 0x72eacea8, 0xfa6484bb, 0x8d6612ae, 0xbf3c6f47, - 0xd29be463, 0x542f5d9e, 0xaec2771b, 0xf64e6370, - 0x740e0d8d, 0xe75b1357, 0xf8721671, 0xaf537d5d, - 0x4040cb08, 0x4eb4e2cc, 0x34d2466a, 0x0115af84, - 0xe1b00428, 0x95983a1d, 0x06b89fb4, 0xce6ea048, - 0x6f3f3b82, 0x3520ab82, 0x011a1d4b, 0x277227f8, - 0x611560b1, 0xe7933fdc, 0xbb3a792b, 0x344525bd, - 0xa08839e1, 0x51ce794b, 0x2f32c9b7, 0xa01fbac9, - 0xe01cc87e, 0xbcc7d1f6, 0xcf0111c3, 0xa1e8aac7, - 0x1a908749, 0xd44fbd9a, 0xd0dadecb, 0xd50ada38, - 0x0339c32a, 0xc6913667, 0x8df9317c, 0xe0b12b4f, - 0xf79e59b7, 0x43f5bb3a, 0xf2d519ff, 0x27d9459c, - 0xbf97222c, 0x15e6fc2a, 0x0f91fc71, 0x9b941525, - 0xfae59361, 0xceb69ceb, 0xc2a86459, 0x12baa8d1, - 0xb6c1075e, 0xe3056a0c, 0x10d25065, 0xcb03a442, - 0xe0ec6e0e, 0x1698db3b, 0x4c98a0be, 0x3278e964, - 0x9f1f9532, 0xe0d392df, 0xd3a0342b, 0x8971f21e, - 0x1b0a7441, 0x4ba3348c, 0xc5be7120, 0xc37632d8, - 0xdf359f8d, 0x9b992f2e, 0xe60b6f47, 0x0fe3f11d, - 0xe54cda54, 0x1edad891, 0xce6279cf, 0xcd3e7e6f, - 0x1618b166, 0xfd2c1d05, 0x848fd2c5, 0xf6fb2299, - 0xf523f357, 0xa6327623, 0x93a83531, 0x56cccd02, - 0xacf08162, 0x5a75ebb5, 0x6e163697, 0x88d273cc, - 0xde966292, 0x81b949d0, 0x4c50901b, 0x71c65614, - 0xe6c6c7bd, 0x327a140a, 0x45e1d006, 0xc3f27b9a, - 0xc9aa53fd, 0x62a80f00, 0xbb25bfe2, 0x35bdd2f6, - 0x71126905, 0xb2040222, 0xb6cbcf7c, 0xcd769c2b, - 0x53113ec0, 0x1640e3d3, 0x38abbd60, 0x2547adf0, - 0xba38209c, 0xf746ce76, 0x77afa1c5, 0x20756060, - 0x85cbfe4e, 0x8ae88dd8, 0x7aaaf9b0, 0x4cf9aa7e, - 0x1948c25c, 0x02fb8a8c, 0x01c36ae4, 0xd6ebe1f9, - 0x90d4f869, 0xa65cdea0, 0x3f09252d, 0xc208e69f, - 0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6 -}; - -__device__ __constant__ u32 c_pbox[18] = -{ - 0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344, - 0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89, - 0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c, - 0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917, - 0x9216d5d9, 0x8979fb1b -}; - -#define BF_ROUND(L,R,N) \ -{ \ - u32x tmp; \ - \ - tmp = S0[((L) >> 24) & 0xff]; \ - tmp += S1[((L) >> 16) & 0xff]; \ - tmp ^= S2[((L) >> 8) & 0xff]; \ - tmp += S3[((L) >> 0) & 0xff]; \ - \ - (R) ^= tmp ^ P[(N)]; \ -} - -#define BF_ENCRYPT(L,R) \ -{ \ - u32x tmp; \ - \ - L ^= P[0]; \ - BF_ROUND (L, R, 1); \ - BF_ROUND (R, L, 2); \ - BF_ROUND (L, R, 3); \ - BF_ROUND (R, L, 4); \ - BF_ROUND (L, R, 5); \ - BF_ROUND (R, L, 6); \ - BF_ROUND (L, R, 7); \ - BF_ROUND (R, L, 8); \ - BF_ROUND (L, R, 9); \ - BF_ROUND (R, L, 10); \ - BF_ROUND (L, R, 11); \ - BF_ROUND (R, L, 12); \ - BF_ROUND (L, R, 13); \ - BF_ROUND (R, L, 14); \ - BF_ROUND (L, R, 15); \ - BF_ROUND (R, L, 16); \ - tmp = R; \ - R = L; \ - L = tmp ^ P[17]; \ -} - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -extern "C" __global__ void __launch_bounds__ (8, 1) m09000_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, pwsafe2_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 lid = threadIdx.x; - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - append_0x80_4 (w0, w1, w2, w3, pw_len); - - /** - * salt - */ - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 salt_buf[2]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - - /** - * initial sha1 - */ - - w3[1] = w2[3] << 16 | w2[2] >> 16; - w3[0] = w2[2] << 16 | w2[1] >> 16; - w2[3] = w2[1] << 16 | w2[0] >> 16; - w2[2] = w2[0] << 16 | w1[3] >> 16; - w2[1] = w1[3] << 16 | w1[2] >> 16; - w2[0] = w1[2] << 16 | w1[1] >> 16; - w1[3] = w1[1] << 16 | w1[0] >> 16; - w1[2] = w1[0] << 16 | w0[3] >> 16; - w1[1] = w0[3] << 16 | w0[2] >> 16; - w1[0] = w0[2] << 16 | w0[1] >> 16; - w0[3] = w0[1] << 16 | w0[0] >> 16; - w0[2] = w0[0] << 16; - w0[1] = salt_buf[1]; - w0[0] = salt_buf[0]; - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - w2[0] = swap_workaround (w2[0]); - w2[1] = swap_workaround (w2[1]); - w2[2] = swap_workaround (w2[2]); - w2[3] = swap_workaround (w2[3]); - w3[0] = swap_workaround (w3[0]); - w3[1] = swap_workaround (w3[1]); - - const u32 block_len = salt_len + 2 + pw_len; - - w3[2] = 0; - w3[3] = block_len * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, digest); - - /** - * blowfish setkey - */ - - u32 P[18]; - - for (u32 i = 0; i < 18; i++) - { - P[i] = c_pbox[i]; - } - - __shared__ u32x S0_all[8][256]; - __shared__ u32x S1_all[8][256]; - __shared__ u32x S2_all[8][256]; - __shared__ u32x S3_all[8][256]; - - u32x *S0 = S0_all[lid]; - u32x *S1 = S1_all[lid]; - u32x *S2 = S2_all[lid]; - u32x *S3 = S3_all[lid]; - - for (u32 i = 0; i < 256; i++) - { - S0[i] = c_sbox0[i]; - S1[i] = c_sbox1[i]; - S2[i] = c_sbox2[i]; - S3[i] = c_sbox3[i]; - } - - for (u32 i = 0; i < 18; i++) - { - P[i] ^= digest[i % 5]; - } - - u32 L0 = 0; - u32 R0 = 0; - - for (u32 i = 0; i < 18; i += 2) - { - BF_ENCRYPT (L0, R0); - - P[i + 0] = L0; - P[i + 1] = R0; - } - - for (u32 i = 0; i < 256; i += 4) - { - BF_ENCRYPT (L0, R0); - - S0[i + 0] = L0; - S0[i + 1] = R0; - - BF_ENCRYPT (L0, R0); - - S0[i + 2] = L0; - S0[i + 3] = R0; - } - - for (u32 i = 0; i < 256; i += 4) - { - BF_ENCRYPT (L0, R0); - - S1[i + 0] = L0; - S1[i + 1] = R0; - - BF_ENCRYPT (L0, R0); - - S1[i + 2] = L0; - S1[i + 3] = R0; - } - - for (u32 i = 0; i < 256; i += 4) - { - BF_ENCRYPT (L0, R0); - - S2[i + 0] = L0; - S2[i + 1] = R0; - - BF_ENCRYPT (L0, R0); - - S2[i + 2] = L0; - S2[i + 3] = R0; - } - - for (u32 i = 0; i < 256; i += 4) - { - BF_ENCRYPT (L0, R0); - - S3[i + 0] = L0; - S3[i + 1] = R0; - - BF_ENCRYPT (L0, R0); - - S3[i + 2] = L0; - S3[i + 3] = R0; - } - - // store - - tmps[gid].digest[0] = salt_buf[0]; - tmps[gid].digest[1] = salt_buf[1]; - - for (u32 i = 0; i < 18; i++) - { - tmps[gid].P[i] = P[i]; - } - - for (u32 i = 0; i < 256; i++) - { - tmps[gid].S0[i] = S0[i]; - tmps[gid].S1[i] = S1[i]; - tmps[gid].S2[i] = S2[i]; - tmps[gid].S3[i] = S3[i]; - } -} - -extern "C" __global__ void __launch_bounds__ (8, 1) m09000_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, pwsafe2_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 lid = threadIdx.x; - - // load - - u32 digest[2]; - - digest[0] = tmps[gid].digest[0]; - digest[1] = tmps[gid].digest[1]; - - u32x P[18]; - - for (u32 i = 0; i < 18; i++) - { - P[i] = tmps[gid].P[i]; - } - - __shared__ u32x S0_all[8][256]; - __shared__ u32x S1_all[8][256]; - __shared__ u32x S2_all[8][256]; - __shared__ u32x S3_all[8][256]; - - u32x *S0 = S0_all[lid]; - u32x *S1 = S1_all[lid]; - u32x *S2 = S2_all[lid]; - u32x *S3 = S3_all[lid]; - - for (u32 i = 0; i < 256; i++) - { - S0[i] = tmps[gid].S0[i]; - S1[i] = tmps[gid].S1[i]; - S2[i] = tmps[gid].S2[i]; - S3[i] = tmps[gid].S3[i]; - } - - // loop - - u32x L0 = digest[0]; - u32x R0 = digest[1]; - - for (u32 i = 0; i < loop_cnt; i++) - { - BF_ENCRYPT (L0, R0); - } - - // store - - tmps[gid].digest[0] = L0; - tmps[gid].digest[1] = R0; -} - -extern "C" __global__ void __launch_bounds__ (8, 1) m09000_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, pwsafe2_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 lid = threadIdx.x; - - // load - - u32 digest[2]; - - digest[0] = tmps[gid].digest[0]; - digest[1] = tmps[gid].digest[1]; - - // final sha1 - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = swap_workaround (digest[0]); - w0[1] = swap_workaround (digest[1]); - w0[2] = 0x00008000; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (8 + 2) * 8; - - u32x out[5]; - - out[0] = 0; // yep, not a bug! context is zero here - out[1] = 0; - out[2] = 0; - out[3] = 0; - out[4] = 0; - - sha1_transform (w0, w1, w2, w3, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m09100.cu b/nv/m09100.cu deleted file mode 100644 index e2e9380..0000000 --- a/nv/m09100.cu +++ /dev/null @@ -1,989 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _LOTUS8_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" - -#undef _SHA1_ - -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ __constant__ u32 lotus_magic_table[256] = -{ - 0xbd, 0x56, 0xea, 0xf2, 0xa2, 0xf1, 0xac, 0x2a, - 0xb0, 0x93, 0xd1, 0x9c, 0x1b, 0x33, 0xfd, 0xd0, - 0x30, 0x04, 0xb6, 0xdc, 0x7d, 0xdf, 0x32, 0x4b, - 0xf7, 0xcb, 0x45, 0x9b, 0x31, 0xbb, 0x21, 0x5a, - 0x41, 0x9f, 0xe1, 0xd9, 0x4a, 0x4d, 0x9e, 0xda, - 0xa0, 0x68, 0x2c, 0xc3, 0x27, 0x5f, 0x80, 0x36, - 0x3e, 0xee, 0xfb, 0x95, 0x1a, 0xfe, 0xce, 0xa8, - 0x34, 0xa9, 0x13, 0xf0, 0xa6, 0x3f, 0xd8, 0x0c, - 0x78, 0x24, 0xaf, 0x23, 0x52, 0xc1, 0x67, 0x17, - 0xf5, 0x66, 0x90, 0xe7, 0xe8, 0x07, 0xb8, 0x60, - 0x48, 0xe6, 0x1e, 0x53, 0xf3, 0x92, 0xa4, 0x72, - 0x8c, 0x08, 0x15, 0x6e, 0x86, 0x00, 0x84, 0xfa, - 0xf4, 0x7f, 0x8a, 0x42, 0x19, 0xf6, 0xdb, 0xcd, - 0x14, 0x8d, 0x50, 0x12, 0xba, 0x3c, 0x06, 0x4e, - 0xec, 0xb3, 0x35, 0x11, 0xa1, 0x88, 0x8e, 0x2b, - 0x94, 0x99, 0xb7, 0x71, 0x74, 0xd3, 0xe4, 0xbf, - 0x3a, 0xde, 0x96, 0x0e, 0xbc, 0x0a, 0xed, 0x77, - 0xfc, 0x37, 0x6b, 0x03, 0x79, 0x89, 0x62, 0xc6, - 0xd7, 0xc0, 0xd2, 0x7c, 0x6a, 0x8b, 0x22, 0xa3, - 0x5b, 0x05, 0x5d, 0x02, 0x75, 0xd5, 0x61, 0xe3, - 0x18, 0x8f, 0x55, 0x51, 0xad, 0x1f, 0x0b, 0x5e, - 0x85, 0xe5, 0xc2, 0x57, 0x63, 0xca, 0x3d, 0x6c, - 0xb4, 0xc5, 0xcc, 0x70, 0xb2, 0x91, 0x59, 0x0d, - 0x47, 0x20, 0xc8, 0x4f, 0x58, 0xe0, 0x01, 0xe2, - 0x16, 0x38, 0xc4, 0x6f, 0x3b, 0x0f, 0x65, 0x46, - 0xbe, 0x7e, 0x2d, 0x7b, 0x82, 0xf9, 0x40, 0xb5, - 0x1d, 0x73, 0xf8, 0xeb, 0x26, 0xc7, 0x87, 0x97, - 0x25, 0x54, 0xb1, 0x28, 0xaa, 0x98, 0x9d, 0xa5, - 0x64, 0x6d, 0x7a, 0xd4, 0x10, 0x81, 0x44, 0xef, - 0x49, 0xd6, 0xae, 0x2e, 0xdd, 0x76, 0x5c, 0x2f, - 0xa7, 0x1c, 0xc9, 0x09, 0x69, 0x9a, 0x83, 0xcf, - 0x29, 0x39, 0xb9, 0xe9, 0x4c, 0xff, 0x43, 0xab, -}; - -#ifdef VECT_SIZE1 -#define BOX(S,i) u32x ((S)[(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(S,i) u32x ((S)[(i).x], (S)[(i).y]) -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_upper8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_upper8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -__device__ static void lotus_mix (u32x *in, u32 s_lotus_magic_table[256]) -{ - u32x p = 0; - - for (int i = 0; i < 18; i++) - { - u32 s = 48; - - #pragma unroll 12 - for (int j = 0; j < 12; j++) - { - u32x tmp_in = in[j]; - u32x tmp_out = 0; - - p = (p + s--) & 0xff; p = ((tmp_in >> 0) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 0; - p = (p + s--) & 0xff; p = ((tmp_in >> 8) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 8; - p = (p + s--) & 0xff; p = ((tmp_in >> 16) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 16; - p = (p + s--) & 0xff; p = ((tmp_in >> 24) & 0xff) ^ BOX (s_lotus_magic_table, p); tmp_out |= p << 24; - - in[j] = tmp_out; - } - } -} - -__device__ static void lotus_transform_password (u32x *in, u32x *out, u32 s_lotus_magic_table[256]) -{ - u32x t = out[3] >> 24; - - u32x c; - - #pragma unroll 4 - for (int i = 0; i < 4; i++) - { - t ^= (in[i] >> 0) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 0; t = ((out[i] >> 0) & 0xff); - t ^= (in[i] >> 8) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 8; t = ((out[i] >> 8) & 0xff); - t ^= (in[i] >> 16) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 16; t = ((out[i] >> 16) & 0xff); - t ^= (in[i] >> 24) & 0xff; c = BOX (s_lotus_magic_table, t); out[i] ^= c << 24; t = ((out[i] >> 24) & 0xff); - } -} - -__device__ static void pad (u32x w[4], const u32 len) -{ - const u32 val = 16 - len; - - const u32 mask1 = val << 24; - - const u32 mask2 = val << 16 - | val << 24; - - const u32 mask3 = val << 8 - | val << 16 - | val << 24; - - const u32 mask4 = val << 0 - | val << 8 - | val << 16 - | val << 24; - - switch (len) - { - case 0: w[0] = mask4; - w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 1: w[0] |= mask3; - w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 2: w[0] |= mask2; - w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 3: w[0] |= mask1; - w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 4: w[1] = mask4; - w[2] = mask4; - w[3] = mask4; - break; - case 5: w[1] |= mask3; - w[2] = mask4; - w[3] = mask4; - break; - case 6: w[1] |= mask2; - w[2] = mask4; - w[3] = mask4; - break; - case 7: w[1] |= mask1; - w[2] = mask4; - w[3] = mask4; - break; - case 8: w[2] = mask4; - w[3] = mask4; - break; - case 9: w[2] |= mask3; - w[3] = mask4; - break; - case 10: w[2] |= mask2; - w[3] = mask4; - break; - case 11: w[2] |= mask1; - w[3] = mask4; - break; - case 12: w[3] = mask4; - break; - case 13: w[3] |= mask3; - break; - case 14: w[3] |= mask2; - break; - case 15: w[3] |= mask1; - break; - } -} - -__device__ static void mdtransform_norecalc (u32x state[4], u32x block[4], u32 s_lotus_magic_table[256]) -{ - u32x x[12]; - - x[ 0] = state[0]; - x[ 1] = state[1]; - x[ 2] = state[2]; - x[ 3] = state[3]; - x[ 4] = block[0]; - x[ 5] = block[1]; - x[ 6] = block[2]; - x[ 7] = block[3]; - x[ 8] = state[0] ^ block[0]; - x[ 9] = state[1] ^ block[1]; - x[10] = state[2] ^ block[2]; - x[11] = state[3] ^ block[3]; - - lotus_mix (x, s_lotus_magic_table); - - state[0] = x[0]; - state[1] = x[1]; - state[2] = x[2]; - state[3] = x[3]; -} - -__device__ static void mdtransform (u32x state[4], u32x checksum[4], u32x block[4], u32 s_lotus_magic_table[256]) -{ - mdtransform_norecalc (state, block, s_lotus_magic_table); - - lotus_transform_password (block, checksum, s_lotus_magic_table); -} - -__device__ static void domino_big_md (const u32x saved_key[16], const u32 size, u32x state[4], u32 s_lotus_magic_table[256]) -{ - u32x checksum[4]; - - checksum[0] = 0; - checksum[1] = 0; - checksum[2] = 0; - checksum[3] = 0; - - u32x block[4]; - - block[0] = 0; - block[1] = 0; - block[2] = 0; - block[3] = 0; - - u32 curpos; - u32 idx; - - for (curpos = 0, idx = 0; curpos + 16 < size; curpos += 16, idx += 4) - { - block[0] = saved_key[idx + 0]; - block[1] = saved_key[idx + 1]; - block[2] = saved_key[idx + 2]; - block[3] = saved_key[idx + 3]; - - mdtransform (state, checksum, block, s_lotus_magic_table); - } - - block[0] = saved_key[idx + 0]; - block[1] = saved_key[idx + 1]; - block[2] = saved_key[idx + 2]; - block[3] = saved_key[idx + 3]; - - mdtransform (state, checksum, block, s_lotus_magic_table); - - mdtransform_norecalc (state, checksum, s_lotus_magic_table); -} - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = SHA1M_A; - ipad[1] = SHA1M_B; - ipad[2] = SHA1M_C; - ipad[3] = SHA1M_D; - ipad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = SHA1M_A; - opad[1] = SHA1M_B; - opad[2] = SHA1M_C; - opad[3] = SHA1M_D; - opad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - - sha1_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - - sha1_transform (w0, w1, w2, w3, digest); -} - -__device__ static void base64_encode (u8 *base64_hash, const u32 len, const u8 *base64_plain) -{ - u8 *out_ptr = (u8 *) base64_hash; - u8 *in_ptr = (u8 *) base64_plain; - - char *lotus64_table = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/"; - - u32 i; - - for (i = 0; i < len; i += 3) - { - char out_val0 = lotus64_table [ ((in_ptr[0] >> 2) & 0x3f)]; - char out_val1 = lotus64_table [((in_ptr[0] << 4) & 0x30) | ((in_ptr[1] >> 4) & 0x0f)]; - char out_val2 = lotus64_table [((in_ptr[1] << 2) & 0x3c) | ((in_ptr[2] >> 6) & 0x03)]; - char out_val3 = lotus64_table [ ((in_ptr[2] >> 0) & 0x3f)]; - - out_ptr[0] = out_val0 & 0x7f; - out_ptr[1] = out_val1 & 0x7f; - out_ptr[2] = out_val2 & 0x7f; - out_ptr[3] = out_val3 & 0x7f; - - in_ptr += 3; - out_ptr += 4; - } -} - -__device__ static void lotus6_base64_encode (u8 base64_hash[24], const u32 salt0, const u32 salt1, u32x a, u32x b, u32x c) -{ - u8 *salt0_ptr = (u8 *) &salt0; - u8 *salt1_ptr = (u8 *) &salt1; - - u8 *a_ptr = (u8 *) &a; - u8 *b_ptr = (u8 *) &b; - u8 *c_ptr = (u8 *) &c; - - /* - * Copy $salt.$digest to a tmp buffer - */ - - u8 base64_plain[16]; - - base64_plain[ 0] = salt0_ptr[0]; - base64_plain[ 1] = salt0_ptr[1]; - base64_plain[ 2] = salt0_ptr[2]; - base64_plain[ 3] = salt0_ptr[3]; - - base64_plain[3] -= -4; // dont ask! - - base64_plain[ 4] = salt1_ptr[0]; - - base64_plain[ 5] = a_ptr[0]; - base64_plain[ 6] = a_ptr[1]; - base64_plain[ 7] = a_ptr[2]; - base64_plain[ 8] = a_ptr[3]; - - base64_plain[ 9] = b_ptr[0]; - base64_plain[10] = b_ptr[1]; - base64_plain[11] = b_ptr[2]; - base64_plain[12] = b_ptr[3]; - - base64_plain[13] = c_ptr[0]; - base64_plain[14] = c_ptr[1]; - base64_plain[15] = c_ptr[2]; - - /* - * base64 encode the $salt.$digest string - */ - - base64_hash[ 0] = '('; - base64_hash[ 1] = 'G'; - - base64_encode (base64_hash + 2, 14, base64_plain); - - base64_hash[21] = ')'; -} - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; - -__device__ __shared__ short l_bin2asc[256]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m09100_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, lotus8_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - /** - * sbox - */ - - __shared__ u32 s_lotus_magic_table[256]; - - s_lotus_magic_table[lid] = lotus_magic_table[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - - u32x w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - /** - * pad - */ - - u32 pw_len = pws[gid].pw_len; - - if (pw_len < 16) - { - pad (&w[ 0], pw_len & 0xf); - } - else if (pw_len < 32) - { - pad (&w[ 4], pw_len & 0xf); - } - else if (pw_len < 48) - { - pad (&w[ 8], pw_len & 0xf); - } - else if (pw_len < 64) - { - pad (&w[12], pw_len & 0xf); - } - - /** - * salt - */ - - u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = 0x01000000; - salt_buf1[1] = 0x00000080; - salt_buf1[2] = 0; - salt_buf1[3] = 0; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt0 = salt_buf0[0]; - const u32 salt1 = salt_buf0[1] & 0xff | '(' << 8; - - /** - * Lotus 6 hash - SEC_pwddigest_V2 - */ - - u32x w_tmp[16]; - - w_tmp[ 0] = w[ 0]; - w_tmp[ 1] = w[ 1]; - w_tmp[ 2] = w[ 2]; - w_tmp[ 3] = w[ 3]; - w_tmp[ 4] = w[ 4]; - w_tmp[ 5] = w[ 5]; - w_tmp[ 6] = w[ 6]; - w_tmp[ 7] = w[ 7]; - w_tmp[ 8] = w[ 8]; - w_tmp[ 9] = w[ 9]; - w_tmp[10] = w[10]; - w_tmp[11] = w[11]; - w_tmp[12] = w[12]; - w_tmp[13] = w[13]; - w_tmp[14] = w[14]; - w_tmp[15] = w[15]; - - u32x state[4]; - - state[0] = 0; - state[1] = 0; - state[2] = 0; - state[3] = 0; - - domino_big_md (w_tmp, pw_len, state, s_lotus_magic_table); - - const u32x w0_t = uint_to_hex_upper8 ((state[0] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[0] >> 8) & 255) << 16; - const u32x w1_t = uint_to_hex_upper8 ((state[0] >> 16) & 255) << 0 - | uint_to_hex_upper8 ((state[0] >> 24) & 255) << 16; - const u32x w2_t = uint_to_hex_upper8 ((state[1] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[1] >> 8) & 255) << 16; - const u32x w3_t = uint_to_hex_upper8 ((state[1] >> 16) & 255) << 0 - | uint_to_hex_upper8 ((state[1] >> 24) & 255) << 16; - const u32x w4_t = uint_to_hex_upper8 ((state[2] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[2] >> 8) & 255) << 16; - const u32x w5_t = uint_to_hex_upper8 ((state[2] >> 16) & 255) << 0 - | uint_to_hex_upper8 ((state[2] >> 24) & 255) << 16; - const u32x w6_t = uint_to_hex_upper8 ((state[3] >> 0) & 255) << 0 - | uint_to_hex_upper8 ((state[3] >> 8) & 255) << 16; - - const u32 pade = 0x0e0e0e0e; - - w_tmp[ 0] = salt0; - w_tmp[ 1] = salt1 | w0_t << 16; - w_tmp[ 2] = w0_t >> 16 | w1_t << 16; - w_tmp[ 3] = w1_t >> 16 | w2_t << 16; - w_tmp[ 4] = w2_t >> 16 | w3_t << 16; - w_tmp[ 5] = w3_t >> 16 | w4_t << 16; - w_tmp[ 6] = w4_t >> 16 | w5_t << 16; - w_tmp[ 7] = w5_t >> 16 | w6_t << 16; - w_tmp[ 8] = w6_t >> 16 | pade << 16; - w_tmp[ 9] = pade; - w_tmp[10] = pade; - w_tmp[11] = pade; - w_tmp[12] = 0; - w_tmp[13] = 0; - w_tmp[14] = 0; - w_tmp[15] = 0; - - state[0] = 0; - state[1] = 0; - state[2] = 0; - state[3] = 0; - - domino_big_md (w_tmp, 34, state, s_lotus_magic_table); - - u32x a = state[0]; - u32x b = state[1]; - u32x c = state[2]; - - /** - * Base64 encode - */ - - pw_len = 22; - - u8 base64_hash[24]; // size 22 (=pw_len) is needed but base64 needs size divisible by 4 - - lotus6_base64_encode (base64_hash, salt_buf0[0], salt_buf0[1], a, b, c); - - - /** - * PBKDF2 - HMACSHA1 - 1st iteration - */ - - u32x w0[4]; - - w0[0] = (base64_hash[ 0] << 24) | (base64_hash[ 1] << 16) | (base64_hash[ 2] << 8) | base64_hash[ 3]; - w0[1] = (base64_hash[ 4] << 24) | (base64_hash[ 5] << 16) | (base64_hash[ 6] << 8) | base64_hash[ 7]; - w0[2] = (base64_hash[ 8] << 24) | (base64_hash[ 9] << 16) | (base64_hash[10] << 8) | base64_hash[11]; - w0[3] = (base64_hash[12] << 24) | (base64_hash[13] << 16) | (base64_hash[14] << 8) | base64_hash[15]; - - u32x w1[4]; - - w1[0] = (base64_hash[16] << 24) | (base64_hash[17] << 16) | (base64_hash[18] << 8) | base64_hash[19]; - w1[1] = (base64_hash[20] << 24) | (base64_hash[21] << 16); - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - /** - * pads - */ - - u32x ipad[5]; - u32x opad[5]; - - hmac_sha1_pad (w0, w1, w2, w3, ipad, opad); - - tmps[gid].ipad[0] = ipad[0]; - tmps[gid].ipad[1] = ipad[1]; - tmps[gid].ipad[2] = ipad[2]; - tmps[gid].ipad[3] = ipad[3]; - tmps[gid].ipad[4] = ipad[4]; - - tmps[gid].opad[0] = opad[0]; - tmps[gid].opad[1] = opad[1]; - tmps[gid].opad[2] = opad[2]; - tmps[gid].opad[3] = opad[3]; - tmps[gid].opad[4] = opad[4]; - - w0[0] = salt_buf0[0]; - w0[1] = salt_buf0[1]; - w0[2] = salt_buf0[2]; - w0[3] = salt_buf0[3]; - w1[0] = salt_buf1[0]; - w1[1] = salt_buf1[1]; - w1[2] = salt_buf1[2]; - w1[3] = salt_buf1[3]; - w2[0] = salt_buf2[0]; - w2[1] = salt_buf2[1]; - w2[2] = salt_buf2[2]; - w2[3] = salt_buf2[3]; - w3[0] = salt_buf3[0]; - w3[1] = salt_buf3[1]; - w3[2] = salt_buf3[2]; - //w3[3] = salt_buf3[3]; - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - w2[0] = swap_workaround (w2[0]); - w2[1] = swap_workaround (w2[1]); - w2[2] = swap_workaround (w2[2]); - w2[3] = swap_workaround (w2[3]); - w3[0] = swap_workaround (w3[0]); - w3[1] = swap_workaround (w3[1]); - w3[2] = swap_workaround (w3[2]); - w3[3] = (64 + salt_len + 4) * 8; - - u32x dgst[5]; - - hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst); - - tmps[gid].dgst[0] = dgst[0]; - tmps[gid].dgst[1] = dgst[1]; - tmps[gid].dgst[2] = dgst[2]; - tmps[gid].dgst[3] = dgst[3]; - tmps[gid].dgst[4] = dgst[4]; - - tmps[gid].out[0] = dgst[0]; - tmps[gid].out[1] = dgst[1]; - tmps[gid].out[2] = dgst[2]; - tmps[gid].out[3] = dgst[3]; - tmps[gid].out[4] = dgst[4]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09100_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, lotus8_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x ipad[5]; - u32x opad[5]; - - ipad[0] = tmps[gid].ipad[0]; - ipad[1] = tmps[gid].ipad[1]; - ipad[2] = tmps[gid].ipad[2]; - ipad[3] = tmps[gid].ipad[3]; - ipad[4] = tmps[gid].ipad[4]; - - opad[0] = tmps[gid].opad[0]; - opad[1] = tmps[gid].opad[1]; - opad[2] = tmps[gid].opad[2]; - opad[3] = tmps[gid].opad[3]; - opad[4] = tmps[gid].opad[4]; - - u32x dgst[5]; - u32x out[5]; - - dgst[0] = tmps[gid].dgst[0]; - dgst[1] = tmps[gid].dgst[1]; - dgst[2] = tmps[gid].dgst[2]; - dgst[3] = tmps[gid].dgst[3]; - dgst[4] = tmps[gid].dgst[4]; - - out[0] = tmps[gid].out[0]; - out[1] = tmps[gid].out[1]; - out[2] = tmps[gid].out[2]; - out[3] = tmps[gid].out[3]; - out[4] = tmps[gid].out[4]; - - for (u32 j = 0; j < loop_cnt; j++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = dgst[0]; - w0[1] = dgst[1]; - w0[2] = dgst[2]; - w0[3] = dgst[3]; - w1[0] = dgst[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst); - - out[0] ^= dgst[0]; - out[1] ^= dgst[1]; - out[2] ^= dgst[2]; - out[3] ^= dgst[3]; - out[4] ^= dgst[4]; - } - - tmps[gid].dgst[0] = dgst[0]; - tmps[gid].dgst[1] = dgst[1]; - tmps[gid].dgst[2] = dgst[2]; - tmps[gid].dgst[3] = dgst[3]; - tmps[gid].dgst[4] = dgst[4]; - - tmps[gid].out[0] = out[0]; - tmps[gid].out[1] = out[1]; - tmps[gid].out[2] = out[2]; - tmps[gid].out[3] = out[3]; - tmps[gid].out[4] = out[4]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09100_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, lotus8_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32x r0 = tmps[gid].out[DGST_R0]; - const u32x r1 = tmps[gid].out[DGST_R1]; - const u32x r2 = 0; - const u32x r3 = 0; - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m09400.cu b/nv/m09400.cu deleted file mode 100644 index c0afabb..0000000 --- a/nv/m09400.cu +++ /dev/null @@ -1,1844 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _OFFICE2007_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ __constant__ u32 te0[256] = -{ - 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, - 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, - 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, - 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, - 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, - 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, - 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, - 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, - 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, - 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, - 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, - 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, - 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, - 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, - 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, - 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, - 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, - 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, - 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, - 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, - 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, - 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, - 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, - 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, - 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, - 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, - 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, - 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, - 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, - 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, - 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, - 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, - 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, - 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, - 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, - 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, - 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, - 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, - 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, - 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, - 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, - 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, - 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, - 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, - 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, - 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, - 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, - 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, - 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, - 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, - 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, - 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, - 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, - 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, - 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, - 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, - 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, - 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, - 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, - 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, - 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, - 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, - 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, - 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a, -}; - -__device__ __constant__ u32 te1[256] = -{ - 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b, - 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5, - 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b, - 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676, - 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d, - 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0, - 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf, - 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0, - 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626, - 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc, - 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1, - 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515, - 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3, - 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a, - 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2, - 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575, - 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a, - 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0, - 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3, - 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484, - 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded, - 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b, - 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939, - 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf, - 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb, - 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585, - 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f, - 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8, - 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f, - 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5, - 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121, - 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2, - 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec, - 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717, - 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d, - 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373, - 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc, - 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888, - 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414, - 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb, - 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a, - 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c, - 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262, - 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979, - 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d, - 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9, - 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea, - 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808, - 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e, - 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6, - 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f, - 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a, - 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666, - 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e, - 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9, - 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e, - 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111, - 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494, - 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9, - 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf, - 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d, - 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868, - 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f, - 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616, -}; - -__device__ __constant__ u32 te2[256] = -{ - 0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b, - 0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5, - 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b, - 0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76, - 0xca458fca, 0x829d1f82, 0xc94089c9, 0x7d87fa7d, - 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0, - 0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af, - 0x9cbf239c, 0xa4f753a4, 0x7296e472, 0xc05b9bc0, - 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26, - 0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc, - 0x345c6834, 0xa5f451a5, 0xe534d1e5, 0xf108f9f1, - 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15, - 0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3, - 0x18283018, 0x96a13796, 0x050f0a05, 0x9ab52f9a, - 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2, - 0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75, - 0x091b1209, 0x839e1d83, 0x2c74582c, 0x1a2e341a, - 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0, - 0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3, - 0x297b5229, 0xe33edde3, 0x2f715e2f, 0x84971384, - 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed, - 0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b, - 0x6abed46a, 0xcb468dcb, 0xbed967be, 0x394b7239, - 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf, - 0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb, - 0x43c58643, 0x4dd79a4d, 0x33556633, 0x85941185, - 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f, - 0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8, - 0x51f3a251, 0xa3fe5da3, 0x40c08040, 0x8f8a058f, - 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5, - 0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221, - 0x10302010, 0xff1ae5ff, 0xf30efdf3, 0xd26dbfd2, - 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec, - 0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17, - 0xc45793c4, 0xa7f255a7, 0x7e82fc7e, 0x3d477a3d, - 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673, - 0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc, - 0x22664422, 0x2a7e542a, 0x90ab3b90, 0x88830b88, - 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814, - 0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb, - 0xe03bdbe0, 0x32566432, 0x3a4e743a, 0x0a1e140a, - 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c, - 0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462, - 0x91a83991, 0x95a43195, 0xe437d3e4, 0x798bf279, - 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d, - 0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9, - 0x6cb4d86c, 0x56faac56, 0xf407f3f4, 0xea25cfea, - 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008, - 0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e, - 0x1c24381c, 0xa6f157a6, 0xb4c773b4, 0xc65197c6, - 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f, - 0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a, - 0x7090e070, 0x3e427c3e, 0xb5c471b5, 0x66aacc66, - 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e, - 0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9, - 0x86911786, 0xc15899c1, 0x1d273a1d, 0x9eb9279e, - 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211, - 0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394, - 0x9bb62d9b, 0x1e223c1e, 0x87921587, 0xe920c9e9, - 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df, - 0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d, - 0xbfda65bf, 0xe631d7e6, 0x42c68442, 0x68b8d068, - 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f, - 0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16, -}; - -__device__ __constant__ u32 te3[256] = -{ - 0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6, - 0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491, - 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56, - 0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec, - 0xcaca458f, 0x82829d1f, 0xc9c94089, 0x7d7d87fa, - 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb, - 0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45, - 0x9c9cbf23, 0xa4a4f753, 0x727296e4, 0xc0c05b9b, - 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c, - 0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83, - 0x34345c68, 0xa5a5f451, 0xe5e534d1, 0xf1f108f9, - 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a, - 0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d, - 0x18182830, 0x9696a137, 0x05050f0a, 0x9a9ab52f, - 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf, - 0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea, - 0x09091b12, 0x83839e1d, 0x2c2c7458, 0x1a1a2e34, - 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b, - 0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d, - 0x29297b52, 0xe3e33edd, 0x2f2f715e, 0x84849713, - 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1, - 0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6, - 0x6a6abed4, 0xcbcb468d, 0xbebed967, 0x39394b72, - 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85, - 0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed, - 0x4343c586, 0x4d4dd79a, 0x33335566, 0x85859411, - 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe, - 0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b, - 0x5151f3a2, 0xa3a3fe5d, 0x4040c080, 0x8f8f8a05, - 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1, - 0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342, - 0x10103020, 0xffff1ae5, 0xf3f30efd, 0xd2d26dbf, - 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3, - 0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e, - 0xc4c45793, 0xa7a7f255, 0x7e7e82fc, 0x3d3d477a, - 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6, - 0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3, - 0x22226644, 0x2a2a7e54, 0x9090ab3b, 0x8888830b, - 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28, - 0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad, - 0xe0e03bdb, 0x32325664, 0x3a3a4e74, 0x0a0a1e14, - 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8, - 0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4, - 0x9191a839, 0x9595a431, 0xe4e437d3, 0x79798bf2, - 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da, - 0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049, - 0x6c6cb4d8, 0x5656faac, 0xf4f407f3, 0xeaea25cf, - 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810, - 0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c, - 0x1c1c2438, 0xa6a6f157, 0xb4b4c773, 0xc6c65197, - 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e, - 0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f, - 0x707090e0, 0x3e3e427c, 0xb5b5c471, 0x6666aacc, - 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c, - 0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069, - 0x86869117, 0xc1c15899, 0x1d1d273a, 0x9e9eb927, - 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322, - 0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733, - 0x9b9bb62d, 0x1e1e223c, 0x87879215, 0xe9e920c9, - 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5, - 0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a, - 0xbfbfda65, 0xe6e631d7, 0x4242c684, 0x6868b8d0, - 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e, - 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c, -}; - -__device__ __constant__ u32 te4[256] = -{ - 0x63636363, 0x7c7c7c7c, 0x77777777, 0x7b7b7b7b, - 0xf2f2f2f2, 0x6b6b6b6b, 0x6f6f6f6f, 0xc5c5c5c5, - 0x30303030, 0x01010101, 0x67676767, 0x2b2b2b2b, - 0xfefefefe, 0xd7d7d7d7, 0xabababab, 0x76767676, - 0xcacacaca, 0x82828282, 0xc9c9c9c9, 0x7d7d7d7d, - 0xfafafafa, 0x59595959, 0x47474747, 0xf0f0f0f0, - 0xadadadad, 0xd4d4d4d4, 0xa2a2a2a2, 0xafafafaf, - 0x9c9c9c9c, 0xa4a4a4a4, 0x72727272, 0xc0c0c0c0, - 0xb7b7b7b7, 0xfdfdfdfd, 0x93939393, 0x26262626, - 0x36363636, 0x3f3f3f3f, 0xf7f7f7f7, 0xcccccccc, - 0x34343434, 0xa5a5a5a5, 0xe5e5e5e5, 0xf1f1f1f1, - 0x71717171, 0xd8d8d8d8, 0x31313131, 0x15151515, - 0x04040404, 0xc7c7c7c7, 0x23232323, 0xc3c3c3c3, - 0x18181818, 0x96969696, 0x05050505, 0x9a9a9a9a, - 0x07070707, 0x12121212, 0x80808080, 0xe2e2e2e2, - 0xebebebeb, 0x27272727, 0xb2b2b2b2, 0x75757575, - 0x09090909, 0x83838383, 0x2c2c2c2c, 0x1a1a1a1a, - 0x1b1b1b1b, 0x6e6e6e6e, 0x5a5a5a5a, 0xa0a0a0a0, - 0x52525252, 0x3b3b3b3b, 0xd6d6d6d6, 0xb3b3b3b3, - 0x29292929, 0xe3e3e3e3, 0x2f2f2f2f, 0x84848484, - 0x53535353, 0xd1d1d1d1, 0x00000000, 0xedededed, - 0x20202020, 0xfcfcfcfc, 0xb1b1b1b1, 0x5b5b5b5b, - 0x6a6a6a6a, 0xcbcbcbcb, 0xbebebebe, 0x39393939, - 0x4a4a4a4a, 0x4c4c4c4c, 0x58585858, 0xcfcfcfcf, - 0xd0d0d0d0, 0xefefefef, 0xaaaaaaaa, 0xfbfbfbfb, - 0x43434343, 0x4d4d4d4d, 0x33333333, 0x85858585, - 0x45454545, 0xf9f9f9f9, 0x02020202, 0x7f7f7f7f, - 0x50505050, 0x3c3c3c3c, 0x9f9f9f9f, 0xa8a8a8a8, - 0x51515151, 0xa3a3a3a3, 0x40404040, 0x8f8f8f8f, - 0x92929292, 0x9d9d9d9d, 0x38383838, 0xf5f5f5f5, - 0xbcbcbcbc, 0xb6b6b6b6, 0xdadadada, 0x21212121, - 0x10101010, 0xffffffff, 0xf3f3f3f3, 0xd2d2d2d2, - 0xcdcdcdcd, 0x0c0c0c0c, 0x13131313, 0xecececec, - 0x5f5f5f5f, 0x97979797, 0x44444444, 0x17171717, - 0xc4c4c4c4, 0xa7a7a7a7, 0x7e7e7e7e, 0x3d3d3d3d, - 0x64646464, 0x5d5d5d5d, 0x19191919, 0x73737373, - 0x60606060, 0x81818181, 0x4f4f4f4f, 0xdcdcdcdc, - 0x22222222, 0x2a2a2a2a, 0x90909090, 0x88888888, - 0x46464646, 0xeeeeeeee, 0xb8b8b8b8, 0x14141414, - 0xdededede, 0x5e5e5e5e, 0x0b0b0b0b, 0xdbdbdbdb, - 0xe0e0e0e0, 0x32323232, 0x3a3a3a3a, 0x0a0a0a0a, - 0x49494949, 0x06060606, 0x24242424, 0x5c5c5c5c, - 0xc2c2c2c2, 0xd3d3d3d3, 0xacacacac, 0x62626262, - 0x91919191, 0x95959595, 0xe4e4e4e4, 0x79797979, - 0xe7e7e7e7, 0xc8c8c8c8, 0x37373737, 0x6d6d6d6d, - 0x8d8d8d8d, 0xd5d5d5d5, 0x4e4e4e4e, 0xa9a9a9a9, - 0x6c6c6c6c, 0x56565656, 0xf4f4f4f4, 0xeaeaeaea, - 0x65656565, 0x7a7a7a7a, 0xaeaeaeae, 0x08080808, - 0xbabababa, 0x78787878, 0x25252525, 0x2e2e2e2e, - 0x1c1c1c1c, 0xa6a6a6a6, 0xb4b4b4b4, 0xc6c6c6c6, - 0xe8e8e8e8, 0xdddddddd, 0x74747474, 0x1f1f1f1f, - 0x4b4b4b4b, 0xbdbdbdbd, 0x8b8b8b8b, 0x8a8a8a8a, - 0x70707070, 0x3e3e3e3e, 0xb5b5b5b5, 0x66666666, - 0x48484848, 0x03030303, 0xf6f6f6f6, 0x0e0e0e0e, - 0x61616161, 0x35353535, 0x57575757, 0xb9b9b9b9, - 0x86868686, 0xc1c1c1c1, 0x1d1d1d1d, 0x9e9e9e9e, - 0xe1e1e1e1, 0xf8f8f8f8, 0x98989898, 0x11111111, - 0x69696969, 0xd9d9d9d9, 0x8e8e8e8e, 0x94949494, - 0x9b9b9b9b, 0x1e1e1e1e, 0x87878787, 0xe9e9e9e9, - 0xcececece, 0x55555555, 0x28282828, 0xdfdfdfdf, - 0x8c8c8c8c, 0xa1a1a1a1, 0x89898989, 0x0d0d0d0d, - 0xbfbfbfbf, 0xe6e6e6e6, 0x42424242, 0x68686868, - 0x41414141, 0x99999999, 0x2d2d2d2d, 0x0f0f0f0f, - 0xb0b0b0b0, 0x54545454, 0xbbbbbbbb, 0x16161616, -}; - -__device__ __constant__ u32 td0[256] = -{ - 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, - 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, - 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, - 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, - 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, - 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, - 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, - 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, - 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, - 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, - 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, - 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, - 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, - 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, - 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, - 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, - 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, - 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, - 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, - 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, - 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, - 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, - 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, - 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, - 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, - 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, - 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, - 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, - 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, - 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, - 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, - 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, - 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, - 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, - 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, - 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, - 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, - 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, - 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, - 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, - 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, - 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, - 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, - 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, - 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, - 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, - 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, - 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, - 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, - 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, - 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, - 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, - 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, - 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, - 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, - 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, - 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, - 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, - 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, - 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, - 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, - 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, - 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, - 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742, -}; - -__device__ __constant__ u32 td1[256] = -{ - 0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e, - 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303, - 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c, - 0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3, - 0x49deb15a, 0x6725ba1b, 0x9845ea0e, 0xe15dfec0, - 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9, - 0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259, - 0x2dd4be83, 0xd3587421, 0x2949e069, 0x448ec9c8, - 0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971, - 0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a, - 0x1863df4a, 0x82e51a31, 0x60975133, 0x4562537f, - 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b, - 0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8, - 0x23ab73d3, 0xe2724b02, 0x57e31f8f, 0x2a6655ab, - 0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708, - 0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682, - 0x2b8acf1c, 0x92a779b4, 0xf0f307f2, 0xa14e69e2, - 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe, - 0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb, - 0x390b83ec, 0xaa4060ef, 0x065e719f, 0x51bd6e10, - 0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd, - 0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015, - 0x241998fb, 0x97d6bde9, 0xcc894043, 0x7767d99e, - 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee, - 0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000, - 0x83098086, 0x48322bed, 0xac1e1170, 0x4e6c5a72, - 0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39, - 0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e, - 0xb10c0a67, 0x0f9357e7, 0xd2b4ee96, 0x9e1b9b91, - 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a, - 0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17, - 0x0b0e090d, 0xadf28bc7, 0xb92db6a8, 0xc8141ea9, - 0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60, - 0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e, - 0x768b4329, 0xdccb23c6, 0x68b6edfc, 0x63b8e4f1, - 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611, - 0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1, - 0x4b1d9e2f, 0xf3dcb230, 0xec0d8652, 0xd077c1e3, - 0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964, - 0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390, - 0xc787494e, 0xc1d938d1, 0xfe8ccaa2, 0x3698d40b, - 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf, - 0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46, - 0xc2f68d13, 0xe890d8b8, 0x5e2e39f7, 0xf582c3af, - 0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512, - 0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb, - 0x09cd2678, 0xf46e5918, 0x01ec9ab7, 0xa8834f9a, - 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8, - 0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c, - 0xaf31a4b2, 0x312a3f23, 0x30c6a594, 0xc035a266, - 0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8, - 0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6, - 0x8d764dd6, 0x4d43efb0, 0x54ccaa4d, 0xdfe49604, - 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551, - 0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41, - 0x5ab3671d, 0x5292dbd2, 0x33e91056, 0x136dd647, - 0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c, - 0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1, - 0x599cd2df, 0x3f55f273, 0x791814ce, 0xbf73c737, - 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db, - 0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340, - 0x72161dc3, 0x0cbce225, 0x8b283c49, 0x41ff0d95, - 0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1, - 0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857, -}; - -__device__ __constant__ u32 td2[256] = -{ - 0xa75051f4, 0x65537e41, 0xa4c31a17, 0x5e963a27, - 0x6bcb3bab, 0x45f11f9d, 0x58abacfa, 0x03934be3, - 0xfa552030, 0x6df6ad76, 0x769188cc, 0x4c25f502, - 0xd7fc4fe5, 0xcbd7c52a, 0x44802635, 0xa38fb562, - 0x5a49deb1, 0x1b6725ba, 0x0e9845ea, 0xc0e15dfe, - 0x7502c32f, 0xf012814c, 0x97a38d46, 0xf9c66bd3, - 0x5fe7038f, 0x9c951592, 0x7aebbf6d, 0x59da9552, - 0x832dd4be, 0x21d35874, 0x692949e0, 0xc8448ec9, - 0x896a75c2, 0x7978f48e, 0x3e6b9958, 0x71dd27b9, - 0x4fb6bee1, 0xad17f088, 0xac66c920, 0x3ab47dce, - 0x4a1863df, 0x3182e51a, 0x33609751, 0x7f456253, - 0x77e0b164, 0xae84bb6b, 0xa01cfe81, 0x2b94f908, - 0x68587048, 0xfd198f45, 0x6c8794de, 0xf8b7527b, - 0xd323ab73, 0x02e2724b, 0x8f57e31f, 0xab2a6655, - 0x2807b2eb, 0xc2032fb5, 0x7b9a86c5, 0x08a5d337, - 0x87f23028, 0xa5b223bf, 0x6aba0203, 0x825ced16, - 0x1c2b8acf, 0xb492a779, 0xf2f0f307, 0xe2a14e69, - 0xf4cd65da, 0xbed50605, 0x621fd134, 0xfe8ac4a6, - 0x539d342e, 0x55a0a2f3, 0xe132058a, 0xeb75a4f6, - 0xec390b83, 0xefaa4060, 0x9f065e71, 0x1051bd6e, - 0x8af93e21, 0x063d96dd, 0x05aedd3e, 0xbd464de6, - 0x8db59154, 0x5d0571c4, 0xd46f0406, 0x15ff6050, - 0xfb241998, 0xe997d6bd, 0x43cc8940, 0x9e7767d9, - 0x42bdb0e8, 0x8b880789, 0x5b38e719, 0xeedb79c8, - 0x0a47a17c, 0x0fe97c42, 0x1ec9f884, 0x00000000, - 0x86830980, 0xed48322b, 0x70ac1e11, 0x724e6c5a, - 0xfffbfd0e, 0x38560f85, 0xd51e3dae, 0x3927362d, - 0xd9640a0f, 0xa621685c, 0x54d19b5b, 0x2e3a2436, - 0x67b10c0a, 0xe70f9357, 0x96d2b4ee, 0x919e1b9b, - 0xc54f80c0, 0x20a261dc, 0x4b695a77, 0x1a161c12, - 0xba0ae293, 0x2ae5c0a0, 0xe0433c22, 0x171d121b, - 0x0d0b0e09, 0xc7adf28b, 0xa8b92db6, 0xa9c8141e, - 0x198557f1, 0x074caf75, 0xddbbee99, 0x60fda37f, - 0x269ff701, 0xf5bc5c72, 0x3bc54466, 0x7e345bfb, - 0x29768b43, 0xc6dccb23, 0xfc68b6ed, 0xf163b8e4, - 0xdccad731, 0x85104263, 0x22401397, 0x112084c6, - 0x247d854a, 0x3df8d2bb, 0x3211aef9, 0xa16dc729, - 0x2f4b1d9e, 0x30f3dcb2, 0x52ec0d86, 0xe3d077c1, - 0x166c2bb3, 0xb999a970, 0x48fa1194, 0x642247e9, - 0x8cc4a8fc, 0x3f1aa0f0, 0x2cd8567d, 0x90ef2233, - 0x4ec78749, 0xd1c1d938, 0xa2fe8cca, 0x0b3698d4, - 0x81cfa6f5, 0xde28a57a, 0x8e26dab7, 0xbfa43fad, - 0x9de42c3a, 0x920d5078, 0xcc9b6a5f, 0x4662547e, - 0x13c2f68d, 0xb8e890d8, 0xf75e2e39, 0xaff582c3, - 0x80be9f5d, 0x937c69d0, 0x2da96fd5, 0x12b3cf25, - 0x993bc8ac, 0x7da71018, 0x636ee89c, 0xbb7bdb3b, - 0x7809cd26, 0x18f46e59, 0xb701ec9a, 0x9aa8834f, - 0x6e65e695, 0xe67eaaff, 0xcf0821bc, 0xe8e6ef15, - 0x9bd9bae7, 0x36ce4a6f, 0x09d4ea9f, 0x7cd629b0, - 0xb2af31a4, 0x23312a3f, 0x9430c6a5, 0x66c035a2, - 0xbc37744e, 0xcaa6fc82, 0xd0b0e090, 0xd81533a7, - 0x984af104, 0xdaf741ec, 0x500e7fcd, 0xf62f1791, - 0xd68d764d, 0xb04d43ef, 0x4d54ccaa, 0x04dfe496, - 0xb5e39ed1, 0x881b4c6a, 0x1fb8c12c, 0x517f4665, - 0xea049d5e, 0x355d018c, 0x7473fa87, 0x412efb0b, - 0x1d5ab367, 0xd25292db, 0x5633e910, 0x47136dd6, - 0x618c9ad7, 0x0c7a37a1, 0x148e59f8, 0x3c89eb13, - 0x27eecea9, 0xc935b761, 0xe5ede11c, 0xb13c7a47, - 0xdf599cd2, 0x733f55f2, 0xce791814, 0x37bf73c7, - 0xcdea53f7, 0xaa5b5ffd, 0x6f14df3d, 0xdb867844, - 0xf381caaf, 0xc43eb968, 0x342c3824, 0x405fc2a3, - 0xc372161d, 0x250cbce2, 0x498b283c, 0x9541ff0d, - 0x017139a8, 0xb3de080c, 0xe49cd8b4, 0xc1906456, - 0x84617bcb, 0xb670d532, 0x5c74486c, 0x5742d0b8, -}; - -__device__ __constant__ u32 td3[256] = -{ - 0xf4a75051, 0x4165537e, 0x17a4c31a, 0x275e963a, - 0xab6bcb3b, 0x9d45f11f, 0xfa58abac, 0xe303934b, - 0x30fa5520, 0x766df6ad, 0xcc769188, 0x024c25f5, - 0xe5d7fc4f, 0x2acbd7c5, 0x35448026, 0x62a38fb5, - 0xb15a49de, 0xba1b6725, 0xea0e9845, 0xfec0e15d, - 0x2f7502c3, 0x4cf01281, 0x4697a38d, 0xd3f9c66b, - 0x8f5fe703, 0x929c9515, 0x6d7aebbf, 0x5259da95, - 0xbe832dd4, 0x7421d358, 0xe0692949, 0xc9c8448e, - 0xc2896a75, 0x8e7978f4, 0x583e6b99, 0xb971dd27, - 0xe14fb6be, 0x88ad17f0, 0x20ac66c9, 0xce3ab47d, - 0xdf4a1863, 0x1a3182e5, 0x51336097, 0x537f4562, - 0x6477e0b1, 0x6bae84bb, 0x81a01cfe, 0x082b94f9, - 0x48685870, 0x45fd198f, 0xde6c8794, 0x7bf8b752, - 0x73d323ab, 0x4b02e272, 0x1f8f57e3, 0x55ab2a66, - 0xeb2807b2, 0xb5c2032f, 0xc57b9a86, 0x3708a5d3, - 0x2887f230, 0xbfa5b223, 0x036aba02, 0x16825ced, - 0xcf1c2b8a, 0x79b492a7, 0x07f2f0f3, 0x69e2a14e, - 0xdaf4cd65, 0x05bed506, 0x34621fd1, 0xa6fe8ac4, - 0x2e539d34, 0xf355a0a2, 0x8ae13205, 0xf6eb75a4, - 0x83ec390b, 0x60efaa40, 0x719f065e, 0x6e1051bd, - 0x218af93e, 0xdd063d96, 0x3e05aedd, 0xe6bd464d, - 0x548db591, 0xc45d0571, 0x06d46f04, 0x5015ff60, - 0x98fb2419, 0xbde997d6, 0x4043cc89, 0xd99e7767, - 0xe842bdb0, 0x898b8807, 0x195b38e7, 0xc8eedb79, - 0x7c0a47a1, 0x420fe97c, 0x841ec9f8, 0x00000000, - 0x80868309, 0x2bed4832, 0x1170ac1e, 0x5a724e6c, - 0x0efffbfd, 0x8538560f, 0xaed51e3d, 0x2d392736, - 0x0fd9640a, 0x5ca62168, 0x5b54d19b, 0x362e3a24, - 0x0a67b10c, 0x57e70f93, 0xee96d2b4, 0x9b919e1b, - 0xc0c54f80, 0xdc20a261, 0x774b695a, 0x121a161c, - 0x93ba0ae2, 0xa02ae5c0, 0x22e0433c, 0x1b171d12, - 0x090d0b0e, 0x8bc7adf2, 0xb6a8b92d, 0x1ea9c814, - 0xf1198557, 0x75074caf, 0x99ddbbee, 0x7f60fda3, - 0x01269ff7, 0x72f5bc5c, 0x663bc544, 0xfb7e345b, - 0x4329768b, 0x23c6dccb, 0xedfc68b6, 0xe4f163b8, - 0x31dccad7, 0x63851042, 0x97224013, 0xc6112084, - 0x4a247d85, 0xbb3df8d2, 0xf93211ae, 0x29a16dc7, - 0x9e2f4b1d, 0xb230f3dc, 0x8652ec0d, 0xc1e3d077, - 0xb3166c2b, 0x70b999a9, 0x9448fa11, 0xe9642247, - 0xfc8cc4a8, 0xf03f1aa0, 0x7d2cd856, 0x3390ef22, - 0x494ec787, 0x38d1c1d9, 0xcaa2fe8c, 0xd40b3698, - 0xf581cfa6, 0x7ade28a5, 0xb78e26da, 0xadbfa43f, - 0x3a9de42c, 0x78920d50, 0x5fcc9b6a, 0x7e466254, - 0x8d13c2f6, 0xd8b8e890, 0x39f75e2e, 0xc3aff582, - 0x5d80be9f, 0xd0937c69, 0xd52da96f, 0x2512b3cf, - 0xac993bc8, 0x187da710, 0x9c636ee8, 0x3bbb7bdb, - 0x267809cd, 0x5918f46e, 0x9ab701ec, 0x4f9aa883, - 0x956e65e6, 0xffe67eaa, 0xbccf0821, 0x15e8e6ef, - 0xe79bd9ba, 0x6f36ce4a, 0x9f09d4ea, 0xb07cd629, - 0xa4b2af31, 0x3f23312a, 0xa59430c6, 0xa266c035, - 0x4ebc3774, 0x82caa6fc, 0x90d0b0e0, 0xa7d81533, - 0x04984af1, 0xecdaf741, 0xcd500e7f, 0x91f62f17, - 0x4dd68d76, 0xefb04d43, 0xaa4d54cc, 0x9604dfe4, - 0xd1b5e39e, 0x6a881b4c, 0x2c1fb8c1, 0x65517f46, - 0x5eea049d, 0x8c355d01, 0x877473fa, 0x0b412efb, - 0x671d5ab3, 0xdbd25292, 0x105633e9, 0xd647136d, - 0xd7618c9a, 0xa10c7a37, 0xf8148e59, 0x133c89eb, - 0xa927eece, 0x61c935b7, 0x1ce5ede1, 0x47b13c7a, - 0xd2df599c, 0xf2733f55, 0x14ce7918, 0xc737bf73, - 0xf7cdea53, 0xfdaa5b5f, 0x3d6f14df, 0x44db8678, - 0xaff381ca, 0x68c43eb9, 0x24342c38, 0xa3405fc2, - 0x1dc37216, 0xe2250cbc, 0x3c498b28, 0x0d9541ff, - 0xa8017139, 0x0cb3de08, 0xb4e49cd8, 0x56c19064, - 0xcb84617b, 0x32b670d5, 0x6c5c7448, 0xb85742d0, -}; - -__device__ __constant__ u32 td4[256] = -{ - 0x52525252, 0x09090909, 0x6a6a6a6a, 0xd5d5d5d5, - 0x30303030, 0x36363636, 0xa5a5a5a5, 0x38383838, - 0xbfbfbfbf, 0x40404040, 0xa3a3a3a3, 0x9e9e9e9e, - 0x81818181, 0xf3f3f3f3, 0xd7d7d7d7, 0xfbfbfbfb, - 0x7c7c7c7c, 0xe3e3e3e3, 0x39393939, 0x82828282, - 0x9b9b9b9b, 0x2f2f2f2f, 0xffffffff, 0x87878787, - 0x34343434, 0x8e8e8e8e, 0x43434343, 0x44444444, - 0xc4c4c4c4, 0xdededede, 0xe9e9e9e9, 0xcbcbcbcb, - 0x54545454, 0x7b7b7b7b, 0x94949494, 0x32323232, - 0xa6a6a6a6, 0xc2c2c2c2, 0x23232323, 0x3d3d3d3d, - 0xeeeeeeee, 0x4c4c4c4c, 0x95959595, 0x0b0b0b0b, - 0x42424242, 0xfafafafa, 0xc3c3c3c3, 0x4e4e4e4e, - 0x08080808, 0x2e2e2e2e, 0xa1a1a1a1, 0x66666666, - 0x28282828, 0xd9d9d9d9, 0x24242424, 0xb2b2b2b2, - 0x76767676, 0x5b5b5b5b, 0xa2a2a2a2, 0x49494949, - 0x6d6d6d6d, 0x8b8b8b8b, 0xd1d1d1d1, 0x25252525, - 0x72727272, 0xf8f8f8f8, 0xf6f6f6f6, 0x64646464, - 0x86868686, 0x68686868, 0x98989898, 0x16161616, - 0xd4d4d4d4, 0xa4a4a4a4, 0x5c5c5c5c, 0xcccccccc, - 0x5d5d5d5d, 0x65656565, 0xb6b6b6b6, 0x92929292, - 0x6c6c6c6c, 0x70707070, 0x48484848, 0x50505050, - 0xfdfdfdfd, 0xedededed, 0xb9b9b9b9, 0xdadadada, - 0x5e5e5e5e, 0x15151515, 0x46464646, 0x57575757, - 0xa7a7a7a7, 0x8d8d8d8d, 0x9d9d9d9d, 0x84848484, - 0x90909090, 0xd8d8d8d8, 0xabababab, 0x00000000, - 0x8c8c8c8c, 0xbcbcbcbc, 0xd3d3d3d3, 0x0a0a0a0a, - 0xf7f7f7f7, 0xe4e4e4e4, 0x58585858, 0x05050505, - 0xb8b8b8b8, 0xb3b3b3b3, 0x45454545, 0x06060606, - 0xd0d0d0d0, 0x2c2c2c2c, 0x1e1e1e1e, 0x8f8f8f8f, - 0xcacacaca, 0x3f3f3f3f, 0x0f0f0f0f, 0x02020202, - 0xc1c1c1c1, 0xafafafaf, 0xbdbdbdbd, 0x03030303, - 0x01010101, 0x13131313, 0x8a8a8a8a, 0x6b6b6b6b, - 0x3a3a3a3a, 0x91919191, 0x11111111, 0x41414141, - 0x4f4f4f4f, 0x67676767, 0xdcdcdcdc, 0xeaeaeaea, - 0x97979797, 0xf2f2f2f2, 0xcfcfcfcf, 0xcececece, - 0xf0f0f0f0, 0xb4b4b4b4, 0xe6e6e6e6, 0x73737373, - 0x96969696, 0xacacacac, 0x74747474, 0x22222222, - 0xe7e7e7e7, 0xadadadad, 0x35353535, 0x85858585, - 0xe2e2e2e2, 0xf9f9f9f9, 0x37373737, 0xe8e8e8e8, - 0x1c1c1c1c, 0x75757575, 0xdfdfdfdf, 0x6e6e6e6e, - 0x47474747, 0xf1f1f1f1, 0x1a1a1a1a, 0x71717171, - 0x1d1d1d1d, 0x29292929, 0xc5c5c5c5, 0x89898989, - 0x6f6f6f6f, 0xb7b7b7b7, 0x62626262, 0x0e0e0e0e, - 0xaaaaaaaa, 0x18181818, 0xbebebebe, 0x1b1b1b1b, - 0xfcfcfcfc, 0x56565656, 0x3e3e3e3e, 0x4b4b4b4b, - 0xc6c6c6c6, 0xd2d2d2d2, 0x79797979, 0x20202020, - 0x9a9a9a9a, 0xdbdbdbdb, 0xc0c0c0c0, 0xfefefefe, - 0x78787878, 0xcdcdcdcd, 0x5a5a5a5a, 0xf4f4f4f4, - 0x1f1f1f1f, 0xdddddddd, 0xa8a8a8a8, 0x33333333, - 0x88888888, 0x07070707, 0xc7c7c7c7, 0x31313131, - 0xb1b1b1b1, 0x12121212, 0x10101010, 0x59595959, - 0x27272727, 0x80808080, 0xecececec, 0x5f5f5f5f, - 0x60606060, 0x51515151, 0x7f7f7f7f, 0xa9a9a9a9, - 0x19191919, 0xb5b5b5b5, 0x4a4a4a4a, 0x0d0d0d0d, - 0x2d2d2d2d, 0xe5e5e5e5, 0x7a7a7a7a, 0x9f9f9f9f, - 0x93939393, 0xc9c9c9c9, 0x9c9c9c9c, 0xefefefef, - 0xa0a0a0a0, 0xe0e0e0e0, 0x3b3b3b3b, 0x4d4d4d4d, - 0xaeaeaeae, 0x2a2a2a2a, 0xf5f5f5f5, 0xb0b0b0b0, - 0xc8c8c8c8, 0xebebebeb, 0xbbbbbbbb, 0x3c3c3c3c, - 0x83838383, 0x53535353, 0x99999999, 0x61616161, - 0x17171717, 0x2b2b2b2b, 0x04040404, 0x7e7e7e7e, - 0xbabababa, 0x77777777, 0xd6d6d6d6, 0x26262626, - 0xe1e1e1e1, 0x69696969, 0x14141414, 0x63636363, - 0x55555555, 0x21212121, 0x0c0c0c0c, 0x7d7d7d7d, -}; - -__device__ __constant__ u32 rcon[] = -{ - 0x01000000, 0x02000000, 0x04000000, 0x08000000, - 0x10000000, 0x20000000, 0x40000000, 0x80000000, - 0x1b000000, 0x36000000, -}; - -__device__ static void AES128_ExpandKey (u32 *userkey, u32 *rek, u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - rek[0] = userkey[0]; - rek[1] = userkey[1]; - rek[2] = userkey[2]; - rek[3] = userkey[3]; - - #pragma unroll 10 - for (u32 i = 0, j = 0; i < 10; i += 1, j += 4) - { - u32 temp = rek[j + 3]; - - temp = (s_te2[(temp >> 16) & 0xff] & 0xff000000) - ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000) - ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00) - ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff); - - rek[j + 4] = rek[j + 0] - ^ temp - ^ rcon[i]; - - rek[j + 5] = rek[j + 1] ^ rek[j + 4]; - rek[j + 6] = rek[j + 2] ^ rek[j + 5]; - rek[j + 7] = rek[j + 3] ^ rek[j + 6]; - } -} - -__device__ static void AES128_InvertKey (u32 *rdk, u32 s_td0[256], u32 s_td1[256], u32 s_td2[256], u32 s_td3[256], u32 s_td4[256], u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - for (u32 i = 0, j = 40; i < j; i += 4, j -= 4) - { - u32 temp; - - temp = rdk[i + 0]; rdk[i + 0] = rdk[j + 0]; rdk[j + 0] = temp; - temp = rdk[i + 1]; rdk[i + 1] = rdk[j + 1]; rdk[j + 1] = temp; - temp = rdk[i + 2]; rdk[i + 2] = rdk[j + 2]; rdk[j + 2] = temp; - temp = rdk[i + 3]; rdk[i + 3] = rdk[j + 3]; rdk[j + 3] = temp; - } - - for (u32 i = 1, j = 4; i < 10; i += 1, j += 4) - { - rdk[j + 0] = - s_td0[s_te1[(rdk[j + 0] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 0] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 0] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 0] >> 0) & 0xff] & 0xff]; - - rdk[j + 1] = - s_td0[s_te1[(rdk[j + 1] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 1] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 1] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 1] >> 0) & 0xff] & 0xff]; - - rdk[j + 2] = - s_td0[s_te1[(rdk[j + 2] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 2] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 2] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 2] >> 0) & 0xff] & 0xff]; - - rdk[j + 3] = - s_td0[s_te1[(rdk[j + 3] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 3] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 3] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 3] >> 0) & 0xff] & 0xff]; - } -} - -__device__ static void AES128_decrypt (const u32 *in, u32 *out, const u32 *rdk, u32 s_td0[256], u32 s_td1[256], u32 s_td2[256], u32 s_td3[256], u32 s_td4[256]) -{ - u32 s0 = in[0] ^ rdk[0]; - u32 s1 = in[1] ^ rdk[1]; - u32 s2 = in[2] ^ rdk[2]; - u32 s3 = in[3] ^ rdk[3]; - - u32 t0; - u32 t1; - u32 t2; - u32 t3; - - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[ 4]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[ 5]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[ 6]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[ 7]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[ 8]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[ 9]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[10]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[11]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[12]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[13]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[14]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[15]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[16]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[17]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[18]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[19]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[20]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[21]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[22]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[23]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[24]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[25]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[26]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[27]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[28]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[29]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[30]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[31]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[32]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[33]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[34]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[35]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[36]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[37]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[38]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[39]; - - out[0] = (s_td4[(t0 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t3 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t2 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t1 >> 0) & 0xff] & 0x000000ff) - ^ rdk[40]; - - out[1] = (s_td4[(t1 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t0 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t3 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t2 >> 0) & 0xff] & 0x000000ff) - ^ rdk[41]; - - out[2] = (s_td4[(t2 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t1 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t0 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t3 >> 0) & 0xff] & 0x000000ff) - ^ rdk[42]; - - out[3] = (s_td4[(t3 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t2 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t1 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t0 >> 0) & 0xff] & 0x000000ff) - ^ rdk[43]; -} - -__device__ static void AES128_encrypt (const u32 *in, u32 *out, const u32 *rek, u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - u32 s0 = in[0] ^ rek[0]; - u32 s1 = in[1] ^ rek[1]; - u32 s2 = in[2] ^ rek[2]; - u32 s3 = in[3] ^ rek[3]; - - u32 t0; - u32 t1; - u32 t2; - u32 t3; - - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[ 4]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[ 5]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[ 6]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[ 7]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[ 8]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[ 9]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[10]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[11]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[12]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[13]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[14]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[15]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[16]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[17]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[18]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[19]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[20]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[21]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[22]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[23]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[24]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[25]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[26]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[27]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[28]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[29]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[30]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[31]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[32]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[33]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[34]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[35]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[36]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[37]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[38]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[39]; - - out[0] = (s_te4[(t0 >> 24) & 0xff] & 0xff000000) - ^ (s_te4[(t1 >> 16) & 0xff] & 0x00ff0000) - ^ (s_te4[(t2 >> 8) & 0xff] & 0x0000ff00) - ^ (s_te4[(t3 >> 0) & 0xff] & 0x000000ff) - ^ rek[40]; - - out[1] = (s_te4[(t1 >> 24) & 0xff] & 0xff000000) - ^ (s_te4[(t2 >> 16) & 0xff] & 0x00ff0000) - ^ (s_te4[(t3 >> 8) & 0xff] & 0x0000ff00) - ^ (s_te4[(t0 >> 0) & 0xff] & 0x000000ff) - ^ rek[41]; - - out[2] = (s_te4[(t2 >> 24) & 0xff] & 0xff000000) - ^ (s_te4[(t3 >> 16) & 0xff] & 0x00ff0000) - ^ (s_te4[(t0 >> 8) & 0xff] & 0x0000ff00) - ^ (s_te4[(t1 >> 0) & 0xff] & 0x000000ff) - ^ rek[42]; - - out[3] = (s_te4[(t3 >> 24) & 0xff] & 0xff000000) - ^ (s_te4[(t0 >> 16) & 0xff] & 0x00ff0000) - ^ (s_te4[(t1 >> 8) & 0xff] & 0x0000ff00) - ^ (s_te4[(t2 >> 0) & 0xff] & 0x000000ff) - ^ rek[43]; -} - -__device__ static void AES256_ExpandKey (u32 *userkey, u32 *rek, u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - rek[0] = userkey[0]; - rek[1] = userkey[1]; - rek[2] = userkey[2]; - rek[3] = userkey[3]; - rek[4] = userkey[4]; - rek[5] = userkey[5]; - rek[6] = userkey[6]; - rek[7] = userkey[7]; - - int i; - int j; - - i = 0; - j = 0; - - while (1) - { - u32 temp = rek[j + 7]; - - rek[j + 8] = rek[j + 0] - ^ (s_te2[(temp >> 16) & 0xff] & 0xff000000) - ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000) - ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00) - ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff) - ^ rcon[i]; - - rek[j + 9] = rek[j + 1] ^ rek[j + 8]; - rek[j + 10] = rek[j + 2] ^ rek[j + 9]; - rek[j + 11] = rek[j + 3] ^ rek[j + 10]; - - if (++i == 7) break; - - temp = rek[j + 11]; - - rek[j + 12] = rek[j + 4] - ^ (s_te2[(temp >> 24) & 0xff] & 0xff000000) - ^ (s_te3[(temp >> 16) & 0xff] & 0x00ff0000) - ^ (s_te0[(temp >> 8) & 0xff] & 0x0000ff00) - ^ (s_te1[(temp >> 0) & 0xff] & 0x000000ff); - - rek[j + 13] = rek[j + 5] ^ rek[j + 12]; - rek[j + 14] = rek[j + 6] ^ rek[j + 13]; - rek[j + 15] = rek[j + 7] ^ rek[j + 14]; - - j += 8; - } -} - -__device__ static void AES256_InvertKey (u32 *rdk, u32 s_td0[256], u32 s_td1[256], u32 s_td2[256], u32 s_td3[256], u32 s_td4[256], u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - for (u32 i = 0, j = 56; i < j; i += 4, j -= 4) - { - u32 temp; - - temp = rdk[i + 0]; rdk[i + 0] = rdk[j + 0]; rdk[j + 0] = temp; - temp = rdk[i + 1]; rdk[i + 1] = rdk[j + 1]; rdk[j + 1] = temp; - temp = rdk[i + 2]; rdk[i + 2] = rdk[j + 2]; rdk[j + 2] = temp; - temp = rdk[i + 3]; rdk[i + 3] = rdk[j + 3]; rdk[j + 3] = temp; - } - - for (u32 i = 1, j = 4; i < 14; i += 1, j += 4) - { - rdk[j + 0] = - s_td0[s_te1[(rdk[j + 0] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 0] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 0] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 0] >> 0) & 0xff] & 0xff]; - - rdk[j + 1] = - s_td0[s_te1[(rdk[j + 1] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 1] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 1] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 1] >> 0) & 0xff] & 0xff]; - - rdk[j + 2] = - s_td0[s_te1[(rdk[j + 2] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 2] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 2] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 2] >> 0) & 0xff] & 0xff]; - - rdk[j + 3] = - s_td0[s_te1[(rdk[j + 3] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 3] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 3] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 3] >> 0) & 0xff] & 0xff]; - } -} - -__device__ static void AES256_decrypt (const u32 *in, u32 *out, const u32 *rdk, u32 s_td0[256], u32 s_td1[256], u32 s_td2[256], u32 s_td3[256], u32 s_td4[256]) -{ - u32 s0 = in[0] ^ rdk[0]; - u32 s1 = in[1] ^ rdk[1]; - u32 s2 = in[2] ^ rdk[2]; - u32 s3 = in[3] ^ rdk[3]; - - u32 t0; - u32 t1; - u32 t2; - u32 t3; - - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[ 4]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[ 5]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[ 6]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[ 7]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[ 8]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[ 9]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[10]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[11]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[12]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[13]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[14]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[15]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[16]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[17]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[18]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[19]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[20]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[21]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[22]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[23]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[24]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[25]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[26]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[27]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[28]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[29]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[30]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[31]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[32]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[33]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[34]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[35]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[36]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[37]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[38]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[39]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[40]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[41]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[42]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[43]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[44]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[45]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[46]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[47]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[48]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[49]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[50]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[51]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[52]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[53]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[54]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[55]; - - out[0] = (s_td4[(t0 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t3 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t2 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t1 >> 0) & 0xff] & 0x000000ff) - ^ rdk[56]; - - out[1] = (s_td4[(t1 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t0 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t3 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t2 >> 0) & 0xff] & 0x000000ff) - ^ rdk[57]; - - out[2] = (s_td4[(t2 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t1 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t0 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t3 >> 0) & 0xff] & 0x000000ff) - ^ rdk[58]; - - out[3] = (s_td4[(t3 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t2 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t1 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t0 >> 0) & 0xff] & 0x000000ff) - ^ rdk[59]; -} - -__device__ static void AES256_encrypt (const u32 *in, u32 *out, const u32 *rek, u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - u32 s0 = in[0] ^ rek[0]; - u32 s1 = in[1] ^ rek[1]; - u32 s2 = in[2] ^ rek[2]; - u32 s3 = in[3] ^ rek[3]; - - u32 t0; - u32 t1; - u32 t2; - u32 t3; - - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[ 4]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[ 5]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[ 6]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[ 7]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[ 8]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[ 9]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[10]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[11]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[12]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[13]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[14]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[15]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[16]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[17]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[18]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[19]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[20]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[21]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[22]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[23]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[24]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[25]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[26]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[27]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[28]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[29]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[30]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[31]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[32]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[33]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[34]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[35]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[36]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[37]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[38]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[39]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[40]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[41]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[42]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[43]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[44]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[45]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[46]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[47]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[48]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[49]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[50]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[51]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[52]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[53]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[54]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[55]; - - out[0] = (s_te4[(t0 >> 24) & 0xff] & 0xff000000) - ^ (s_te4[(t1 >> 16) & 0xff] & 0x00ff0000) - ^ (s_te4[(t2 >> 8) & 0xff] & 0x0000ff00) - ^ (s_te4[(t3 >> 0) & 0xff] & 0x000000ff) - ^ rek[56]; - - out[1] = (s_te4[(t1 >> 24) & 0xff] & 0xff000000) - ^ (s_te4[(t2 >> 16) & 0xff] & 0x00ff0000) - ^ (s_te4[(t3 >> 8) & 0xff] & 0x0000ff00) - ^ (s_te4[(t0 >> 0) & 0xff] & 0x000000ff) - ^ rek[57]; - - out[2] = (s_te4[(t2 >> 24) & 0xff] & 0xff000000) - ^ (s_te4[(t3 >> 16) & 0xff] & 0x00ff0000) - ^ (s_te4[(t0 >> 8) & 0xff] & 0x0000ff00) - ^ (s_te4[(t1 >> 0) & 0xff] & 0x000000ff) - ^ rek[58]; - - out[3] = (s_te4[(t3 >> 24) & 0xff] & 0xff000000) - ^ (s_te4[(t0 >> 16) & 0xff] & 0x00ff0000) - ^ (s_te4[(t1 >> 8) & 0xff] & 0x0000ff00) - ^ (s_te4[(t2 >> 0) & 0xff] & 0x000000ff) - ^ rek[59]; -} - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09400_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, office2007_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const office2007_t *office2007_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - u32 pw_len = pws[gid].pw_len; - - append_0x80_4 (w0, w1, w2, w3, pw_len); - - make_unicode (w1, w2, w3); - make_unicode (w0, w0, w1); - - /** - * salt - */ - - u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - /** - * init - */ - - u32x t0[4]; - - t0[0] = salt_buf[0]; - t0[1] = salt_buf[1]; - t0[2] = salt_buf[2]; - t0[3] = salt_buf[3]; - - u32x t1[4]; - - t1[0] = swap_workaround (w0[0]); - t1[1] = swap_workaround (w0[1]); - t1[2] = swap_workaround (w0[2]); - t1[3] = swap_workaround (w0[3]); - - u32x t2[4]; - - t2[0] = swap_workaround (w1[0]); - t2[1] = swap_workaround (w1[1]); - t2[2] = swap_workaround (w1[2]); - t2[3] = swap_workaround (w1[3]); - - u32x t3[4]; - - t3[0] = swap_workaround (w2[0]); - t3[1] = swap_workaround (w2[1]); - t3[2] = 0; - t3[3] = (salt_len + (pw_len * 2)) * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (t0, t1, t2, t3, digest); - - tmps[gid].out[0] = digest[0]; - tmps[gid].out[1] = digest[1]; - tmps[gid].out[2] = digest[2]; - tmps[gid].out[3] = digest[3]; - tmps[gid].out[4] = digest[4]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09400_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, office2007_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const office2007_t *office2007_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = 0; - w0[1] = tmps[gid].out[0]; - w0[2] = tmps[gid].out[1]; - w0[3] = tmps[gid].out[2]; - - u32x w1[4]; - - w1[0] = tmps[gid].out[3]; - w1[1] = tmps[gid].out[4]; - w1[2] = 0x80000000; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (4 + 20) * 8; - - for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++) - { - w0[0] = swap_workaround (j); - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, digest); - - w0[1] = digest[0]; - w0[2] = digest[1]; - w0[3] = digest[2]; - w1[0] = digest[3]; - w1[1] = digest[4]; - } - - tmps[gid].out[0] = w0[1]; - tmps[gid].out[1] = w0[2]; - tmps[gid].out[2] = w0[3]; - tmps[gid].out[3] = w1[0]; - tmps[gid].out[4] = w1[1]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09400_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, office2007_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const office2007_t *office2007_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * aes shared - */ - - __shared__ u32 s_td0[256]; - __shared__ u32 s_td1[256]; - __shared__ u32 s_td2[256]; - __shared__ u32 s_td3[256]; - __shared__ u32 s_td4[256]; - - __shared__ u32 s_te0[256]; - __shared__ u32 s_te1[256]; - __shared__ u32 s_te2[256]; - __shared__ u32 s_te3[256]; - __shared__ u32 s_te4[256]; - - s_td0[lid] = td0[lid]; - s_td1[lid] = td1[lid]; - s_td2[lid] = td2[lid]; - s_td3[lid] = td3[lid]; - s_td4[lid] = td4[lid]; - - s_te0[lid] = te0[lid]; - s_te1[lid] = te1[lid]; - s_te2[lid] = te2[lid]; - s_te3[lid] = te3[lid]; - s_te4[lid] = te4[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = tmps[gid].out[0]; - w0[1] = tmps[gid].out[1]; - w0[2] = tmps[gid].out[2]; - w0[3] = tmps[gid].out[3]; - - u32x w1[4]; - - w1[0] = tmps[gid].out[4]; - w1[1] = 0; - w1[2] = 0x80000000; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (20 + 4) * 8; - - u32 digest_common[5]; - - digest_common[0] = SHA1M_A; - digest_common[1] = SHA1M_B; - digest_common[2] = SHA1M_C; - digest_common[3] = SHA1M_D; - digest_common[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, digest_common); - - w0[0] = 0x36363636 ^ digest_common[0]; - w0[1] = 0x36363636 ^ digest_common[1]; - w0[2] = 0x36363636 ^ digest_common[2]; - w0[3] = 0x36363636 ^ digest_common[3]; - w1[0] = 0x36363636 ^ digest_common[4]; - w1[1] = 0x36363636; - w1[2] = 0x36363636; - w1[3] = 0x36363636; - w2[0] = 0x36363636; - w2[1] = 0x36363636; - w2[2] = 0x36363636; - w2[3] = 0x36363636; - w3[0] = 0x36363636; - w3[1] = 0x36363636; - w3[2] = 0x36363636; - w3[3] = 0x36363636; - - u32 digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, digest); - - w0[0] = 0x80000000; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 64 * 8; - - sha1_transform (w0, w1, w2, w3, digest); - - // save digest for the AES-256 variant - - u32 digest_saved[5]; - - digest_saved[0] = digest[0]; - digest_saved[1] = digest[1]; - digest_saved[2] = digest[2]; - digest_saved[3] = digest[3]; - digest_saved[4] = digest[4]; - - // now we got the AES key, decrypt the verifier - - u32x rek[60]; - u32x rdk[60]; - - u32 verifier[4]; - - verifier[0] = office2007_bufs[salt_pos].encryptedVerifier[0]; - verifier[1] = office2007_bufs[salt_pos].encryptedVerifier[1]; - verifier[2] = office2007_bufs[salt_pos].encryptedVerifier[2]; - verifier[3] = office2007_bufs[salt_pos].encryptedVerifier[3]; - - u32 data[8]; - - data[0] = verifier[0]; - data[1] = verifier[1]; - data[2] = verifier[2]; - data[3] = verifier[3]; - - data[4] = 0; - data[5] = 0; - data[6] = 0; - data[7] = 0; - - u32x ukeyx[8]; - - ukeyx[0] = digest[0]; - ukeyx[1] = digest[1]; - ukeyx[2] = digest[2]; - ukeyx[3] = digest[3]; - - AES128_ExpandKey (ukeyx, rek, s_te0, s_te1, s_te2, s_te3, s_te4); - - for (u32 i = 0; i < 44; i++) rdk[i] = rek[i]; - - AES128_InvertKey (rdk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4); - - u32 out[4]; - - AES128_decrypt (data, out, rdk, s_td0, s_td1, s_td2, s_td3, s_td4); - - // do a sha1 of the result - - w0[0] = out[0]; - w0[1] = out[1]; - w0[2] = out[2]; - w0[3] = out[3]; - w1[0] = 0x80000000; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 16 * 8; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, digest); - - // encrypt it again for verify - - data[0] = digest[0]; - data[1] = digest[1]; - data[2] = digest[2]; - data[3] = digest[3]; - - AES128_encrypt (data, out, rek, s_te0, s_te1, s_te2, s_te3, s_te4); - - { - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #define il_pos 0 - - #include VECT_COMPARE_M - } - - /* - * AES-256 test - */ - - // try same procedure but with AES-256 - - w0[0] = 0x5c5c5c5c ^ digest_common[0]; - w0[1] = 0x5c5c5c5c ^ digest_common[1]; - w0[2] = 0x5c5c5c5c ^ digest_common[2]; - w0[3] = 0x5c5c5c5c ^ digest_common[3]; - w1[0] = 0x5c5c5c5c ^ digest_common[4]; - w1[1] = 0x5c5c5c5c; - w1[2] = 0x5c5c5c5c; - w1[3] = 0x5c5c5c5c; - w2[0] = 0x5c5c5c5c; - w2[1] = 0x5c5c5c5c; - w2[2] = 0x5c5c5c5c; - w2[3] = 0x5c5c5c5c; - w3[0] = 0x5c5c5c5c; - w3[1] = 0x5c5c5c5c; - w3[2] = 0x5c5c5c5c; - w3[3] = 0x5c5c5c5c; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, digest); - - w0[0] = 0x80000000; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 64 * 8; - - sha1_transform (w0, w1, w2, w3, digest); - - // now we got the AES key, decrypt the verifier - - ukeyx[0] = digest_saved[0]; - ukeyx[1] = digest_saved[1]; - ukeyx[2] = digest_saved[2]; - ukeyx[3] = digest_saved[3]; - ukeyx[4] = digest_saved[4]; - ukeyx[5] = digest[0]; - ukeyx[6] = digest[1]; - ukeyx[7] = digest[2]; - - AES256_ExpandKey (ukeyx, rek, s_te0, s_te1, s_te2, s_te3, s_te4); - - for (u32 i = 0; i < 60; i++) rdk[i] = rek[i]; - - AES256_InvertKey (rdk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4); - - data[0] = verifier[0]; - data[1] = verifier[1]; - data[2] = verifier[2]; - data[3] = verifier[3]; - - AES256_decrypt (data, out, rdk, s_td0, s_td1, s_td2, s_td3, s_td4); - - // do a sha1 of the result - - w0[0] = out[0]; - w0[1] = out[1]; - w0[2] = out[2]; - w0[3] = out[3]; - w1[0] = 0x80000000; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 16 * 8; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, digest); - - // encrypt it again for verify - - data[0] = digest[0]; - data[1] = digest[1]; - data[2] = digest[2]; - data[3] = digest[3]; - - AES256_encrypt (data, out, rek, s_te0, s_te1, s_te2, s_te3, s_te4); - - { - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #define il_pos 0 - - #include VECT_COMPARE_M - } -} diff --git a/nv/m09500.cu b/nv/m09500.cu deleted file mode 100644 index 30c0fdd..0000000 --- a/nv/m09500.cu +++ /dev/null @@ -1,1421 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _OFFICE2010_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ __constant__ u32 te0[256] = -{ - 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, - 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, - 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, - 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, - 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, - 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, - 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, - 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, - 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, - 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, - 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, - 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, - 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, - 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, - 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, - 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, - 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, - 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, - 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, - 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, - 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, - 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, - 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, - 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, - 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, - 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, - 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, - 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, - 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, - 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, - 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, - 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, - 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, - 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, - 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, - 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, - 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, - 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, - 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, - 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, - 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, - 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, - 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, - 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, - 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, - 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, - 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, - 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, - 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, - 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, - 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, - 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, - 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, - 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, - 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, - 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, - 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, - 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, - 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, - 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, - 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, - 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, - 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, - 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a, -}; - -__device__ __constant__ u32 te1[256] = -{ - 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b, - 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5, - 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b, - 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676, - 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d, - 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0, - 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf, - 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0, - 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626, - 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc, - 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1, - 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515, - 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3, - 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a, - 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2, - 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575, - 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a, - 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0, - 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3, - 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484, - 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded, - 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b, - 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939, - 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf, - 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb, - 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585, - 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f, - 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8, - 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f, - 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5, - 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121, - 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2, - 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec, - 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717, - 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d, - 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373, - 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc, - 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888, - 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414, - 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb, - 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a, - 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c, - 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262, - 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979, - 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d, - 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9, - 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea, - 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808, - 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e, - 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6, - 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f, - 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a, - 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666, - 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e, - 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9, - 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e, - 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111, - 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494, - 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9, - 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf, - 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d, - 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868, - 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f, - 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616, -}; - -__device__ __constant__ u32 te2[256] = -{ - 0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b, - 0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5, - 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b, - 0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76, - 0xca458fca, 0x829d1f82, 0xc94089c9, 0x7d87fa7d, - 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0, - 0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af, - 0x9cbf239c, 0xa4f753a4, 0x7296e472, 0xc05b9bc0, - 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26, - 0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc, - 0x345c6834, 0xa5f451a5, 0xe534d1e5, 0xf108f9f1, - 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15, - 0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3, - 0x18283018, 0x96a13796, 0x050f0a05, 0x9ab52f9a, - 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2, - 0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75, - 0x091b1209, 0x839e1d83, 0x2c74582c, 0x1a2e341a, - 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0, - 0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3, - 0x297b5229, 0xe33edde3, 0x2f715e2f, 0x84971384, - 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed, - 0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b, - 0x6abed46a, 0xcb468dcb, 0xbed967be, 0x394b7239, - 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf, - 0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb, - 0x43c58643, 0x4dd79a4d, 0x33556633, 0x85941185, - 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f, - 0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8, - 0x51f3a251, 0xa3fe5da3, 0x40c08040, 0x8f8a058f, - 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5, - 0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221, - 0x10302010, 0xff1ae5ff, 0xf30efdf3, 0xd26dbfd2, - 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec, - 0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17, - 0xc45793c4, 0xa7f255a7, 0x7e82fc7e, 0x3d477a3d, - 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673, - 0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc, - 0x22664422, 0x2a7e542a, 0x90ab3b90, 0x88830b88, - 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814, - 0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb, - 0xe03bdbe0, 0x32566432, 0x3a4e743a, 0x0a1e140a, - 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c, - 0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462, - 0x91a83991, 0x95a43195, 0xe437d3e4, 0x798bf279, - 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d, - 0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9, - 0x6cb4d86c, 0x56faac56, 0xf407f3f4, 0xea25cfea, - 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008, - 0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e, - 0x1c24381c, 0xa6f157a6, 0xb4c773b4, 0xc65197c6, - 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f, - 0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a, - 0x7090e070, 0x3e427c3e, 0xb5c471b5, 0x66aacc66, - 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e, - 0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9, - 0x86911786, 0xc15899c1, 0x1d273a1d, 0x9eb9279e, - 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211, - 0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394, - 0x9bb62d9b, 0x1e223c1e, 0x87921587, 0xe920c9e9, - 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df, - 0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d, - 0xbfda65bf, 0xe631d7e6, 0x42c68442, 0x68b8d068, - 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f, - 0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16, -}; - -__device__ __constant__ u32 te3[256] = -{ - 0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6, - 0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491, - 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56, - 0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec, - 0xcaca458f, 0x82829d1f, 0xc9c94089, 0x7d7d87fa, - 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb, - 0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45, - 0x9c9cbf23, 0xa4a4f753, 0x727296e4, 0xc0c05b9b, - 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c, - 0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83, - 0x34345c68, 0xa5a5f451, 0xe5e534d1, 0xf1f108f9, - 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a, - 0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d, - 0x18182830, 0x9696a137, 0x05050f0a, 0x9a9ab52f, - 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf, - 0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea, - 0x09091b12, 0x83839e1d, 0x2c2c7458, 0x1a1a2e34, - 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b, - 0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d, - 0x29297b52, 0xe3e33edd, 0x2f2f715e, 0x84849713, - 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1, - 0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6, - 0x6a6abed4, 0xcbcb468d, 0xbebed967, 0x39394b72, - 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85, - 0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed, - 0x4343c586, 0x4d4dd79a, 0x33335566, 0x85859411, - 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe, - 0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b, - 0x5151f3a2, 0xa3a3fe5d, 0x4040c080, 0x8f8f8a05, - 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1, - 0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342, - 0x10103020, 0xffff1ae5, 0xf3f30efd, 0xd2d26dbf, - 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3, - 0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e, - 0xc4c45793, 0xa7a7f255, 0x7e7e82fc, 0x3d3d477a, - 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6, - 0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3, - 0x22226644, 0x2a2a7e54, 0x9090ab3b, 0x8888830b, - 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28, - 0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad, - 0xe0e03bdb, 0x32325664, 0x3a3a4e74, 0x0a0a1e14, - 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8, - 0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4, - 0x9191a839, 0x9595a431, 0xe4e437d3, 0x79798bf2, - 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da, - 0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049, - 0x6c6cb4d8, 0x5656faac, 0xf4f407f3, 0xeaea25cf, - 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810, - 0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c, - 0x1c1c2438, 0xa6a6f157, 0xb4b4c773, 0xc6c65197, - 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e, - 0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f, - 0x707090e0, 0x3e3e427c, 0xb5b5c471, 0x6666aacc, - 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c, - 0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069, - 0x86869117, 0xc1c15899, 0x1d1d273a, 0x9e9eb927, - 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322, - 0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733, - 0x9b9bb62d, 0x1e1e223c, 0x87879215, 0xe9e920c9, - 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5, - 0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a, - 0xbfbfda65, 0xe6e631d7, 0x4242c684, 0x6868b8d0, - 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e, - 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c, -}; - -__device__ __constant__ u32 te4[256] = -{ - 0x63636363, 0x7c7c7c7c, 0x77777777, 0x7b7b7b7b, - 0xf2f2f2f2, 0x6b6b6b6b, 0x6f6f6f6f, 0xc5c5c5c5, - 0x30303030, 0x01010101, 0x67676767, 0x2b2b2b2b, - 0xfefefefe, 0xd7d7d7d7, 0xabababab, 0x76767676, - 0xcacacaca, 0x82828282, 0xc9c9c9c9, 0x7d7d7d7d, - 0xfafafafa, 0x59595959, 0x47474747, 0xf0f0f0f0, - 0xadadadad, 0xd4d4d4d4, 0xa2a2a2a2, 0xafafafaf, - 0x9c9c9c9c, 0xa4a4a4a4, 0x72727272, 0xc0c0c0c0, - 0xb7b7b7b7, 0xfdfdfdfd, 0x93939393, 0x26262626, - 0x36363636, 0x3f3f3f3f, 0xf7f7f7f7, 0xcccccccc, - 0x34343434, 0xa5a5a5a5, 0xe5e5e5e5, 0xf1f1f1f1, - 0x71717171, 0xd8d8d8d8, 0x31313131, 0x15151515, - 0x04040404, 0xc7c7c7c7, 0x23232323, 0xc3c3c3c3, - 0x18181818, 0x96969696, 0x05050505, 0x9a9a9a9a, - 0x07070707, 0x12121212, 0x80808080, 0xe2e2e2e2, - 0xebebebeb, 0x27272727, 0xb2b2b2b2, 0x75757575, - 0x09090909, 0x83838383, 0x2c2c2c2c, 0x1a1a1a1a, - 0x1b1b1b1b, 0x6e6e6e6e, 0x5a5a5a5a, 0xa0a0a0a0, - 0x52525252, 0x3b3b3b3b, 0xd6d6d6d6, 0xb3b3b3b3, - 0x29292929, 0xe3e3e3e3, 0x2f2f2f2f, 0x84848484, - 0x53535353, 0xd1d1d1d1, 0x00000000, 0xedededed, - 0x20202020, 0xfcfcfcfc, 0xb1b1b1b1, 0x5b5b5b5b, - 0x6a6a6a6a, 0xcbcbcbcb, 0xbebebebe, 0x39393939, - 0x4a4a4a4a, 0x4c4c4c4c, 0x58585858, 0xcfcfcfcf, - 0xd0d0d0d0, 0xefefefef, 0xaaaaaaaa, 0xfbfbfbfb, - 0x43434343, 0x4d4d4d4d, 0x33333333, 0x85858585, - 0x45454545, 0xf9f9f9f9, 0x02020202, 0x7f7f7f7f, - 0x50505050, 0x3c3c3c3c, 0x9f9f9f9f, 0xa8a8a8a8, - 0x51515151, 0xa3a3a3a3, 0x40404040, 0x8f8f8f8f, - 0x92929292, 0x9d9d9d9d, 0x38383838, 0xf5f5f5f5, - 0xbcbcbcbc, 0xb6b6b6b6, 0xdadadada, 0x21212121, - 0x10101010, 0xffffffff, 0xf3f3f3f3, 0xd2d2d2d2, - 0xcdcdcdcd, 0x0c0c0c0c, 0x13131313, 0xecececec, - 0x5f5f5f5f, 0x97979797, 0x44444444, 0x17171717, - 0xc4c4c4c4, 0xa7a7a7a7, 0x7e7e7e7e, 0x3d3d3d3d, - 0x64646464, 0x5d5d5d5d, 0x19191919, 0x73737373, - 0x60606060, 0x81818181, 0x4f4f4f4f, 0xdcdcdcdc, - 0x22222222, 0x2a2a2a2a, 0x90909090, 0x88888888, - 0x46464646, 0xeeeeeeee, 0xb8b8b8b8, 0x14141414, - 0xdededede, 0x5e5e5e5e, 0x0b0b0b0b, 0xdbdbdbdb, - 0xe0e0e0e0, 0x32323232, 0x3a3a3a3a, 0x0a0a0a0a, - 0x49494949, 0x06060606, 0x24242424, 0x5c5c5c5c, - 0xc2c2c2c2, 0xd3d3d3d3, 0xacacacac, 0x62626262, - 0x91919191, 0x95959595, 0xe4e4e4e4, 0x79797979, - 0xe7e7e7e7, 0xc8c8c8c8, 0x37373737, 0x6d6d6d6d, - 0x8d8d8d8d, 0xd5d5d5d5, 0x4e4e4e4e, 0xa9a9a9a9, - 0x6c6c6c6c, 0x56565656, 0xf4f4f4f4, 0xeaeaeaea, - 0x65656565, 0x7a7a7a7a, 0xaeaeaeae, 0x08080808, - 0xbabababa, 0x78787878, 0x25252525, 0x2e2e2e2e, - 0x1c1c1c1c, 0xa6a6a6a6, 0xb4b4b4b4, 0xc6c6c6c6, - 0xe8e8e8e8, 0xdddddddd, 0x74747474, 0x1f1f1f1f, - 0x4b4b4b4b, 0xbdbdbdbd, 0x8b8b8b8b, 0x8a8a8a8a, - 0x70707070, 0x3e3e3e3e, 0xb5b5b5b5, 0x66666666, - 0x48484848, 0x03030303, 0xf6f6f6f6, 0x0e0e0e0e, - 0x61616161, 0x35353535, 0x57575757, 0xb9b9b9b9, - 0x86868686, 0xc1c1c1c1, 0x1d1d1d1d, 0x9e9e9e9e, - 0xe1e1e1e1, 0xf8f8f8f8, 0x98989898, 0x11111111, - 0x69696969, 0xd9d9d9d9, 0x8e8e8e8e, 0x94949494, - 0x9b9b9b9b, 0x1e1e1e1e, 0x87878787, 0xe9e9e9e9, - 0xcececece, 0x55555555, 0x28282828, 0xdfdfdfdf, - 0x8c8c8c8c, 0xa1a1a1a1, 0x89898989, 0x0d0d0d0d, - 0xbfbfbfbf, 0xe6e6e6e6, 0x42424242, 0x68686868, - 0x41414141, 0x99999999, 0x2d2d2d2d, 0x0f0f0f0f, - 0xb0b0b0b0, 0x54545454, 0xbbbbbbbb, 0x16161616, -}; - -__device__ __constant__ u32 td0[256] = -{ - 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, - 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, - 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, - 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, - 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, - 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, - 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, - 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, - 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, - 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, - 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, - 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, - 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, - 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, - 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, - 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, - 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, - 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, - 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, - 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, - 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, - 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, - 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, - 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, - 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, - 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, - 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, - 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, - 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, - 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, - 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, - 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, - 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, - 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, - 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, - 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, - 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, - 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, - 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, - 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, - 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, - 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, - 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, - 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, - 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, - 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, - 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, - 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, - 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, - 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, - 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, - 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, - 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, - 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, - 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, - 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, - 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, - 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, - 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, - 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, - 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, - 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, - 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, - 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742, -}; - -__device__ __constant__ u32 td1[256] = -{ - 0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e, - 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303, - 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c, - 0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3, - 0x49deb15a, 0x6725ba1b, 0x9845ea0e, 0xe15dfec0, - 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9, - 0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259, - 0x2dd4be83, 0xd3587421, 0x2949e069, 0x448ec9c8, - 0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971, - 0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a, - 0x1863df4a, 0x82e51a31, 0x60975133, 0x4562537f, - 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b, - 0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8, - 0x23ab73d3, 0xe2724b02, 0x57e31f8f, 0x2a6655ab, - 0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708, - 0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682, - 0x2b8acf1c, 0x92a779b4, 0xf0f307f2, 0xa14e69e2, - 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe, - 0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb, - 0x390b83ec, 0xaa4060ef, 0x065e719f, 0x51bd6e10, - 0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd, - 0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015, - 0x241998fb, 0x97d6bde9, 0xcc894043, 0x7767d99e, - 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee, - 0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000, - 0x83098086, 0x48322bed, 0xac1e1170, 0x4e6c5a72, - 0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39, - 0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e, - 0xb10c0a67, 0x0f9357e7, 0xd2b4ee96, 0x9e1b9b91, - 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a, - 0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17, - 0x0b0e090d, 0xadf28bc7, 0xb92db6a8, 0xc8141ea9, - 0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60, - 0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e, - 0x768b4329, 0xdccb23c6, 0x68b6edfc, 0x63b8e4f1, - 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611, - 0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1, - 0x4b1d9e2f, 0xf3dcb230, 0xec0d8652, 0xd077c1e3, - 0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964, - 0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390, - 0xc787494e, 0xc1d938d1, 0xfe8ccaa2, 0x3698d40b, - 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf, - 0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46, - 0xc2f68d13, 0xe890d8b8, 0x5e2e39f7, 0xf582c3af, - 0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512, - 0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb, - 0x09cd2678, 0xf46e5918, 0x01ec9ab7, 0xa8834f9a, - 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8, - 0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c, - 0xaf31a4b2, 0x312a3f23, 0x30c6a594, 0xc035a266, - 0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8, - 0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6, - 0x8d764dd6, 0x4d43efb0, 0x54ccaa4d, 0xdfe49604, - 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551, - 0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41, - 0x5ab3671d, 0x5292dbd2, 0x33e91056, 0x136dd647, - 0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c, - 0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1, - 0x599cd2df, 0x3f55f273, 0x791814ce, 0xbf73c737, - 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db, - 0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340, - 0x72161dc3, 0x0cbce225, 0x8b283c49, 0x41ff0d95, - 0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1, - 0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857, -}; - -__device__ __constant__ u32 td2[256] = -{ - 0xa75051f4, 0x65537e41, 0xa4c31a17, 0x5e963a27, - 0x6bcb3bab, 0x45f11f9d, 0x58abacfa, 0x03934be3, - 0xfa552030, 0x6df6ad76, 0x769188cc, 0x4c25f502, - 0xd7fc4fe5, 0xcbd7c52a, 0x44802635, 0xa38fb562, - 0x5a49deb1, 0x1b6725ba, 0x0e9845ea, 0xc0e15dfe, - 0x7502c32f, 0xf012814c, 0x97a38d46, 0xf9c66bd3, - 0x5fe7038f, 0x9c951592, 0x7aebbf6d, 0x59da9552, - 0x832dd4be, 0x21d35874, 0x692949e0, 0xc8448ec9, - 0x896a75c2, 0x7978f48e, 0x3e6b9958, 0x71dd27b9, - 0x4fb6bee1, 0xad17f088, 0xac66c920, 0x3ab47dce, - 0x4a1863df, 0x3182e51a, 0x33609751, 0x7f456253, - 0x77e0b164, 0xae84bb6b, 0xa01cfe81, 0x2b94f908, - 0x68587048, 0xfd198f45, 0x6c8794de, 0xf8b7527b, - 0xd323ab73, 0x02e2724b, 0x8f57e31f, 0xab2a6655, - 0x2807b2eb, 0xc2032fb5, 0x7b9a86c5, 0x08a5d337, - 0x87f23028, 0xa5b223bf, 0x6aba0203, 0x825ced16, - 0x1c2b8acf, 0xb492a779, 0xf2f0f307, 0xe2a14e69, - 0xf4cd65da, 0xbed50605, 0x621fd134, 0xfe8ac4a6, - 0x539d342e, 0x55a0a2f3, 0xe132058a, 0xeb75a4f6, - 0xec390b83, 0xefaa4060, 0x9f065e71, 0x1051bd6e, - 0x8af93e21, 0x063d96dd, 0x05aedd3e, 0xbd464de6, - 0x8db59154, 0x5d0571c4, 0xd46f0406, 0x15ff6050, - 0xfb241998, 0xe997d6bd, 0x43cc8940, 0x9e7767d9, - 0x42bdb0e8, 0x8b880789, 0x5b38e719, 0xeedb79c8, - 0x0a47a17c, 0x0fe97c42, 0x1ec9f884, 0x00000000, - 0x86830980, 0xed48322b, 0x70ac1e11, 0x724e6c5a, - 0xfffbfd0e, 0x38560f85, 0xd51e3dae, 0x3927362d, - 0xd9640a0f, 0xa621685c, 0x54d19b5b, 0x2e3a2436, - 0x67b10c0a, 0xe70f9357, 0x96d2b4ee, 0x919e1b9b, - 0xc54f80c0, 0x20a261dc, 0x4b695a77, 0x1a161c12, - 0xba0ae293, 0x2ae5c0a0, 0xe0433c22, 0x171d121b, - 0x0d0b0e09, 0xc7adf28b, 0xa8b92db6, 0xa9c8141e, - 0x198557f1, 0x074caf75, 0xddbbee99, 0x60fda37f, - 0x269ff701, 0xf5bc5c72, 0x3bc54466, 0x7e345bfb, - 0x29768b43, 0xc6dccb23, 0xfc68b6ed, 0xf163b8e4, - 0xdccad731, 0x85104263, 0x22401397, 0x112084c6, - 0x247d854a, 0x3df8d2bb, 0x3211aef9, 0xa16dc729, - 0x2f4b1d9e, 0x30f3dcb2, 0x52ec0d86, 0xe3d077c1, - 0x166c2bb3, 0xb999a970, 0x48fa1194, 0x642247e9, - 0x8cc4a8fc, 0x3f1aa0f0, 0x2cd8567d, 0x90ef2233, - 0x4ec78749, 0xd1c1d938, 0xa2fe8cca, 0x0b3698d4, - 0x81cfa6f5, 0xde28a57a, 0x8e26dab7, 0xbfa43fad, - 0x9de42c3a, 0x920d5078, 0xcc9b6a5f, 0x4662547e, - 0x13c2f68d, 0xb8e890d8, 0xf75e2e39, 0xaff582c3, - 0x80be9f5d, 0x937c69d0, 0x2da96fd5, 0x12b3cf25, - 0x993bc8ac, 0x7da71018, 0x636ee89c, 0xbb7bdb3b, - 0x7809cd26, 0x18f46e59, 0xb701ec9a, 0x9aa8834f, - 0x6e65e695, 0xe67eaaff, 0xcf0821bc, 0xe8e6ef15, - 0x9bd9bae7, 0x36ce4a6f, 0x09d4ea9f, 0x7cd629b0, - 0xb2af31a4, 0x23312a3f, 0x9430c6a5, 0x66c035a2, - 0xbc37744e, 0xcaa6fc82, 0xd0b0e090, 0xd81533a7, - 0x984af104, 0xdaf741ec, 0x500e7fcd, 0xf62f1791, - 0xd68d764d, 0xb04d43ef, 0x4d54ccaa, 0x04dfe496, - 0xb5e39ed1, 0x881b4c6a, 0x1fb8c12c, 0x517f4665, - 0xea049d5e, 0x355d018c, 0x7473fa87, 0x412efb0b, - 0x1d5ab367, 0xd25292db, 0x5633e910, 0x47136dd6, - 0x618c9ad7, 0x0c7a37a1, 0x148e59f8, 0x3c89eb13, - 0x27eecea9, 0xc935b761, 0xe5ede11c, 0xb13c7a47, - 0xdf599cd2, 0x733f55f2, 0xce791814, 0x37bf73c7, - 0xcdea53f7, 0xaa5b5ffd, 0x6f14df3d, 0xdb867844, - 0xf381caaf, 0xc43eb968, 0x342c3824, 0x405fc2a3, - 0xc372161d, 0x250cbce2, 0x498b283c, 0x9541ff0d, - 0x017139a8, 0xb3de080c, 0xe49cd8b4, 0xc1906456, - 0x84617bcb, 0xb670d532, 0x5c74486c, 0x5742d0b8, -}; - -__device__ __constant__ u32 td3[256] = -{ - 0xf4a75051, 0x4165537e, 0x17a4c31a, 0x275e963a, - 0xab6bcb3b, 0x9d45f11f, 0xfa58abac, 0xe303934b, - 0x30fa5520, 0x766df6ad, 0xcc769188, 0x024c25f5, - 0xe5d7fc4f, 0x2acbd7c5, 0x35448026, 0x62a38fb5, - 0xb15a49de, 0xba1b6725, 0xea0e9845, 0xfec0e15d, - 0x2f7502c3, 0x4cf01281, 0x4697a38d, 0xd3f9c66b, - 0x8f5fe703, 0x929c9515, 0x6d7aebbf, 0x5259da95, - 0xbe832dd4, 0x7421d358, 0xe0692949, 0xc9c8448e, - 0xc2896a75, 0x8e7978f4, 0x583e6b99, 0xb971dd27, - 0xe14fb6be, 0x88ad17f0, 0x20ac66c9, 0xce3ab47d, - 0xdf4a1863, 0x1a3182e5, 0x51336097, 0x537f4562, - 0x6477e0b1, 0x6bae84bb, 0x81a01cfe, 0x082b94f9, - 0x48685870, 0x45fd198f, 0xde6c8794, 0x7bf8b752, - 0x73d323ab, 0x4b02e272, 0x1f8f57e3, 0x55ab2a66, - 0xeb2807b2, 0xb5c2032f, 0xc57b9a86, 0x3708a5d3, - 0x2887f230, 0xbfa5b223, 0x036aba02, 0x16825ced, - 0xcf1c2b8a, 0x79b492a7, 0x07f2f0f3, 0x69e2a14e, - 0xdaf4cd65, 0x05bed506, 0x34621fd1, 0xa6fe8ac4, - 0x2e539d34, 0xf355a0a2, 0x8ae13205, 0xf6eb75a4, - 0x83ec390b, 0x60efaa40, 0x719f065e, 0x6e1051bd, - 0x218af93e, 0xdd063d96, 0x3e05aedd, 0xe6bd464d, - 0x548db591, 0xc45d0571, 0x06d46f04, 0x5015ff60, - 0x98fb2419, 0xbde997d6, 0x4043cc89, 0xd99e7767, - 0xe842bdb0, 0x898b8807, 0x195b38e7, 0xc8eedb79, - 0x7c0a47a1, 0x420fe97c, 0x841ec9f8, 0x00000000, - 0x80868309, 0x2bed4832, 0x1170ac1e, 0x5a724e6c, - 0x0efffbfd, 0x8538560f, 0xaed51e3d, 0x2d392736, - 0x0fd9640a, 0x5ca62168, 0x5b54d19b, 0x362e3a24, - 0x0a67b10c, 0x57e70f93, 0xee96d2b4, 0x9b919e1b, - 0xc0c54f80, 0xdc20a261, 0x774b695a, 0x121a161c, - 0x93ba0ae2, 0xa02ae5c0, 0x22e0433c, 0x1b171d12, - 0x090d0b0e, 0x8bc7adf2, 0xb6a8b92d, 0x1ea9c814, - 0xf1198557, 0x75074caf, 0x99ddbbee, 0x7f60fda3, - 0x01269ff7, 0x72f5bc5c, 0x663bc544, 0xfb7e345b, - 0x4329768b, 0x23c6dccb, 0xedfc68b6, 0xe4f163b8, - 0x31dccad7, 0x63851042, 0x97224013, 0xc6112084, - 0x4a247d85, 0xbb3df8d2, 0xf93211ae, 0x29a16dc7, - 0x9e2f4b1d, 0xb230f3dc, 0x8652ec0d, 0xc1e3d077, - 0xb3166c2b, 0x70b999a9, 0x9448fa11, 0xe9642247, - 0xfc8cc4a8, 0xf03f1aa0, 0x7d2cd856, 0x3390ef22, - 0x494ec787, 0x38d1c1d9, 0xcaa2fe8c, 0xd40b3698, - 0xf581cfa6, 0x7ade28a5, 0xb78e26da, 0xadbfa43f, - 0x3a9de42c, 0x78920d50, 0x5fcc9b6a, 0x7e466254, - 0x8d13c2f6, 0xd8b8e890, 0x39f75e2e, 0xc3aff582, - 0x5d80be9f, 0xd0937c69, 0xd52da96f, 0x2512b3cf, - 0xac993bc8, 0x187da710, 0x9c636ee8, 0x3bbb7bdb, - 0x267809cd, 0x5918f46e, 0x9ab701ec, 0x4f9aa883, - 0x956e65e6, 0xffe67eaa, 0xbccf0821, 0x15e8e6ef, - 0xe79bd9ba, 0x6f36ce4a, 0x9f09d4ea, 0xb07cd629, - 0xa4b2af31, 0x3f23312a, 0xa59430c6, 0xa266c035, - 0x4ebc3774, 0x82caa6fc, 0x90d0b0e0, 0xa7d81533, - 0x04984af1, 0xecdaf741, 0xcd500e7f, 0x91f62f17, - 0x4dd68d76, 0xefb04d43, 0xaa4d54cc, 0x9604dfe4, - 0xd1b5e39e, 0x6a881b4c, 0x2c1fb8c1, 0x65517f46, - 0x5eea049d, 0x8c355d01, 0x877473fa, 0x0b412efb, - 0x671d5ab3, 0xdbd25292, 0x105633e9, 0xd647136d, - 0xd7618c9a, 0xa10c7a37, 0xf8148e59, 0x133c89eb, - 0xa927eece, 0x61c935b7, 0x1ce5ede1, 0x47b13c7a, - 0xd2df599c, 0xf2733f55, 0x14ce7918, 0xc737bf73, - 0xf7cdea53, 0xfdaa5b5f, 0x3d6f14df, 0x44db8678, - 0xaff381ca, 0x68c43eb9, 0x24342c38, 0xa3405fc2, - 0x1dc37216, 0xe2250cbc, 0x3c498b28, 0x0d9541ff, - 0xa8017139, 0x0cb3de08, 0xb4e49cd8, 0x56c19064, - 0xcb84617b, 0x32b670d5, 0x6c5c7448, 0xb85742d0, -}; - -__device__ __constant__ u32 td4[256] = -{ - 0x52525252, 0x09090909, 0x6a6a6a6a, 0xd5d5d5d5, - 0x30303030, 0x36363636, 0xa5a5a5a5, 0x38383838, - 0xbfbfbfbf, 0x40404040, 0xa3a3a3a3, 0x9e9e9e9e, - 0x81818181, 0xf3f3f3f3, 0xd7d7d7d7, 0xfbfbfbfb, - 0x7c7c7c7c, 0xe3e3e3e3, 0x39393939, 0x82828282, - 0x9b9b9b9b, 0x2f2f2f2f, 0xffffffff, 0x87878787, - 0x34343434, 0x8e8e8e8e, 0x43434343, 0x44444444, - 0xc4c4c4c4, 0xdededede, 0xe9e9e9e9, 0xcbcbcbcb, - 0x54545454, 0x7b7b7b7b, 0x94949494, 0x32323232, - 0xa6a6a6a6, 0xc2c2c2c2, 0x23232323, 0x3d3d3d3d, - 0xeeeeeeee, 0x4c4c4c4c, 0x95959595, 0x0b0b0b0b, - 0x42424242, 0xfafafafa, 0xc3c3c3c3, 0x4e4e4e4e, - 0x08080808, 0x2e2e2e2e, 0xa1a1a1a1, 0x66666666, - 0x28282828, 0xd9d9d9d9, 0x24242424, 0xb2b2b2b2, - 0x76767676, 0x5b5b5b5b, 0xa2a2a2a2, 0x49494949, - 0x6d6d6d6d, 0x8b8b8b8b, 0xd1d1d1d1, 0x25252525, - 0x72727272, 0xf8f8f8f8, 0xf6f6f6f6, 0x64646464, - 0x86868686, 0x68686868, 0x98989898, 0x16161616, - 0xd4d4d4d4, 0xa4a4a4a4, 0x5c5c5c5c, 0xcccccccc, - 0x5d5d5d5d, 0x65656565, 0xb6b6b6b6, 0x92929292, - 0x6c6c6c6c, 0x70707070, 0x48484848, 0x50505050, - 0xfdfdfdfd, 0xedededed, 0xb9b9b9b9, 0xdadadada, - 0x5e5e5e5e, 0x15151515, 0x46464646, 0x57575757, - 0xa7a7a7a7, 0x8d8d8d8d, 0x9d9d9d9d, 0x84848484, - 0x90909090, 0xd8d8d8d8, 0xabababab, 0x00000000, - 0x8c8c8c8c, 0xbcbcbcbc, 0xd3d3d3d3, 0x0a0a0a0a, - 0xf7f7f7f7, 0xe4e4e4e4, 0x58585858, 0x05050505, - 0xb8b8b8b8, 0xb3b3b3b3, 0x45454545, 0x06060606, - 0xd0d0d0d0, 0x2c2c2c2c, 0x1e1e1e1e, 0x8f8f8f8f, - 0xcacacaca, 0x3f3f3f3f, 0x0f0f0f0f, 0x02020202, - 0xc1c1c1c1, 0xafafafaf, 0xbdbdbdbd, 0x03030303, - 0x01010101, 0x13131313, 0x8a8a8a8a, 0x6b6b6b6b, - 0x3a3a3a3a, 0x91919191, 0x11111111, 0x41414141, - 0x4f4f4f4f, 0x67676767, 0xdcdcdcdc, 0xeaeaeaea, - 0x97979797, 0xf2f2f2f2, 0xcfcfcfcf, 0xcececece, - 0xf0f0f0f0, 0xb4b4b4b4, 0xe6e6e6e6, 0x73737373, - 0x96969696, 0xacacacac, 0x74747474, 0x22222222, - 0xe7e7e7e7, 0xadadadad, 0x35353535, 0x85858585, - 0xe2e2e2e2, 0xf9f9f9f9, 0x37373737, 0xe8e8e8e8, - 0x1c1c1c1c, 0x75757575, 0xdfdfdfdf, 0x6e6e6e6e, - 0x47474747, 0xf1f1f1f1, 0x1a1a1a1a, 0x71717171, - 0x1d1d1d1d, 0x29292929, 0xc5c5c5c5, 0x89898989, - 0x6f6f6f6f, 0xb7b7b7b7, 0x62626262, 0x0e0e0e0e, - 0xaaaaaaaa, 0x18181818, 0xbebebebe, 0x1b1b1b1b, - 0xfcfcfcfc, 0x56565656, 0x3e3e3e3e, 0x4b4b4b4b, - 0xc6c6c6c6, 0xd2d2d2d2, 0x79797979, 0x20202020, - 0x9a9a9a9a, 0xdbdbdbdb, 0xc0c0c0c0, 0xfefefefe, - 0x78787878, 0xcdcdcdcd, 0x5a5a5a5a, 0xf4f4f4f4, - 0x1f1f1f1f, 0xdddddddd, 0xa8a8a8a8, 0x33333333, - 0x88888888, 0x07070707, 0xc7c7c7c7, 0x31313131, - 0xb1b1b1b1, 0x12121212, 0x10101010, 0x59595959, - 0x27272727, 0x80808080, 0xecececec, 0x5f5f5f5f, - 0x60606060, 0x51515151, 0x7f7f7f7f, 0xa9a9a9a9, - 0x19191919, 0xb5b5b5b5, 0x4a4a4a4a, 0x0d0d0d0d, - 0x2d2d2d2d, 0xe5e5e5e5, 0x7a7a7a7a, 0x9f9f9f9f, - 0x93939393, 0xc9c9c9c9, 0x9c9c9c9c, 0xefefefef, - 0xa0a0a0a0, 0xe0e0e0e0, 0x3b3b3b3b, 0x4d4d4d4d, - 0xaeaeaeae, 0x2a2a2a2a, 0xf5f5f5f5, 0xb0b0b0b0, - 0xc8c8c8c8, 0xebebebeb, 0xbbbbbbbb, 0x3c3c3c3c, - 0x83838383, 0x53535353, 0x99999999, 0x61616161, - 0x17171717, 0x2b2b2b2b, 0x04040404, 0x7e7e7e7e, - 0xbabababa, 0x77777777, 0xd6d6d6d6, 0x26262626, - 0xe1e1e1e1, 0x69696969, 0x14141414, 0x63636363, - 0x55555555, 0x21212121, 0x0c0c0c0c, 0x7d7d7d7d, -}; - -__device__ __constant__ u32 rcon[] = -{ - 0x01000000, 0x02000000, 0x04000000, 0x08000000, - 0x10000000, 0x20000000, 0x40000000, 0x80000000, - 0x1b000000, 0x36000000, -}; - -__device__ static void AES128_ExpandKey (u32 *userkey, u32 *rek, u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - rek[0] = userkey[0]; - rek[1] = userkey[1]; - rek[2] = userkey[2]; - rek[3] = userkey[3]; - - #pragma unroll 10 - for (u32 i = 0, j = 0; i < 10; i += 1, j += 4) - { - u32 temp = rek[j + 3]; - - temp = (s_te2[(temp >> 16) & 0xff] & 0xff000000) - ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000) - ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00) - ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff); - - rek[j + 4] = rek[j + 0] - ^ temp - ^ rcon[i]; - - rek[j + 5] = rek[j + 1] ^ rek[j + 4]; - rek[j + 6] = rek[j + 2] ^ rek[j + 5]; - rek[j + 7] = rek[j + 3] ^ rek[j + 6]; - } -} - -__device__ static void AES128_InvertKey (u32 *rdk, u32 s_td0[256], u32 s_td1[256], u32 s_td2[256], u32 s_td3[256], u32 s_td4[256], u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - for (u32 i = 0, j = 40; i < j; i += 4, j -= 4) - { - u32 temp; - - temp = rdk[i + 0]; rdk[i + 0] = rdk[j + 0]; rdk[j + 0] = temp; - temp = rdk[i + 1]; rdk[i + 1] = rdk[j + 1]; rdk[j + 1] = temp; - temp = rdk[i + 2]; rdk[i + 2] = rdk[j + 2]; rdk[j + 2] = temp; - temp = rdk[i + 3]; rdk[i + 3] = rdk[j + 3]; rdk[j + 3] = temp; - } - - for (u32 i = 1, j = 4; i < 10; i += 1, j += 4) - { - rdk[j + 0] = - s_td0[s_te1[(rdk[j + 0] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 0] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 0] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 0] >> 0) & 0xff] & 0xff]; - - rdk[j + 1] = - s_td0[s_te1[(rdk[j + 1] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 1] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 1] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 1] >> 0) & 0xff] & 0xff]; - - rdk[j + 2] = - s_td0[s_te1[(rdk[j + 2] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 2] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 2] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 2] >> 0) & 0xff] & 0xff]; - - rdk[j + 3] = - s_td0[s_te1[(rdk[j + 3] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 3] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 3] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 3] >> 0) & 0xff] & 0xff]; - } -} - -__device__ static void AES128_decrypt (const u32 *in, u32 *out, const u32 *rdk, u32 s_td0[256], u32 s_td1[256], u32 s_td2[256], u32 s_td3[256], u32 s_td4[256]) -{ - u32 s0 = in[0] ^ rdk[0]; - u32 s1 = in[1] ^ rdk[1]; - u32 s2 = in[2] ^ rdk[2]; - u32 s3 = in[3] ^ rdk[3]; - - u32 t0; - u32 t1; - u32 t2; - u32 t3; - - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[ 4]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[ 5]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[ 6]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[ 7]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[ 8]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[ 9]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[10]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[11]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[12]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[13]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[14]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[15]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[16]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[17]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[18]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[19]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[20]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[21]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[22]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[23]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[24]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[25]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[26]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[27]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[28]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[29]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[30]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[31]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[32]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[33]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[34]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[35]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[36]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[37]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[38]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[39]; - - out[0] = (s_td4[(t0 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t3 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t2 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t1 >> 0) & 0xff] & 0x000000ff) - ^ rdk[40]; - - out[1] = (s_td4[(t1 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t0 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t3 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t2 >> 0) & 0xff] & 0x000000ff) - ^ rdk[41]; - - out[2] = (s_td4[(t2 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t1 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t0 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t3 >> 0) & 0xff] & 0x000000ff) - ^ rdk[42]; - - out[3] = (s_td4[(t3 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t2 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t1 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t0 >> 0) & 0xff] & 0x000000ff) - ^ rdk[43]; -} - -__device__ static void AES128_encrypt (const u32 *in, u32 *out, const u32 *rek, u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - u32 s0 = in[0] ^ rek[0]; - u32 s1 = in[1] ^ rek[1]; - u32 s2 = in[2] ^ rek[2]; - u32 s3 = in[3] ^ rek[3]; - - u32 t0; - u32 t1; - u32 t2; - u32 t3; - - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[ 4]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[ 5]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[ 6]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[ 7]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[ 8]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[ 9]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[10]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[11]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[12]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[13]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[14]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[15]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[16]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[17]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[18]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[19]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[20]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[21]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[22]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[23]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[24]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[25]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[26]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[27]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[28]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[29]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[30]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[31]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[32]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[33]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[34]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[35]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[36]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[37]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[38]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[39]; - - out[0] = (s_te4[(t0 >> 24) & 0xff] & 0xff000000) - ^ (s_te4[(t1 >> 16) & 0xff] & 0x00ff0000) - ^ (s_te4[(t2 >> 8) & 0xff] & 0x0000ff00) - ^ (s_te4[(t3 >> 0) & 0xff] & 0x000000ff) - ^ rek[40]; - - out[1] = (s_te4[(t1 >> 24) & 0xff] & 0xff000000) - ^ (s_te4[(t2 >> 16) & 0xff] & 0x00ff0000) - ^ (s_te4[(t3 >> 8) & 0xff] & 0x0000ff00) - ^ (s_te4[(t0 >> 0) & 0xff] & 0x000000ff) - ^ rek[41]; - - out[2] = (s_te4[(t2 >> 24) & 0xff] & 0xff000000) - ^ (s_te4[(t3 >> 16) & 0xff] & 0x00ff0000) - ^ (s_te4[(t0 >> 8) & 0xff] & 0x0000ff00) - ^ (s_te4[(t1 >> 0) & 0xff] & 0x000000ff) - ^ rek[42]; - - out[3] = (s_te4[(t3 >> 24) & 0xff] & 0xff000000) - ^ (s_te4[(t0 >> 16) & 0xff] & 0x00ff0000) - ^ (s_te4[(t1 >> 8) & 0xff] & 0x0000ff00) - ^ (s_te4[(t2 >> 0) & 0xff] & 0x000000ff) - ^ rek[43]; -} - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09500_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, office2010_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const office2010_t *office2010_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - u32 pw_len = pws[gid].pw_len; - - append_0x80_4 (w0, w1, w2, w3, pw_len); - - make_unicode (w1, w2, w3); - make_unicode (w0, w0, w1); - - /** - * salt - */ - - u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - /** - * init - */ - - u32x t0[4]; - - t0[0] = salt_buf[0]; - t0[1] = salt_buf[1]; - t0[2] = salt_buf[2]; - t0[3] = salt_buf[3]; - - u32x t1[4]; - - t1[0] = swap_workaround (w0[0]); - t1[1] = swap_workaround (w0[1]); - t1[2] = swap_workaround (w0[2]); - t1[3] = swap_workaround (w0[3]); - - u32x t2[4]; - - t2[0] = swap_workaround (w1[0]); - t2[1] = swap_workaround (w1[1]); - t2[2] = swap_workaround (w1[2]); - t2[3] = swap_workaround (w1[3]); - - u32x t3[4]; - - t3[0] = swap_workaround (w2[0]); - t3[1] = swap_workaround (w2[1]); - t3[2] = 0; - t3[3] = (salt_len + (pw_len * 2)) * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (t0, t1, t2, t3, digest); - - tmps[gid].out[0] = digest[0]; - tmps[gid].out[1] = digest[1]; - tmps[gid].out[2] = digest[2]; - tmps[gid].out[3] = digest[3]; - tmps[gid].out[4] = digest[4]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09500_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, office2010_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const office2010_t *office2010_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = 0; - w0[1] = tmps[gid].out[0]; - w0[2] = tmps[gid].out[1]; - w0[3] = tmps[gid].out[2]; - - u32x w1[4]; - - w1[0] = tmps[gid].out[3]; - w1[1] = tmps[gid].out[4]; - w1[2] = 0x80000000; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (4 + 20) * 8; - - for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++) - { - w0[0] = swap_workaround (j); - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, digest); - - w0[1] = digest[0]; - w0[2] = digest[1]; - w0[3] = digest[2]; - w1[0] = digest[3]; - w1[1] = digest[4]; - } - - tmps[gid].out[0] = w0[1]; - tmps[gid].out[1] = w0[2]; - tmps[gid].out[2] = w0[3]; - tmps[gid].out[3] = w1[0]; - tmps[gid].out[4] = w1[1]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09500_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, office2010_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const office2010_t *office2010_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * aes shared - */ - - __shared__ u32 s_td0[256]; - __shared__ u32 s_td1[256]; - __shared__ u32 s_td2[256]; - __shared__ u32 s_td3[256]; - __shared__ u32 s_td4[256]; - - __shared__ u32 s_te0[256]; - __shared__ u32 s_te1[256]; - __shared__ u32 s_te2[256]; - __shared__ u32 s_te3[256]; - __shared__ u32 s_te4[256]; - - s_td0[lid] = td0[lid]; - s_td1[lid] = td1[lid]; - s_td2[lid] = td2[lid]; - s_td3[lid] = td3[lid]; - s_td4[lid] = td4[lid]; - - s_te0[lid] = te0[lid]; - s_te1[lid] = te1[lid]; - s_te2[lid] = te2[lid]; - s_te3[lid] = te3[lid]; - s_te4[lid] = te4[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - u32x encryptedVerifierHashInputBlockKey[2] = { 0xfea7d276, 0x3b4b9e79 }; - u32x encryptedVerifierHashValueBlockKey[2] = { 0xd7aa0f6d, 0x3061344e }; - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = tmps[gid].out[0]; - w0[1] = tmps[gid].out[1]; - w0[2] = tmps[gid].out[2]; - w0[3] = tmps[gid].out[3]; - w1[0] = tmps[gid].out[4]; - w1[1] = encryptedVerifierHashInputBlockKey[0]; - w1[2] = encryptedVerifierHashInputBlockKey[1]; - w1[3] = 0x80000000; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (20 + 8) * 8; - - u32 digest0[5]; - - digest0[0] = SHA1M_A; - digest0[1] = SHA1M_B; - digest0[2] = SHA1M_C; - digest0[3] = SHA1M_D; - digest0[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, digest0); - - w0[0] = tmps[gid].out[0]; - w0[1] = tmps[gid].out[1]; - w0[2] = tmps[gid].out[2]; - w0[3] = tmps[gid].out[3]; - w1[0] = tmps[gid].out[4]; - w1[1] = encryptedVerifierHashValueBlockKey[0]; - w1[2] = encryptedVerifierHashValueBlockKey[1]; - w1[3] = 0x80000000; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (20 + 8) * 8; - - u32 digest1[5]; - - digest1[0] = SHA1M_A; - digest1[1] = SHA1M_B; - digest1[2] = SHA1M_C; - digest1[3] = SHA1M_D; - digest1[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, digest1); - - // now we got the AES key, decrypt the verifier - - u32x rek[60]; - u32x rdk[60]; - - u32 data[4]; - - data[0] = office2010_bufs[salt_pos].encryptedVerifier[0]; - data[1] = office2010_bufs[salt_pos].encryptedVerifier[1]; - data[2] = office2010_bufs[salt_pos].encryptedVerifier[2]; - data[3] = office2010_bufs[salt_pos].encryptedVerifier[3]; - - u32x ukeyx[4]; - - ukeyx[0] = digest0[0]; - ukeyx[1] = digest0[1]; - ukeyx[2] = digest0[2]; - ukeyx[3] = digest0[3]; - - AES128_ExpandKey (ukeyx, rek, s_te0, s_te1, s_te2, s_te3, s_te4); - - for (u32 i = 0; i < 44; i++) rdk[i] = rek[i]; - - AES128_InvertKey (rdk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4); - - u32 out[4]; - - AES128_decrypt (data, out, rdk, s_td0, s_td1, s_td2, s_td3, s_td4); - - out[0] ^= salt_bufs[salt_pos].salt_buf[0]; - out[1] ^= salt_bufs[salt_pos].salt_buf[1]; - out[2] ^= salt_bufs[salt_pos].salt_buf[2]; - out[3] ^= salt_bufs[salt_pos].salt_buf[3]; - - // do a sha1 of the result - - w0[0] = out[0]; - w0[1] = out[1]; - w0[2] = out[2]; - w0[3] = out[3]; - w1[0] = 0x80000000; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 16 * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, digest); - - // encrypt it again for verify - - ukeyx[0] = digest1[0]; - ukeyx[1] = digest1[1]; - ukeyx[2] = digest1[2]; - ukeyx[3] = digest1[3]; - - AES128_ExpandKey (ukeyx, rek, s_te0, s_te1, s_te2, s_te3, s_te4); - - data[0] = digest[0] ^ salt_bufs[salt_pos].salt_buf[0]; - data[1] = digest[1] ^ salt_bufs[salt_pos].salt_buf[1]; - data[2] = digest[2] ^ salt_bufs[salt_pos].salt_buf[2]; - data[3] = digest[3] ^ salt_bufs[salt_pos].salt_buf[3]; - - AES128_encrypt (data, out, rek, s_te0, s_te1, s_te2, s_te3, s_te4); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m09600.cu b/nv/m09600.cu deleted file mode 100644 index 648062c..0000000 --- a/nv/m09600.cu +++ /dev/null @@ -1,1490 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _OFFICE2013_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ __constant__ u32 te0[256] = -{ - 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, - 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, - 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, - 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, - 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, - 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, - 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, - 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, - 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, - 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, - 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, - 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, - 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, - 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, - 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, - 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, - 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, - 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, - 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, - 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, - 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, - 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, - 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, - 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, - 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, - 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, - 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, - 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, - 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, - 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, - 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, - 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, - 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, - 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, - 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, - 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, - 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, - 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, - 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, - 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, - 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, - 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, - 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, - 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, - 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, - 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, - 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, - 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, - 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, - 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, - 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, - 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, - 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, - 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, - 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, - 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, - 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, - 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, - 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, - 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, - 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, - 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, - 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, - 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a, -}; - -__device__ __constant__ u32 te1[256] = -{ - 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b, - 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5, - 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b, - 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676, - 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d, - 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0, - 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf, - 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0, - 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626, - 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc, - 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1, - 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515, - 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3, - 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a, - 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2, - 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575, - 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a, - 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0, - 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3, - 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484, - 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded, - 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b, - 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939, - 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf, - 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb, - 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585, - 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f, - 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8, - 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f, - 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5, - 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121, - 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2, - 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec, - 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717, - 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d, - 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373, - 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc, - 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888, - 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414, - 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb, - 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a, - 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c, - 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262, - 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979, - 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d, - 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9, - 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea, - 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808, - 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e, - 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6, - 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f, - 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a, - 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666, - 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e, - 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9, - 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e, - 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111, - 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494, - 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9, - 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf, - 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d, - 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868, - 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f, - 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616, -}; - -__device__ __constant__ u32 te2[256] = -{ - 0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b, - 0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5, - 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b, - 0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76, - 0xca458fca, 0x829d1f82, 0xc94089c9, 0x7d87fa7d, - 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0, - 0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af, - 0x9cbf239c, 0xa4f753a4, 0x7296e472, 0xc05b9bc0, - 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26, - 0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc, - 0x345c6834, 0xa5f451a5, 0xe534d1e5, 0xf108f9f1, - 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15, - 0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3, - 0x18283018, 0x96a13796, 0x050f0a05, 0x9ab52f9a, - 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2, - 0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75, - 0x091b1209, 0x839e1d83, 0x2c74582c, 0x1a2e341a, - 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0, - 0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3, - 0x297b5229, 0xe33edde3, 0x2f715e2f, 0x84971384, - 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed, - 0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b, - 0x6abed46a, 0xcb468dcb, 0xbed967be, 0x394b7239, - 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf, - 0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb, - 0x43c58643, 0x4dd79a4d, 0x33556633, 0x85941185, - 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f, - 0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8, - 0x51f3a251, 0xa3fe5da3, 0x40c08040, 0x8f8a058f, - 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5, - 0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221, - 0x10302010, 0xff1ae5ff, 0xf30efdf3, 0xd26dbfd2, - 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec, - 0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17, - 0xc45793c4, 0xa7f255a7, 0x7e82fc7e, 0x3d477a3d, - 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673, - 0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc, - 0x22664422, 0x2a7e542a, 0x90ab3b90, 0x88830b88, - 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814, - 0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb, - 0xe03bdbe0, 0x32566432, 0x3a4e743a, 0x0a1e140a, - 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c, - 0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462, - 0x91a83991, 0x95a43195, 0xe437d3e4, 0x798bf279, - 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d, - 0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9, - 0x6cb4d86c, 0x56faac56, 0xf407f3f4, 0xea25cfea, - 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008, - 0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e, - 0x1c24381c, 0xa6f157a6, 0xb4c773b4, 0xc65197c6, - 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f, - 0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a, - 0x7090e070, 0x3e427c3e, 0xb5c471b5, 0x66aacc66, - 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e, - 0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9, - 0x86911786, 0xc15899c1, 0x1d273a1d, 0x9eb9279e, - 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211, - 0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394, - 0x9bb62d9b, 0x1e223c1e, 0x87921587, 0xe920c9e9, - 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df, - 0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d, - 0xbfda65bf, 0xe631d7e6, 0x42c68442, 0x68b8d068, - 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f, - 0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16, -}; - -__device__ __constant__ u32 te3[256] = -{ - 0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6, - 0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491, - 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56, - 0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec, - 0xcaca458f, 0x82829d1f, 0xc9c94089, 0x7d7d87fa, - 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb, - 0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45, - 0x9c9cbf23, 0xa4a4f753, 0x727296e4, 0xc0c05b9b, - 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c, - 0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83, - 0x34345c68, 0xa5a5f451, 0xe5e534d1, 0xf1f108f9, - 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a, - 0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d, - 0x18182830, 0x9696a137, 0x05050f0a, 0x9a9ab52f, - 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf, - 0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea, - 0x09091b12, 0x83839e1d, 0x2c2c7458, 0x1a1a2e34, - 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b, - 0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d, - 0x29297b52, 0xe3e33edd, 0x2f2f715e, 0x84849713, - 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1, - 0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6, - 0x6a6abed4, 0xcbcb468d, 0xbebed967, 0x39394b72, - 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85, - 0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed, - 0x4343c586, 0x4d4dd79a, 0x33335566, 0x85859411, - 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe, - 0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b, - 0x5151f3a2, 0xa3a3fe5d, 0x4040c080, 0x8f8f8a05, - 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1, - 0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342, - 0x10103020, 0xffff1ae5, 0xf3f30efd, 0xd2d26dbf, - 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3, - 0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e, - 0xc4c45793, 0xa7a7f255, 0x7e7e82fc, 0x3d3d477a, - 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6, - 0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3, - 0x22226644, 0x2a2a7e54, 0x9090ab3b, 0x8888830b, - 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28, - 0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad, - 0xe0e03bdb, 0x32325664, 0x3a3a4e74, 0x0a0a1e14, - 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8, - 0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4, - 0x9191a839, 0x9595a431, 0xe4e437d3, 0x79798bf2, - 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da, - 0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049, - 0x6c6cb4d8, 0x5656faac, 0xf4f407f3, 0xeaea25cf, - 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810, - 0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c, - 0x1c1c2438, 0xa6a6f157, 0xb4b4c773, 0xc6c65197, - 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e, - 0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f, - 0x707090e0, 0x3e3e427c, 0xb5b5c471, 0x6666aacc, - 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c, - 0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069, - 0x86869117, 0xc1c15899, 0x1d1d273a, 0x9e9eb927, - 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322, - 0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733, - 0x9b9bb62d, 0x1e1e223c, 0x87879215, 0xe9e920c9, - 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5, - 0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a, - 0xbfbfda65, 0xe6e631d7, 0x4242c684, 0x6868b8d0, - 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e, - 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c, -}; - -__device__ __constant__ u32 te4[256] = -{ - 0x63636363, 0x7c7c7c7c, 0x77777777, 0x7b7b7b7b, - 0xf2f2f2f2, 0x6b6b6b6b, 0x6f6f6f6f, 0xc5c5c5c5, - 0x30303030, 0x01010101, 0x67676767, 0x2b2b2b2b, - 0xfefefefe, 0xd7d7d7d7, 0xabababab, 0x76767676, - 0xcacacaca, 0x82828282, 0xc9c9c9c9, 0x7d7d7d7d, - 0xfafafafa, 0x59595959, 0x47474747, 0xf0f0f0f0, - 0xadadadad, 0xd4d4d4d4, 0xa2a2a2a2, 0xafafafaf, - 0x9c9c9c9c, 0xa4a4a4a4, 0x72727272, 0xc0c0c0c0, - 0xb7b7b7b7, 0xfdfdfdfd, 0x93939393, 0x26262626, - 0x36363636, 0x3f3f3f3f, 0xf7f7f7f7, 0xcccccccc, - 0x34343434, 0xa5a5a5a5, 0xe5e5e5e5, 0xf1f1f1f1, - 0x71717171, 0xd8d8d8d8, 0x31313131, 0x15151515, - 0x04040404, 0xc7c7c7c7, 0x23232323, 0xc3c3c3c3, - 0x18181818, 0x96969696, 0x05050505, 0x9a9a9a9a, - 0x07070707, 0x12121212, 0x80808080, 0xe2e2e2e2, - 0xebebebeb, 0x27272727, 0xb2b2b2b2, 0x75757575, - 0x09090909, 0x83838383, 0x2c2c2c2c, 0x1a1a1a1a, - 0x1b1b1b1b, 0x6e6e6e6e, 0x5a5a5a5a, 0xa0a0a0a0, - 0x52525252, 0x3b3b3b3b, 0xd6d6d6d6, 0xb3b3b3b3, - 0x29292929, 0xe3e3e3e3, 0x2f2f2f2f, 0x84848484, - 0x53535353, 0xd1d1d1d1, 0x00000000, 0xedededed, - 0x20202020, 0xfcfcfcfc, 0xb1b1b1b1, 0x5b5b5b5b, - 0x6a6a6a6a, 0xcbcbcbcb, 0xbebebebe, 0x39393939, - 0x4a4a4a4a, 0x4c4c4c4c, 0x58585858, 0xcfcfcfcf, - 0xd0d0d0d0, 0xefefefef, 0xaaaaaaaa, 0xfbfbfbfb, - 0x43434343, 0x4d4d4d4d, 0x33333333, 0x85858585, - 0x45454545, 0xf9f9f9f9, 0x02020202, 0x7f7f7f7f, - 0x50505050, 0x3c3c3c3c, 0x9f9f9f9f, 0xa8a8a8a8, - 0x51515151, 0xa3a3a3a3, 0x40404040, 0x8f8f8f8f, - 0x92929292, 0x9d9d9d9d, 0x38383838, 0xf5f5f5f5, - 0xbcbcbcbc, 0xb6b6b6b6, 0xdadadada, 0x21212121, - 0x10101010, 0xffffffff, 0xf3f3f3f3, 0xd2d2d2d2, - 0xcdcdcdcd, 0x0c0c0c0c, 0x13131313, 0xecececec, - 0x5f5f5f5f, 0x97979797, 0x44444444, 0x17171717, - 0xc4c4c4c4, 0xa7a7a7a7, 0x7e7e7e7e, 0x3d3d3d3d, - 0x64646464, 0x5d5d5d5d, 0x19191919, 0x73737373, - 0x60606060, 0x81818181, 0x4f4f4f4f, 0xdcdcdcdc, - 0x22222222, 0x2a2a2a2a, 0x90909090, 0x88888888, - 0x46464646, 0xeeeeeeee, 0xb8b8b8b8, 0x14141414, - 0xdededede, 0x5e5e5e5e, 0x0b0b0b0b, 0xdbdbdbdb, - 0xe0e0e0e0, 0x32323232, 0x3a3a3a3a, 0x0a0a0a0a, - 0x49494949, 0x06060606, 0x24242424, 0x5c5c5c5c, - 0xc2c2c2c2, 0xd3d3d3d3, 0xacacacac, 0x62626262, - 0x91919191, 0x95959595, 0xe4e4e4e4, 0x79797979, - 0xe7e7e7e7, 0xc8c8c8c8, 0x37373737, 0x6d6d6d6d, - 0x8d8d8d8d, 0xd5d5d5d5, 0x4e4e4e4e, 0xa9a9a9a9, - 0x6c6c6c6c, 0x56565656, 0xf4f4f4f4, 0xeaeaeaea, - 0x65656565, 0x7a7a7a7a, 0xaeaeaeae, 0x08080808, - 0xbabababa, 0x78787878, 0x25252525, 0x2e2e2e2e, - 0x1c1c1c1c, 0xa6a6a6a6, 0xb4b4b4b4, 0xc6c6c6c6, - 0xe8e8e8e8, 0xdddddddd, 0x74747474, 0x1f1f1f1f, - 0x4b4b4b4b, 0xbdbdbdbd, 0x8b8b8b8b, 0x8a8a8a8a, - 0x70707070, 0x3e3e3e3e, 0xb5b5b5b5, 0x66666666, - 0x48484848, 0x03030303, 0xf6f6f6f6, 0x0e0e0e0e, - 0x61616161, 0x35353535, 0x57575757, 0xb9b9b9b9, - 0x86868686, 0xc1c1c1c1, 0x1d1d1d1d, 0x9e9e9e9e, - 0xe1e1e1e1, 0xf8f8f8f8, 0x98989898, 0x11111111, - 0x69696969, 0xd9d9d9d9, 0x8e8e8e8e, 0x94949494, - 0x9b9b9b9b, 0x1e1e1e1e, 0x87878787, 0xe9e9e9e9, - 0xcececece, 0x55555555, 0x28282828, 0xdfdfdfdf, - 0x8c8c8c8c, 0xa1a1a1a1, 0x89898989, 0x0d0d0d0d, - 0xbfbfbfbf, 0xe6e6e6e6, 0x42424242, 0x68686868, - 0x41414141, 0x99999999, 0x2d2d2d2d, 0x0f0f0f0f, - 0xb0b0b0b0, 0x54545454, 0xbbbbbbbb, 0x16161616, -}; - -__device__ __constant__ u32 td0[256] = -{ - 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, - 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, - 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, - 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, - 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, - 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, - 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, - 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, - 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, - 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, - 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, - 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, - 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, - 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, - 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, - 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, - 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, - 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, - 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, - 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, - 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, - 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, - 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, - 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, - 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, - 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, - 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, - 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, - 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, - 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, - 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, - 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, - 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, - 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, - 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, - 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, - 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, - 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, - 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, - 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, - 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, - 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, - 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, - 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, - 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, - 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, - 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, - 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, - 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, - 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, - 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, - 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, - 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, - 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, - 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, - 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, - 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, - 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, - 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, - 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, - 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, - 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, - 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, - 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742, -}; - -__device__ __constant__ u32 td1[256] = -{ - 0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e, - 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303, - 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c, - 0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3, - 0x49deb15a, 0x6725ba1b, 0x9845ea0e, 0xe15dfec0, - 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9, - 0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259, - 0x2dd4be83, 0xd3587421, 0x2949e069, 0x448ec9c8, - 0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971, - 0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a, - 0x1863df4a, 0x82e51a31, 0x60975133, 0x4562537f, - 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b, - 0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8, - 0x23ab73d3, 0xe2724b02, 0x57e31f8f, 0x2a6655ab, - 0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708, - 0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682, - 0x2b8acf1c, 0x92a779b4, 0xf0f307f2, 0xa14e69e2, - 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe, - 0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb, - 0x390b83ec, 0xaa4060ef, 0x065e719f, 0x51bd6e10, - 0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd, - 0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015, - 0x241998fb, 0x97d6bde9, 0xcc894043, 0x7767d99e, - 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee, - 0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000, - 0x83098086, 0x48322bed, 0xac1e1170, 0x4e6c5a72, - 0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39, - 0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e, - 0xb10c0a67, 0x0f9357e7, 0xd2b4ee96, 0x9e1b9b91, - 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a, - 0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17, - 0x0b0e090d, 0xadf28bc7, 0xb92db6a8, 0xc8141ea9, - 0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60, - 0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e, - 0x768b4329, 0xdccb23c6, 0x68b6edfc, 0x63b8e4f1, - 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611, - 0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1, - 0x4b1d9e2f, 0xf3dcb230, 0xec0d8652, 0xd077c1e3, - 0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964, - 0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390, - 0xc787494e, 0xc1d938d1, 0xfe8ccaa2, 0x3698d40b, - 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf, - 0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46, - 0xc2f68d13, 0xe890d8b8, 0x5e2e39f7, 0xf582c3af, - 0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512, - 0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb, - 0x09cd2678, 0xf46e5918, 0x01ec9ab7, 0xa8834f9a, - 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8, - 0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c, - 0xaf31a4b2, 0x312a3f23, 0x30c6a594, 0xc035a266, - 0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8, - 0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6, - 0x8d764dd6, 0x4d43efb0, 0x54ccaa4d, 0xdfe49604, - 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551, - 0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41, - 0x5ab3671d, 0x5292dbd2, 0x33e91056, 0x136dd647, - 0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c, - 0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1, - 0x599cd2df, 0x3f55f273, 0x791814ce, 0xbf73c737, - 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db, - 0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340, - 0x72161dc3, 0x0cbce225, 0x8b283c49, 0x41ff0d95, - 0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1, - 0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857, -}; - -__device__ __constant__ u32 td2[256] = -{ - 0xa75051f4, 0x65537e41, 0xa4c31a17, 0x5e963a27, - 0x6bcb3bab, 0x45f11f9d, 0x58abacfa, 0x03934be3, - 0xfa552030, 0x6df6ad76, 0x769188cc, 0x4c25f502, - 0xd7fc4fe5, 0xcbd7c52a, 0x44802635, 0xa38fb562, - 0x5a49deb1, 0x1b6725ba, 0x0e9845ea, 0xc0e15dfe, - 0x7502c32f, 0xf012814c, 0x97a38d46, 0xf9c66bd3, - 0x5fe7038f, 0x9c951592, 0x7aebbf6d, 0x59da9552, - 0x832dd4be, 0x21d35874, 0x692949e0, 0xc8448ec9, - 0x896a75c2, 0x7978f48e, 0x3e6b9958, 0x71dd27b9, - 0x4fb6bee1, 0xad17f088, 0xac66c920, 0x3ab47dce, - 0x4a1863df, 0x3182e51a, 0x33609751, 0x7f456253, - 0x77e0b164, 0xae84bb6b, 0xa01cfe81, 0x2b94f908, - 0x68587048, 0xfd198f45, 0x6c8794de, 0xf8b7527b, - 0xd323ab73, 0x02e2724b, 0x8f57e31f, 0xab2a6655, - 0x2807b2eb, 0xc2032fb5, 0x7b9a86c5, 0x08a5d337, - 0x87f23028, 0xa5b223bf, 0x6aba0203, 0x825ced16, - 0x1c2b8acf, 0xb492a779, 0xf2f0f307, 0xe2a14e69, - 0xf4cd65da, 0xbed50605, 0x621fd134, 0xfe8ac4a6, - 0x539d342e, 0x55a0a2f3, 0xe132058a, 0xeb75a4f6, - 0xec390b83, 0xefaa4060, 0x9f065e71, 0x1051bd6e, - 0x8af93e21, 0x063d96dd, 0x05aedd3e, 0xbd464de6, - 0x8db59154, 0x5d0571c4, 0xd46f0406, 0x15ff6050, - 0xfb241998, 0xe997d6bd, 0x43cc8940, 0x9e7767d9, - 0x42bdb0e8, 0x8b880789, 0x5b38e719, 0xeedb79c8, - 0x0a47a17c, 0x0fe97c42, 0x1ec9f884, 0x00000000, - 0x86830980, 0xed48322b, 0x70ac1e11, 0x724e6c5a, - 0xfffbfd0e, 0x38560f85, 0xd51e3dae, 0x3927362d, - 0xd9640a0f, 0xa621685c, 0x54d19b5b, 0x2e3a2436, - 0x67b10c0a, 0xe70f9357, 0x96d2b4ee, 0x919e1b9b, - 0xc54f80c0, 0x20a261dc, 0x4b695a77, 0x1a161c12, - 0xba0ae293, 0x2ae5c0a0, 0xe0433c22, 0x171d121b, - 0x0d0b0e09, 0xc7adf28b, 0xa8b92db6, 0xa9c8141e, - 0x198557f1, 0x074caf75, 0xddbbee99, 0x60fda37f, - 0x269ff701, 0xf5bc5c72, 0x3bc54466, 0x7e345bfb, - 0x29768b43, 0xc6dccb23, 0xfc68b6ed, 0xf163b8e4, - 0xdccad731, 0x85104263, 0x22401397, 0x112084c6, - 0x247d854a, 0x3df8d2bb, 0x3211aef9, 0xa16dc729, - 0x2f4b1d9e, 0x30f3dcb2, 0x52ec0d86, 0xe3d077c1, - 0x166c2bb3, 0xb999a970, 0x48fa1194, 0x642247e9, - 0x8cc4a8fc, 0x3f1aa0f0, 0x2cd8567d, 0x90ef2233, - 0x4ec78749, 0xd1c1d938, 0xa2fe8cca, 0x0b3698d4, - 0x81cfa6f5, 0xde28a57a, 0x8e26dab7, 0xbfa43fad, - 0x9de42c3a, 0x920d5078, 0xcc9b6a5f, 0x4662547e, - 0x13c2f68d, 0xb8e890d8, 0xf75e2e39, 0xaff582c3, - 0x80be9f5d, 0x937c69d0, 0x2da96fd5, 0x12b3cf25, - 0x993bc8ac, 0x7da71018, 0x636ee89c, 0xbb7bdb3b, - 0x7809cd26, 0x18f46e59, 0xb701ec9a, 0x9aa8834f, - 0x6e65e695, 0xe67eaaff, 0xcf0821bc, 0xe8e6ef15, - 0x9bd9bae7, 0x36ce4a6f, 0x09d4ea9f, 0x7cd629b0, - 0xb2af31a4, 0x23312a3f, 0x9430c6a5, 0x66c035a2, - 0xbc37744e, 0xcaa6fc82, 0xd0b0e090, 0xd81533a7, - 0x984af104, 0xdaf741ec, 0x500e7fcd, 0xf62f1791, - 0xd68d764d, 0xb04d43ef, 0x4d54ccaa, 0x04dfe496, - 0xb5e39ed1, 0x881b4c6a, 0x1fb8c12c, 0x517f4665, - 0xea049d5e, 0x355d018c, 0x7473fa87, 0x412efb0b, - 0x1d5ab367, 0xd25292db, 0x5633e910, 0x47136dd6, - 0x618c9ad7, 0x0c7a37a1, 0x148e59f8, 0x3c89eb13, - 0x27eecea9, 0xc935b761, 0xe5ede11c, 0xb13c7a47, - 0xdf599cd2, 0x733f55f2, 0xce791814, 0x37bf73c7, - 0xcdea53f7, 0xaa5b5ffd, 0x6f14df3d, 0xdb867844, - 0xf381caaf, 0xc43eb968, 0x342c3824, 0x405fc2a3, - 0xc372161d, 0x250cbce2, 0x498b283c, 0x9541ff0d, - 0x017139a8, 0xb3de080c, 0xe49cd8b4, 0xc1906456, - 0x84617bcb, 0xb670d532, 0x5c74486c, 0x5742d0b8, -}; - -__device__ __constant__ u32 td3[256] = -{ - 0xf4a75051, 0x4165537e, 0x17a4c31a, 0x275e963a, - 0xab6bcb3b, 0x9d45f11f, 0xfa58abac, 0xe303934b, - 0x30fa5520, 0x766df6ad, 0xcc769188, 0x024c25f5, - 0xe5d7fc4f, 0x2acbd7c5, 0x35448026, 0x62a38fb5, - 0xb15a49de, 0xba1b6725, 0xea0e9845, 0xfec0e15d, - 0x2f7502c3, 0x4cf01281, 0x4697a38d, 0xd3f9c66b, - 0x8f5fe703, 0x929c9515, 0x6d7aebbf, 0x5259da95, - 0xbe832dd4, 0x7421d358, 0xe0692949, 0xc9c8448e, - 0xc2896a75, 0x8e7978f4, 0x583e6b99, 0xb971dd27, - 0xe14fb6be, 0x88ad17f0, 0x20ac66c9, 0xce3ab47d, - 0xdf4a1863, 0x1a3182e5, 0x51336097, 0x537f4562, - 0x6477e0b1, 0x6bae84bb, 0x81a01cfe, 0x082b94f9, - 0x48685870, 0x45fd198f, 0xde6c8794, 0x7bf8b752, - 0x73d323ab, 0x4b02e272, 0x1f8f57e3, 0x55ab2a66, - 0xeb2807b2, 0xb5c2032f, 0xc57b9a86, 0x3708a5d3, - 0x2887f230, 0xbfa5b223, 0x036aba02, 0x16825ced, - 0xcf1c2b8a, 0x79b492a7, 0x07f2f0f3, 0x69e2a14e, - 0xdaf4cd65, 0x05bed506, 0x34621fd1, 0xa6fe8ac4, - 0x2e539d34, 0xf355a0a2, 0x8ae13205, 0xf6eb75a4, - 0x83ec390b, 0x60efaa40, 0x719f065e, 0x6e1051bd, - 0x218af93e, 0xdd063d96, 0x3e05aedd, 0xe6bd464d, - 0x548db591, 0xc45d0571, 0x06d46f04, 0x5015ff60, - 0x98fb2419, 0xbde997d6, 0x4043cc89, 0xd99e7767, - 0xe842bdb0, 0x898b8807, 0x195b38e7, 0xc8eedb79, - 0x7c0a47a1, 0x420fe97c, 0x841ec9f8, 0x00000000, - 0x80868309, 0x2bed4832, 0x1170ac1e, 0x5a724e6c, - 0x0efffbfd, 0x8538560f, 0xaed51e3d, 0x2d392736, - 0x0fd9640a, 0x5ca62168, 0x5b54d19b, 0x362e3a24, - 0x0a67b10c, 0x57e70f93, 0xee96d2b4, 0x9b919e1b, - 0xc0c54f80, 0xdc20a261, 0x774b695a, 0x121a161c, - 0x93ba0ae2, 0xa02ae5c0, 0x22e0433c, 0x1b171d12, - 0x090d0b0e, 0x8bc7adf2, 0xb6a8b92d, 0x1ea9c814, - 0xf1198557, 0x75074caf, 0x99ddbbee, 0x7f60fda3, - 0x01269ff7, 0x72f5bc5c, 0x663bc544, 0xfb7e345b, - 0x4329768b, 0x23c6dccb, 0xedfc68b6, 0xe4f163b8, - 0x31dccad7, 0x63851042, 0x97224013, 0xc6112084, - 0x4a247d85, 0xbb3df8d2, 0xf93211ae, 0x29a16dc7, - 0x9e2f4b1d, 0xb230f3dc, 0x8652ec0d, 0xc1e3d077, - 0xb3166c2b, 0x70b999a9, 0x9448fa11, 0xe9642247, - 0xfc8cc4a8, 0xf03f1aa0, 0x7d2cd856, 0x3390ef22, - 0x494ec787, 0x38d1c1d9, 0xcaa2fe8c, 0xd40b3698, - 0xf581cfa6, 0x7ade28a5, 0xb78e26da, 0xadbfa43f, - 0x3a9de42c, 0x78920d50, 0x5fcc9b6a, 0x7e466254, - 0x8d13c2f6, 0xd8b8e890, 0x39f75e2e, 0xc3aff582, - 0x5d80be9f, 0xd0937c69, 0xd52da96f, 0x2512b3cf, - 0xac993bc8, 0x187da710, 0x9c636ee8, 0x3bbb7bdb, - 0x267809cd, 0x5918f46e, 0x9ab701ec, 0x4f9aa883, - 0x956e65e6, 0xffe67eaa, 0xbccf0821, 0x15e8e6ef, - 0xe79bd9ba, 0x6f36ce4a, 0x9f09d4ea, 0xb07cd629, - 0xa4b2af31, 0x3f23312a, 0xa59430c6, 0xa266c035, - 0x4ebc3774, 0x82caa6fc, 0x90d0b0e0, 0xa7d81533, - 0x04984af1, 0xecdaf741, 0xcd500e7f, 0x91f62f17, - 0x4dd68d76, 0xefb04d43, 0xaa4d54cc, 0x9604dfe4, - 0xd1b5e39e, 0x6a881b4c, 0x2c1fb8c1, 0x65517f46, - 0x5eea049d, 0x8c355d01, 0x877473fa, 0x0b412efb, - 0x671d5ab3, 0xdbd25292, 0x105633e9, 0xd647136d, - 0xd7618c9a, 0xa10c7a37, 0xf8148e59, 0x133c89eb, - 0xa927eece, 0x61c935b7, 0x1ce5ede1, 0x47b13c7a, - 0xd2df599c, 0xf2733f55, 0x14ce7918, 0xc737bf73, - 0xf7cdea53, 0xfdaa5b5f, 0x3d6f14df, 0x44db8678, - 0xaff381ca, 0x68c43eb9, 0x24342c38, 0xa3405fc2, - 0x1dc37216, 0xe2250cbc, 0x3c498b28, 0x0d9541ff, - 0xa8017139, 0x0cb3de08, 0xb4e49cd8, 0x56c19064, - 0xcb84617b, 0x32b670d5, 0x6c5c7448, 0xb85742d0, -}; - -__device__ __constant__ u32 td4[256] = -{ - 0x52525252, 0x09090909, 0x6a6a6a6a, 0xd5d5d5d5, - 0x30303030, 0x36363636, 0xa5a5a5a5, 0x38383838, - 0xbfbfbfbf, 0x40404040, 0xa3a3a3a3, 0x9e9e9e9e, - 0x81818181, 0xf3f3f3f3, 0xd7d7d7d7, 0xfbfbfbfb, - 0x7c7c7c7c, 0xe3e3e3e3, 0x39393939, 0x82828282, - 0x9b9b9b9b, 0x2f2f2f2f, 0xffffffff, 0x87878787, - 0x34343434, 0x8e8e8e8e, 0x43434343, 0x44444444, - 0xc4c4c4c4, 0xdededede, 0xe9e9e9e9, 0xcbcbcbcb, - 0x54545454, 0x7b7b7b7b, 0x94949494, 0x32323232, - 0xa6a6a6a6, 0xc2c2c2c2, 0x23232323, 0x3d3d3d3d, - 0xeeeeeeee, 0x4c4c4c4c, 0x95959595, 0x0b0b0b0b, - 0x42424242, 0xfafafafa, 0xc3c3c3c3, 0x4e4e4e4e, - 0x08080808, 0x2e2e2e2e, 0xa1a1a1a1, 0x66666666, - 0x28282828, 0xd9d9d9d9, 0x24242424, 0xb2b2b2b2, - 0x76767676, 0x5b5b5b5b, 0xa2a2a2a2, 0x49494949, - 0x6d6d6d6d, 0x8b8b8b8b, 0xd1d1d1d1, 0x25252525, - 0x72727272, 0xf8f8f8f8, 0xf6f6f6f6, 0x64646464, - 0x86868686, 0x68686868, 0x98989898, 0x16161616, - 0xd4d4d4d4, 0xa4a4a4a4, 0x5c5c5c5c, 0xcccccccc, - 0x5d5d5d5d, 0x65656565, 0xb6b6b6b6, 0x92929292, - 0x6c6c6c6c, 0x70707070, 0x48484848, 0x50505050, - 0xfdfdfdfd, 0xedededed, 0xb9b9b9b9, 0xdadadada, - 0x5e5e5e5e, 0x15151515, 0x46464646, 0x57575757, - 0xa7a7a7a7, 0x8d8d8d8d, 0x9d9d9d9d, 0x84848484, - 0x90909090, 0xd8d8d8d8, 0xabababab, 0x00000000, - 0x8c8c8c8c, 0xbcbcbcbc, 0xd3d3d3d3, 0x0a0a0a0a, - 0xf7f7f7f7, 0xe4e4e4e4, 0x58585858, 0x05050505, - 0xb8b8b8b8, 0xb3b3b3b3, 0x45454545, 0x06060606, - 0xd0d0d0d0, 0x2c2c2c2c, 0x1e1e1e1e, 0x8f8f8f8f, - 0xcacacaca, 0x3f3f3f3f, 0x0f0f0f0f, 0x02020202, - 0xc1c1c1c1, 0xafafafaf, 0xbdbdbdbd, 0x03030303, - 0x01010101, 0x13131313, 0x8a8a8a8a, 0x6b6b6b6b, - 0x3a3a3a3a, 0x91919191, 0x11111111, 0x41414141, - 0x4f4f4f4f, 0x67676767, 0xdcdcdcdc, 0xeaeaeaea, - 0x97979797, 0xf2f2f2f2, 0xcfcfcfcf, 0xcececece, - 0xf0f0f0f0, 0xb4b4b4b4, 0xe6e6e6e6, 0x73737373, - 0x96969696, 0xacacacac, 0x74747474, 0x22222222, - 0xe7e7e7e7, 0xadadadad, 0x35353535, 0x85858585, - 0xe2e2e2e2, 0xf9f9f9f9, 0x37373737, 0xe8e8e8e8, - 0x1c1c1c1c, 0x75757575, 0xdfdfdfdf, 0x6e6e6e6e, - 0x47474747, 0xf1f1f1f1, 0x1a1a1a1a, 0x71717171, - 0x1d1d1d1d, 0x29292929, 0xc5c5c5c5, 0x89898989, - 0x6f6f6f6f, 0xb7b7b7b7, 0x62626262, 0x0e0e0e0e, - 0xaaaaaaaa, 0x18181818, 0xbebebebe, 0x1b1b1b1b, - 0xfcfcfcfc, 0x56565656, 0x3e3e3e3e, 0x4b4b4b4b, - 0xc6c6c6c6, 0xd2d2d2d2, 0x79797979, 0x20202020, - 0x9a9a9a9a, 0xdbdbdbdb, 0xc0c0c0c0, 0xfefefefe, - 0x78787878, 0xcdcdcdcd, 0x5a5a5a5a, 0xf4f4f4f4, - 0x1f1f1f1f, 0xdddddddd, 0xa8a8a8a8, 0x33333333, - 0x88888888, 0x07070707, 0xc7c7c7c7, 0x31313131, - 0xb1b1b1b1, 0x12121212, 0x10101010, 0x59595959, - 0x27272727, 0x80808080, 0xecececec, 0x5f5f5f5f, - 0x60606060, 0x51515151, 0x7f7f7f7f, 0xa9a9a9a9, - 0x19191919, 0xb5b5b5b5, 0x4a4a4a4a, 0x0d0d0d0d, - 0x2d2d2d2d, 0xe5e5e5e5, 0x7a7a7a7a, 0x9f9f9f9f, - 0x93939393, 0xc9c9c9c9, 0x9c9c9c9c, 0xefefefef, - 0xa0a0a0a0, 0xe0e0e0e0, 0x3b3b3b3b, 0x4d4d4d4d, - 0xaeaeaeae, 0x2a2a2a2a, 0xf5f5f5f5, 0xb0b0b0b0, - 0xc8c8c8c8, 0xebebebeb, 0xbbbbbbbb, 0x3c3c3c3c, - 0x83838383, 0x53535353, 0x99999999, 0x61616161, - 0x17171717, 0x2b2b2b2b, 0x04040404, 0x7e7e7e7e, - 0xbabababa, 0x77777777, 0xd6d6d6d6, 0x26262626, - 0xe1e1e1e1, 0x69696969, 0x14141414, 0x63636363, - 0x55555555, 0x21212121, 0x0c0c0c0c, 0x7d7d7d7d, -}; - -__device__ __constant__ u32 rcon[] = -{ - 0x01000000, 0x02000000, 0x04000000, 0x08000000, - 0x10000000, 0x20000000, 0x40000000, 0x80000000, - 0x1b000000, 0x36000000, -}; - -__device__ static void AES256_ExpandKey (u32 *userkey, u32 *rek, u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - rek[0] = userkey[0]; - rek[1] = userkey[1]; - rek[2] = userkey[2]; - rek[3] = userkey[3]; - rek[4] = userkey[4]; - rek[5] = userkey[5]; - rek[6] = userkey[6]; - rek[7] = userkey[7]; - - int i; - int j; - - i = 0; - j = 0; - - while (1) - { - u32 temp = rek[j + 7]; - - rek[j + 8] = rek[j + 0] - ^ (s_te2[(temp >> 16) & 0xff] & 0xff000000) - ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000) - ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00) - ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff) - ^ rcon[i]; - - rek[j + 9] = rek[j + 1] ^ rek[j + 8]; - rek[j + 10] = rek[j + 2] ^ rek[j + 9]; - rek[j + 11] = rek[j + 3] ^ rek[j + 10]; - - if (++i == 7) break; - - temp = rek[j + 11]; - - rek[j + 12] = rek[j + 4] - ^ (s_te2[(temp >> 24) & 0xff] & 0xff000000) - ^ (s_te3[(temp >> 16) & 0xff] & 0x00ff0000) - ^ (s_te0[(temp >> 8) & 0xff] & 0x0000ff00) - ^ (s_te1[(temp >> 0) & 0xff] & 0x000000ff); - - rek[j + 13] = rek[j + 5] ^ rek[j + 12]; - rek[j + 14] = rek[j + 6] ^ rek[j + 13]; - rek[j + 15] = rek[j + 7] ^ rek[j + 14]; - - j += 8; - } -} - -__device__ static void AES256_InvertKey (u32 *rdk, u32 s_td0[256], u32 s_td1[256], u32 s_td2[256], u32 s_td3[256], u32 s_td4[256], u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - for (u32 i = 0, j = 56; i < j; i += 4, j -= 4) - { - u32 temp; - - temp = rdk[i + 0]; rdk[i + 0] = rdk[j + 0]; rdk[j + 0] = temp; - temp = rdk[i + 1]; rdk[i + 1] = rdk[j + 1]; rdk[j + 1] = temp; - temp = rdk[i + 2]; rdk[i + 2] = rdk[j + 2]; rdk[j + 2] = temp; - temp = rdk[i + 3]; rdk[i + 3] = rdk[j + 3]; rdk[j + 3] = temp; - } - - for (u32 i = 1, j = 4; i < 14; i += 1, j += 4) - { - rdk[j + 0] = - s_td0[s_te1[(rdk[j + 0] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 0] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 0] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 0] >> 0) & 0xff] & 0xff]; - - rdk[j + 1] = - s_td0[s_te1[(rdk[j + 1] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 1] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 1] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 1] >> 0) & 0xff] & 0xff]; - - rdk[j + 2] = - s_td0[s_te1[(rdk[j + 2] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 2] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 2] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 2] >> 0) & 0xff] & 0xff]; - - rdk[j + 3] = - s_td0[s_te1[(rdk[j + 3] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 3] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 3] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 3] >> 0) & 0xff] & 0xff]; - } -} - -__device__ static void AES256_decrypt (const u32 *in, u32 *out, const u32 *rdk, u32 s_td0[256], u32 s_td1[256], u32 s_td2[256], u32 s_td3[256], u32 s_td4[256]) -{ - u32 s0 = in[0] ^ rdk[0]; - u32 s1 = in[1] ^ rdk[1]; - u32 s2 = in[2] ^ rdk[2]; - u32 s3 = in[3] ^ rdk[3]; - - u32 t0; - u32 t1; - u32 t2; - u32 t3; - - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[ 4]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[ 5]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[ 6]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[ 7]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[ 8]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[ 9]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[10]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[11]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[12]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[13]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[14]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[15]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[16]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[17]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[18]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[19]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[20]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[21]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[22]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[23]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[24]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[25]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[26]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[27]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[28]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[29]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[30]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[31]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[32]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[33]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[34]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[35]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[36]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[37]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[38]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[39]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[40]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[41]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[42]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[43]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[44]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[45]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[46]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[47]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[48]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[49]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[50]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[51]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[52]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[53]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[54]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[55]; - - out[0] = (s_td4[(t0 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t3 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t2 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t1 >> 0) & 0xff] & 0x000000ff) - ^ rdk[56]; - - out[1] = (s_td4[(t1 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t0 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t3 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t2 >> 0) & 0xff] & 0x000000ff) - ^ rdk[57]; - - out[2] = (s_td4[(t2 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t1 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t0 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t3 >> 0) & 0xff] & 0x000000ff) - ^ rdk[58]; - - out[3] = (s_td4[(t3 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t2 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t1 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t0 >> 0) & 0xff] & 0x000000ff) - ^ rdk[59]; -} - -__device__ static void AES256_encrypt (const u32 *in, u32 *out, const u32 *rek, u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - u32 s0 = in[0] ^ rek[0]; - u32 s1 = in[1] ^ rek[1]; - u32 s2 = in[2] ^ rek[2]; - u32 s3 = in[3] ^ rek[3]; - - u32 t0; - u32 t1; - u32 t2; - u32 t3; - - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[ 4]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[ 5]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[ 6]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[ 7]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[ 8]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[ 9]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[10]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[11]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[12]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[13]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[14]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[15]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[16]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[17]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[18]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[19]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[20]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[21]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[22]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[23]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[24]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[25]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[26]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[27]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[28]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[29]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[30]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[31]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[32]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[33]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[34]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[35]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[36]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[37]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[38]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[39]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[40]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[41]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[42]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[43]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[44]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[45]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[46]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[47]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[48]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[49]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[50]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[51]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[52]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[53]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[54]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[55]; - - out[0] = (s_te4[(t0 >> 24) & 0xff] & 0xff000000) - ^ (s_te4[(t1 >> 16) & 0xff] & 0x00ff0000) - ^ (s_te4[(t2 >> 8) & 0xff] & 0x0000ff00) - ^ (s_te4[(t3 >> 0) & 0xff] & 0x000000ff) - ^ rek[56]; - - out[1] = (s_te4[(t1 >> 24) & 0xff] & 0xff000000) - ^ (s_te4[(t2 >> 16) & 0xff] & 0x00ff0000) - ^ (s_te4[(t3 >> 8) & 0xff] & 0x0000ff00) - ^ (s_te4[(t0 >> 0) & 0xff] & 0x000000ff) - ^ rek[57]; - - out[2] = (s_te4[(t2 >> 24) & 0xff] & 0xff000000) - ^ (s_te4[(t3 >> 16) & 0xff] & 0x00ff0000) - ^ (s_te4[(t0 >> 8) & 0xff] & 0x0000ff00) - ^ (s_te4[(t1 >> 0) & 0xff] & 0x000000ff) - ^ rek[58]; - - out[3] = (s_te4[(t3 >> 24) & 0xff] & 0xff000000) - ^ (s_te4[(t0 >> 16) & 0xff] & 0x00ff0000) - ^ (s_te4[(t1 >> 8) & 0xff] & 0x0000ff00) - ^ (s_te4[(t2 >> 0) & 0xff] & 0x000000ff) - ^ rek[59]; -} - -__device__ __constant__ u64 k[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -__device__ static void sha512_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const u64 w3[4], u64 dgst[8]) -{ - u64 a = dgst[0]; - u64 b = dgst[1]; - u64 c = dgst[2]; - u64 d = dgst[3]; - u64 e = dgst[4]; - u64 f = dgst[5]; - u64 g = dgst[6]; - u64 h = dgst[7]; - - u64 w0_t = w0[0]; - u64 w1_t = w0[1]; - u64 w2_t = w0[2]; - u64 w3_t = w0[3]; - u64 w4_t = w1[0]; - u64 w5_t = w1[1]; - u64 w6_t = w1[2]; - u64 w7_t = w1[3]; - u64 w8_t = w2[0]; - u64 w9_t = w2[1]; - u64 wa_t = w2[2]; - u64 wb_t = w2[3]; - u64 wc_t = w3[0]; - u64 wd_t = w3[1]; - u64 we_t = w3[2]; - u64 wf_t = w3[3]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - dgst[0] += a; - dgst[1] += b; - dgst[2] += c; - dgst[3] += d; - dgst[4] += e; - dgst[5] += f; - dgst[6] += g; - dgst[7] += h; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09600_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, office2013_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const office2013_t *office2013_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - u32 pw_len = pws[gid].pw_len; - - append_0x80_4 (w0, w1, w2, w3, pw_len); - - make_unicode (w1, w2, w3); - make_unicode (w0, w0, w1); - - /** - * salt - */ - - u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - /** - * init - */ - - u64x t0[4]; - - t0[0] = (u64) salt_buf[0] << 32 | salt_buf[1]; - t0[1] = (u64) salt_buf[2] << 32 | salt_buf[3]; - t0[2] = (u64) swap_workaround (w0[0]) << 32 | swap_workaround (w0[1]); - t0[3] = (u64) swap_workaround (w0[2]) << 32 | swap_workaround (w0[3]); - - u64x t1[4]; - - t1[0] = (u64) swap_workaround (w1[0]) << 32 | swap_workaround (w1[1]); - t1[1] = (u64) swap_workaround (w1[2]) << 32 | swap_workaround (w1[3]); - t1[2] = (u64) swap_workaround (w2[0]) << 32 | swap_workaround (w2[1]); - t1[3] = (u64) swap_workaround (w2[2]) << 32 | swap_workaround (w2[3]); - - u64x t2[4]; - - t2[0] = (u64) swap_workaround (w3[0]) << 32 | swap_workaround (w3[1]); - t2[1] = (u64) swap_workaround (w3[2]) << 32 | swap_workaround (w3[3]); - t2[2] = 0; - t2[3] = 0; - - u64x t3[4]; - - t3[0] = 0; - t3[1] = 0; - t3[2] = 0; - t3[3] = (salt_len + (pw_len * 2)) * 8; - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (t0, t1, t2, t3, digest); - - tmps[gid].out[0] = digest[0]; - tmps[gid].out[1] = digest[1]; - tmps[gid].out[2] = digest[2]; - tmps[gid].out[3] = digest[3]; - tmps[gid].out[4] = digest[4]; - tmps[gid].out[5] = digest[5]; - tmps[gid].out[6] = digest[6]; - tmps[gid].out[7] = digest[7]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09600_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, office2013_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const office2013_t *office2013_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u64x w0[4]; - - w0[0] = tmps[gid].out[0] >> 32; - w0[1] = tmps[gid].out[0] << 32 | tmps[gid].out[1] >> 32; - w0[2] = tmps[gid].out[1] << 32 | tmps[gid].out[2] >> 32; - w0[3] = tmps[gid].out[2] << 32 | tmps[gid].out[3] >> 32; - - u64x w1[4]; - - w1[0] = tmps[gid].out[3] << 32 | tmps[gid].out[4] >> 32; - w1[1] = tmps[gid].out[4] << 32 | tmps[gid].out[5] >> 32; - w1[2] = tmps[gid].out[5] << 32 | tmps[gid].out[6] >> 32; - w1[3] = tmps[gid].out[6] << 32 | tmps[gid].out[7] >> 32; - - u64x w2[4]; - - w2[0] = tmps[gid].out[7] << 32 | 0x80000000; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u64x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (4 + 64) * 8; - - for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++) - { - w0[0] = (u64) swap_workaround (j) << 32 | w0[0] & 0xffffffff; - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0] >> 32; - w0[1] = digest[0] << 32 | digest[1] >> 32; - w0[2] = digest[1] << 32 | digest[2] >> 32; - w0[3] = digest[2] << 32 | digest[3] >> 32; - w1[0] = digest[3] << 32 | digest[4] >> 32; - w1[1] = digest[4] << 32 | digest[5] >> 32; - w1[2] = digest[5] << 32 | digest[6] >> 32; - w1[3] = digest[6] << 32 | digest[7] >> 32; - w2[0] = digest[7] << 32 | 0x80000000; - } - - tmps[gid].out[0] = w0[0] << 32 | w0[1] >> 32; - tmps[gid].out[1] = w0[1] << 32 | w0[2] >> 32; - tmps[gid].out[2] = w0[2] << 32 | w0[3] >> 32; - tmps[gid].out[3] = w0[3] << 32 | w1[0] >> 32; - tmps[gid].out[4] = w1[0] << 32 | w1[1] >> 32; - tmps[gid].out[5] = w1[1] << 32 | w1[2] >> 32; - tmps[gid].out[6] = w1[2] << 32 | w1[3] >> 32; - tmps[gid].out[7] = w1[3] << 32 | w2[0] >> 32; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09600_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, office2013_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const office2013_t *office2013_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * aes shared - */ - - __shared__ u32 s_td0[256]; - __shared__ u32 s_td1[256]; - __shared__ u32 s_td2[256]; - __shared__ u32 s_td3[256]; - __shared__ u32 s_td4[256]; - - __shared__ u32 s_te0[256]; - __shared__ u32 s_te1[256]; - __shared__ u32 s_te2[256]; - __shared__ u32 s_te3[256]; - __shared__ u32 s_te4[256]; - - s_td0[lid] = td0[lid]; - s_td1[lid] = td1[lid]; - s_td2[lid] = td2[lid]; - s_td3[lid] = td3[lid]; - s_td4[lid] = td4[lid]; - - s_te0[lid] = te0[lid]; - s_te1[lid] = te1[lid]; - s_te2[lid] = te2[lid]; - s_te3[lid] = te3[lid]; - s_te4[lid] = te4[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - u32x encryptedVerifierHashInputBlockKey[2] = { 0xfea7d276, 0x3b4b9e79 }; - u32x encryptedVerifierHashValueBlockKey[2] = { 0xd7aa0f6d, 0x3061344e }; - - u64x w0[4]; - u64x w1[4]; - u64x w2[4]; - u64x w3[4]; - - w0[0] = tmps[gid].out[0]; - w0[1] = tmps[gid].out[1]; - w0[2] = tmps[gid].out[2]; - w0[3] = tmps[gid].out[3]; - w1[0] = tmps[gid].out[4]; - w1[1] = tmps[gid].out[5]; - w1[2] = tmps[gid].out[6]; - w1[3] = tmps[gid].out[7]; - w2[0] = hl32_to_64 (encryptedVerifierHashInputBlockKey[0], encryptedVerifierHashInputBlockKey[1]); - w2[1] = 0x8000000000000000; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 8) * 8; - - u64x digest0[8]; - - digest0[0] = SHA512M_A; - digest0[1] = SHA512M_B; - digest0[2] = SHA512M_C; - digest0[3] = SHA512M_D; - digest0[4] = SHA512M_E; - digest0[5] = SHA512M_F; - digest0[6] = SHA512M_G; - digest0[7] = SHA512M_H; - - sha512_transform (w0, w1, w2, w3, digest0); - - w0[0] = tmps[gid].out[0]; - w0[1] = tmps[gid].out[1]; - w0[2] = tmps[gid].out[2]; - w0[3] = tmps[gid].out[3]; - w1[0] = tmps[gid].out[4]; - w1[1] = tmps[gid].out[5]; - w1[2] = tmps[gid].out[6]; - w1[3] = tmps[gid].out[7]; - w2[0] = hl32_to_64 (encryptedVerifierHashValueBlockKey[0], encryptedVerifierHashValueBlockKey[1]); - w2[1] = 0x8000000000000000; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 8) * 8; - - u64x digest1[8]; - - digest1[0] = SHA512M_A; - digest1[1] = SHA512M_B; - digest1[2] = SHA512M_C; - digest1[3] = SHA512M_D; - digest1[4] = SHA512M_E; - digest1[5] = SHA512M_F; - digest1[6] = SHA512M_G; - digest1[7] = SHA512M_H; - - sha512_transform (w0, w1, w2, w3, digest1); - - // now we got the AES key, decrypt the verifier - - u32x rek[60]; - u32x rdk[60]; - - u32 data[4]; - - data[0] = office2013_bufs[salt_pos].encryptedVerifier[0]; - data[1] = office2013_bufs[salt_pos].encryptedVerifier[1]; - data[2] = office2013_bufs[salt_pos].encryptedVerifier[2]; - data[3] = office2013_bufs[salt_pos].encryptedVerifier[3]; - - u32x ukeyx[8]; - - ukeyx[0] = h32_from_64 (digest0[0]); - ukeyx[1] = l32_from_64 (digest0[0]); - ukeyx[2] = h32_from_64 (digest0[1]); - ukeyx[3] = l32_from_64 (digest0[1]); - ukeyx[4] = h32_from_64 (digest0[2]); - ukeyx[5] = l32_from_64 (digest0[2]); - ukeyx[6] = h32_from_64 (digest0[3]); - ukeyx[7] = l32_from_64 (digest0[3]); - - AES256_ExpandKey (ukeyx, rek, s_te0, s_te1, s_te2, s_te3, s_te4); - - for (u32 i = 0; i < 60; i++) rdk[i] = rek[i]; - - AES256_InvertKey (rdk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4); - - u32 out[4]; - - AES256_decrypt (data, out, rdk, s_td0, s_td1, s_td2, s_td3, s_td4); - - out[0] ^= salt_bufs[salt_pos].salt_buf[0]; - out[1] ^= salt_bufs[salt_pos].salt_buf[1]; - out[2] ^= salt_bufs[salt_pos].salt_buf[2]; - out[3] ^= salt_bufs[salt_pos].salt_buf[3]; - - // final sha512 - - w0[0] = hl32_to_64 (out[0], out[1]); - w0[1] = hl32_to_64 (out[2], out[3]); - w0[2] = 0x8000000000000000; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 16 * 8; - - u64x digest[8]; - - digest[0] = SHA512M_A; - digest[1] = SHA512M_B; - digest[2] = SHA512M_C; - digest[3] = SHA512M_D; - digest[4] = SHA512M_E; - digest[5] = SHA512M_F; - digest[6] = SHA512M_G; - digest[7] = SHA512M_H; - - sha512_transform (w0, w1, w2, w3, digest); - - // encrypt with 2nd key - - ukeyx[0] = h32_from_64 (digest1[0]); - ukeyx[1] = l32_from_64 (digest1[0]); - ukeyx[2] = h32_from_64 (digest1[1]); - ukeyx[3] = l32_from_64 (digest1[1]); - ukeyx[4] = h32_from_64 (digest1[2]); - ukeyx[5] = l32_from_64 (digest1[2]); - ukeyx[6] = h32_from_64 (digest1[3]); - ukeyx[7] = l32_from_64 (digest1[3]); - - AES256_ExpandKey (ukeyx, rek, s_te0, s_te1, s_te2, s_te3, s_te4); - - data[0] = h32_from_64 (digest[0]) ^ salt_bufs[salt_pos].salt_buf[0]; - data[1] = l32_from_64 (digest[0]) ^ salt_bufs[salt_pos].salt_buf[1]; - data[2] = h32_from_64 (digest[1]) ^ salt_bufs[salt_pos].salt_buf[2]; - data[3] = l32_from_64 (digest[1]) ^ salt_bufs[salt_pos].salt_buf[3]; - - AES256_encrypt (data, out, rek, s_te0, s_te1, s_te2, s_te3, s_te4); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m09700_a0.cu b/nv/m09700_a0.cu deleted file mode 100644 index 6cc738f..0000000 --- a/nv/m09700_a0.cu +++ /dev/null @@ -1,1047 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _OLDOFFICE01_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -__device__ static void swap (RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -__device__ static void rc4_init_16 (RC4_KEY *rc4_key, const u32 data[4]) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - u32 *ptr = (u32 *) rc4_key->S; - - #pragma unroll 64 - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -__device__ static u8 rc4_next_16 (RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) -{ - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void gen336 (u32x digest_pre[4], u32 salt_buf[4], u32x digest[4]) -{ - u32x digest_t0[2]; - u32x digest_t1[2]; - u32x digest_t2[2]; - u32x digest_t3[2]; - - digest_t0[0] = digest_pre[0]; - digest_t0[1] = digest_pre[1] & 0xff; - - digest_t1[0] = digest_pre[0] << 8; - digest_t1[1] = digest_pre[0] >> 24 | digest_pre[1] << 8; - - digest_t2[0] = digest_pre[0] << 16; - digest_t2[1] = digest_pre[0] >> 16 | digest_pre[1] << 16; - - digest_t3[0] = digest_pre[0] << 24; - digest_t3[1] = digest_pre[0] >> 8 | digest_pre[1] << 24; - - u32 salt_buf_t0[4]; - u32 salt_buf_t1[5]; - u32 salt_buf_t2[5]; - u32 salt_buf_t3[5]; - - salt_buf_t0[0] = salt_buf[0]; - salt_buf_t0[1] = salt_buf[1]; - salt_buf_t0[2] = salt_buf[2]; - salt_buf_t0[3] = salt_buf[3]; - - salt_buf_t1[0] = salt_buf[0] << 8; - salt_buf_t1[1] = salt_buf[0] >> 24 | salt_buf[1] << 8; - salt_buf_t1[2] = salt_buf[1] >> 24 | salt_buf[2] << 8; - salt_buf_t1[3] = salt_buf[2] >> 24 | salt_buf[3] << 8; - salt_buf_t1[4] = salt_buf[3] >> 24; - - salt_buf_t2[0] = salt_buf[0] << 16; - salt_buf_t2[1] = salt_buf[0] >> 16 | salt_buf[1] << 16; - salt_buf_t2[2] = salt_buf[1] >> 16 | salt_buf[2] << 16; - salt_buf_t2[3] = salt_buf[2] >> 16 | salt_buf[3] << 16; - salt_buf_t2[4] = salt_buf[3] >> 16; - - salt_buf_t3[0] = salt_buf[0] << 24; - salt_buf_t3[1] = salt_buf[0] >> 8 | salt_buf[1] << 24; - salt_buf_t3[2] = salt_buf[1] >> 8 | salt_buf[2] << 24; - salt_buf_t3[3] = salt_buf[2] >> 8 | salt_buf[3] << 24; - salt_buf_t3[4] = salt_buf[3] >> 8; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - // generate the 16 * 21 buffer - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..5 - w0_t[0] = digest_t0[0]; - w0_t[1] = digest_t0[1]; - - // 5..21 - w0_t[1] |= salt_buf_t1[0]; - w0_t[2] = salt_buf_t1[1]; - w0_t[3] = salt_buf_t1[2]; - w1_t[0] = salt_buf_t1[3]; - w1_t[1] = salt_buf_t1[4]; - - // 21..26 - w1_t[1] |= digest_t1[0]; - w1_t[2] = digest_t1[1]; - - // 26..42 - w1_t[2] |= salt_buf_t2[0]; - w1_t[3] = salt_buf_t2[1]; - w2_t[0] = salt_buf_t2[2]; - w2_t[1] = salt_buf_t2[3]; - w2_t[2] = salt_buf_t2[4]; - - // 42..47 - w2_t[2] |= digest_t2[0]; - w2_t[3] = digest_t2[1]; - - // 47..63 - w2_t[3] |= salt_buf_t3[0]; - w3_t[0] = salt_buf_t3[1]; - w3_t[1] = salt_buf_t3[2]; - w3_t[2] = salt_buf_t3[3]; - w3_t[3] = salt_buf_t3[4]; - - // 63.. - - w3_t[3] |= digest_t3[0]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..4 - w0_t[0] = digest_t3[1]; - - // 4..20 - w0_t[1] = salt_buf_t0[0]; - w0_t[2] = salt_buf_t0[1]; - w0_t[3] = salt_buf_t0[2]; - w1_t[0] = salt_buf_t0[3]; - - // 20..25 - w1_t[1] = digest_t0[0]; - w1_t[2] = digest_t0[1]; - - // 25..41 - w1_t[2] |= salt_buf_t1[0]; - w1_t[3] = salt_buf_t1[1]; - w2_t[0] = salt_buf_t1[2]; - w2_t[1] = salt_buf_t1[3]; - w2_t[2] = salt_buf_t1[4]; - - // 41..46 - w2_t[2] |= digest_t1[0]; - w2_t[3] = digest_t1[1]; - - // 46..62 - w2_t[3] |= salt_buf_t2[0]; - w3_t[0] = salt_buf_t2[1]; - w3_t[1] = salt_buf_t2[2]; - w3_t[2] = salt_buf_t2[3]; - w3_t[3] = salt_buf_t2[4]; - - // 62.. - w3_t[3] |= digest_t2[0]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..3 - w0_t[0] = digest_t2[1]; - - // 3..19 - w0_t[0] |= salt_buf_t3[0]; - w0_t[1] = salt_buf_t3[1]; - w0_t[2] = salt_buf_t3[2]; - w0_t[3] = salt_buf_t3[3]; - w1_t[0] = salt_buf_t3[4]; - - // 19..24 - w1_t[0] |= digest_t3[0]; - w1_t[1] = digest_t3[1]; - - // 24..40 - w1_t[2] = salt_buf_t0[0]; - w1_t[3] = salt_buf_t0[1]; - w2_t[0] = salt_buf_t0[2]; - w2_t[1] = salt_buf_t0[3]; - - // 40..45 - w2_t[2] = digest_t0[0]; - w2_t[3] = digest_t0[1]; - - // 45..61 - w2_t[3] |= salt_buf_t1[0]; - w3_t[0] = salt_buf_t1[1]; - w3_t[1] = salt_buf_t1[2]; - w3_t[2] = salt_buf_t1[3]; - w3_t[3] = salt_buf_t1[4]; - - // 61.. - w3_t[3] |= digest_t1[0]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..2 - w0_t[0] = digest_t1[1]; - - // 2..18 - w0_t[0] |= salt_buf_t2[0]; - w0_t[1] = salt_buf_t2[1]; - w0_t[2] = salt_buf_t2[2]; - w0_t[3] = salt_buf_t2[3]; - w1_t[0] = salt_buf_t2[4]; - - // 18..23 - w1_t[0] |= digest_t2[0]; - w1_t[1] = digest_t2[1]; - - // 23..39 - w1_t[1] |= salt_buf_t3[0]; - w1_t[2] = salt_buf_t3[1]; - w1_t[3] = salt_buf_t3[2]; - w2_t[0] = salt_buf_t3[3]; - w2_t[1] = salt_buf_t3[4]; - - // 39..44 - w2_t[1] |= digest_t3[0]; - w2_t[2] = digest_t3[1]; - - // 44..60 - w2_t[3] = salt_buf_t0[0]; - w3_t[0] = salt_buf_t0[1]; - w3_t[1] = salt_buf_t0[2]; - w3_t[2] = salt_buf_t0[3]; - - // 60.. - w3_t[3] = digest_t0[0]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..1 - w0_t[0] = digest_t0[1]; - - // 1..17 - w0_t[0] |= salt_buf_t1[0]; - w0_t[1] = salt_buf_t1[1]; - w0_t[2] = salt_buf_t1[2]; - w0_t[3] = salt_buf_t1[3]; - w1_t[0] = salt_buf_t1[4]; - - // 17..22 - w1_t[0] |= digest_t1[0]; - w1_t[1] = digest_t1[1]; - - // 22..38 - w1_t[1] |= salt_buf_t2[0]; - w1_t[2] = salt_buf_t2[1]; - w1_t[3] = salt_buf_t2[2]; - w2_t[0] = salt_buf_t2[3]; - w2_t[1] = salt_buf_t2[4]; - - // 38..43 - w2_t[1] |= digest_t2[0]; - w2_t[2] = digest_t2[1]; - - // 43..59 - w2_t[2] |= salt_buf_t3[0]; - w2_t[3] = salt_buf_t3[1]; - w3_t[0] = salt_buf_t3[2]; - w3_t[1] = salt_buf_t3[3]; - w3_t[2] = salt_buf_t3[4]; - - // 59.. - w3_t[2] |= digest_t3[0]; - w3_t[3] = digest_t3[1]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = salt_buf_t0[0]; - w0_t[1] = salt_buf_t0[1]; - w0_t[2] = salt_buf_t0[2]; - w0_t[3] = salt_buf_t0[3]; - w1_t[0] = 0x80; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 21 * 16 * 8; - w3_t[3] = 0; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); -} - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (64, 1) m09700_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - __shared__ RC4_KEY rc4_keys[64]; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - /** - * esalt - */ - - const u32 version = oldoffice01_bufs[salt_pos].version; - - u32 encryptedVerifier[4]; - - encryptedVerifier[0] = oldoffice01_bufs[salt_pos].encryptedVerifier[0]; - encryptedVerifier[1] = oldoffice01_bufs[salt_pos].encryptedVerifier[1]; - encryptedVerifier[2] = oldoffice01_bufs[salt_pos].encryptedVerifier[2]; - encryptedVerifier[3] = oldoffice01_bufs[salt_pos].encryptedVerifier[3]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w3_t[2] = out_len * 8 * 2; - - u32x digest_pre[4]; - - digest_pre[0] = MD5M_A; - digest_pre[1] = MD5M_B; - digest_pre[2] = MD5M_C; - digest_pre[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest_pre); - - digest_pre[0] &= 0xffffffff; - digest_pre[1] &= 0x000000ff; - digest_pre[2] &= 0x00000000; - digest_pre[3] &= 0x00000000; - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - gen336 (digest_pre, salt_buf, digest); - - // now the 40 bit input for the MD5 which then will generate the RC4 key, so it's precomputable! - - w0_t[0] = digest[0]; - w0_t[1] = digest[1] & 0xff; - w0_t[2] = 0x8000; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 9 * 8; - w3_t[3] = 0; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - // now the RC4 part - - u32x key[4]; - - key[0] = digest[0]; - key[1] = digest[1]; - key[2] = digest[2]; - key[3] = digest[3]; - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); - - w0_t[0] = out[0]; - w0_t[1] = out[1]; - w0_t[2] = out[2]; - w0_t[3] = out[3]; - w1_t[0] = 0x80; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 16 * 8; - w3_t[3] = 0; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - rc4_next_16 (rc4_key, 16, j, digest, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09700_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09700_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09700_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - __shared__ RC4_KEY rc4_keys[64]; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - /** - * esalt - */ - - const u32 version = oldoffice01_bufs[salt_pos].version; - - u32 encryptedVerifier[4]; - - encryptedVerifier[0] = oldoffice01_bufs[salt_pos].encryptedVerifier[0]; - encryptedVerifier[1] = oldoffice01_bufs[salt_pos].encryptedVerifier[1]; - encryptedVerifier[2] = oldoffice01_bufs[salt_pos].encryptedVerifier[2]; - encryptedVerifier[3] = oldoffice01_bufs[salt_pos].encryptedVerifier[3]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w3_t[2] = out_len * 8 * 2; - - u32x digest_pre[4]; - - digest_pre[0] = MD5M_A; - digest_pre[1] = MD5M_B; - digest_pre[2] = MD5M_C; - digest_pre[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest_pre); - - digest_pre[0] &= 0xffffffff; - digest_pre[1] &= 0x000000ff; - digest_pre[2] &= 0x00000000; - digest_pre[3] &= 0x00000000; - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - gen336 (digest_pre, salt_buf, digest); - - // now the 40 bit input for the MD5 which then will generate the RC4 key, so it's precomputable! - - w0_t[0] = digest[0]; - w0_t[1] = digest[1] & 0xff; - w0_t[2] = 0x8000; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 9 * 8; - w3_t[3] = 0; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - // now the RC4 part - - u32x key[4]; - - key[0] = digest[0]; - key[1] = digest[1]; - key[2] = digest[2]; - key[3] = digest[3]; - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); - - w0_t[0] = out[0]; - w0_t[1] = out[1]; - w0_t[2] = out[2]; - w0_t[3] = out[3]; - w1_t[0] = 0x80; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 16 * 8; - w3_t[3] = 0; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - rc4_next_16 (rc4_key, 16, j, digest, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09700_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09700_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m09700_a1.cu b/nv/m09700_a1.cu deleted file mode 100644 index 3b4490e..0000000 --- a/nv/m09700_a1.cu +++ /dev/null @@ -1,1165 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _OLDOFFICE01_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -__device__ static void swap (RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -__device__ static void rc4_init_16 (RC4_KEY *rc4_key, const u32 data[4]) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - u32 *ptr = (u32 *) rc4_key->S; - - #pragma unroll 64 - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -__device__ static u8 rc4_next_16 (RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) -{ - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void gen336 (u32x digest_pre[4], u32 salt_buf[4], u32x digest[4]) -{ - u32x digest_t0[2]; - u32x digest_t1[2]; - u32x digest_t2[2]; - u32x digest_t3[2]; - - digest_t0[0] = digest_pre[0]; - digest_t0[1] = digest_pre[1] & 0xff; - - digest_t1[0] = digest_pre[0] << 8; - digest_t1[1] = digest_pre[0] >> 24 | digest_pre[1] << 8; - - digest_t2[0] = digest_pre[0] << 16; - digest_t2[1] = digest_pre[0] >> 16 | digest_pre[1] << 16; - - digest_t3[0] = digest_pre[0] << 24; - digest_t3[1] = digest_pre[0] >> 8 | digest_pre[1] << 24; - - u32 salt_buf_t0[4]; - u32 salt_buf_t1[5]; - u32 salt_buf_t2[5]; - u32 salt_buf_t3[5]; - - salt_buf_t0[0] = salt_buf[0]; - salt_buf_t0[1] = salt_buf[1]; - salt_buf_t0[2] = salt_buf[2]; - salt_buf_t0[3] = salt_buf[3]; - - salt_buf_t1[0] = salt_buf[0] << 8; - salt_buf_t1[1] = salt_buf[0] >> 24 | salt_buf[1] << 8; - salt_buf_t1[2] = salt_buf[1] >> 24 | salt_buf[2] << 8; - salt_buf_t1[3] = salt_buf[2] >> 24 | salt_buf[3] << 8; - salt_buf_t1[4] = salt_buf[3] >> 24; - - salt_buf_t2[0] = salt_buf[0] << 16; - salt_buf_t2[1] = salt_buf[0] >> 16 | salt_buf[1] << 16; - salt_buf_t2[2] = salt_buf[1] >> 16 | salt_buf[2] << 16; - salt_buf_t2[3] = salt_buf[2] >> 16 | salt_buf[3] << 16; - salt_buf_t2[4] = salt_buf[3] >> 16; - - salt_buf_t3[0] = salt_buf[0] << 24; - salt_buf_t3[1] = salt_buf[0] >> 8 | salt_buf[1] << 24; - salt_buf_t3[2] = salt_buf[1] >> 8 | salt_buf[2] << 24; - salt_buf_t3[3] = salt_buf[2] >> 8 | salt_buf[3] << 24; - salt_buf_t3[4] = salt_buf[3] >> 8; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - // generate the 16 * 21 buffer - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..5 - w0_t[0] = digest_t0[0]; - w0_t[1] = digest_t0[1]; - - // 5..21 - w0_t[1] |= salt_buf_t1[0]; - w0_t[2] = salt_buf_t1[1]; - w0_t[3] = salt_buf_t1[2]; - w1_t[0] = salt_buf_t1[3]; - w1_t[1] = salt_buf_t1[4]; - - // 21..26 - w1_t[1] |= digest_t1[0]; - w1_t[2] = digest_t1[1]; - - // 26..42 - w1_t[2] |= salt_buf_t2[0]; - w1_t[3] = salt_buf_t2[1]; - w2_t[0] = salt_buf_t2[2]; - w2_t[1] = salt_buf_t2[3]; - w2_t[2] = salt_buf_t2[4]; - - // 42..47 - w2_t[2] |= digest_t2[0]; - w2_t[3] = digest_t2[1]; - - // 47..63 - w2_t[3] |= salt_buf_t3[0]; - w3_t[0] = salt_buf_t3[1]; - w3_t[1] = salt_buf_t3[2]; - w3_t[2] = salt_buf_t3[3]; - w3_t[3] = salt_buf_t3[4]; - - // 63.. - - w3_t[3] |= digest_t3[0]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..4 - w0_t[0] = digest_t3[1]; - - // 4..20 - w0_t[1] = salt_buf_t0[0]; - w0_t[2] = salt_buf_t0[1]; - w0_t[3] = salt_buf_t0[2]; - w1_t[0] = salt_buf_t0[3]; - - // 20..25 - w1_t[1] = digest_t0[0]; - w1_t[2] = digest_t0[1]; - - // 25..41 - w1_t[2] |= salt_buf_t1[0]; - w1_t[3] = salt_buf_t1[1]; - w2_t[0] = salt_buf_t1[2]; - w2_t[1] = salt_buf_t1[3]; - w2_t[2] = salt_buf_t1[4]; - - // 41..46 - w2_t[2] |= digest_t1[0]; - w2_t[3] = digest_t1[1]; - - // 46..62 - w2_t[3] |= salt_buf_t2[0]; - w3_t[0] = salt_buf_t2[1]; - w3_t[1] = salt_buf_t2[2]; - w3_t[2] = salt_buf_t2[3]; - w3_t[3] = salt_buf_t2[4]; - - // 62.. - w3_t[3] |= digest_t2[0]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..3 - w0_t[0] = digest_t2[1]; - - // 3..19 - w0_t[0] |= salt_buf_t3[0]; - w0_t[1] = salt_buf_t3[1]; - w0_t[2] = salt_buf_t3[2]; - w0_t[3] = salt_buf_t3[3]; - w1_t[0] = salt_buf_t3[4]; - - // 19..24 - w1_t[0] |= digest_t3[0]; - w1_t[1] = digest_t3[1]; - - // 24..40 - w1_t[2] = salt_buf_t0[0]; - w1_t[3] = salt_buf_t0[1]; - w2_t[0] = salt_buf_t0[2]; - w2_t[1] = salt_buf_t0[3]; - - // 40..45 - w2_t[2] = digest_t0[0]; - w2_t[3] = digest_t0[1]; - - // 45..61 - w2_t[3] |= salt_buf_t1[0]; - w3_t[0] = salt_buf_t1[1]; - w3_t[1] = salt_buf_t1[2]; - w3_t[2] = salt_buf_t1[3]; - w3_t[3] = salt_buf_t1[4]; - - // 61.. - w3_t[3] |= digest_t1[0]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..2 - w0_t[0] = digest_t1[1]; - - // 2..18 - w0_t[0] |= salt_buf_t2[0]; - w0_t[1] = salt_buf_t2[1]; - w0_t[2] = salt_buf_t2[2]; - w0_t[3] = salt_buf_t2[3]; - w1_t[0] = salt_buf_t2[4]; - - // 18..23 - w1_t[0] |= digest_t2[0]; - w1_t[1] = digest_t2[1]; - - // 23..39 - w1_t[1] |= salt_buf_t3[0]; - w1_t[2] = salt_buf_t3[1]; - w1_t[3] = salt_buf_t3[2]; - w2_t[0] = salt_buf_t3[3]; - w2_t[1] = salt_buf_t3[4]; - - // 39..44 - w2_t[1] |= digest_t3[0]; - w2_t[2] = digest_t3[1]; - - // 44..60 - w2_t[3] = salt_buf_t0[0]; - w3_t[0] = salt_buf_t0[1]; - w3_t[1] = salt_buf_t0[2]; - w3_t[2] = salt_buf_t0[3]; - - // 60.. - w3_t[3] = digest_t0[0]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..1 - w0_t[0] = digest_t0[1]; - - // 1..17 - w0_t[0] |= salt_buf_t1[0]; - w0_t[1] = salt_buf_t1[1]; - w0_t[2] = salt_buf_t1[2]; - w0_t[3] = salt_buf_t1[3]; - w1_t[0] = salt_buf_t1[4]; - - // 17..22 - w1_t[0] |= digest_t1[0]; - w1_t[1] = digest_t1[1]; - - // 22..38 - w1_t[1] |= salt_buf_t2[0]; - w1_t[2] = salt_buf_t2[1]; - w1_t[3] = salt_buf_t2[2]; - w2_t[0] = salt_buf_t2[3]; - w2_t[1] = salt_buf_t2[4]; - - // 38..43 - w2_t[1] |= digest_t2[0]; - w2_t[2] = digest_t2[1]; - - // 43..59 - w2_t[2] |= salt_buf_t3[0]; - w2_t[3] = salt_buf_t3[1]; - w3_t[0] = salt_buf_t3[2]; - w3_t[1] = salt_buf_t3[3]; - w3_t[2] = salt_buf_t3[4]; - - // 59.. - w3_t[2] |= digest_t3[0]; - w3_t[3] = digest_t3[1]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = salt_buf_t0[0]; - w0_t[1] = salt_buf_t0[1]; - w0_t[2] = salt_buf_t0[2]; - w0_t[3] = salt_buf_t0[3]; - w1_t[0] = 0x80; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 21 * 16 * 8; - w3_t[3] = 0; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (64, 1) m09700_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - __shared__ RC4_KEY rc4_keys[64]; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - /** - * esalt - */ - - const u32 version = oldoffice01_bufs[salt_pos].version; - - u32 encryptedVerifier[4]; - - encryptedVerifier[0] = oldoffice01_bufs[salt_pos].encryptedVerifier[0]; - encryptedVerifier[1] = oldoffice01_bufs[salt_pos].encryptedVerifier[1]; - encryptedVerifier[2] = oldoffice01_bufs[salt_pos].encryptedVerifier[2]; - encryptedVerifier[3] = oldoffice01_bufs[salt_pos].encryptedVerifier[3]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - append_0x80_2 (w0, w1, pw_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w3_t[2] = pw_len * 8 * 2; - - u32x digest_pre[4]; - - digest_pre[0] = MD5M_A; - digest_pre[1] = MD5M_B; - digest_pre[2] = MD5M_C; - digest_pre[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest_pre); - - digest_pre[0] &= 0xffffffff; - digest_pre[1] &= 0x000000ff; - digest_pre[2] &= 0x00000000; - digest_pre[3] &= 0x00000000; - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - gen336 (digest_pre, salt_buf, digest); - - // now the 40 bit input for the MD5 which then will generate the RC4 key, so it's precomputable! - - w0_t[0] = digest[0]; - w0_t[1] = digest[1] & 0xff; - w0_t[2] = 0x8000; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 9 * 8; - w3_t[3] = 0; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - // now the RC4 part - - u32x key[4]; - - key[0] = digest[0]; - key[1] = digest[1]; - key[2] = digest[2]; - key[3] = digest[3]; - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); - - w0_t[0] = out[0]; - w0_t[1] = out[1]; - w0_t[2] = out[2]; - w0_t[3] = out[3]; - w1_t[0] = 0x80; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 16 * 8; - w3_t[3] = 0; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - rc4_next_16 (rc4_key, 16, j, digest, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09700_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09700_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09700_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - __shared__ RC4_KEY rc4_keys[64]; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - /** - * esalt - */ - - const u32 version = oldoffice01_bufs[salt_pos].version; - - u32 encryptedVerifier[4]; - - encryptedVerifier[0] = oldoffice01_bufs[salt_pos].encryptedVerifier[0]; - encryptedVerifier[1] = oldoffice01_bufs[salt_pos].encryptedVerifier[1]; - encryptedVerifier[2] = oldoffice01_bufs[salt_pos].encryptedVerifier[2]; - encryptedVerifier[3] = oldoffice01_bufs[salt_pos].encryptedVerifier[3]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - append_0x80_2 (w0, w1, pw_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w3_t[2] = pw_len * 8 * 2; - - u32x digest_pre[4]; - - digest_pre[0] = MD5M_A; - digest_pre[1] = MD5M_B; - digest_pre[2] = MD5M_C; - digest_pre[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest_pre); - - digest_pre[0] &= 0xffffffff; - digest_pre[1] &= 0x000000ff; - digest_pre[2] &= 0x00000000; - digest_pre[3] &= 0x00000000; - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - gen336 (digest_pre, salt_buf, digest); - - // now the 40 bit input for the MD5 which then will generate the RC4 key, so it's precomputable! - - w0_t[0] = digest[0]; - w0_t[1] = digest[1] & 0xff; - w0_t[2] = 0x8000; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 9 * 8; - w3_t[3] = 0; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - // now the RC4 part - - u32x key[4]; - - key[0] = digest[0]; - key[1] = digest[1]; - key[2] = digest[2]; - key[3] = digest[3]; - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); - - w0_t[0] = out[0]; - w0_t[1] = out[1]; - w0_t[2] = out[2]; - w0_t[3] = out[3]; - w1_t[0] = 0x80; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 16 * 8; - w3_t[3] = 0; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - rc4_next_16 (rc4_key, 16, j, digest, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09700_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09700_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m09700_a3.cu b/nv/m09700_a3.cu deleted file mode 100644 index e095b04..0000000 --- a/nv/m09700_a3.cu +++ /dev/null @@ -1,1552 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _OLDOFFICE01_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -__device__ static void swap (RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -__device__ static void rc4_init_16 (RC4_KEY *rc4_key, const u32 data[4]) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - u32 *ptr = (u32 *) rc4_key->S; - - #pragma unroll 64 - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -__device__ static u8 rc4_next_16 (RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) -{ - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m09700m (RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * salt - */ - - u32 salt_buf_t0[4]; - u32 salt_buf_t1[5]; - u32 salt_buf_t2[5]; - u32 salt_buf_t3[5]; - - salt_buf_t0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf_t0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf_t0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf_t0[3] = salt_bufs[salt_pos].salt_buf[3]; - - salt_buf_t1[0] = salt_buf_t0[0] << 8; - salt_buf_t1[1] = salt_buf_t0[0] >> 24 | salt_buf_t0[1] << 8; - salt_buf_t1[2] = salt_buf_t0[1] >> 24 | salt_buf_t0[2] << 8; - salt_buf_t1[3] = salt_buf_t0[2] >> 24 | salt_buf_t0[3] << 8; - salt_buf_t1[4] = salt_buf_t0[3] >> 24; - - salt_buf_t2[0] = salt_buf_t0[0] << 16; - salt_buf_t2[1] = salt_buf_t0[0] >> 16 | salt_buf_t0[1] << 16; - salt_buf_t2[2] = salt_buf_t0[1] >> 16 | salt_buf_t0[2] << 16; - salt_buf_t2[3] = salt_buf_t0[2] >> 16 | salt_buf_t0[3] << 16; - salt_buf_t2[4] = salt_buf_t0[3] >> 16; - - salt_buf_t3[0] = salt_buf_t0[0] << 24; - salt_buf_t3[1] = salt_buf_t0[0] >> 8 | salt_buf_t0[1] << 24; - salt_buf_t3[2] = salt_buf_t0[1] >> 8 | salt_buf_t0[2] << 24; - salt_buf_t3[3] = salt_buf_t0[2] >> 8 | salt_buf_t0[3] << 24; - salt_buf_t3[4] = salt_buf_t0[3] >> 8; - - const u32 salt_len = 16; - - /** - * esalt - */ - - const u32 version = oldoffice01_bufs[salt_pos].version; - - u32 encryptedVerifier[4]; - - encryptedVerifier[0] = oldoffice01_bufs[salt_pos].encryptedVerifier[0]; - encryptedVerifier[1] = oldoffice01_bufs[salt_pos].encryptedVerifier[1]; - encryptedVerifier[2] = oldoffice01_bufs[salt_pos].encryptedVerifier[2]; - encryptedVerifier[3] = oldoffice01_bufs[salt_pos].encryptedVerifier[3]; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = pw_len * 8; - w3_t[3] = 0; - - u32x digest_t0[4]; - u32x digest_t1[2]; // need only first 5 byte - u32x digest_t2[2]; - u32x digest_t3[2]; - - digest_t0[0] = MD5M_A; - digest_t0[1] = MD5M_B; - digest_t0[2] = MD5M_C; - digest_t0[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest_t0); - - // prepare 16 * 21 buffer stuff - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - // offsets - - digest_t0[0] &= 0xffffffff; - digest_t0[1] &= 0x000000ff; - digest_t0[2] &= 0x00000000; - digest_t0[3] &= 0x00000000; - - digest_t1[0] = digest_t0[0] << 8; - digest_t1[1] = digest_t0[0] >> 24 | digest_t0[1] << 8; - - digest_t2[0] = digest_t0[0] << 16; - digest_t2[1] = digest_t0[0] >> 16 | digest_t0[1] << 16; - - digest_t3[0] = digest_t0[0] << 24; - digest_t3[1] = digest_t0[0] >> 8 | digest_t0[1] << 24; - - // generate the 16 * 21 buffer - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..5 - w0_t[0] = digest_t0[0]; - w0_t[1] = digest_t0[1]; - - // 5..21 - w0_t[1] |= salt_buf_t1[0]; - w0_t[2] = salt_buf_t1[1]; - w0_t[3] = salt_buf_t1[2]; - w1_t[0] = salt_buf_t1[3]; - w1_t[1] = salt_buf_t1[4]; - - // 21..26 - w1_t[1] |= digest_t1[0]; - w1_t[2] = digest_t1[1]; - - // 26..42 - w1_t[2] |= salt_buf_t2[0]; - w1_t[3] = salt_buf_t2[1]; - w2_t[0] = salt_buf_t2[2]; - w2_t[1] = salt_buf_t2[3]; - w2_t[2] = salt_buf_t2[4]; - - // 42..47 - w2_t[2] |= digest_t2[0]; - w2_t[3] = digest_t2[1]; - - // 47..63 - w2_t[3] |= salt_buf_t3[0]; - w3_t[0] = salt_buf_t3[1]; - w3_t[1] = salt_buf_t3[2]; - w3_t[2] = salt_buf_t3[3]; - w3_t[3] = salt_buf_t3[4]; - - // 63.. - - w3_t[3] |= digest_t3[0]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..4 - w0_t[0] = digest_t3[1]; - - // 4..20 - w0_t[1] = salt_buf_t0[0]; - w0_t[2] = salt_buf_t0[1]; - w0_t[3] = salt_buf_t0[2]; - w1_t[0] = salt_buf_t0[3]; - - // 20..25 - w1_t[1] = digest_t0[0]; - w1_t[2] = digest_t0[1]; - - // 25..41 - w1_t[2] |= salt_buf_t1[0]; - w1_t[3] = salt_buf_t1[1]; - w2_t[0] = salt_buf_t1[2]; - w2_t[1] = salt_buf_t1[3]; - w2_t[2] = salt_buf_t1[4]; - - // 41..46 - w2_t[2] |= digest_t1[0]; - w2_t[3] = digest_t1[1]; - - // 46..62 - w2_t[3] |= salt_buf_t2[0]; - w3_t[0] = salt_buf_t2[1]; - w3_t[1] = salt_buf_t2[2]; - w3_t[2] = salt_buf_t2[3]; - w3_t[3] = salt_buf_t2[4]; - - // 62.. - w3_t[3] |= digest_t2[0]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..3 - w0_t[0] = digest_t2[1]; - - // 3..19 - w0_t[0] |= salt_buf_t3[0]; - w0_t[1] = salt_buf_t3[1]; - w0_t[2] = salt_buf_t3[2]; - w0_t[3] = salt_buf_t3[3]; - w1_t[0] = salt_buf_t3[4]; - - // 19..24 - w1_t[0] |= digest_t3[0]; - w1_t[1] = digest_t3[1]; - - // 24..40 - w1_t[2] = salt_buf_t0[0]; - w1_t[3] = salt_buf_t0[1]; - w2_t[0] = salt_buf_t0[2]; - w2_t[1] = salt_buf_t0[3]; - - // 40..45 - w2_t[2] = digest_t0[0]; - w2_t[3] = digest_t0[1]; - - // 45..61 - w2_t[3] |= salt_buf_t1[0]; - w3_t[0] = salt_buf_t1[1]; - w3_t[1] = salt_buf_t1[2]; - w3_t[2] = salt_buf_t1[3]; - w3_t[3] = salt_buf_t1[4]; - - // 61.. - w3_t[3] |= digest_t1[0]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..2 - w0_t[0] = digest_t1[1]; - - // 2..18 - w0_t[0] |= salt_buf_t2[0]; - w0_t[1] = salt_buf_t2[1]; - w0_t[2] = salt_buf_t2[2]; - w0_t[3] = salt_buf_t2[3]; - w1_t[0] = salt_buf_t2[4]; - - // 18..23 - w1_t[0] |= digest_t2[0]; - w1_t[1] = digest_t2[1]; - - // 23..39 - w1_t[1] |= salt_buf_t3[0]; - w1_t[2] = salt_buf_t3[1]; - w1_t[3] = salt_buf_t3[2]; - w2_t[0] = salt_buf_t3[3]; - w2_t[1] = salt_buf_t3[4]; - - // 39..44 - w2_t[1] |= digest_t3[0]; - w2_t[2] = digest_t3[1]; - - // 44..60 - w2_t[3] = salt_buf_t0[0]; - w3_t[0] = salt_buf_t0[1]; - w3_t[1] = salt_buf_t0[2]; - w3_t[2] = salt_buf_t0[3]; - - // 60.. - w3_t[3] = digest_t0[0]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..1 - w0_t[0] = digest_t0[1]; - - // 1..17 - w0_t[0] |= salt_buf_t1[0]; - w0_t[1] = salt_buf_t1[1]; - w0_t[2] = salt_buf_t1[2]; - w0_t[3] = salt_buf_t1[3]; - w1_t[0] = salt_buf_t1[4]; - - // 17..22 - w1_t[0] |= digest_t1[0]; - w1_t[1] = digest_t1[1]; - - // 22..38 - w1_t[1] |= salt_buf_t2[0]; - w1_t[2] = salt_buf_t2[1]; - w1_t[3] = salt_buf_t2[2]; - w2_t[0] = salt_buf_t2[3]; - w2_t[1] = salt_buf_t2[4]; - - // 38..43 - w2_t[1] |= digest_t2[0]; - w2_t[2] = digest_t2[1]; - - // 43..59 - w2_t[2] |= salt_buf_t3[0]; - w2_t[3] = salt_buf_t3[1]; - w3_t[0] = salt_buf_t3[2]; - w3_t[1] = salt_buf_t3[3]; - w3_t[2] = salt_buf_t3[4]; - - // 59.. - w3_t[2] |= digest_t3[0]; - w3_t[3] = digest_t3[1]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = salt_buf_t0[0]; - w0_t[1] = salt_buf_t0[1]; - w0_t[2] = salt_buf_t0[2]; - w0_t[3] = salt_buf_t0[3]; - w1_t[0] = 0x80; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 21 * 16 * 8; - w3_t[3] = 0; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - // now the 40 bit input for the MD5 which then will generate the RC4 key, so it's precomputable! - - w0_t[0] = digest[0]; - w0_t[1] = digest[1] & 0xff; - w0_t[2] = 0x8000; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 9 * 8; - w3_t[3] = 0; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - // now the RC4 part - - u32x key[4]; - - key[0] = digest[0]; - key[1] = digest[1]; - key[2] = digest[2]; - key[3] = digest[3]; - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); - - w0_t[0] = out[0]; - w0_t[1] = out[1]; - w0_t[2] = out[2]; - w0_t[3] = out[3]; - w1_t[0] = 0x80; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 16 * 8; - w3_t[3] = 0; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - rc4_next_16 (rc4_key, 16, j, digest, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_M - } -} - -__device__ static void m09700s (RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - u32 salt_buf_t0[4]; - u32 salt_buf_t1[5]; - u32 salt_buf_t2[5]; - u32 salt_buf_t3[5]; - - salt_buf_t0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf_t0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf_t0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf_t0[3] = salt_bufs[salt_pos].salt_buf[3]; - - salt_buf_t1[0] = salt_buf_t0[0] << 8; - salt_buf_t1[1] = salt_buf_t0[0] >> 24 | salt_buf_t0[1] << 8; - salt_buf_t1[2] = salt_buf_t0[1] >> 24 | salt_buf_t0[2] << 8; - salt_buf_t1[3] = salt_buf_t0[2] >> 24 | salt_buf_t0[3] << 8; - salt_buf_t1[4] = salt_buf_t0[3] >> 24; - - salt_buf_t2[0] = salt_buf_t0[0] << 16; - salt_buf_t2[1] = salt_buf_t0[0] >> 16 | salt_buf_t0[1] << 16; - salt_buf_t2[2] = salt_buf_t0[1] >> 16 | salt_buf_t0[2] << 16; - salt_buf_t2[3] = salt_buf_t0[2] >> 16 | salt_buf_t0[3] << 16; - salt_buf_t2[4] = salt_buf_t0[3] >> 16; - - salt_buf_t3[0] = salt_buf_t0[0] << 24; - salt_buf_t3[1] = salt_buf_t0[0] >> 8 | salt_buf_t0[1] << 24; - salt_buf_t3[2] = salt_buf_t0[1] >> 8 | salt_buf_t0[2] << 24; - salt_buf_t3[3] = salt_buf_t0[2] >> 8 | salt_buf_t0[3] << 24; - salt_buf_t3[4] = salt_buf_t0[3] >> 8; - - const u32 salt_len = 16; - - /** - * esalt - */ - - const u32 version = oldoffice01_bufs[salt_pos].version; - - u32 encryptedVerifier[4]; - - encryptedVerifier[0] = oldoffice01_bufs[salt_pos].encryptedVerifier[0]; - encryptedVerifier[1] = oldoffice01_bufs[salt_pos].encryptedVerifier[1]; - encryptedVerifier[2] = oldoffice01_bufs[salt_pos].encryptedVerifier[2]; - encryptedVerifier[3] = oldoffice01_bufs[salt_pos].encryptedVerifier[3]; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = pw_len * 8; - w3_t[3] = 0; - - u32x digest_t0[4]; - u32x digest_t1[2]; // need only first 5 byte - u32x digest_t2[2]; - u32x digest_t3[2]; - - digest_t0[0] = MD5M_A; - digest_t0[1] = MD5M_B; - digest_t0[2] = MD5M_C; - digest_t0[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest_t0); - - // prepare 16 * 21 buffer stuff - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - // offsets - - digest_t0[0] &= 0xffffffff; - digest_t0[1] &= 0x000000ff; - digest_t0[2] &= 0x00000000; - digest_t0[3] &= 0x00000000; - - digest_t1[0] = digest_t0[0] << 8; - digest_t1[1] = digest_t0[0] >> 24 | digest_t0[1] << 8; - - digest_t2[0] = digest_t0[0] << 16; - digest_t2[1] = digest_t0[0] >> 16 | digest_t0[1] << 16; - - digest_t3[0] = digest_t0[0] << 24; - digest_t3[1] = digest_t0[0] >> 8 | digest_t0[1] << 24; - - // generate the 16 * 21 buffer - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..5 - w0_t[0] = digest_t0[0]; - w0_t[1] = digest_t0[1]; - - // 5..21 - w0_t[1] |= salt_buf_t1[0]; - w0_t[2] = salt_buf_t1[1]; - w0_t[3] = salt_buf_t1[2]; - w1_t[0] = salt_buf_t1[3]; - w1_t[1] = salt_buf_t1[4]; - - // 21..26 - w1_t[1] |= digest_t1[0]; - w1_t[2] = digest_t1[1]; - - // 26..42 - w1_t[2] |= salt_buf_t2[0]; - w1_t[3] = salt_buf_t2[1]; - w2_t[0] = salt_buf_t2[2]; - w2_t[1] = salt_buf_t2[3]; - w2_t[2] = salt_buf_t2[4]; - - // 42..47 - w2_t[2] |= digest_t2[0]; - w2_t[3] = digest_t2[1]; - - // 47..63 - w2_t[3] |= salt_buf_t3[0]; - w3_t[0] = salt_buf_t3[1]; - w3_t[1] = salt_buf_t3[2]; - w3_t[2] = salt_buf_t3[3]; - w3_t[3] = salt_buf_t3[4]; - - // 63.. - - w3_t[3] |= digest_t3[0]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..4 - w0_t[0] = digest_t3[1]; - - // 4..20 - w0_t[1] = salt_buf_t0[0]; - w0_t[2] = salt_buf_t0[1]; - w0_t[3] = salt_buf_t0[2]; - w1_t[0] = salt_buf_t0[3]; - - // 20..25 - w1_t[1] = digest_t0[0]; - w1_t[2] = digest_t0[1]; - - // 25..41 - w1_t[2] |= salt_buf_t1[0]; - w1_t[3] = salt_buf_t1[1]; - w2_t[0] = salt_buf_t1[2]; - w2_t[1] = salt_buf_t1[3]; - w2_t[2] = salt_buf_t1[4]; - - // 41..46 - w2_t[2] |= digest_t1[0]; - w2_t[3] = digest_t1[1]; - - // 46..62 - w2_t[3] |= salt_buf_t2[0]; - w3_t[0] = salt_buf_t2[1]; - w3_t[1] = salt_buf_t2[2]; - w3_t[2] = salt_buf_t2[3]; - w3_t[3] = salt_buf_t2[4]; - - // 62.. - w3_t[3] |= digest_t2[0]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..3 - w0_t[0] = digest_t2[1]; - - // 3..19 - w0_t[0] |= salt_buf_t3[0]; - w0_t[1] = salt_buf_t3[1]; - w0_t[2] = salt_buf_t3[2]; - w0_t[3] = salt_buf_t3[3]; - w1_t[0] = salt_buf_t3[4]; - - // 19..24 - w1_t[0] |= digest_t3[0]; - w1_t[1] = digest_t3[1]; - - // 24..40 - w1_t[2] = salt_buf_t0[0]; - w1_t[3] = salt_buf_t0[1]; - w2_t[0] = salt_buf_t0[2]; - w2_t[1] = salt_buf_t0[3]; - - // 40..45 - w2_t[2] = digest_t0[0]; - w2_t[3] = digest_t0[1]; - - // 45..61 - w2_t[3] |= salt_buf_t1[0]; - w3_t[0] = salt_buf_t1[1]; - w3_t[1] = salt_buf_t1[2]; - w3_t[2] = salt_buf_t1[3]; - w3_t[3] = salt_buf_t1[4]; - - // 61.. - w3_t[3] |= digest_t1[0]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..2 - w0_t[0] = digest_t1[1]; - - // 2..18 - w0_t[0] |= salt_buf_t2[0]; - w0_t[1] = salt_buf_t2[1]; - w0_t[2] = salt_buf_t2[2]; - w0_t[3] = salt_buf_t2[3]; - w1_t[0] = salt_buf_t2[4]; - - // 18..23 - w1_t[0] |= digest_t2[0]; - w1_t[1] = digest_t2[1]; - - // 23..39 - w1_t[1] |= salt_buf_t3[0]; - w1_t[2] = salt_buf_t3[1]; - w1_t[3] = salt_buf_t3[2]; - w2_t[0] = salt_buf_t3[3]; - w2_t[1] = salt_buf_t3[4]; - - // 39..44 - w2_t[1] |= digest_t3[0]; - w2_t[2] = digest_t3[1]; - - // 44..60 - w2_t[3] = salt_buf_t0[0]; - w3_t[0] = salt_buf_t0[1]; - w3_t[1] = salt_buf_t0[2]; - w3_t[2] = salt_buf_t0[3]; - - // 60.. - w3_t[3] = digest_t0[0]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..1 - w0_t[0] = digest_t0[1]; - - // 1..17 - w0_t[0] |= salt_buf_t1[0]; - w0_t[1] = salt_buf_t1[1]; - w0_t[2] = salt_buf_t1[2]; - w0_t[3] = salt_buf_t1[3]; - w1_t[0] = salt_buf_t1[4]; - - // 17..22 - w1_t[0] |= digest_t1[0]; - w1_t[1] = digest_t1[1]; - - // 22..38 - w1_t[1] |= salt_buf_t2[0]; - w1_t[2] = salt_buf_t2[1]; - w1_t[3] = salt_buf_t2[2]; - w2_t[0] = salt_buf_t2[3]; - w2_t[1] = salt_buf_t2[4]; - - // 38..43 - w2_t[1] |= digest_t2[0]; - w2_t[2] = digest_t2[1]; - - // 43..59 - w2_t[2] |= salt_buf_t3[0]; - w2_t[3] = salt_buf_t3[1]; - w3_t[0] = salt_buf_t3[2]; - w3_t[1] = salt_buf_t3[3]; - w3_t[2] = salt_buf_t3[4]; - - // 59.. - w3_t[2] |= digest_t3[0]; - w3_t[3] = digest_t3[1]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = salt_buf_t0[0]; - w0_t[1] = salt_buf_t0[1]; - w0_t[2] = salt_buf_t0[2]; - w0_t[3] = salt_buf_t0[3]; - w1_t[0] = 0x80; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 21 * 16 * 8; - w3_t[3] = 0; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - // now the 40 bit input for the MD5 which then will generate the RC4 key, so it's precomputable! - - w0_t[0] = digest[0]; - w0_t[1] = digest[1] & 0xff; - w0_t[2] = 0x8000; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 9 * 8; - w3_t[3] = 0; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - // now the RC4 part - - u32x key[4]; - - key[0] = digest[0]; - key[1] = digest[1]; - key[2] = digest[2]; - key[3] = digest[3]; - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); - - w0_t[0] = out[0]; - w0_t[1] = out[1]; - w0_t[2] = out[2]; - w0_t[3] = out[3]; - w1_t[0] = 0x80; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 16 * 8; - w3_t[3] = 0; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - rc4_next_16 (rc4_key, 16, j, digest, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09700_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - __shared__ RC4_KEY rc4_keys[64]; - - m09700m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, oldoffice01_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09700_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - __shared__ RC4_KEY rc4_keys[64]; - - m09700m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, oldoffice01_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09700_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - __shared__ RC4_KEY rc4_keys[64]; - - m09700m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, oldoffice01_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09700_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - __shared__ RC4_KEY rc4_keys[64]; - - m09700s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, oldoffice01_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09700_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - __shared__ RC4_KEY rc4_keys[64]; - - m09700s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, oldoffice01_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09700_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - __shared__ RC4_KEY rc4_keys[64]; - - m09700s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, oldoffice01_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m09710_a0.cu b/nv/m09710_a0.cu deleted file mode 100644 index d8cb3ee..0000000 --- a/nv/m09710_a0.cu +++ /dev/null @@ -1,625 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _OLDOFFICE01_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -__device__ static void swap (RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -__device__ static void rc4_init_16 (RC4_KEY *rc4_key, const u32 data[4]) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - u32 *ptr = (u32 *) rc4_key->S; - - #pragma unroll 64 - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -__device__ static u8 rc4_next_16 (RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) -{ - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (64, 1) m09710_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - __shared__ RC4_KEY rc4_keys[64]; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * esalt - */ - - const u32 version = oldoffice01_bufs[salt_pos].version; - - u32 encryptedVerifier[4]; - - encryptedVerifier[0] = oldoffice01_bufs[salt_pos].encryptedVerifier[0]; - encryptedVerifier[1] = oldoffice01_bufs[salt_pos].encryptedVerifier[1]; - encryptedVerifier[2] = oldoffice01_bufs[salt_pos].encryptedVerifier[2]; - encryptedVerifier[3] = oldoffice01_bufs[salt_pos].encryptedVerifier[3]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - // first md5 to generate RC4 128 bit key - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1] & 0xff; - w0_t[2] = 0x8000; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 9 * 8; - w3_t[3] = 0; - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - // now the RC4 part - - u32x key[4]; - - key[0] = digest[0]; - key[1] = digest[1]; - key[2] = digest[2]; - key[3] = digest[3]; - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); - - w0_t[0] = out[0]; - w0_t[1] = out[1]; - w0_t[2] = out[2]; - w0_t[3] = out[3]; - w1_t[0] = 0x80; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 16 * 8; - w3_t[3] = 0; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - rc4_next_16 (rc4_key, 16, j, digest, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09710_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09710_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09710_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - __shared__ RC4_KEY rc4_keys[64]; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * esalt - */ - - const u32 version = oldoffice01_bufs[salt_pos].version; - - u32 encryptedVerifier[4]; - - encryptedVerifier[0] = oldoffice01_bufs[salt_pos].encryptedVerifier[0]; - encryptedVerifier[1] = oldoffice01_bufs[salt_pos].encryptedVerifier[1]; - encryptedVerifier[2] = oldoffice01_bufs[salt_pos].encryptedVerifier[2]; - encryptedVerifier[3] = oldoffice01_bufs[salt_pos].encryptedVerifier[3]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - // first md5 to generate RC4 128 bit key - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1] & 0xff; - w0_t[2] = 0x8000; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 9 * 8; - w3_t[3] = 0; - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - // now the RC4 part - - u32x key[4]; - - key[0] = digest[0]; - key[1] = digest[1]; - key[2] = digest[2]; - key[3] = digest[3]; - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); - - w0_t[0] = out[0]; - w0_t[1] = out[1]; - w0_t[2] = out[2]; - w0_t[3] = out[3]; - w1_t[0] = 0x80; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 16 * 8; - w3_t[3] = 0; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - rc4_next_16 (rc4_key, 16, j, digest, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09710_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09710_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m09710_a1.cu b/nv/m09710_a1.cu deleted file mode 100644 index b3177a1..0000000 --- a/nv/m09710_a1.cu +++ /dev/null @@ -1,667 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _OLDOFFICE01_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -__device__ static void swap (RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -__device__ static void rc4_init_16 (RC4_KEY *rc4_key, const u32 data[4]) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - u32 *ptr = (u32 *) rc4_key->S; - - #pragma unroll 64 - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -__device__ static u8 rc4_next_16 (RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) -{ - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (64, 1) m09710_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - __shared__ RC4_KEY rc4_keys[64]; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * esalt - */ - - const u32 version = oldoffice01_bufs[salt_pos].version; - - u32 encryptedVerifier[4]; - - encryptedVerifier[0] = oldoffice01_bufs[salt_pos].encryptedVerifier[0]; - encryptedVerifier[1] = oldoffice01_bufs[salt_pos].encryptedVerifier[1]; - encryptedVerifier[2] = oldoffice01_bufs[salt_pos].encryptedVerifier[2]; - encryptedVerifier[3] = oldoffice01_bufs[salt_pos].encryptedVerifier[3]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - // first md5 to generate RC4 128 bit key - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = wordl0[0] | wordr0[0]; - w0_t[1] = (wordl0[1] | wordr0[1]) & 0xff; - w0_t[2] = 0x8000; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 9 * 8; - w3_t[3] = 0; - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - // now the RC4 part - - u32x key[4]; - - key[0] = digest[0]; - key[1] = digest[1]; - key[2] = digest[2]; - key[3] = digest[3]; - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); - - w0_t[0] = out[0]; - w0_t[1] = out[1]; - w0_t[2] = out[2]; - w0_t[3] = out[3]; - w1_t[0] = 0x80; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 16 * 8; - w3_t[3] = 0; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - rc4_next_16 (rc4_key, 16, j, digest, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09710_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09710_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09710_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - __shared__ RC4_KEY rc4_keys[64]; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * esalt - */ - - const u32 version = oldoffice01_bufs[salt_pos].version; - - u32 encryptedVerifier[4]; - - encryptedVerifier[0] = oldoffice01_bufs[salt_pos].encryptedVerifier[0]; - encryptedVerifier[1] = oldoffice01_bufs[salt_pos].encryptedVerifier[1]; - encryptedVerifier[2] = oldoffice01_bufs[salt_pos].encryptedVerifier[2]; - encryptedVerifier[3] = oldoffice01_bufs[salt_pos].encryptedVerifier[3]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - // first md5 to generate RC4 128 bit key - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = wordl0[0] | wordr0[0]; - w0_t[1] = (wordl0[1] | wordr0[1]) & 0xff; - w0_t[2] = 0x8000; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 9 * 8; - w3_t[3] = 0; - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - // now the RC4 part - - u32x key[4]; - - key[0] = digest[0]; - key[1] = digest[1]; - key[2] = digest[2]; - key[3] = digest[3]; - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); - - w0_t[0] = out[0]; - w0_t[1] = out[1]; - w0_t[2] = out[2]; - w0_t[3] = out[3]; - w1_t[0] = 0x80; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 16 * 8; - w3_t[3] = 0; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - rc4_next_16 (rc4_key, 16, j, digest, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09710_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09710_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m09710_a3.cu b/nv/m09710_a3.cu deleted file mode 100644 index 6480bf5..0000000 --- a/nv/m09710_a3.cu +++ /dev/null @@ -1,618 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _OLDOFFICE01_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -__device__ static void swap (RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -__device__ static void rc4_init_16 (RC4_KEY *rc4_key, const u32 data[4]) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - u32 *ptr = (u32 *) rc4_key->S; - - #pragma unroll 64 - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -__device__ static u8 rc4_next_16 (RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) -{ - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m09710m (RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * esalt - */ - - const u32 version = oldoffice01_bufs[salt_pos].version; - - u32 encryptedVerifier[4]; - - encryptedVerifier[0] = oldoffice01_bufs[salt_pos].encryptedVerifier[0]; - encryptedVerifier[1] = oldoffice01_bufs[salt_pos].encryptedVerifier[1]; - encryptedVerifier[2] = oldoffice01_bufs[salt_pos].encryptedVerifier[2]; - encryptedVerifier[3] = oldoffice01_bufs[salt_pos].encryptedVerifier[3]; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - // first md5 to generate RC4 128 bit key - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1] & 0xff; - w0_t[2] = 0x8000; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 9 * 8; - w3_t[3] = 0; - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - // now the RC4 part - - u32x key[4]; - - key[0] = digest[0]; - key[1] = digest[1]; - key[2] = digest[2]; - key[3] = digest[3]; - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); - - w0_t[0] = out[0]; - w0_t[1] = out[1]; - w0_t[2] = out[2]; - w0_t[3] = out[3]; - w1_t[0] = 0x80; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 16 * 8; - w3_t[3] = 0; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - rc4_next_16 (rc4_key, 16, j, digest, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_M - } -} - -__device__ static void m09710s (RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * esalt - */ - - const u32 version = oldoffice01_bufs[salt_pos].version; - - u32 encryptedVerifier[4]; - - encryptedVerifier[0] = oldoffice01_bufs[salt_pos].encryptedVerifier[0]; - encryptedVerifier[1] = oldoffice01_bufs[salt_pos].encryptedVerifier[1]; - encryptedVerifier[2] = oldoffice01_bufs[salt_pos].encryptedVerifier[2]; - encryptedVerifier[3] = oldoffice01_bufs[salt_pos].encryptedVerifier[3]; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - // first md5 to generate RC4 128 bit key - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1] & 0xff; - w0_t[2] = 0x8000; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 9 * 8; - w3_t[3] = 0; - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - // now the RC4 part - - u32x key[4]; - - key[0] = digest[0]; - key[1] = digest[1]; - key[2] = digest[2]; - key[3] = digest[3]; - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); - - w0_t[0] = out[0]; - w0_t[1] = out[1]; - w0_t[2] = out[2]; - w0_t[3] = out[3]; - w1_t[0] = 0x80; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 16 * 8; - w3_t[3] = 0; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - rc4_next_16 (rc4_key, 16, j, digest, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09710_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - __shared__ RC4_KEY rc4_keys[64]; - - m09710m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, oldoffice01_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09710_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09710_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09710_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - __shared__ RC4_KEY rc4_keys[64]; - - m09710s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, oldoffice01_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09710_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09710_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m09720_a0.cu b/nv/m09720_a0.cu deleted file mode 100644 index 7ba1648..0000000 --- a/nv/m09720_a0.cu +++ /dev/null @@ -1,762 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _OLDOFFICE01_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void gen336 (u32x digest_pre[4], u32 salt_buf[4], u32x digest[4]) -{ - u32x digest_t0[2]; - u32x digest_t1[2]; - u32x digest_t2[2]; - u32x digest_t3[2]; - - digest_t0[0] = digest_pre[0]; - digest_t0[1] = digest_pre[1] & 0xff; - - digest_t1[0] = digest_pre[0] << 8; - digest_t1[1] = digest_pre[0] >> 24 | digest_pre[1] << 8; - - digest_t2[0] = digest_pre[0] << 16; - digest_t2[1] = digest_pre[0] >> 16 | digest_pre[1] << 16; - - digest_t3[0] = digest_pre[0] << 24; - digest_t3[1] = digest_pre[0] >> 8 | digest_pre[1] << 24; - - u32 salt_buf_t0[4]; - u32 salt_buf_t1[5]; - u32 salt_buf_t2[5]; - u32 salt_buf_t3[5]; - - salt_buf_t0[0] = salt_buf[0]; - salt_buf_t0[1] = salt_buf[1]; - salt_buf_t0[2] = salt_buf[2]; - salt_buf_t0[3] = salt_buf[3]; - - salt_buf_t1[0] = salt_buf[0] << 8; - salt_buf_t1[1] = salt_buf[0] >> 24 | salt_buf[1] << 8; - salt_buf_t1[2] = salt_buf[1] >> 24 | salt_buf[2] << 8; - salt_buf_t1[3] = salt_buf[2] >> 24 | salt_buf[3] << 8; - salt_buf_t1[4] = salt_buf[3] >> 24; - - salt_buf_t2[0] = salt_buf[0] << 16; - salt_buf_t2[1] = salt_buf[0] >> 16 | salt_buf[1] << 16; - salt_buf_t2[2] = salt_buf[1] >> 16 | salt_buf[2] << 16; - salt_buf_t2[3] = salt_buf[2] >> 16 | salt_buf[3] << 16; - salt_buf_t2[4] = salt_buf[3] >> 16; - - salt_buf_t3[0] = salt_buf[0] << 24; - salt_buf_t3[1] = salt_buf[0] >> 8 | salt_buf[1] << 24; - salt_buf_t3[2] = salt_buf[1] >> 8 | salt_buf[2] << 24; - salt_buf_t3[3] = salt_buf[2] >> 8 | salt_buf[3] << 24; - salt_buf_t3[4] = salt_buf[3] >> 8; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - // generate the 16 * 21 buffer - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..5 - w0_t[0] = digest_t0[0]; - w0_t[1] = digest_t0[1]; - - // 5..21 - w0_t[1] |= salt_buf_t1[0]; - w0_t[2] = salt_buf_t1[1]; - w0_t[3] = salt_buf_t1[2]; - w1_t[0] = salt_buf_t1[3]; - w1_t[1] = salt_buf_t1[4]; - - // 21..26 - w1_t[1] |= digest_t1[0]; - w1_t[2] = digest_t1[1]; - - // 26..42 - w1_t[2] |= salt_buf_t2[0]; - w1_t[3] = salt_buf_t2[1]; - w2_t[0] = salt_buf_t2[2]; - w2_t[1] = salt_buf_t2[3]; - w2_t[2] = salt_buf_t2[4]; - - // 42..47 - w2_t[2] |= digest_t2[0]; - w2_t[3] = digest_t2[1]; - - // 47..63 - w2_t[3] |= salt_buf_t3[0]; - w3_t[0] = salt_buf_t3[1]; - w3_t[1] = salt_buf_t3[2]; - w3_t[2] = salt_buf_t3[3]; - w3_t[3] = salt_buf_t3[4]; - - // 63.. - - w3_t[3] |= digest_t3[0]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..4 - w0_t[0] = digest_t3[1]; - - // 4..20 - w0_t[1] = salt_buf_t0[0]; - w0_t[2] = salt_buf_t0[1]; - w0_t[3] = salt_buf_t0[2]; - w1_t[0] = salt_buf_t0[3]; - - // 20..25 - w1_t[1] = digest_t0[0]; - w1_t[2] = digest_t0[1]; - - // 25..41 - w1_t[2] |= salt_buf_t1[0]; - w1_t[3] = salt_buf_t1[1]; - w2_t[0] = salt_buf_t1[2]; - w2_t[1] = salt_buf_t1[3]; - w2_t[2] = salt_buf_t1[4]; - - // 41..46 - w2_t[2] |= digest_t1[0]; - w2_t[3] = digest_t1[1]; - - // 46..62 - w2_t[3] |= salt_buf_t2[0]; - w3_t[0] = salt_buf_t2[1]; - w3_t[1] = salt_buf_t2[2]; - w3_t[2] = salt_buf_t2[3]; - w3_t[3] = salt_buf_t2[4]; - - // 62.. - w3_t[3] |= digest_t2[0]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..3 - w0_t[0] = digest_t2[1]; - - // 3..19 - w0_t[0] |= salt_buf_t3[0]; - w0_t[1] = salt_buf_t3[1]; - w0_t[2] = salt_buf_t3[2]; - w0_t[3] = salt_buf_t3[3]; - w1_t[0] = salt_buf_t3[4]; - - // 19..24 - w1_t[0] |= digest_t3[0]; - w1_t[1] = digest_t3[1]; - - // 24..40 - w1_t[2] = salt_buf_t0[0]; - w1_t[3] = salt_buf_t0[1]; - w2_t[0] = salt_buf_t0[2]; - w2_t[1] = salt_buf_t0[3]; - - // 40..45 - w2_t[2] = digest_t0[0]; - w2_t[3] = digest_t0[1]; - - // 45..61 - w2_t[3] |= salt_buf_t1[0]; - w3_t[0] = salt_buf_t1[1]; - w3_t[1] = salt_buf_t1[2]; - w3_t[2] = salt_buf_t1[3]; - w3_t[3] = salt_buf_t1[4]; - - // 61.. - w3_t[3] |= digest_t1[0]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..2 - w0_t[0] = digest_t1[1]; - - // 2..18 - w0_t[0] |= salt_buf_t2[0]; - w0_t[1] = salt_buf_t2[1]; - w0_t[2] = salt_buf_t2[2]; - w0_t[3] = salt_buf_t2[3]; - w1_t[0] = salt_buf_t2[4]; - - // 18..23 - w1_t[0] |= digest_t2[0]; - w1_t[1] = digest_t2[1]; - - // 23..39 - w1_t[1] |= salt_buf_t3[0]; - w1_t[2] = salt_buf_t3[1]; - w1_t[3] = salt_buf_t3[2]; - w2_t[0] = salt_buf_t3[3]; - w2_t[1] = salt_buf_t3[4]; - - // 39..44 - w2_t[1] |= digest_t3[0]; - w2_t[2] = digest_t3[1]; - - // 44..60 - w2_t[3] = salt_buf_t0[0]; - w3_t[0] = salt_buf_t0[1]; - w3_t[1] = salt_buf_t0[2]; - w3_t[2] = salt_buf_t0[3]; - - // 60.. - w3_t[3] = digest_t0[0]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..1 - w0_t[0] = digest_t0[1]; - - // 1..17 - w0_t[0] |= salt_buf_t1[0]; - w0_t[1] = salt_buf_t1[1]; - w0_t[2] = salt_buf_t1[2]; - w0_t[3] = salt_buf_t1[3]; - w1_t[0] = salt_buf_t1[4]; - - // 17..22 - w1_t[0] |= digest_t1[0]; - w1_t[1] = digest_t1[1]; - - // 22..38 - w1_t[1] |= salt_buf_t2[0]; - w1_t[2] = salt_buf_t2[1]; - w1_t[3] = salt_buf_t2[2]; - w2_t[0] = salt_buf_t2[3]; - w2_t[1] = salt_buf_t2[4]; - - // 38..43 - w2_t[1] |= digest_t2[0]; - w2_t[2] = digest_t2[1]; - - // 43..59 - w2_t[2] |= salt_buf_t3[0]; - w2_t[3] = salt_buf_t3[1]; - w3_t[0] = salt_buf_t3[2]; - w3_t[1] = salt_buf_t3[3]; - w3_t[2] = salt_buf_t3[4]; - - // 59.. - w3_t[2] |= digest_t3[0]; - w3_t[3] = digest_t3[1]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = salt_buf_t0[0]; - w0_t[1] = salt_buf_t0[1]; - w0_t[2] = salt_buf_t0[2]; - w0_t[3] = salt_buf_t0[3]; - w1_t[0] = 0x80; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 21 * 16 * 8; - w3_t[3] = 0; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); -} - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m09720_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w3_t[2] = out_len * 8 * 2; - - u32x digest_pre[4]; - - digest_pre[0] = MD5M_A; - digest_pre[1] = MD5M_B; - digest_pre[2] = MD5M_C; - digest_pre[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest_pre); - - digest_pre[0] &= 0xffffffff; - digest_pre[1] &= 0x000000ff; - digest_pre[2] &= 0x00000000; - digest_pre[3] &= 0x00000000; - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - gen336 (digest_pre, salt_buf, digest); - - u32x a = digest[0]; - u32x b = digest[1] & 0xff; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09720_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09720_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09720_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w3_t[2] = out_len * 8 * 2; - - u32x digest_pre[4]; - - digest_pre[0] = MD5M_A; - digest_pre[1] = MD5M_B; - digest_pre[2] = MD5M_C; - digest_pre[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest_pre); - - digest_pre[0] &= 0xffffffff; - digest_pre[1] &= 0x000000ff; - digest_pre[2] &= 0x00000000; - digest_pre[3] &= 0x00000000; - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - gen336 (digest_pre, salt_buf, digest); - - u32x a = digest[0]; - u32x b = digest[1] & 0xff; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09720_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09720_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m09720_a1.cu b/nv/m09720_a1.cu deleted file mode 100644 index 157a7eb..0000000 --- a/nv/m09720_a1.cu +++ /dev/null @@ -1,873 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _OLDOFFICE01_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void gen336 (u32x digest_pre[4], u32 salt_buf[4], u32x digest[4]) -{ - u32x digest_t0[2]; - u32x digest_t1[2]; - u32x digest_t2[2]; - u32x digest_t3[2]; - - digest_t0[0] = digest_pre[0]; - digest_t0[1] = digest_pre[1] & 0xff; - - digest_t1[0] = digest_pre[0] << 8; - digest_t1[1] = digest_pre[0] >> 24 | digest_pre[1] << 8; - - digest_t2[0] = digest_pre[0] << 16; - digest_t2[1] = digest_pre[0] >> 16 | digest_pre[1] << 16; - - digest_t3[0] = digest_pre[0] << 24; - digest_t3[1] = digest_pre[0] >> 8 | digest_pre[1] << 24; - - u32 salt_buf_t0[4]; - u32 salt_buf_t1[5]; - u32 salt_buf_t2[5]; - u32 salt_buf_t3[5]; - - salt_buf_t0[0] = salt_buf[0]; - salt_buf_t0[1] = salt_buf[1]; - salt_buf_t0[2] = salt_buf[2]; - salt_buf_t0[3] = salt_buf[3]; - - salt_buf_t1[0] = salt_buf[0] << 8; - salt_buf_t1[1] = salt_buf[0] >> 24 | salt_buf[1] << 8; - salt_buf_t1[2] = salt_buf[1] >> 24 | salt_buf[2] << 8; - salt_buf_t1[3] = salt_buf[2] >> 24 | salt_buf[3] << 8; - salt_buf_t1[4] = salt_buf[3] >> 24; - - salt_buf_t2[0] = salt_buf[0] << 16; - salt_buf_t2[1] = salt_buf[0] >> 16 | salt_buf[1] << 16; - salt_buf_t2[2] = salt_buf[1] >> 16 | salt_buf[2] << 16; - salt_buf_t2[3] = salt_buf[2] >> 16 | salt_buf[3] << 16; - salt_buf_t2[4] = salt_buf[3] >> 16; - - salt_buf_t3[0] = salt_buf[0] << 24; - salt_buf_t3[1] = salt_buf[0] >> 8 | salt_buf[1] << 24; - salt_buf_t3[2] = salt_buf[1] >> 8 | salt_buf[2] << 24; - salt_buf_t3[3] = salt_buf[2] >> 8 | salt_buf[3] << 24; - salt_buf_t3[4] = salt_buf[3] >> 8; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - // generate the 16 * 21 buffer - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..5 - w0_t[0] = digest_t0[0]; - w0_t[1] = digest_t0[1]; - - // 5..21 - w0_t[1] |= salt_buf_t1[0]; - w0_t[2] = salt_buf_t1[1]; - w0_t[3] = salt_buf_t1[2]; - w1_t[0] = salt_buf_t1[3]; - w1_t[1] = salt_buf_t1[4]; - - // 21..26 - w1_t[1] |= digest_t1[0]; - w1_t[2] = digest_t1[1]; - - // 26..42 - w1_t[2] |= salt_buf_t2[0]; - w1_t[3] = salt_buf_t2[1]; - w2_t[0] = salt_buf_t2[2]; - w2_t[1] = salt_buf_t2[3]; - w2_t[2] = salt_buf_t2[4]; - - // 42..47 - w2_t[2] |= digest_t2[0]; - w2_t[3] = digest_t2[1]; - - // 47..63 - w2_t[3] |= salt_buf_t3[0]; - w3_t[0] = salt_buf_t3[1]; - w3_t[1] = salt_buf_t3[2]; - w3_t[2] = salt_buf_t3[3]; - w3_t[3] = salt_buf_t3[4]; - - // 63.. - - w3_t[3] |= digest_t3[0]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..4 - w0_t[0] = digest_t3[1]; - - // 4..20 - w0_t[1] = salt_buf_t0[0]; - w0_t[2] = salt_buf_t0[1]; - w0_t[3] = salt_buf_t0[2]; - w1_t[0] = salt_buf_t0[3]; - - // 20..25 - w1_t[1] = digest_t0[0]; - w1_t[2] = digest_t0[1]; - - // 25..41 - w1_t[2] |= salt_buf_t1[0]; - w1_t[3] = salt_buf_t1[1]; - w2_t[0] = salt_buf_t1[2]; - w2_t[1] = salt_buf_t1[3]; - w2_t[2] = salt_buf_t1[4]; - - // 41..46 - w2_t[2] |= digest_t1[0]; - w2_t[3] = digest_t1[1]; - - // 46..62 - w2_t[3] |= salt_buf_t2[0]; - w3_t[0] = salt_buf_t2[1]; - w3_t[1] = salt_buf_t2[2]; - w3_t[2] = salt_buf_t2[3]; - w3_t[3] = salt_buf_t2[4]; - - // 62.. - w3_t[3] |= digest_t2[0]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..3 - w0_t[0] = digest_t2[1]; - - // 3..19 - w0_t[0] |= salt_buf_t3[0]; - w0_t[1] = salt_buf_t3[1]; - w0_t[2] = salt_buf_t3[2]; - w0_t[3] = salt_buf_t3[3]; - w1_t[0] = salt_buf_t3[4]; - - // 19..24 - w1_t[0] |= digest_t3[0]; - w1_t[1] = digest_t3[1]; - - // 24..40 - w1_t[2] = salt_buf_t0[0]; - w1_t[3] = salt_buf_t0[1]; - w2_t[0] = salt_buf_t0[2]; - w2_t[1] = salt_buf_t0[3]; - - // 40..45 - w2_t[2] = digest_t0[0]; - w2_t[3] = digest_t0[1]; - - // 45..61 - w2_t[3] |= salt_buf_t1[0]; - w3_t[0] = salt_buf_t1[1]; - w3_t[1] = salt_buf_t1[2]; - w3_t[2] = salt_buf_t1[3]; - w3_t[3] = salt_buf_t1[4]; - - // 61.. - w3_t[3] |= digest_t1[0]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..2 - w0_t[0] = digest_t1[1]; - - // 2..18 - w0_t[0] |= salt_buf_t2[0]; - w0_t[1] = salt_buf_t2[1]; - w0_t[2] = salt_buf_t2[2]; - w0_t[3] = salt_buf_t2[3]; - w1_t[0] = salt_buf_t2[4]; - - // 18..23 - w1_t[0] |= digest_t2[0]; - w1_t[1] = digest_t2[1]; - - // 23..39 - w1_t[1] |= salt_buf_t3[0]; - w1_t[2] = salt_buf_t3[1]; - w1_t[3] = salt_buf_t3[2]; - w2_t[0] = salt_buf_t3[3]; - w2_t[1] = salt_buf_t3[4]; - - // 39..44 - w2_t[1] |= digest_t3[0]; - w2_t[2] = digest_t3[1]; - - // 44..60 - w2_t[3] = salt_buf_t0[0]; - w3_t[0] = salt_buf_t0[1]; - w3_t[1] = salt_buf_t0[2]; - w3_t[2] = salt_buf_t0[3]; - - // 60.. - w3_t[3] = digest_t0[0]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..1 - w0_t[0] = digest_t0[1]; - - // 1..17 - w0_t[0] |= salt_buf_t1[0]; - w0_t[1] = salt_buf_t1[1]; - w0_t[2] = salt_buf_t1[2]; - w0_t[3] = salt_buf_t1[3]; - w1_t[0] = salt_buf_t1[4]; - - // 17..22 - w1_t[0] |= digest_t1[0]; - w1_t[1] = digest_t1[1]; - - // 22..38 - w1_t[1] |= salt_buf_t2[0]; - w1_t[2] = salt_buf_t2[1]; - w1_t[3] = salt_buf_t2[2]; - w2_t[0] = salt_buf_t2[3]; - w2_t[1] = salt_buf_t2[4]; - - // 38..43 - w2_t[1] |= digest_t2[0]; - w2_t[2] = digest_t2[1]; - - // 43..59 - w2_t[2] |= salt_buf_t3[0]; - w2_t[3] = salt_buf_t3[1]; - w3_t[0] = salt_buf_t3[2]; - w3_t[1] = salt_buf_t3[3]; - w3_t[2] = salt_buf_t3[4]; - - // 59.. - w3_t[2] |= digest_t3[0]; - w3_t[3] = digest_t3[1]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = salt_buf_t0[0]; - w0_t[1] = salt_buf_t0[1]; - w0_t[2] = salt_buf_t0[2]; - w0_t[3] = salt_buf_t0[3]; - w1_t[0] = 0x80; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 21 * 16 * 8; - w3_t[3] = 0; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m09720_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - append_0x80_2 (w0, w1, pw_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w3_t[2] = pw_len * 8 * 2; - - u32x digest_pre[4]; - - digest_pre[0] = MD5M_A; - digest_pre[1] = MD5M_B; - digest_pre[2] = MD5M_C; - digest_pre[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest_pre); - - digest_pre[0] &= 0xffffffff; - digest_pre[1] &= 0x000000ff; - digest_pre[2] &= 0x00000000; - digest_pre[3] &= 0x00000000; - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - gen336 (digest_pre, salt_buf, digest); - - u32x a = digest[0]; - u32x b = digest[1] & 0xff; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09720_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09720_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09720_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - append_0x80_2 (w0, w1, pw_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - w3_t[2] = pw_len * 8 * 2; - - u32x digest_pre[4]; - - digest_pre[0] = MD5M_A; - digest_pre[1] = MD5M_B; - digest_pre[2] = MD5M_C; - digest_pre[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest_pre); - - digest_pre[0] &= 0xffffffff; - digest_pre[1] &= 0x000000ff; - digest_pre[2] &= 0x00000000; - digest_pre[3] &= 0x00000000; - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - gen336 (digest_pre, salt_buf, digest); - - u32x a = digest[0]; - u32x b = digest[1] & 0xff; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09720_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09720_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m09720_a3.cu b/nv/m09720_a3.cu deleted file mode 100644 index ae3955b..0000000 --- a/nv/m09720_a3.cu +++ /dev/null @@ -1,952 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _OLDOFFICE01_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void gen336 (u32x digest_pre[4], u32 salt_buf[4], u32x digest[4]) -{ - u32x digest_t0[2]; - u32x digest_t1[2]; - u32x digest_t2[2]; - u32x digest_t3[2]; - - digest_t0[0] = digest_pre[0]; - digest_t0[1] = digest_pre[1] & 0xff; - - digest_t1[0] = digest_pre[0] << 8; - digest_t1[1] = digest_pre[0] >> 24 | digest_pre[1] << 8; - - digest_t2[0] = digest_pre[0] << 16; - digest_t2[1] = digest_pre[0] >> 16 | digest_pre[1] << 16; - - digest_t3[0] = digest_pre[0] << 24; - digest_t3[1] = digest_pre[0] >> 8 | digest_pre[1] << 24; - - u32 salt_buf_t0[4]; - u32 salt_buf_t1[5]; - u32 salt_buf_t2[5]; - u32 salt_buf_t3[5]; - - salt_buf_t0[0] = salt_buf[0]; - salt_buf_t0[1] = salt_buf[1]; - salt_buf_t0[2] = salt_buf[2]; - salt_buf_t0[3] = salt_buf[3]; - - salt_buf_t1[0] = salt_buf[0] << 8; - salt_buf_t1[1] = salt_buf[0] >> 24 | salt_buf[1] << 8; - salt_buf_t1[2] = salt_buf[1] >> 24 | salt_buf[2] << 8; - salt_buf_t1[3] = salt_buf[2] >> 24 | salt_buf[3] << 8; - salt_buf_t1[4] = salt_buf[3] >> 24; - - salt_buf_t2[0] = salt_buf[0] << 16; - salt_buf_t2[1] = salt_buf[0] >> 16 | salt_buf[1] << 16; - salt_buf_t2[2] = salt_buf[1] >> 16 | salt_buf[2] << 16; - salt_buf_t2[3] = salt_buf[2] >> 16 | salt_buf[3] << 16; - salt_buf_t2[4] = salt_buf[3] >> 16; - - salt_buf_t3[0] = salt_buf[0] << 24; - salt_buf_t3[1] = salt_buf[0] >> 8 | salt_buf[1] << 24; - salt_buf_t3[2] = salt_buf[1] >> 8 | salt_buf[2] << 24; - salt_buf_t3[3] = salt_buf[2] >> 8 | salt_buf[3] << 24; - salt_buf_t3[4] = salt_buf[3] >> 8; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - // generate the 16 * 21 buffer - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..5 - w0_t[0] = digest_t0[0]; - w0_t[1] = digest_t0[1]; - - // 5..21 - w0_t[1] |= salt_buf_t1[0]; - w0_t[2] = salt_buf_t1[1]; - w0_t[3] = salt_buf_t1[2]; - w1_t[0] = salt_buf_t1[3]; - w1_t[1] = salt_buf_t1[4]; - - // 21..26 - w1_t[1] |= digest_t1[0]; - w1_t[2] = digest_t1[1]; - - // 26..42 - w1_t[2] |= salt_buf_t2[0]; - w1_t[3] = salt_buf_t2[1]; - w2_t[0] = salt_buf_t2[2]; - w2_t[1] = salt_buf_t2[3]; - w2_t[2] = salt_buf_t2[4]; - - // 42..47 - w2_t[2] |= digest_t2[0]; - w2_t[3] = digest_t2[1]; - - // 47..63 - w2_t[3] |= salt_buf_t3[0]; - w3_t[0] = salt_buf_t3[1]; - w3_t[1] = salt_buf_t3[2]; - w3_t[2] = salt_buf_t3[3]; - w3_t[3] = salt_buf_t3[4]; - - // 63.. - - w3_t[3] |= digest_t3[0]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..4 - w0_t[0] = digest_t3[1]; - - // 4..20 - w0_t[1] = salt_buf_t0[0]; - w0_t[2] = salt_buf_t0[1]; - w0_t[3] = salt_buf_t0[2]; - w1_t[0] = salt_buf_t0[3]; - - // 20..25 - w1_t[1] = digest_t0[0]; - w1_t[2] = digest_t0[1]; - - // 25..41 - w1_t[2] |= salt_buf_t1[0]; - w1_t[3] = salt_buf_t1[1]; - w2_t[0] = salt_buf_t1[2]; - w2_t[1] = salt_buf_t1[3]; - w2_t[2] = salt_buf_t1[4]; - - // 41..46 - w2_t[2] |= digest_t1[0]; - w2_t[3] = digest_t1[1]; - - // 46..62 - w2_t[3] |= salt_buf_t2[0]; - w3_t[0] = salt_buf_t2[1]; - w3_t[1] = salt_buf_t2[2]; - w3_t[2] = salt_buf_t2[3]; - w3_t[3] = salt_buf_t2[4]; - - // 62.. - w3_t[3] |= digest_t2[0]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..3 - w0_t[0] = digest_t2[1]; - - // 3..19 - w0_t[0] |= salt_buf_t3[0]; - w0_t[1] = salt_buf_t3[1]; - w0_t[2] = salt_buf_t3[2]; - w0_t[3] = salt_buf_t3[3]; - w1_t[0] = salt_buf_t3[4]; - - // 19..24 - w1_t[0] |= digest_t3[0]; - w1_t[1] = digest_t3[1]; - - // 24..40 - w1_t[2] = salt_buf_t0[0]; - w1_t[3] = salt_buf_t0[1]; - w2_t[0] = salt_buf_t0[2]; - w2_t[1] = salt_buf_t0[3]; - - // 40..45 - w2_t[2] = digest_t0[0]; - w2_t[3] = digest_t0[1]; - - // 45..61 - w2_t[3] |= salt_buf_t1[0]; - w3_t[0] = salt_buf_t1[1]; - w3_t[1] = salt_buf_t1[2]; - w3_t[2] = salt_buf_t1[3]; - w3_t[3] = salt_buf_t1[4]; - - // 61.. - w3_t[3] |= digest_t1[0]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..2 - w0_t[0] = digest_t1[1]; - - // 2..18 - w0_t[0] |= salt_buf_t2[0]; - w0_t[1] = salt_buf_t2[1]; - w0_t[2] = salt_buf_t2[2]; - w0_t[3] = salt_buf_t2[3]; - w1_t[0] = salt_buf_t2[4]; - - // 18..23 - w1_t[0] |= digest_t2[0]; - w1_t[1] = digest_t2[1]; - - // 23..39 - w1_t[1] |= salt_buf_t3[0]; - w1_t[2] = salt_buf_t3[1]; - w1_t[3] = salt_buf_t3[2]; - w2_t[0] = salt_buf_t3[3]; - w2_t[1] = salt_buf_t3[4]; - - // 39..44 - w2_t[1] |= digest_t3[0]; - w2_t[2] = digest_t3[1]; - - // 44..60 - w2_t[3] = salt_buf_t0[0]; - w3_t[0] = salt_buf_t0[1]; - w3_t[1] = salt_buf_t0[2]; - w3_t[2] = salt_buf_t0[3]; - - // 60.. - w3_t[3] = digest_t0[0]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = 0; - w0_t[1] = 0; - w0_t[2] = 0; - w0_t[3] = 0; - w1_t[0] = 0; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - // 0..1 - w0_t[0] = digest_t0[1]; - - // 1..17 - w0_t[0] |= salt_buf_t1[0]; - w0_t[1] = salt_buf_t1[1]; - w0_t[2] = salt_buf_t1[2]; - w0_t[3] = salt_buf_t1[3]; - w1_t[0] = salt_buf_t1[4]; - - // 17..22 - w1_t[0] |= digest_t1[0]; - w1_t[1] = digest_t1[1]; - - // 22..38 - w1_t[1] |= salt_buf_t2[0]; - w1_t[2] = salt_buf_t2[1]; - w1_t[3] = salt_buf_t2[2]; - w2_t[0] = salt_buf_t2[3]; - w2_t[1] = salt_buf_t2[4]; - - // 38..43 - w2_t[1] |= digest_t2[0]; - w2_t[2] = digest_t2[1]; - - // 43..59 - w2_t[2] |= salt_buf_t3[0]; - w2_t[3] = salt_buf_t3[1]; - w3_t[0] = salt_buf_t3[2]; - w3_t[1] = salt_buf_t3[3]; - w3_t[2] = salt_buf_t3[4]; - - // 59.. - w3_t[2] |= digest_t3[0]; - w3_t[3] = digest_t3[1]; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = salt_buf_t0[0]; - w0_t[1] = salt_buf_t0[1]; - w0_t[2] = salt_buf_t0[2]; - w0_t[3] = salt_buf_t0[3]; - w1_t[0] = 0x80; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 21 * 16 * 8; - w3_t[3] = 0; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); -} - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m09720m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = pw_len * 8; - w3_t[3] = 0; - - u32x digest_pre[4]; - - digest_pre[0] = MD5M_A; - digest_pre[1] = MD5M_B; - digest_pre[2] = MD5M_C; - digest_pre[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest_pre); - - digest_pre[0] &= 0xffffffff; - digest_pre[1] &= 0x000000ff; - digest_pre[2] &= 0x00000000; - digest_pre[3] &= 0x00000000; - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - gen336 (digest_pre, salt_buf, digest); - - u32x a = digest[0]; - u32x b = digest[1] & 0xff; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } -} - -__device__ static void m09720s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0[0]; - w0_t[1] = w0[1]; - w0_t[2] = w0[2]; - w0_t[3] = w0[3]; - w1_t[0] = w1[0]; - w1_t[1] = w1[1]; - w1_t[2] = w1[2]; - w1_t[3] = w1[3]; - w2_t[0] = w2[0]; - w2_t[1] = w2[1]; - w2_t[2] = w2[2]; - w2_t[3] = w2[3]; - w3_t[0] = w3[0]; - w3_t[1] = w3[1]; - w3_t[2] = pw_len * 8; - w3_t[3] = 0; - - u32x digest_pre[4]; - - digest_pre[0] = MD5M_A; - digest_pre[1] = MD5M_B; - digest_pre[2] = MD5M_C; - digest_pre[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest_pre); - - digest_pre[0] &= 0xffffffff; - digest_pre[1] &= 0x000000ff; - digest_pre[2] &= 0x00000000; - digest_pre[3] &= 0x00000000; - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - gen336 (digest_pre, salt_buf, digest); - - u32x a = digest[0]; - u32x b = digest[1] & 0xff; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09720_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m09720m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, oldoffice01_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09720_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m09720m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, oldoffice01_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09720_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m09720m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, oldoffice01_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09720_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m09720s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, oldoffice01_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09720_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m09720s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, oldoffice01_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09720_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice01_t *oldoffice01_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m09720s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, oldoffice01_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m09800_a0.cu b/nv/m09800_a0.cu deleted file mode 100644 index 43d791b..0000000 --- a/nv/m09800_a0.cu +++ /dev/null @@ -1,769 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _OLDOFFICE34_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -__device__ static void swap (RC4_KEY *rc4_key, const u32 i, const u32 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -__device__ static void rc4_init_16 (RC4_KEY *rc4_key, const u32 data[4]) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - u32 *ptr = (u32 *) rc4_key->S; - - #pragma unroll 64 - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j & 0xff); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j & 0xff); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j & 0xff); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j & 0xff); idx++; - } -} - -__device__ static u8 rc4_next_16 (RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) -{ - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (64, 1) m09800_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - __shared__ RC4_KEY rc4_keys[64]; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - const u32 salt_len = 16; - - /** - * esalt - */ - - const u32 version = oldoffice34_bufs[salt_pos].version; - - u32 encryptedVerifier[4]; - - encryptedVerifier[0] = oldoffice34_bufs[salt_pos].encryptedVerifier[0]; - encryptedVerifier[1] = oldoffice34_bufs[salt_pos].encryptedVerifier[1]; - encryptedVerifier[2] = oldoffice34_bufs[salt_pos].encryptedVerifier[2]; - encryptedVerifier[3] = oldoffice34_bufs[salt_pos].encryptedVerifier[3]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - const u32 pw_salt_len = (out_len * 2) + salt_len; - - append_0x80_2 (w0, w1, out_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] = salt_buf[0]; - w0_t[1] = salt_buf[1]; - w0_t[2] = salt_buf[2]; - w0_t[3] = salt_buf[3]; - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - w3_t[2] = 0; - w3_t[3] = pw_salt_len * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = digest[4]; - w1_t[1] = 0; - w1_t[2] = 0x80000000; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (20 + 4) * 8; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - u32x key[4]; - - key[0] = swap_workaround (digest[0]); - key[1] = swap_workaround (digest[1]); - key[2] = swap_workaround (digest[2]); - key[3] = swap_workaround (digest[3]); - - if (version == 3) - { - key[1] &= 0xff; - key[2] = 0; - key[3] = 0; - } - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); - - w0_t[0] = swap_workaround (out[0]); - w0_t[1] = swap_workaround (out[1]); - w0_t[2] = swap_workaround (out[2]); - w0_t[3] = swap_workaround (out[3]); - w1_t[0] = 0x80000000; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 16 * 8; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - digest[0] = swap_workaround (digest[0]); - digest[1] = swap_workaround (digest[1]); - digest[2] = swap_workaround (digest[2]); - digest[3] = swap_workaround (digest[3]); - - rc4_next_16 (rc4_key, 16, j, digest, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09800_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09800_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09800_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - __shared__ RC4_KEY rc4_keys[64]; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - const u32 salt_len = 16; - - /** - * esalt - */ - - const u32 version = oldoffice34_bufs[salt_pos].version; - - u32 encryptedVerifier[4]; - - encryptedVerifier[0] = oldoffice34_bufs[salt_pos].encryptedVerifier[0]; - encryptedVerifier[1] = oldoffice34_bufs[salt_pos].encryptedVerifier[1]; - encryptedVerifier[2] = oldoffice34_bufs[salt_pos].encryptedVerifier[2]; - encryptedVerifier[3] = oldoffice34_bufs[salt_pos].encryptedVerifier[3]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - const u32 pw_salt_len = (out_len * 2) + salt_len; - - append_0x80_2 (w0, w1, out_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] = salt_buf[0]; - w0_t[1] = salt_buf[1]; - w0_t[2] = salt_buf[2]; - w0_t[3] = salt_buf[3]; - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - w3_t[2] = 0; - w3_t[3] = pw_salt_len * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = digest[4]; - w1_t[1] = 0; - w1_t[2] = 0x80000000; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (20 + 4) * 8; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - u32x key[4]; - - key[0] = swap_workaround (digest[0]); - key[1] = swap_workaround (digest[1]); - key[2] = swap_workaround (digest[2]); - key[3] = swap_workaround (digest[3]); - - if (version == 3) - { - key[1] &= 0xff; - key[2] = 0; - key[3] = 0; - } - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); - - w0_t[0] = swap_workaround (out[0]); - w0_t[1] = swap_workaround (out[1]); - w0_t[2] = swap_workaround (out[2]); - w0_t[3] = swap_workaround (out[3]); - w1_t[0] = 0x80000000; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 16 * 8; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - digest[0] = swap_workaround (digest[0]); - digest[1] = swap_workaround (digest[1]); - digest[2] = swap_workaround (digest[2]); - digest[3] = swap_workaround (digest[3]); - - rc4_next_16 (rc4_key, 16, j, digest, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09800_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09800_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m09800_a1.cu b/nv/m09800_a1.cu deleted file mode 100644 index cd60751..0000000 --- a/nv/m09800_a1.cu +++ /dev/null @@ -1,875 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _OLDOFFICE34_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -__device__ static void swap (RC4_KEY *rc4_key, const u32 i, const u32 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -__device__ static void rc4_init_16 (RC4_KEY *rc4_key, const u32 data[4]) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - u32 *ptr = (u32 *) rc4_key->S; - - #pragma unroll 64 - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j & 0xff); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j & 0xff); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j & 0xff); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j & 0xff); idx++; - } -} - -__device__ static u8 rc4_next_16 (RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) -{ - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (64, 1) m09800_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - __shared__ RC4_KEY rc4_keys[64]; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - const u32 salt_len = 16; - - /** - * esalt - */ - - const u32 version = oldoffice34_bufs[salt_pos].version; - - u32 encryptedVerifier[4]; - - encryptedVerifier[0] = oldoffice34_bufs[salt_pos].encryptedVerifier[0]; - encryptedVerifier[1] = oldoffice34_bufs[salt_pos].encryptedVerifier[1]; - encryptedVerifier[2] = oldoffice34_bufs[salt_pos].encryptedVerifier[2]; - encryptedVerifier[3] = oldoffice34_bufs[salt_pos].encryptedVerifier[3]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - const u32 pw_salt_len = (pw_len * 2) + salt_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - append_0x80_2 (w0, w1, pw_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] = salt_buf[0]; - w0_t[1] = salt_buf[1]; - w0_t[2] = salt_buf[2]; - w0_t[3] = salt_buf[3]; - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - w3_t[2] = 0; - w3_t[3] = pw_salt_len * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = digest[4]; - w1_t[1] = 0; - w1_t[2] = 0x80000000; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (20 + 4) * 8; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - u32x key[4]; - - key[0] = swap_workaround (digest[0]); - key[1] = swap_workaround (digest[1]); - key[2] = swap_workaround (digest[2]); - key[3] = swap_workaround (digest[3]); - - if (version == 3) - { - key[1] &= 0xff; - key[2] = 0; - key[3] = 0; - } - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); - - w0_t[0] = swap_workaround (out[0]); - w0_t[1] = swap_workaround (out[1]); - w0_t[2] = swap_workaround (out[2]); - w0_t[3] = swap_workaround (out[3]); - w1_t[0] = 0x80000000; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 16 * 8; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - digest[0] = swap_workaround (digest[0]); - digest[1] = swap_workaround (digest[1]); - digest[2] = swap_workaround (digest[2]); - digest[3] = swap_workaround (digest[3]); - - rc4_next_16 (rc4_key, 16, j, digest, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09800_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09800_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09800_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - __shared__ RC4_KEY rc4_keys[64]; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - const u32 salt_len = 16; - - /** - * esalt - */ - - const u32 version = oldoffice34_bufs[salt_pos].version; - - u32 encryptedVerifier[4]; - - encryptedVerifier[0] = oldoffice34_bufs[salt_pos].encryptedVerifier[0]; - encryptedVerifier[1] = oldoffice34_bufs[salt_pos].encryptedVerifier[1]; - encryptedVerifier[2] = oldoffice34_bufs[salt_pos].encryptedVerifier[2]; - encryptedVerifier[3] = oldoffice34_bufs[salt_pos].encryptedVerifier[3]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - const u32 pw_salt_len = (pw_len * 2) + salt_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - append_0x80_2 (w0, w1, pw_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] = salt_buf[0]; - w0_t[1] = salt_buf[1]; - w0_t[2] = salt_buf[2]; - w0_t[3] = salt_buf[3]; - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - w3_t[2] = 0; - w3_t[3] = pw_salt_len * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = digest[4]; - w1_t[1] = 0; - w1_t[2] = 0x80000000; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (20 + 4) * 8; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - u32x key[4]; - - key[0] = swap_workaround (digest[0]); - key[1] = swap_workaround (digest[1]); - key[2] = swap_workaround (digest[2]); - key[3] = swap_workaround (digest[3]); - - if (version == 3) - { - key[1] &= 0xff; - key[2] = 0; - key[3] = 0; - } - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); - - w0_t[0] = swap_workaround (out[0]); - w0_t[1] = swap_workaround (out[1]); - w0_t[2] = swap_workaround (out[2]); - w0_t[3] = swap_workaround (out[3]); - w1_t[0] = 0x80000000; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 16 * 8; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - digest[0] = swap_workaround (digest[0]); - digest[1] = swap_workaround (digest[1]); - digest[2] = swap_workaround (digest[2]); - digest[3] = swap_workaround (digest[3]); - - rc4_next_16 (rc4_key, 16, j, digest, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09800_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09800_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m09800_a3.cu b/nv/m09800_a3.cu deleted file mode 100644 index 0bf3a87..0000000 --- a/nv/m09800_a3.cu +++ /dev/null @@ -1,928 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _OLDOFFICE34_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -__device__ static void swap (RC4_KEY *rc4_key, const u32 i, const u32 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -__device__ static void rc4_init_16 (RC4_KEY *rc4_key, const u32 data[4]) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - u32 *ptr = (u32 *) rc4_key->S; - - #pragma unroll 64 - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j & 0xff); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j & 0xff); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j & 0xff); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j & 0xff); idx++; - } -} - -__device__ static u8 rc4_next_16 (RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) -{ - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m09800m (RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * salt - */ - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - const u32 salt_len = 16; - - const u32 pw_salt_len = pw_len + salt_len; - - /** - * esalt - */ - - const u32 version = oldoffice34_bufs[salt_pos].version; - - u32 encryptedVerifier[4]; - - encryptedVerifier[0] = oldoffice34_bufs[salt_pos].encryptedVerifier[0]; - encryptedVerifier[1] = oldoffice34_bufs[salt_pos].encryptedVerifier[1]; - encryptedVerifier[2] = oldoffice34_bufs[salt_pos].encryptedVerifier[2]; - encryptedVerifier[3] = oldoffice34_bufs[salt_pos].encryptedVerifier[3]; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = salt_buf[0]; - w0_t[1] = salt_buf[1]; - w0_t[2] = salt_buf[2]; - w0_t[3] = salt_buf[3]; - w1_t[0] = w0[0]; - w1_t[1] = w0[1]; - w1_t[2] = w0[2]; - w1_t[3] = w0[3]; - w2_t[0] = w1[0]; - w2_t[1] = w1[1]; - w2_t[2] = w1[2]; - w2_t[3] = w1[3]; - w3_t[0] = w2[0]; - w3_t[1] = w2[1]; - w3_t[2] = 0; - w3_t[3] = pw_salt_len * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = digest[4]; - w1_t[1] = 0; - w1_t[2] = 0x80000000; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (20 + 4) * 8; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - u32x key[4]; - - key[0] = swap_workaround (digest[0]); - key[1] = swap_workaround (digest[1]); - key[2] = swap_workaround (digest[2]); - key[3] = swap_workaround (digest[3]); - - if (version == 3) - { - key[1] &= 0xff; - key[2] = 0; - key[3] = 0; - } - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); - - w0_t[0] = swap_workaround (out[0]); - w0_t[1] = swap_workaround (out[1]); - w0_t[2] = swap_workaround (out[2]); - w0_t[3] = swap_workaround (out[3]); - w1_t[0] = 0x80000000; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 16 * 8; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - digest[0] = swap_workaround (digest[0]); - digest[1] = swap_workaround (digest[1]); - digest[2] = swap_workaround (digest[2]); - digest[3] = swap_workaround (digest[3]); - - rc4_next_16 (rc4_key, 16, j, digest, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_M - } -} - -__device__ static void m09800s (RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - const u32 salt_len = 16; - - const u32 pw_salt_len = pw_len + salt_len; - - /** - * esalt - */ - - const u32 version = oldoffice34_bufs[salt_pos].version; - - u32 encryptedVerifier[4]; - - encryptedVerifier[0] = oldoffice34_bufs[salt_pos].encryptedVerifier[0]; - encryptedVerifier[1] = oldoffice34_bufs[salt_pos].encryptedVerifier[1]; - encryptedVerifier[2] = oldoffice34_bufs[salt_pos].encryptedVerifier[2]; - encryptedVerifier[3] = oldoffice34_bufs[salt_pos].encryptedVerifier[3]; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = salt_buf[0]; - w0_t[1] = salt_buf[1]; - w0_t[2] = salt_buf[2]; - w0_t[3] = salt_buf[3]; - w1_t[0] = w0[0]; - w1_t[1] = w0[1]; - w1_t[2] = w0[2]; - w1_t[3] = w0[3]; - w2_t[0] = w1[0]; - w2_t[1] = w1[1]; - w2_t[2] = w1[2]; - w2_t[3] = w1[3]; - w3_t[0] = w2[0]; - w3_t[1] = w2[1]; - w3_t[2] = 0; - w3_t[3] = pw_salt_len * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = digest[4]; - w1_t[1] = 0; - w1_t[2] = 0x80000000; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (20 + 4) * 8; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - u32x key[4]; - - key[0] = swap_workaround (digest[0]); - key[1] = swap_workaround (digest[1]); - key[2] = swap_workaround (digest[2]); - key[3] = swap_workaround (digest[3]); - - if (version == 3) - { - key[1] &= 0xff; - key[2] = 0; - key[3] = 0; - } - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); - - w0_t[0] = swap_workaround (out[0]); - w0_t[1] = swap_workaround (out[1]); - w0_t[2] = swap_workaround (out[2]); - w0_t[3] = swap_workaround (out[3]); - w1_t[0] = 0x80000000; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 16 * 8; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - digest[0] = swap_workaround (digest[0]); - digest[1] = swap_workaround (digest[1]); - digest[2] = swap_workaround (digest[2]); - digest[3] = swap_workaround (digest[3]); - - rc4_next_16 (rc4_key, 16, j, digest, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09800_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - __shared__ RC4_KEY rc4_keys[64]; - - m09800m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, oldoffice34_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09800_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - __shared__ RC4_KEY rc4_keys[64]; - - m09800m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, oldoffice34_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09800_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - __shared__ RC4_KEY rc4_keys[64]; - - m09800m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, oldoffice34_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09800_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - __shared__ RC4_KEY rc4_keys[64]; - - m09800s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, oldoffice34_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09800_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - __shared__ RC4_KEY rc4_keys[64]; - - m09800s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, oldoffice34_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09800_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - __shared__ RC4_KEY rc4_keys[64]; - - m09800s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, oldoffice34_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m09810_a0.cu b/nv/m09810_a0.cu deleted file mode 100644 index 2313691..0000000 --- a/nv/m09810_a0.cu +++ /dev/null @@ -1,611 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _OLDOFFICE34_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -__device__ static void swap (RC4_KEY *rc4_key, const u32 i, const u32 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -__device__ static void rc4_init_16 (RC4_KEY *rc4_key, const u32 data[4]) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - u32 *ptr = (u32 *) rc4_key->S; - - #pragma unroll 64 - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j & 0xff); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j & 0xff); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j & 0xff); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j & 0xff); idx++; - } -} - -__device__ static u8 rc4_next_16 (RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) -{ - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (64, 1) m09810_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - __shared__ RC4_KEY rc4_keys[64]; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * esalt - */ - - const u32 version = oldoffice34_bufs[salt_pos].version; - - u32 encryptedVerifier[4]; - - encryptedVerifier[0] = oldoffice34_bufs[salt_pos].encryptedVerifier[0]; - encryptedVerifier[1] = oldoffice34_bufs[salt_pos].encryptedVerifier[1]; - encryptedVerifier[2] = oldoffice34_bufs[salt_pos].encryptedVerifier[2]; - encryptedVerifier[3] = oldoffice34_bufs[salt_pos].encryptedVerifier[3]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - u32x key[4]; - - key[0] = w0[0]; - key[1] = w0[1] & 0xff; - key[2] = 0; - key[3] = 0; - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = swap_workaround (out[0]); - w0_t[1] = swap_workaround (out[1]); - w0_t[2] = swap_workaround (out[2]); - w0_t[3] = swap_workaround (out[3]); - w1_t[0] = 0x80000000; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 16 * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - digest[0] = swap_workaround (digest[0]); - digest[1] = swap_workaround (digest[1]); - digest[2] = swap_workaround (digest[2]); - digest[3] = swap_workaround (digest[3]); - - rc4_next_16 (rc4_key, 16, j, digest, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09810_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09810_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09810_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - __shared__ RC4_KEY rc4_keys[64]; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * esalt - */ - - const u32 version = oldoffice34_bufs[salt_pos].version; - - u32 encryptedVerifier[4]; - - encryptedVerifier[0] = oldoffice34_bufs[salt_pos].encryptedVerifier[0]; - encryptedVerifier[1] = oldoffice34_bufs[salt_pos].encryptedVerifier[1]; - encryptedVerifier[2] = oldoffice34_bufs[salt_pos].encryptedVerifier[2]; - encryptedVerifier[3] = oldoffice34_bufs[salt_pos].encryptedVerifier[3]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - u32x key[4]; - - key[0] = w0[0]; - key[1] = w0[1] & 0xff; - key[2] = 0; - key[3] = 0; - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = swap_workaround (out[0]); - w0_t[1] = swap_workaround (out[1]); - w0_t[2] = swap_workaround (out[2]); - w0_t[3] = swap_workaround (out[3]); - w1_t[0] = 0x80000000; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 16 * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - digest[0] = swap_workaround (digest[0]); - digest[1] = swap_workaround (digest[1]); - digest[2] = swap_workaround (digest[2]); - digest[3] = swap_workaround (digest[3]); - - rc4_next_16 (rc4_key, 16, j, digest, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09810_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09810_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m09810_a1.cu b/nv/m09810_a1.cu deleted file mode 100644 index b1f40b2..0000000 --- a/nv/m09810_a1.cu +++ /dev/null @@ -1,657 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _OLDOFFICE34_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -__device__ static void swap (RC4_KEY *rc4_key, const u32 i, const u32 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -__device__ static void rc4_init_16 (RC4_KEY *rc4_key, const u32 data[4]) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - u32 *ptr = (u32 *) rc4_key->S; - - #pragma unroll 64 - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j & 0xff); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j & 0xff); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j & 0xff); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j & 0xff); idx++; - } -} - -__device__ static u8 rc4_next_16 (RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) -{ - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (64, 1) m09810_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - __shared__ RC4_KEY rc4_keys[64]; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * esalt - */ - - const u32 version = oldoffice34_bufs[salt_pos].version; - - u32 encryptedVerifier[4]; - - encryptedVerifier[0] = oldoffice34_bufs[salt_pos].encryptedVerifier[0]; - encryptedVerifier[1] = oldoffice34_bufs[salt_pos].encryptedVerifier[1]; - encryptedVerifier[2] = oldoffice34_bufs[salt_pos].encryptedVerifier[2]; - encryptedVerifier[3] = oldoffice34_bufs[salt_pos].encryptedVerifier[3]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - u32x key[4]; - - key[0] = wordl0[0] | wordr0[0]; - key[1] = (wordl0[1] | wordr0[1]) & 0xff; - key[2] = 0; - key[3] = 0; - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = swap_workaround (out[0]); - w0_t[1] = swap_workaround (out[1]); - w0_t[2] = swap_workaround (out[2]); - w0_t[3] = swap_workaround (out[3]); - w1_t[0] = 0x80000000; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 16 * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - digest[0] = swap_workaround (digest[0]); - digest[1] = swap_workaround (digest[1]); - digest[2] = swap_workaround (digest[2]); - digest[3] = swap_workaround (digest[3]); - - rc4_next_16 (rc4_key, 16, j, digest, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09810_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09810_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09810_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - __shared__ RC4_KEY rc4_keys[64]; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * esalt - */ - - const u32 version = oldoffice34_bufs[salt_pos].version; - - u32 encryptedVerifier[4]; - - encryptedVerifier[0] = oldoffice34_bufs[salt_pos].encryptedVerifier[0]; - encryptedVerifier[1] = oldoffice34_bufs[salt_pos].encryptedVerifier[1]; - encryptedVerifier[2] = oldoffice34_bufs[salt_pos].encryptedVerifier[2]; - encryptedVerifier[3] = oldoffice34_bufs[salt_pos].encryptedVerifier[3]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - u32x key[4]; - - key[0] = wordl0[0] | wordr0[0]; - key[1] = (wordl0[1] | wordr0[1]) & 0xff; - key[2] = 0; - key[3] = 0; - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = swap_workaround (out[0]); - w0_t[1] = swap_workaround (out[1]); - w0_t[2] = swap_workaround (out[2]); - w0_t[3] = swap_workaround (out[3]); - w1_t[0] = 0x80000000; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 16 * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - digest[0] = swap_workaround (digest[0]); - digest[1] = swap_workaround (digest[1]); - digest[2] = swap_workaround (digest[2]); - digest[3] = swap_workaround (digest[3]); - - rc4_next_16 (rc4_key, 16, j, digest, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09810_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09810_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m09810_a3.cu b/nv/m09810_a3.cu deleted file mode 100644 index d577962..0000000 --- a/nv/m09810_a3.cu +++ /dev/null @@ -1,784 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _OLDOFFICE34_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -__device__ static void swap (RC4_KEY *rc4_key, const u32 i, const u32 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -__device__ static void rc4_init_16 (RC4_KEY *rc4_key, const u32 data[4]) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - u32 *ptr = (u32 *) rc4_key->S; - - #pragma unroll 64 - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j & 0xff); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j & 0xff); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j & 0xff); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j & 0xff); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j & 0xff); idx++; - } -} - -__device__ static u8 rc4_next_16 (RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) -{ - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m09810m (RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * esalt - */ - - const u32 version = oldoffice34_bufs[salt_pos].version; - - u32 encryptedVerifier[4]; - - encryptedVerifier[0] = oldoffice34_bufs[salt_pos].encryptedVerifier[0]; - encryptedVerifier[1] = oldoffice34_bufs[salt_pos].encryptedVerifier[1]; - encryptedVerifier[2] = oldoffice34_bufs[salt_pos].encryptedVerifier[2]; - encryptedVerifier[3] = oldoffice34_bufs[salt_pos].encryptedVerifier[3]; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x key[4]; - - key[0] = w0[0]; - key[1] = w0[1] & 0xff; - key[2] = 0; - key[3] = 0; - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = swap_workaround (out[0]); - w0_t[1] = swap_workaround (out[1]); - w0_t[2] = swap_workaround (out[2]); - w0_t[3] = swap_workaround (out[3]); - w1_t[0] = 0x80000000; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 16 * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - digest[0] = swap_workaround (digest[0]); - digest[1] = swap_workaround (digest[1]); - digest[2] = swap_workaround (digest[2]); - digest[3] = swap_workaround (digest[3]); - - rc4_next_16 (rc4_key, 16, j, digest, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_M - } -} - -__device__ static void m09810s (RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * esalt - */ - - const u32 version = oldoffice34_bufs[salt_pos].version; - - u32 encryptedVerifier[4]; - - encryptedVerifier[0] = oldoffice34_bufs[salt_pos].encryptedVerifier[0]; - encryptedVerifier[1] = oldoffice34_bufs[salt_pos].encryptedVerifier[1]; - encryptedVerifier[2] = oldoffice34_bufs[salt_pos].encryptedVerifier[2]; - encryptedVerifier[3] = oldoffice34_bufs[salt_pos].encryptedVerifier[3]; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x key[4]; - - key[0] = w0[0]; - key[1] = w0[1] & 0xff; - key[2] = 0; - key[3] = 0; - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = swap_workaround (out[0]); - w0_t[1] = swap_workaround (out[1]); - w0_t[2] = swap_workaround (out[2]); - w0_t[3] = swap_workaround (out[3]); - w1_t[0] = 0x80000000; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 16 * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - digest[0] = swap_workaround (digest[0]); - digest[1] = swap_workaround (digest[1]); - digest[2] = swap_workaround (digest[2]); - digest[3] = swap_workaround (digest[3]); - - rc4_next_16 (rc4_key, 16, j, digest, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09810_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - __shared__ RC4_KEY rc4_keys[64]; - - m09810m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, oldoffice34_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09810_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - __shared__ RC4_KEY rc4_keys[64]; - - m09810m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, oldoffice34_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09810_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - __shared__ RC4_KEY rc4_keys[64]; - - m09810m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, oldoffice34_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09810_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - __shared__ RC4_KEY rc4_keys[64]; - - m09810s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, oldoffice34_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09810_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - __shared__ RC4_KEY rc4_keys[64]; - - m09810s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, oldoffice34_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m09810_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - __shared__ RC4_KEY rc4_keys[64]; - - m09810s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, oldoffice34_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m09820_a0.cu b/nv/m09820_a0.cu deleted file mode 100644 index fc54f7c5..0000000 --- a/nv/m09820_a0.cu +++ /dev/null @@ -1,519 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _OLDOFFICE34_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m09820_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - const u32 salt_len = 16; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - const u32 pw_salt_len = (out_len * 2) + salt_len; - - append_0x80_2 (w0, w1, out_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] = salt_buf[0]; - w0_t[1] = salt_buf[1]; - w0_t[2] = salt_buf[2]; - w0_t[3] = salt_buf[3]; - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - w3_t[2] = 0; - w3_t[3] = pw_salt_len * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = digest[4]; - w1_t[1] = 0; - w1_t[2] = 0x80000000; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (20 + 4) * 8; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - u32x a = swap_workaround (digest[0]); - u32x b = swap_workaround (digest[1]) & 0xff; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09820_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09820_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09820_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - const u32 salt_len = 16; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - const u32 pw_salt_len = (out_len * 2) + salt_len; - - append_0x80_2 (w0, w1, out_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] = salt_buf[0]; - w0_t[1] = salt_buf[1]; - w0_t[2] = salt_buf[2]; - w0_t[3] = salt_buf[3]; - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - w3_t[2] = 0; - w3_t[3] = pw_salt_len * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = digest[4]; - w1_t[1] = 0; - w1_t[2] = 0x80000000; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (20 + 4) * 8; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - u32x a = swap_workaround (digest[0]); - u32x b = swap_workaround (digest[1]) & 0xff; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09820_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09820_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m09820_a1.cu b/nv/m09820_a1.cu deleted file mode 100644 index 6d9ff52..0000000 --- a/nv/m09820_a1.cu +++ /dev/null @@ -1,625 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _OLDOFFICE34_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m09820_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - const u32 salt_len = 16; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - const u32 pw_salt_len = (pw_len * 2) + salt_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - append_0x80_2 (w0, w1, pw_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] = salt_buf[0]; - w0_t[1] = salt_buf[1]; - w0_t[2] = salt_buf[2]; - w0_t[3] = salt_buf[3]; - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - w3_t[2] = 0; - w3_t[3] = pw_salt_len * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = digest[4]; - w1_t[1] = 0; - w1_t[2] = 0x80000000; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (20 + 4) * 8; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - u32x a = swap_workaround (digest[0]); - u32x b = swap_workaround (digest[1]) & 0xff; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09820_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09820_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09820_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - const u32 salt_len = 16; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - const u32 pw_salt_len = (pw_len * 2) + salt_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - append_0x80_2 (w0, w1, pw_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - make_unicode (w0, w0_t, w1_t); - make_unicode (w1, w2_t, w3_t); - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, salt_len); - - w0_t[0] = salt_buf[0]; - w0_t[1] = salt_buf[1]; - w0_t[2] = salt_buf[2]; - w0_t[3] = salt_buf[3]; - w1_t[0] = swap_workaround (w1_t[0]); - w1_t[1] = swap_workaround (w1_t[1]); - w1_t[2] = swap_workaround (w1_t[2]); - w1_t[3] = swap_workaround (w1_t[3]); - w2_t[0] = swap_workaround (w2_t[0]); - w2_t[1] = swap_workaround (w2_t[1]); - w2_t[2] = swap_workaround (w2_t[2]); - w2_t[3] = swap_workaround (w2_t[3]); - w3_t[0] = swap_workaround (w3_t[0]); - w3_t[1] = swap_workaround (w3_t[1]); - w3_t[2] = 0; - w3_t[3] = pw_salt_len * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = digest[4]; - w1_t[1] = 0; - w1_t[2] = 0x80000000; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (20 + 4) * 8; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - u32x a = swap_workaround (digest[0]); - u32x b = swap_workaround (digest[1]) & 0xff; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09820_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09820_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m09820_a3.cu b/nv/m09820_a3.cu deleted file mode 100644 index 49d8dec..0000000 --- a/nv/m09820_a3.cu +++ /dev/null @@ -1,646 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _OLDOFFICE34_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m09820m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - const u32 salt_len = 16; - - const u32 pw_salt_len = pw_len + salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = salt_buf[0]; - w0_t[1] = salt_buf[1]; - w0_t[2] = salt_buf[2]; - w0_t[3] = salt_buf[3]; - w1_t[0] = w0[0]; - w1_t[1] = w0[1]; - w1_t[2] = w0[2]; - w1_t[3] = w0[3]; - w2_t[0] = w1[0]; - w2_t[1] = w1[1]; - w2_t[2] = w1[2]; - w2_t[3] = w1[3]; - w3_t[0] = w2[0]; - w3_t[1] = w2[1]; - w3_t[2] = 0; - w3_t[3] = pw_salt_len * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = digest[4]; - w1_t[1] = 0; - w1_t[2] = 0x80000000; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (20 + 4) * 8; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - u32x a = swap_workaround (digest[0]); - u32x b = swap_workaround (digest[1]) & 0xff; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } -} - -__device__ static void m09820s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - const u32 salt_len = 16; - - const u32 pw_salt_len = pw_len + salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = salt_buf[0]; - w0_t[1] = salt_buf[1]; - w0_t[2] = salt_buf[2]; - w0_t[3] = salt_buf[3]; - w1_t[0] = w0[0]; - w1_t[1] = w0[1]; - w1_t[2] = w0[2]; - w1_t[3] = w0[3]; - w2_t[0] = w1[0]; - w2_t[1] = w1[1]; - w2_t[2] = w1[2]; - w2_t[3] = w1[3]; - w3_t[0] = w2[0]; - w3_t[1] = w2[1]; - w3_t[2] = 0; - w3_t[3] = pw_salt_len * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = digest[4]; - w1_t[1] = 0; - w1_t[2] = 0x80000000; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = (20 + 4) * 8; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - - u32x a = swap_workaround (digest[0]); - u32x b = swap_workaround (digest[1]) & 0xff; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09820_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - m09820m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, oldoffice34_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09820_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - m09820m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, oldoffice34_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09820_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - m09820m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, oldoffice34_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09820_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - m09820s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, oldoffice34_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09820_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - m09820s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, oldoffice34_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09820_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *bfs_buf, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const oldoffice34_t *oldoffice34_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - m09820s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, oldoffice34_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m09900_a0.cu b/nv/m09900_a0.cu deleted file mode 100644 index 649a957..0000000 --- a/nv/m09900_a0.cu +++ /dev/null @@ -1,586 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m09900_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - - u32x t0[4]; - u32x t1[4]; - u32x t2[4]; - u32x t3[4]; - - t0[0] = 0; - t0[1] = 0; - t0[2] = 0; - t0[3] = 0; - t1[0] = 0; - t1[1] = 0; - t1[2] = 0; - t1[3] = 0; - t2[0] = 0; - t2[1] = 0x80; - t2[2] = 0; - t2[3] = 0; - t3[0] = 0; - t3[1] = 0; - t3[2] = 100 * 8; - t3[3] = 0; - - MD5_STEP (MD5_Fo, a, b, c, d, t0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, t0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, t1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, t0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t2[1], MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09900_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09900_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09900_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - - u32x t0[4]; - u32x t1[4]; - u32x t2[4]; - u32x t3[4]; - - t0[0] = 0; - t0[1] = 0; - t0[2] = 0; - t0[3] = 0; - t1[0] = 0; - t1[1] = 0; - t1[2] = 0; - t1[3] = 0; - t2[0] = 0; - t2[1] = 0x80; - t2[2] = 0; - t2[3] = 0; - t3[0] = 0; - t3[1] = 0; - t3[2] = 100 * 8; - t3[3] = 0; - - MD5_STEP (MD5_Fo, a, b, c, d, t0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, t0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, t1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, t0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t1[0], MD5C3c, MD5S30); - - if ((a + r_a) != search[0]) continue; - - MD5_STEP (MD5_I , d, a, b, c, t2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t2[1], MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09900_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09900_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m09900_a1.cu b/nv/m09900_a1.cu deleted file mode 100644 index b25abf5..0000000 --- a/nv/m09900_a1.cu +++ /dev/null @@ -1,692 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m09900_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - - u32x t0[4]; - u32x t1[4]; - u32x t2[4]; - u32x t3[4]; - - t0[0] = 0; - t0[1] = 0; - t0[2] = 0; - t0[3] = 0; - t1[0] = 0; - t1[1] = 0; - t1[2] = 0; - t1[3] = 0; - t2[0] = 0; - t2[1] = 0x80; - t2[2] = 0; - t2[3] = 0; - t3[0] = 0; - t3[1] = 0; - t3[2] = 100 * 8; - t3[3] = 0; - - MD5_STEP (MD5_Fo, a, b, c, d, t0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, t0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, t1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, t0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t2[1], MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09900_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09900_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09900_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - - u32x t0[4]; - u32x t1[4]; - u32x t2[4]; - u32x t3[4]; - - t0[0] = 0; - t0[1] = 0; - t0[2] = 0; - t0[3] = 0; - t1[0] = 0; - t1[1] = 0; - t1[2] = 0; - t1[3] = 0; - t2[0] = 0; - t2[1] = 0x80; - t2[2] = 0; - t2[3] = 0; - t3[0] = 0; - t3[1] = 0; - t3[2] = 100 * 8; - t3[3] = 0; - - MD5_STEP (MD5_Fo, a, b, c, d, t0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, t0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, t1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, t0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t1[0], MD5C3c, MD5S30); - - if ((a + r_a) != search[0]) continue; - - MD5_STEP (MD5_I , d, a, b, c, t2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t2[1], MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09900_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09900_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m09900_a3.cu b/nv/m09900_a3.cu deleted file mode 100644 index 3ed2b39..0000000 --- a/nv/m09900_a3.cu +++ /dev/null @@ -1,851 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ -#define _SCALAR_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -__device__ __constant__ u32x c_bfs[1024]; - -__device__ static void m09900m (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 F_w0c00 = 0 + MD5C00; - const u32 F_w1c01 = w[ 1] + MD5C01; - const u32 F_w2c02 = w[ 2] + MD5C02; - const u32 F_w3c03 = w[ 3] + MD5C03; - const u32 F_w4c04 = w[ 4] + MD5C04; - const u32 F_w5c05 = w[ 5] + MD5C05; - const u32 F_w6c06 = w[ 6] + MD5C06; - const u32 F_w7c07 = w[ 7] + MD5C07; - const u32 F_w8c08 = w[ 8] + MD5C08; - const u32 F_w9c09 = w[ 9] + MD5C09; - const u32 F_wac0a = w[10] + MD5C0a; - const u32 F_wbc0b = w[11] + MD5C0b; - const u32 F_wcc0c = w[12] + MD5C0c; - const u32 F_wdc0d = w[13] + MD5C0d; - const u32 F_wec0e = w[14] + MD5C0e; - const u32 F_wfc0f = w[15] + MD5C0f; - - const u32 G_w1c10 = w[ 1] + MD5C10; - const u32 G_w6c11 = w[ 6] + MD5C11; - const u32 G_wbc12 = w[11] + MD5C12; - const u32 G_w0c13 = 0 + MD5C13; - const u32 G_w5c14 = w[ 5] + MD5C14; - const u32 G_wac15 = w[10] + MD5C15; - const u32 G_wfc16 = w[15] + MD5C16; - const u32 G_w4c17 = w[ 4] + MD5C17; - const u32 G_w9c18 = w[ 9] + MD5C18; - const u32 G_wec19 = w[14] + MD5C19; - const u32 G_w3c1a = w[ 3] + MD5C1a; - const u32 G_w8c1b = w[ 8] + MD5C1b; - const u32 G_wdc1c = w[13] + MD5C1c; - const u32 G_w2c1d = w[ 2] + MD5C1d; - const u32 G_w7c1e = w[ 7] + MD5C1e; - const u32 G_wcc1f = w[12] + MD5C1f; - - const u32 H_w5c20 = w[ 5] + MD5C20; - const u32 H_w8c21 = w[ 8] + MD5C21; - const u32 H_wbc22 = w[11] + MD5C22; - const u32 H_wec23 = w[14] + MD5C23; - const u32 H_w1c24 = w[ 1] + MD5C24; - const u32 H_w4c25 = w[ 4] + MD5C25; - const u32 H_w7c26 = w[ 7] + MD5C26; - const u32 H_wac27 = w[10] + MD5C27; - const u32 H_wdc28 = w[13] + MD5C28; - const u32 H_w0c29 = 0 + MD5C29; - const u32 H_w3c2a = w[ 3] + MD5C2a; - const u32 H_w6c2b = w[ 6] + MD5C2b; - const u32 H_w9c2c = w[ 9] + MD5C2c; - const u32 H_wcc2d = w[12] + MD5C2d; - const u32 H_wfc2e = w[15] + MD5C2e; - const u32 H_w2c2f = w[ 2] + MD5C2f; - - const u32 I_w0c30 = 0 + MD5C30; - const u32 I_w7c31 = w[ 7] + MD5C31; - const u32 I_wec32 = w[14] + MD5C32; - const u32 I_w5c33 = w[ 5] + MD5C33; - const u32 I_wcc34 = w[12] + MD5C34; - const u32 I_w3c35 = w[ 3] + MD5C35; - const u32 I_wac36 = w[10] + MD5C36; - const u32 I_w1c37 = w[ 1] + MD5C37; - const u32 I_w8c38 = w[ 8] + MD5C38; - const u32 I_wfc39 = w[15] + MD5C39; - const u32 I_w6c3a = w[ 6] + MD5C3a; - const u32 I_wdc3b = w[13] + MD5C3b; - const u32 I_w4c3c = w[ 4] + MD5C3c; - const u32 I_wbc3d = w[11] + MD5C3d; - const u32 I_w2c3e = w[ 2] + MD5C3e; - const u32 I_w9c3f = w[ 9] + MD5C3f; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_w2c02, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_w3c03, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_w4c04, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w5c05, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_w6c06, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_w7c07, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_w8c08, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w9c09, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_wac0a, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_wbc0b, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_wcc0c, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_wdc0d, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_wec0e, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_wfc0f, MD5S03); - - MD5_STEP0(MD5_Go, a, b, c, d, G_w1c10, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_w6c11, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_wbc12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0, G_w0c13, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_w5c14, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_wac15, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_wfc16, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_w4c17, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_w9c18, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_wec19, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_w3c1a, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_w8c1b, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_wdc1c, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_w2c1d, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_w7c1e, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_wcc1f, MD5S13); - - MD5_STEP0(MD5_H , a, b, c, d, H_w5c20, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, H_w8c21, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, H_wbc22, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, H_wec23, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, H_w1c24, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, H_w4c25, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, H_w7c26, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, H_wac27, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, H_wdc28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0, H_w0c29, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, H_w3c2a, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, H_w6c2b, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, H_w9c2c, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, H_wcc2d, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, H_wfc2e, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, H_w2c2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0, I_w0c30, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_w7c31, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_wec32, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w5c33, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_wcc34, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_w3c35, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_wac36, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w1c37, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_w8c38, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_wfc39, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_w6c3a, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_wdc3b, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_w4c3c, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_wbc3d, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_w2c3e, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w9c3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - - u32x t0[4]; - u32x t1[4]; - u32x t2[4]; - u32x t3[4]; - - t0[0] = 0; - t0[1] = 0; - t0[2] = 0; - t0[3] = 0; - t1[0] = 0; - t1[1] = 0; - t1[2] = 0; - t1[3] = 0; - t2[0] = 0; - t2[1] = 0x80; - t2[2] = 0; - t2[3] = 0; - t3[0] = 0; - t3[1] = 0; - t3[2] = 100 * 8; - t3[3] = 0; - - MD5_STEP (MD5_Fo, a, b, c, d, t0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, t0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, t1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, t0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t1[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t2[1], MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m09900s (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 F_w0c00 = 0 + MD5C00; - const u32 F_w1c01 = w[ 1] + MD5C01; - const u32 F_w2c02 = w[ 2] + MD5C02; - const u32 F_w3c03 = w[ 3] + MD5C03; - const u32 F_w4c04 = w[ 4] + MD5C04; - const u32 F_w5c05 = w[ 5] + MD5C05; - const u32 F_w6c06 = w[ 6] + MD5C06; - const u32 F_w7c07 = w[ 7] + MD5C07; - const u32 F_w8c08 = w[ 8] + MD5C08; - const u32 F_w9c09 = w[ 9] + MD5C09; - const u32 F_wac0a = w[10] + MD5C0a; - const u32 F_wbc0b = w[11] + MD5C0b; - const u32 F_wcc0c = w[12] + MD5C0c; - const u32 F_wdc0d = w[13] + MD5C0d; - const u32 F_wec0e = w[14] + MD5C0e; - const u32 F_wfc0f = w[15] + MD5C0f; - - const u32 G_w1c10 = w[ 1] + MD5C10; - const u32 G_w6c11 = w[ 6] + MD5C11; - const u32 G_wbc12 = w[11] + MD5C12; - const u32 G_w0c13 = 0 + MD5C13; - const u32 G_w5c14 = w[ 5] + MD5C14; - const u32 G_wac15 = w[10] + MD5C15; - const u32 G_wfc16 = w[15] + MD5C16; - const u32 G_w4c17 = w[ 4] + MD5C17; - const u32 G_w9c18 = w[ 9] + MD5C18; - const u32 G_wec19 = w[14] + MD5C19; - const u32 G_w3c1a = w[ 3] + MD5C1a; - const u32 G_w8c1b = w[ 8] + MD5C1b; - const u32 G_wdc1c = w[13] + MD5C1c; - const u32 G_w2c1d = w[ 2] + MD5C1d; - const u32 G_w7c1e = w[ 7] + MD5C1e; - const u32 G_wcc1f = w[12] + MD5C1f; - - const u32 H_w5c20 = w[ 5] + MD5C20; - const u32 H_w8c21 = w[ 8] + MD5C21; - const u32 H_wbc22 = w[11] + MD5C22; - const u32 H_wec23 = w[14] + MD5C23; - const u32 H_w1c24 = w[ 1] + MD5C24; - const u32 H_w4c25 = w[ 4] + MD5C25; - const u32 H_w7c26 = w[ 7] + MD5C26; - const u32 H_wac27 = w[10] + MD5C27; - const u32 H_wdc28 = w[13] + MD5C28; - const u32 H_w0c29 = 0 + MD5C29; - const u32 H_w3c2a = w[ 3] + MD5C2a; - const u32 H_w6c2b = w[ 6] + MD5C2b; - const u32 H_w9c2c = w[ 9] + MD5C2c; - const u32 H_wcc2d = w[12] + MD5C2d; - const u32 H_wfc2e = w[15] + MD5C2e; - const u32 H_w2c2f = w[ 2] + MD5C2f; - - const u32 I_w0c30 = 0 + MD5C30; - const u32 I_w7c31 = w[ 7] + MD5C31; - const u32 I_wec32 = w[14] + MD5C32; - const u32 I_w5c33 = w[ 5] + MD5C33; - const u32 I_wcc34 = w[12] + MD5C34; - const u32 I_w3c35 = w[ 3] + MD5C35; - const u32 I_wac36 = w[10] + MD5C36; - const u32 I_w1c37 = w[ 1] + MD5C37; - const u32 I_w8c38 = w[ 8] + MD5C38; - const u32 I_wfc39 = w[15] + MD5C39; - const u32 I_w6c3a = w[ 6] + MD5C3a; - const u32 I_wdc3b = w[13] + MD5C3b; - const u32 I_w4c3c = w[ 4] + MD5C3c; - const u32 I_wbc3d = w[11] + MD5C3d; - const u32 I_w2c3e = w[ 2] + MD5C3e; - const u32 I_w9c3f = w[ 9] + MD5C3f; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0, F_w0c00, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w1c01, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_w2c02, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_w3c03, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_w4c04, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w5c05, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_w6c06, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_w7c07, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_w8c08, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_w9c09, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_wac0a, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_wbc0b, MD5S03); - MD5_STEP0(MD5_Fo, a, b, c, d, F_wcc0c, MD5S00); - MD5_STEP0(MD5_Fo, d, a, b, c, F_wdc0d, MD5S01); - MD5_STEP0(MD5_Fo, c, d, a, b, F_wec0e, MD5S02); - MD5_STEP0(MD5_Fo, b, c, d, a, F_wfc0f, MD5S03); - - MD5_STEP0(MD5_Go, a, b, c, d, G_w1c10, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_w6c11, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_wbc12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0, G_w0c13, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_w5c14, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_wac15, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_wfc16, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_w4c17, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_w9c18, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_wec19, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_w3c1a, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_w8c1b, MD5S13); - MD5_STEP0(MD5_Go, a, b, c, d, G_wdc1c, MD5S10); - MD5_STEP0(MD5_Go, d, a, b, c, G_w2c1d, MD5S11); - MD5_STEP0(MD5_Go, c, d, a, b, G_w7c1e, MD5S12); - MD5_STEP0(MD5_Go, b, c, d, a, G_wcc1f, MD5S13); - - MD5_STEP0(MD5_H , a, b, c, d, H_w5c20, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, H_w8c21, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, H_wbc22, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, H_wec23, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, H_w1c24, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, H_w4c25, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, H_w7c26, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, H_wac27, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, H_wdc28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0, H_w0c29, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, H_w3c2a, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, H_w6c2b, MD5S23); - MD5_STEP0(MD5_H , a, b, c, d, H_w9c2c, MD5S20); - MD5_STEP0(MD5_H , d, a, b, c, H_wcc2d, MD5S21); - MD5_STEP0(MD5_H , c, d, a, b, H_wfc2e, MD5S22); - MD5_STEP0(MD5_H , b, c, d, a, H_w2c2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0, I_w0c30, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_w7c31, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_wec32, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w5c33, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_wcc34, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_w3c35, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_wac36, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w1c37, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_w8c38, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_wfc39, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_w6c3a, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_wdc3b, MD5S33); - MD5_STEP0(MD5_I , a, b, c, d, I_w4c3c, MD5S30); - MD5_STEP0(MD5_I , d, a, b, c, I_wbc3d, MD5S31); - MD5_STEP0(MD5_I , c, d, a, b, I_w2c3e, MD5S32); - MD5_STEP0(MD5_I , b, c, d, a, I_w9c3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - - u32x t0[4]; - u32x t1[4]; - u32x t2[4]; - u32x t3[4]; - - t0[0] = 0; - t0[1] = 0; - t0[2] = 0; - t0[3] = 0; - t1[0] = 0; - t1[1] = 0; - t1[2] = 0; - t1[3] = 0; - t2[0] = 0; - t2[1] = 0x80; - t2[2] = 0; - t2[3] = 0; - t3[0] = 0; - t3[1] = 0; - t3[2] = 100 * 8; - t3[3] = 0; - - MD5_STEP (MD5_Fo, a, b, c, d, t0[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t0[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t0[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t0[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t1[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t1[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t1[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t1[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t2[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t2[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t2[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t2[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, t3[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, t3[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, t3[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, t3[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, t0[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t1[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t2[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t0[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t1[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t2[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t3[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t1[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t2[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t3[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t0[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t2[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, t3[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, t0[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, t1[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, t3[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, t1[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t2[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t2[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t3[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t0[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t1[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t1[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t2[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t3[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t0[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t0[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t1[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, t2[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, t3[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, t3[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, t0[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, t0[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t1[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t3[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t1[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t3[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t0[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t2[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t0[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t2[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, t3[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t1[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t3[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, t1[0], MD5C3c, MD5S30); - - if ((a + r_a) != search[0]) continue; - - MD5_STEP (MD5_I , d, a, b, c, t2[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, t0[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, t2[1], MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09900_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r,void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m09900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09900_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r,void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m09900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09900_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r,void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m09900m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09900_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r,void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m09900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09900_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r,void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m09900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m09900_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r,void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m09900s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m10100_a0.cu b/nv/m10100_a0.cu deleted file mode 100644 index f10a993..0000000 --- a/nv/m10100_a0.cu +++ /dev/null @@ -1,325 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SIPHASH_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -#define SIPROUND(v0,v1,v2,v3) \ - (v0) += (v1); \ - (v1) = rotl64 ((v1), 13); \ - (v1) ^= (v0); \ - (v0) = rotl64 ((v0), 32); \ - (v2) += (v3); \ - (v3) = rotl64 ((v3), 16); \ - (v3) ^= (v2); \ - (v0) += (v3); \ - (v3) = rotl64 ((v3), 21); \ - (v3) ^= (v0); \ - (v2) += (v1); \ - (v1) = rotl64 ((v1), 17); \ - (v1) ^= (v2); \ - (v2) = rotl64 ((v2), 32); - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m10100_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * base - */ - - u64 v0p = SIPHASHM_0; - u64 v1p = SIPHASHM_1; - u64 v2p = SIPHASHM_2; - u64 v3p = SIPHASHM_3; - - v0p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]); - v1p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]); - v2p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]); - v3p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w[16]; - - w[ 0] = pw_buf0[0]; - w[ 1] = pw_buf0[1]; - w[ 2] = pw_buf0[2]; - w[ 3] = pw_buf0[3]; - w[ 4] = pw_buf1[0]; - w[ 5] = pw_buf1[1]; - w[ 6] = pw_buf1[2]; - w[ 7] = pw_buf1[3]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32x out_len = apply_rules (c_rules[il_pos].cmds, &w[0], &w[4], pw_len); - - u64 *w_ptr = (u64 *) w; - - w_ptr[out_len / 8] |= (u64) out_len << 56; - - u64x v0 = v0p; - u64x v1 = v1p; - u64x v2 = v2p; - u64x v3 = v3p; - - int i; - int j; - - for (i = 0, j = 0; i <= pw_len; i += 8, j += 2) - { - u64x m = hl32_to_64 (w[j + 1], w[j + 0]); - - v3 ^= m; - - SIPROUND (v0, v1, v2, v3); - SIPROUND (v0, v1, v2, v3); - - v0 ^= m; - } - - v2 ^= 0xff; - - SIPROUND (v0, v1, v2, v3); - SIPROUND (v0, v1, v2, v3); - SIPROUND (v0, v1, v2, v3); - SIPROUND (v0, v1, v2, v3); - - const u64x v = v0 ^ v1 ^ v2 ^ v3; - - const u32x a = l32_from_64 (v); - const u32x b = h32_from_64 (v); - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10100_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10100_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10100_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * base - */ - - u64 v0p = SIPHASHM_0; - u64 v1p = SIPHASHM_1; - u64 v2p = SIPHASHM_2; - u64 v3p = SIPHASHM_3; - - v0p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]); - v1p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]); - v2p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]); - v3p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]); - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w[16]; - - w[ 0] = pw_buf0[0]; - w[ 1] = pw_buf0[1]; - w[ 2] = pw_buf0[2]; - w[ 3] = pw_buf0[3]; - w[ 4] = pw_buf1[0]; - w[ 5] = pw_buf1[1]; - w[ 6] = pw_buf1[2]; - w[ 7] = pw_buf1[3]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32x out_len = apply_rules (c_rules[il_pos].cmds, &w[0], &w[4], pw_len); - - u64 *w_ptr = (u64 *) w; - - w_ptr[out_len / 8] |= (u64) out_len << 56; - - u64x v0 = v0p; - u64x v1 = v1p; - u64x v2 = v2p; - u64x v3 = v3p; - - int i; - int j; - - for (i = 0, j = 0; i <= pw_len; i += 8, j += 2) - { - u64x m = hl32_to_64 (w[j + 1], w[j + 0]); - - v3 ^= m; - - SIPROUND (v0, v1, v2, v3); - SIPROUND (v0, v1, v2, v3); - - v0 ^= m; - } - - v2 ^= 0xff; - - SIPROUND (v0, v1, v2, v3); - SIPROUND (v0, v1, v2, v3); - SIPROUND (v0, v1, v2, v3); - SIPROUND (v0, v1, v2, v3); - - const u64x v = v0 ^ v1 ^ v2 ^ v3; - - const u32x a = l32_from_64 (v); - const u32x b = h32_from_64 (v); - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10100_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10100_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m10100_a1.cu b/nv/m10100_a1.cu deleted file mode 100644 index 77209c2..0000000 --- a/nv/m10100_a1.cu +++ /dev/null @@ -1,431 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SIPHASH_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -#define SIPROUND(v0,v1,v2,v3) \ - (v0) += (v1); \ - (v1) = rotl64 ((v1), 13); \ - (v1) ^= (v0); \ - (v0) = rotl64 ((v0), 32); \ - (v2) += (v3); \ - (v3) = rotl64 ((v3), 16); \ - (v3) ^= (v2); \ - (v0) += (v3); \ - (v3) = rotl64 ((v3), 21); \ - (v3) ^= (v0); \ - (v2) += (v1); \ - (v1) = rotl64 ((v1), 17); \ - (v1) ^= (v2); \ - (v2) = rotl64 ((v2), 32); - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m10100_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * base - */ - - u64 v0p = SIPHASHM_0; - u64 v1p = SIPHASHM_1; - u64 v2p = SIPHASHM_2; - u64 v3p = SIPHASHM_3; - - v0p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]); - v1p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]); - v2p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]); - v3p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w[16]; - - w[ 0] = wordl0[0] | wordr0[0]; - w[ 1] = wordl0[1] | wordr0[1]; - w[ 2] = wordl0[2] | wordr0[2]; - w[ 3] = wordl0[3] | wordr0[3]; - w[ 4] = wordl1[0] | wordr1[0]; - w[ 5] = wordl1[1] | wordr1[1]; - w[ 6] = wordl1[2] | wordr1[2]; - w[ 7] = wordl1[3] | wordr1[3]; - w[ 8] = wordl2[0] | wordr2[0]; - w[ 9] = wordl2[1] | wordr2[1]; - w[10] = wordl2[2] | wordr2[2]; - w[11] = wordl2[3] | wordr2[3]; - w[12] = wordl3[0] | wordr3[0]; - w[13] = wordl3[1] | wordr3[1]; - w[14] = wordl3[2] | wordr3[2]; - w[15] = wordl3[3] | wordr3[3]; - - u64 *w_ptr = (u64 *) w; - - w_ptr[pw_len / 8] |= (u64) pw_len << 56; - - u64x v0 = v0p; - u64x v1 = v1p; - u64x v2 = v2p; - u64x v3 = v3p; - - int i; - int j; - - for (i = 0, j = 0; i <= pw_len; i += 8, j += 2) - { - u64x m = hl32_to_64 (w[j + 1], w[j + 0]); - - v3 ^= m; - - SIPROUND (v0, v1, v2, v3); - SIPROUND (v0, v1, v2, v3); - - v0 ^= m; - } - - v2 ^= 0xff; - - SIPROUND (v0, v1, v2, v3); - SIPROUND (v0, v1, v2, v3); - SIPROUND (v0, v1, v2, v3); - SIPROUND (v0, v1, v2, v3); - - const u64x v = v0 ^ v1 ^ v2 ^ v3; - - const u32x a = l32_from_64 (v); - const u32x b = h32_from_64 (v); - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10100_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10100_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10100_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * base - */ - - u64 v0p = SIPHASHM_0; - u64 v1p = SIPHASHM_1; - u64 v2p = SIPHASHM_2; - u64 v3p = SIPHASHM_3; - - v0p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]); - v1p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]); - v2p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]); - v3p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]); - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w[16]; - - w[ 0] = wordl0[0] | wordr0[0]; - w[ 1] = wordl0[1] | wordr0[1]; - w[ 2] = wordl0[2] | wordr0[2]; - w[ 3] = wordl0[3] | wordr0[3]; - w[ 4] = wordl1[0] | wordr1[0]; - w[ 5] = wordl1[1] | wordr1[1]; - w[ 6] = wordl1[2] | wordr1[2]; - w[ 7] = wordl1[3] | wordr1[3]; - w[ 8] = wordl2[0] | wordr2[0]; - w[ 9] = wordl2[1] | wordr2[1]; - w[10] = wordl2[2] | wordr2[2]; - w[11] = wordl2[3] | wordr2[3]; - w[12] = wordl3[0] | wordr3[0]; - w[13] = wordl3[1] | wordr3[1]; - w[14] = wordl3[2] | wordr3[2]; - w[15] = wordl3[3] | wordr3[3]; - - u64 *w_ptr = (u64 *) w; - - w_ptr[pw_len / 8] |= (u64) pw_len << 56; - - u64x v0 = v0p; - u64x v1 = v1p; - u64x v2 = v2p; - u64x v3 = v3p; - - int i; - int j; - - for (i = 0, j = 0; i <= pw_len; i += 8, j += 2) - { - u64x m = hl32_to_64 (w[j + 1], w[j + 0]); - - v3 ^= m; - - SIPROUND (v0, v1, v2, v3); - SIPROUND (v0, v1, v2, v3); - - v0 ^= m; - } - - v2 ^= 0xff; - - SIPROUND (v0, v1, v2, v3); - SIPROUND (v0, v1, v2, v3); - SIPROUND (v0, v1, v2, v3); - SIPROUND (v0, v1, v2, v3); - - const u64x v = v0 ^ v1 ^ v2 ^ v3; - - const u32x a = l32_from_64 (v); - const u32x b = h32_from_64 (v); - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10100_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10100_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m10100_a3.cu b/nv/m10100_a3.cu deleted file mode 100644 index e06295f..0000000 --- a/nv/m10100_a3.cu +++ /dev/null @@ -1,482 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SIPHASH_ -#define _SCALAR_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -#define SIPROUND(v0,v1,v2,v3) \ - (v0) += (v1); \ - (v1) = rotl64 ((v1), 13); \ - (v1) ^= (v0); \ - (v0) = rotl64 ((v0), 32); \ - (v2) += (v3); \ - (v3) = rotl64 ((v3), 16); \ - (v3) ^= (v2); \ - (v0) += (v3); \ - (v3) = rotl64 ((v3), 21); \ - (v3) ^= (v0); \ - (v2) += (v1); \ - (v1) = rotl64 ((v1), 17); \ - (v1) ^= (v2); \ - (v2) = rotl64 ((v2), 32); - -__device__ __constant__ u32x c_bfs[1024]; - -__device__ static void m10100m (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * base - */ - - u64 v0p = SIPHASHM_0; - u64 v1p = SIPHASHM_1; - u64 v2p = SIPHASHM_2; - u64 v3p = SIPHASHM_3; - - v0p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]); - v1p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]); - v2p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]); - v3p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]); - - u64 *w_ptr = (u64 *) w; - - w_ptr[pw_len / 8] |= (u64) pw_len << 56; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u64x v0 = v0p; - u64x v1 = v1p; - u64x v2 = v2p; - u64x v3 = v3p; - - u64x m = hl32_to_64 (w[1], w0); - - v3 ^= m; - - SIPROUND (v0, v1, v2, v3); - SIPROUND (v0, v1, v2, v3); - - v0 ^= m; - - int i; - int j; - - for (i = 8, j = 2; i <= pw_len; i += 8, j += 2) - { - m = hl32_to_64 (w[j + 1], w[j + 0]); - - v3 ^= m; - - SIPROUND (v0, v1, v2, v3); - SIPROUND (v0, v1, v2, v3); - - v0 ^= m; - } - - v2 ^= 0xff; - - SIPROUND (v0, v1, v2, v3); - SIPROUND (v0, v1, v2, v3); - SIPROUND (v0, v1, v2, v3); - SIPROUND (v0, v1, v2, v3); - - const u64x v = v0 ^ v1 ^ v2 ^ v3; - - const u32x a = l32_from_64 (v); - const u32x b = h32_from_64 (v); - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } -} - -__device__ static void m10100s (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * base - */ - - u64 v0p = SIPHASHM_0; - u64 v1p = SIPHASHM_1; - u64 v2p = SIPHASHM_2; - u64 v3p = SIPHASHM_3; - - v0p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]); - v1p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]); - v2p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[1], salt_bufs[salt_pos].salt_buf[0]); - v3p ^= hl32_to_64 (salt_bufs[salt_pos].salt_buf[3], salt_bufs[salt_pos].salt_buf[2]); - - u64 *w_ptr = (u64 *) w; - - w_ptr[pw_len / 8] |= (u64) pw_len << 56; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u64x v0 = v0p; - u64x v1 = v1p; - u64x v2 = v2p; - u64x v3 = v3p; - - u64x m = hl32_to_64 (w[1], w0); - - v3 ^= m; - - SIPROUND (v0, v1, v2, v3); - SIPROUND (v0, v1, v2, v3); - - v0 ^= m; - - int i; - int j; - - for (i = 8, j = 2; i <= pw_len; i += 8, j += 2) - { - m = hl32_to_64 (w[j + 1], w[j + 0]); - - v3 ^= m; - - SIPROUND (v0, v1, v2, v3); - SIPROUND (v0, v1, v2, v3); - - v0 ^= m; - } - - v2 ^= 0xff; - - SIPROUND (v0, v1, v2, v3); - SIPROUND (v0, v1, v2, v3); - SIPROUND (v0, v1, v2, v3); - SIPROUND (v0, v1, v2, v3); - - const u64x v = v0 ^ v1 ^ v2 ^ v3; - - const u32x a = l32_from_64 (v); - const u32x b = h32_from_64 (v); - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10100_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r,void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m10100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10100_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r,void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m10100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10100_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r,void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m10100m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10100_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r,void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m10100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10100_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r,void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = pws[gid].i[14]; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m10100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10100_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r,void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m10100s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m10300.cu b/nv/m10300.cu deleted file mode 100644 index b0e6f56..0000000 --- a/nv/m10300.cu +++ /dev/null @@ -1,421 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10300_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, saph_sha1_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x word_buf0[4]; - - word_buf0[0] = pws[gid].i[0]; - word_buf0[1] = pws[gid].i[1]; - word_buf0[2] = pws[gid].i[2]; - word_buf0[3] = pws[gid].i[3]; - - u32x word_buf1[4]; - - word_buf1[0] = pws[gid].i[4]; - word_buf1[1] = pws[gid].i[5]; - word_buf1[2] = pws[gid].i[6]; - word_buf1[3] = pws[gid].i[7]; - - u32x word_buf2[2]; - - word_buf2[0] = pws[gid].i[8]; - word_buf2[1] = pws[gid].i[9]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - /** - * init - */ - - u32x w0[4]; - - w0[0] = salt_buf[0]; - w0[1] = salt_buf[1]; - w0[2] = salt_buf[2]; - w0[3] = salt_buf[3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - switch_buffer_by_offset (w0, w1, w2, w3, pw_len); - - w0[0] |= word_buf0[0]; - w0[1] |= word_buf0[1]; - w0[2] |= word_buf0[2]; - w0[3] |= word_buf0[3]; - - w1[0] |= word_buf1[0]; - w1[1] |= word_buf1[1]; - w1[2] |= word_buf1[2]; - w1[3] |= word_buf1[3]; - - w2[0] |= word_buf2[0]; - w2[1] |= word_buf2[1]; - - const u32 pw_salt_len = pw_len + salt_len; - - append_0x80_4 (w0, w1, w2, w3, pw_salt_len); - - // swaps needed - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - - w2[0] = swap_workaround (w2[0]); - w2[1] = swap_workaround (w2[1]); - w2[2] = swap_workaround (w2[2]); - w2[3] = swap_workaround (w2[3]); - - w3[0] = swap_workaround (w3[0]); - w3[1] = swap_workaround (w3[1]); - w3[2] = swap_workaround (w3[2]); - w3[3] = pw_salt_len * 8; - - u32x digest[5]; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, digest); - - tmps[gid].digest_buf[0] = digest[0]; - tmps[gid].digest_buf[1] = digest[1]; - tmps[gid].digest_buf[2] = digest[2]; - tmps[gid].digest_buf[3] = digest[3]; - tmps[gid].digest_buf[4] = digest[4]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10300_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, saph_sha1_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x word_buf0[4]; - - word_buf0[0] = swap_workaround (pws[gid].i[0]); - word_buf0[1] = swap_workaround (pws[gid].i[1]); - word_buf0[2] = swap_workaround (pws[gid].i[2]); - word_buf0[3] = swap_workaround (pws[gid].i[3]); - - u32x word_buf1[4]; - - word_buf1[0] = swap_workaround (pws[gid].i[4]); - word_buf1[1] = swap_workaround (pws[gid].i[5]); - word_buf1[2] = swap_workaround (pws[gid].i[6]); - word_buf1[3] = swap_workaround (pws[gid].i[7]); - - u32x word_buf2[2]; - - word_buf2[0] = swap_workaround (pws[gid].i[8]); - word_buf2[1] = swap_workaround (pws[gid].i[9]); - - const u32 pw_len = pws[gid].pw_len; - - u32x digest[5]; - - digest[0] = tmps[gid].digest_buf[0]; - digest[1] = tmps[gid].digest_buf[1]; - digest[2] = tmps[gid].digest_buf[2]; - digest[3] = tmps[gid].digest_buf[3]; - digest[4] = tmps[gid].digest_buf[4]; - - /** - * loop - */ - - for (u32 i = 0; i < loop_cnt; i++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - switch_buffer_by_offset_be (w0, w1, w2, w3, pw_len); - - w0[0] |= word_buf0[0]; - w0[1] |= word_buf0[1]; - w0[2] |= word_buf0[2]; - w0[3] |= word_buf0[3]; - w1[0] |= word_buf1[0]; - w1[1] |= word_buf1[1]; - w1[2] |= word_buf1[2]; - w1[3] |= word_buf1[3]; - w2[0] |= word_buf2[0]; - w2[1] |= word_buf2[1]; - - // not needed - - w3[2] = 0; - w3[3] = (pw_len + 20) * 8; - - digest[0] = SHA1M_A; - digest[1] = SHA1M_B; - digest[2] = SHA1M_C; - digest[3] = SHA1M_D; - digest[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, digest); - } - - tmps[gid].digest_buf[0] = digest[0]; - tmps[gid].digest_buf[1] = digest[1]; - tmps[gid].digest_buf[2] = digest[2]; - tmps[gid].digest_buf[3] = digest[3]; - tmps[gid].digest_buf[4] = digest[4]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10300_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, saph_sha1_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32x r0 = tmps[gid].digest_buf[0]; - const u32x r1 = tmps[gid].digest_buf[1]; - const u32x r2 = tmps[gid].digest_buf[2]; - const u32x r3 = tmps[gid].digest_buf[3]; - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m10400_a0.cu b/nv/m10400_a0.cu deleted file mode 100644 index b0792ca..0000000 --- a/nv/m10400_a0.cu +++ /dev/null @@ -1,679 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ __constant__ u32 padding[8] = -{ - 0x5e4ebf28, - 0x418a754e, - 0x564e0064, - 0x0801faff, - 0xb6002e2e, - 0x803e68d0, - 0xfea90c2f, - 0x7a695364 -}; - -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -__device__ static void swap (RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -__device__ static void rc4_init_16 (RC4_KEY *rc4_key, const u32 data[4]) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - u32 *ptr = (u32 *) rc4_key->S; - - #pragma unroll 64 - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - const u32 d0 = data[0] >> 0; - const u32 d1 = data[0] >> 8; - const u32 d2 = data[0] >> 16; - const u32 d3 = data[0] >> 24; - const u32 d4 = data[1] >> 0; - - u32 i = 0; - u32 j = 0; - - #pragma unroll 52 - for (i = 0; i < 255; i += 5) - { - j += rc4_key->S[i + 0]; j += d0; swap (rc4_key, i + 0, j); - j += rc4_key->S[i + 1]; j += d1; swap (rc4_key, i + 1, j); - j += rc4_key->S[i + 2]; j += d2; swap (rc4_key, i + 2, j); - j += rc4_key->S[i + 3]; j += d3; swap (rc4_key, i + 3, j); - j += rc4_key->S[i + 4]; j += d4; swap (rc4_key, i + 4, j); - } - - j += rc4_key->S[i + 0]; j += d0; swap (rc4_key, i + 0, j); -} - -__device__ static u8 rc4_next_16 (RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) -{ - #pragma unroll 4 - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (64, 1) m10400_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * key - */ - - __shared__ RC4_KEY rc4_keys[64]; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * U_buf - */ - - u32 o_buf[8]; - - o_buf[0] = pdf_bufs[salt_pos].o_buf[0]; - o_buf[1] = pdf_bufs[salt_pos].o_buf[1]; - o_buf[2] = pdf_bufs[salt_pos].o_buf[2]; - o_buf[3] = pdf_bufs[salt_pos].o_buf[3]; - o_buf[4] = pdf_bufs[salt_pos].o_buf[4]; - o_buf[5] = pdf_bufs[salt_pos].o_buf[5]; - o_buf[6] = pdf_bufs[salt_pos].o_buf[6]; - o_buf[7] = pdf_bufs[salt_pos].o_buf[7]; - - u32 P = pdf_bufs[salt_pos].P; - - u32 id_buf[4]; - - id_buf[0] = pdf_bufs[salt_pos].id_buf[0]; - id_buf[1] = pdf_bufs[salt_pos].id_buf[1]; - id_buf[2] = pdf_bufs[salt_pos].id_buf[2]; - id_buf[3] = pdf_bufs[salt_pos].id_buf[3]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - // max length supported by pdf11 is 32 - - w0_t[0] = padding[0]; - w0_t[1] = padding[1]; - w0_t[2] = padding[2]; - w0_t[3] = padding[3]; - w1_t[0] = padding[4]; - w1_t[1] = padding[5]; - w1_t[2] = padding[6]; - w1_t[3] = padding[7]; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, pw_len); - - // add password - // truncate at 32 is wanted, not a bug! - // add o_buf - - w0_t[0] |= w0[0]; - w0_t[1] |= w0[1]; - w0_t[2] |= w0[2]; - w0_t[3] |= w0[3]; - w1_t[0] |= w1[0]; - w1_t[1] |= w1[1]; - w1_t[2] |= w1[2]; - w1_t[3] |= w1[3]; - w2_t[0] = o_buf[0]; - w2_t[1] = o_buf[1]; - w2_t[2] = o_buf[2]; - w2_t[3] = o_buf[3]; - w3_t[0] = o_buf[4]; - w3_t[1] = o_buf[5]; - w3_t[2] = o_buf[6]; - w3_t[3] = o_buf[7]; - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = P; - w0_t[1] = id_buf[0]; - w0_t[2] = id_buf[1]; - w0_t[3] = id_buf[2]; - w1_t[0] = id_buf[3]; - w1_t[1] = 0x80; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 84 * 8; - w3_t[3] = 0; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - // now the RC4 part - - u32x key[4]; - - key[0] = digest[0]; - key[1] = digest[1] & 0xff; - key[2] = 0; - key[3] = 0; - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - rc4_next_16 (rc4_key, 0, 0, padding, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10400_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10400_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10400_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * key - */ - - __shared__ RC4_KEY rc4_keys[64]; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * U_buf - */ - - u32 o_buf[8]; - - o_buf[0] = pdf_bufs[salt_pos].o_buf[0]; - o_buf[1] = pdf_bufs[salt_pos].o_buf[1]; - o_buf[2] = pdf_bufs[salt_pos].o_buf[2]; - o_buf[3] = pdf_bufs[salt_pos].o_buf[3]; - o_buf[4] = pdf_bufs[salt_pos].o_buf[4]; - o_buf[5] = pdf_bufs[salt_pos].o_buf[5]; - o_buf[6] = pdf_bufs[salt_pos].o_buf[6]; - o_buf[7] = pdf_bufs[salt_pos].o_buf[7]; - - u32 P = pdf_bufs[salt_pos].P; - - u32 id_buf[4]; - - id_buf[0] = pdf_bufs[salt_pos].id_buf[0]; - id_buf[1] = pdf_bufs[salt_pos].id_buf[1]; - id_buf[2] = pdf_bufs[salt_pos].id_buf[2]; - id_buf[3] = pdf_bufs[salt_pos].id_buf[3]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - // max length supported by pdf11 is 32 - - w0_t[0] = padding[0]; - w0_t[1] = padding[1]; - w0_t[2] = padding[2]; - w0_t[3] = padding[3]; - w1_t[0] = padding[4]; - w1_t[1] = padding[5]; - w1_t[2] = padding[6]; - w1_t[3] = padding[7]; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, pw_len); - - // add password - // truncate at 32 is wanted, not a bug! - // add o_buf - - w0_t[0] |= w0[0]; - w0_t[1] |= w0[1]; - w0_t[2] |= w0[2]; - w0_t[3] |= w0[3]; - w1_t[0] |= w1[0]; - w1_t[1] |= w1[1]; - w1_t[2] |= w1[2]; - w1_t[3] |= w1[3]; - w2_t[0] = o_buf[0]; - w2_t[1] = o_buf[1]; - w2_t[2] = o_buf[2]; - w2_t[3] = o_buf[3]; - w3_t[0] = o_buf[4]; - w3_t[1] = o_buf[5]; - w3_t[2] = o_buf[6]; - w3_t[3] = o_buf[7]; - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = P; - w0_t[1] = id_buf[0]; - w0_t[2] = id_buf[1]; - w0_t[3] = id_buf[2]; - w1_t[0] = id_buf[3]; - w1_t[1] = 0x80; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 84 * 8; - w3_t[3] = 0; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - // now the RC4 part - - u32x key[4]; - - key[0] = digest[0]; - key[1] = digest[1] & 0xff; - key[2] = 0; - key[3] = 0; - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - rc4_next_16 (rc4_key, 0, 0, padding, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10400_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10400_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m10400_a1.cu b/nv/m10400_a1.cu deleted file mode 100644 index b307562..0000000 --- a/nv/m10400_a1.cu +++ /dev/null @@ -1,785 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ __constant__ u32 padding[8] = -{ - 0x5e4ebf28, - 0x418a754e, - 0x564e0064, - 0x0801faff, - 0xb6002e2e, - 0x803e68d0, - 0xfea90c2f, - 0x7a695364 -}; - -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -__device__ static void swap (RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -__device__ static void rc4_init_16 (RC4_KEY *rc4_key, const u32 data[4]) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - u32 *ptr = (u32 *) rc4_key->S; - - #pragma unroll 64 - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - const u32 d0 = data[0] >> 0; - const u32 d1 = data[0] >> 8; - const u32 d2 = data[0] >> 16; - const u32 d3 = data[0] >> 24; - const u32 d4 = data[1] >> 0; - - u32 i = 0; - u32 j = 0; - - #pragma unroll 52 - for (i = 0; i < 255; i += 5) - { - j += rc4_key->S[i + 0]; j += d0; swap (rc4_key, i + 0, j); - j += rc4_key->S[i + 1]; j += d1; swap (rc4_key, i + 1, j); - j += rc4_key->S[i + 2]; j += d2; swap (rc4_key, i + 2, j); - j += rc4_key->S[i + 3]; j += d3; swap (rc4_key, i + 3, j); - j += rc4_key->S[i + 4]; j += d4; swap (rc4_key, i + 4, j); - } - - j += rc4_key->S[i + 0]; j += d0; swap (rc4_key, i + 0, j); -} - -__device__ static u8 rc4_next_16 (RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) -{ - #pragma unroll 4 - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (64, 1) m10400_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * key - */ - - __shared__ RC4_KEY rc4_keys[64]; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * U_buf - */ - - u32 o_buf[8]; - - o_buf[0] = pdf_bufs[salt_pos].o_buf[0]; - o_buf[1] = pdf_bufs[salt_pos].o_buf[1]; - o_buf[2] = pdf_bufs[salt_pos].o_buf[2]; - o_buf[3] = pdf_bufs[salt_pos].o_buf[3]; - o_buf[4] = pdf_bufs[salt_pos].o_buf[4]; - o_buf[5] = pdf_bufs[salt_pos].o_buf[5]; - o_buf[6] = pdf_bufs[salt_pos].o_buf[6]; - o_buf[7] = pdf_bufs[salt_pos].o_buf[7]; - - u32 P = pdf_bufs[salt_pos].P; - - u32 id_buf[4]; - - id_buf[0] = pdf_bufs[salt_pos].id_buf[0]; - id_buf[1] = pdf_bufs[salt_pos].id_buf[1]; - id_buf[2] = pdf_bufs[salt_pos].id_buf[2]; - id_buf[3] = pdf_bufs[salt_pos].id_buf[3]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - // max length supported by pdf11 is 32 - - w0_t[0] = padding[0]; - w0_t[1] = padding[1]; - w0_t[2] = padding[2]; - w0_t[3] = padding[3]; - w1_t[0] = padding[4]; - w1_t[1] = padding[5]; - w1_t[2] = padding[6]; - w1_t[3] = padding[7]; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, pw_len); - - // add password - // truncate at 32 is wanted, not a bug! - // add o_buf - - w0_t[0] |= w0[0]; - w0_t[1] |= w0[1]; - w0_t[2] |= w0[2]; - w0_t[3] |= w0[3]; - w1_t[0] |= w1[0]; - w1_t[1] |= w1[1]; - w1_t[2] |= w1[2]; - w1_t[3] |= w1[3]; - w2_t[0] = o_buf[0]; - w2_t[1] = o_buf[1]; - w2_t[2] = o_buf[2]; - w2_t[3] = o_buf[3]; - w3_t[0] = o_buf[4]; - w3_t[1] = o_buf[5]; - w3_t[2] = o_buf[6]; - w3_t[3] = o_buf[7]; - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = P; - w0_t[1] = id_buf[0]; - w0_t[2] = id_buf[1]; - w0_t[3] = id_buf[2]; - w1_t[0] = id_buf[3]; - w1_t[1] = 0x80; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 84 * 8; - w3_t[3] = 0; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - // now the RC4 part - - u32x key[4]; - - key[0] = digest[0]; - key[1] = digest[1] & 0xff; - key[2] = 0; - key[3] = 0; - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - rc4_next_16 (rc4_key, 0, 0, padding, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10400_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10400_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10400_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * key - */ - - __shared__ RC4_KEY rc4_keys[64]; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * U_buf - */ - - u32 o_buf[8]; - - o_buf[0] = pdf_bufs[salt_pos].o_buf[0]; - o_buf[1] = pdf_bufs[salt_pos].o_buf[1]; - o_buf[2] = pdf_bufs[salt_pos].o_buf[2]; - o_buf[3] = pdf_bufs[salt_pos].o_buf[3]; - o_buf[4] = pdf_bufs[salt_pos].o_buf[4]; - o_buf[5] = pdf_bufs[salt_pos].o_buf[5]; - o_buf[6] = pdf_bufs[salt_pos].o_buf[6]; - o_buf[7] = pdf_bufs[salt_pos].o_buf[7]; - - u32 P = pdf_bufs[salt_pos].P; - - u32 id_buf[4]; - - id_buf[0] = pdf_bufs[salt_pos].id_buf[0]; - id_buf[1] = pdf_bufs[salt_pos].id_buf[1]; - id_buf[2] = pdf_bufs[salt_pos].id_buf[2]; - id_buf[3] = pdf_bufs[salt_pos].id_buf[3]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - // max length supported by pdf11 is 32 - - w0_t[0] = padding[0]; - w0_t[1] = padding[1]; - w0_t[2] = padding[2]; - w0_t[3] = padding[3]; - w1_t[0] = padding[4]; - w1_t[1] = padding[5]; - w1_t[2] = padding[6]; - w1_t[3] = padding[7]; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, pw_len); - - // add password - // truncate at 32 is wanted, not a bug! - // add o_buf - - w0_t[0] |= w0[0]; - w0_t[1] |= w0[1]; - w0_t[2] |= w0[2]; - w0_t[3] |= w0[3]; - w1_t[0] |= w1[0]; - w1_t[1] |= w1[1]; - w1_t[2] |= w1[2]; - w1_t[3] |= w1[3]; - w2_t[0] = o_buf[0]; - w2_t[1] = o_buf[1]; - w2_t[2] = o_buf[2]; - w2_t[3] = o_buf[3]; - w3_t[0] = o_buf[4]; - w3_t[1] = o_buf[5]; - w3_t[2] = o_buf[6]; - w3_t[3] = o_buf[7]; - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = P; - w0_t[1] = id_buf[0]; - w0_t[2] = id_buf[1]; - w0_t[3] = id_buf[2]; - w1_t[0] = id_buf[3]; - w1_t[1] = 0x80; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 84 * 8; - w3_t[3] = 0; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - // now the RC4 part - - u32x key[4]; - - key[0] = digest[0]; - key[1] = digest[1] & 0xff; - key[2] = 0; - key[3] = 0; - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - rc4_next_16 (rc4_key, 0, 0, padding, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10400_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10400_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m10400_a3.cu b/nv/m10400_a3.cu deleted file mode 100644 index 2c84a2e..0000000 --- a/nv/m10400_a3.cu +++ /dev/null @@ -1,849 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ __constant__ u32 padding[8] = -{ - 0x5e4ebf28, - 0x418a754e, - 0x564e0064, - 0x0801faff, - 0xb6002e2e, - 0x803e68d0, - 0xfea90c2f, - 0x7a695364 -}; - -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -__device__ static void swap (RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -__device__ static void rc4_init_16 (RC4_KEY *rc4_key, const u32 data[4]) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - u32 *ptr = (u32 *) rc4_key->S; - - #pragma unroll 64 - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - const u32 d0 = data[0] >> 0; - const u32 d1 = data[0] >> 8; - const u32 d2 = data[0] >> 16; - const u32 d3 = data[0] >> 24; - const u32 d4 = data[1] >> 0; - - u32 i = 0; - u32 j = 0; - - #pragma unroll 52 - for (i = 0; i < 255; i += 5) - { - j += rc4_key->S[i + 0]; j += d0; swap (rc4_key, i + 0, j); - j += rc4_key->S[i + 1]; j += d1; swap (rc4_key, i + 1, j); - j += rc4_key->S[i + 2]; j += d2; swap (rc4_key, i + 2, j); - j += rc4_key->S[i + 3]; j += d3; swap (rc4_key, i + 3, j); - j += rc4_key->S[i + 4]; j += d4; swap (rc4_key, i + 4, j); - } - - j += rc4_key->S[i + 0]; j += d0; swap (rc4_key, i + 0, j); -} - -__device__ static u8 rc4_next_16 (RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) -{ - #pragma unroll 4 - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m10400m (RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * U_buf - */ - - u32 o_buf[8]; - - o_buf[0] = pdf_bufs[salt_pos].o_buf[0]; - o_buf[1] = pdf_bufs[salt_pos].o_buf[1]; - o_buf[2] = pdf_bufs[salt_pos].o_buf[2]; - o_buf[3] = pdf_bufs[salt_pos].o_buf[3]; - o_buf[4] = pdf_bufs[salt_pos].o_buf[4]; - o_buf[5] = pdf_bufs[salt_pos].o_buf[5]; - o_buf[6] = pdf_bufs[salt_pos].o_buf[6]; - o_buf[7] = pdf_bufs[salt_pos].o_buf[7]; - - u32 P = pdf_bufs[salt_pos].P; - - u32 id_buf[4]; - - id_buf[0] = pdf_bufs[salt_pos].id_buf[0]; - id_buf[1] = pdf_bufs[salt_pos].id_buf[1]; - id_buf[2] = pdf_bufs[salt_pos].id_buf[2]; - id_buf[3] = pdf_bufs[salt_pos].id_buf[3]; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - // max length supported by pdf11 is 32 - - w0_t[0] = padding[0]; - w0_t[1] = padding[1]; - w0_t[2] = padding[2]; - w0_t[3] = padding[3]; - w1_t[0] = padding[4]; - w1_t[1] = padding[5]; - w1_t[2] = padding[6]; - w1_t[3] = padding[7]; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, pw_len); - - // add password - // truncate at 32 is wanted, not a bug! - // add o_buf - - w0_t[0] |= w0[0]; - w0_t[1] |= w0[1]; - w0_t[2] |= w0[2]; - w0_t[3] |= w0[3]; - w1_t[0] |= w1[0]; - w1_t[1] |= w1[1]; - w1_t[2] |= w1[2]; - w1_t[3] |= w1[3]; - w2_t[0] = o_buf[0]; - w2_t[1] = o_buf[1]; - w2_t[2] = o_buf[2]; - w2_t[3] = o_buf[3]; - w3_t[0] = o_buf[4]; - w3_t[1] = o_buf[5]; - w3_t[2] = o_buf[6]; - w3_t[3] = o_buf[7]; - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = P; - w0_t[1] = id_buf[0]; - w0_t[2] = id_buf[1]; - w0_t[3] = id_buf[2]; - w1_t[0] = id_buf[3]; - w1_t[1] = 0x80; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 84 * 8; - w3_t[3] = 0; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - // now the RC4 part - - u32x key[4]; - - key[0] = digest[0]; - key[1] = digest[1] & 0xff; - key[2] = 0; - key[3] = 0; - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - rc4_next_16 (rc4_key, 0, 0, padding, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_M - } -} - -__device__ static void m10400s (RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * U_buf - */ - - u32 o_buf[8]; - - o_buf[0] = pdf_bufs[salt_pos].o_buf[0]; - o_buf[1] = pdf_bufs[salt_pos].o_buf[1]; - o_buf[2] = pdf_bufs[salt_pos].o_buf[2]; - o_buf[3] = pdf_bufs[salt_pos].o_buf[3]; - o_buf[4] = pdf_bufs[salt_pos].o_buf[4]; - o_buf[5] = pdf_bufs[salt_pos].o_buf[5]; - o_buf[6] = pdf_bufs[salt_pos].o_buf[6]; - o_buf[7] = pdf_bufs[salt_pos].o_buf[7]; - - u32 P = pdf_bufs[salt_pos].P; - - u32 id_buf[4]; - - id_buf[0] = pdf_bufs[salt_pos].id_buf[0]; - id_buf[1] = pdf_bufs[salt_pos].id_buf[1]; - id_buf[2] = pdf_bufs[salt_pos].id_buf[2]; - id_buf[3] = pdf_bufs[salt_pos].id_buf[3]; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - // max length supported by pdf11 is 32 - - w0_t[0] = padding[0]; - w0_t[1] = padding[1]; - w0_t[2] = padding[2]; - w0_t[3] = padding[3]; - w1_t[0] = padding[4]; - w1_t[1] = padding[5]; - w1_t[2] = padding[6]; - w1_t[3] = padding[7]; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, pw_len); - - // add password - // truncate at 32 is wanted, not a bug! - // add o_buf - - w0_t[0] |= w0[0]; - w0_t[1] |= w0[1]; - w0_t[2] |= w0[2]; - w0_t[3] |= w0[3]; - w1_t[0] |= w1[0]; - w1_t[1] |= w1[1]; - w1_t[2] |= w1[2]; - w1_t[3] |= w1[3]; - w2_t[0] = o_buf[0]; - w2_t[1] = o_buf[1]; - w2_t[2] = o_buf[2]; - w2_t[3] = o_buf[3]; - w3_t[0] = o_buf[4]; - w3_t[1] = o_buf[5]; - w3_t[2] = o_buf[6]; - w3_t[3] = o_buf[7]; - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = P; - w0_t[1] = id_buf[0]; - w0_t[2] = id_buf[1]; - w0_t[3] = id_buf[2]; - w1_t[0] = id_buf[3]; - w1_t[1] = 0x80; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 84 * 8; - w3_t[3] = 0; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - // now the RC4 part - - u32x key[4]; - - key[0] = digest[0]; - key[1] = digest[1] & 0xff; - key[2] = 0; - key[3] = 0; - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - rc4_next_16 (rc4_key, 0, 0, padding, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10400_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - __shared__ RC4_KEY rc4_keys[64]; - - m10400m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, pdf_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10400_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - __shared__ RC4_KEY rc4_keys[64]; - - m10400m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, pdf_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10400_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - __shared__ RC4_KEY rc4_keys[64]; - - m10400m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, pdf_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10400_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - __shared__ RC4_KEY rc4_keys[64]; - - m10400s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, pdf_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10400_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - __shared__ RC4_KEY rc4_keys[64]; - - m10400s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, pdf_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10400_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - __shared__ RC4_KEY rc4_keys[64]; - - m10400s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, pdf_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m10410_a0.cu b/nv/m10410_a0.cu deleted file mode 100644 index a2b3692..0000000 --- a/nv/m10410_a0.cu +++ /dev/null @@ -1,380 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ __constant__ u32 padding[8] = -{ - 0x5e4ebf28, - 0x418a754e, - 0x564e0064, - 0x0801faff, - 0xb6002e2e, - 0x803e68d0, - 0xfea90c2f, - 0x7a695364 -}; - -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -__device__ static void swap (RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -__device__ static void rc4_init_16 (RC4_KEY *rc4_key, const u32 data[4]) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - u32 *ptr = (u32 *) rc4_key->S; - - #pragma unroll 64 - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - const u32 d0 = data[0] >> 0; - const u32 d1 = data[0] >> 8; - const u32 d2 = data[0] >> 16; - const u32 d3 = data[0] >> 24; - const u32 d4 = data[1] >> 0; - - u32 i = 0; - u32 j = 0; - - #pragma unroll 52 - for (i = 0; i < 255; i += 5) - { - j += rc4_key->S[i + 0]; j += d0; swap (rc4_key, i + 0, j); - j += rc4_key->S[i + 1]; j += d1; swap (rc4_key, i + 1, j); - j += rc4_key->S[i + 2]; j += d2; swap (rc4_key, i + 2, j); - j += rc4_key->S[i + 3]; j += d3; swap (rc4_key, i + 3, j); - j += rc4_key->S[i + 4]; j += d4; swap (rc4_key, i + 4, j); - } - - j += rc4_key->S[i + 0]; j += d0; swap (rc4_key, i + 0, j); -} - -__device__ static u8 rc4_next_16 (RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) -{ - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (64, 1) m10410_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * key - */ - - __shared__ RC4_KEY rc4_keys[64]; - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - // now the RC4 part - - u32x key[4]; - - key[0] = w0[0]; - key[1] = w0[1]; - key[2] = 0; - key[3] = 0; - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - rc4_next_16 (rc4_key, 0, 0, padding, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10410_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10410_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10410_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * key - */ - - __shared__ RC4_KEY rc4_keys[64]; - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - // now the RC4 part - - u32x key[4]; - - key[0] = w0[0]; - key[1] = w0[1]; - key[2] = 0; - key[3] = 0; - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - rc4_next_16 (rc4_key, 0, 0, padding, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10410_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10410_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m10410_a1.cu b/nv/m10410_a1.cu deleted file mode 100644 index e881982..0000000 --- a/nv/m10410_a1.cu +++ /dev/null @@ -1,432 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ __constant__ u32 padding[8] = -{ - 0x5e4ebf28, - 0x418a754e, - 0x564e0064, - 0x0801faff, - 0xb6002e2e, - 0x803e68d0, - 0xfea90c2f, - 0x7a695364 -}; - -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -__device__ static void swap (RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -__device__ static void rc4_init_16 (RC4_KEY *rc4_key, const u32 data[4]) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - u32 *ptr = (u32 *) rc4_key->S; - - #pragma unroll 64 - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - const u32 d0 = data[0] >> 0; - const u32 d1 = data[0] >> 8; - const u32 d2 = data[0] >> 16; - const u32 d3 = data[0] >> 24; - const u32 d4 = data[1] >> 0; - - u32 i = 0; - u32 j = 0; - - #pragma unroll 52 - for (i = 0; i < 255; i += 5) - { - j += rc4_key->S[i + 0]; j += d0; swap (rc4_key, i + 0, j); - j += rc4_key->S[i + 1]; j += d1; swap (rc4_key, i + 1, j); - j += rc4_key->S[i + 2]; j += d2; swap (rc4_key, i + 2, j); - j += rc4_key->S[i + 3]; j += d3; swap (rc4_key, i + 3, j); - j += rc4_key->S[i + 4]; j += d4; swap (rc4_key, i + 4, j); - } - - j += rc4_key->S[i + 0]; j += d0; swap (rc4_key, i + 0, j); -} - -__device__ static u8 rc4_next_16 (RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) -{ - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (64, 1) m10410_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * key - */ - - __shared__ RC4_KEY rc4_keys[64]; - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[2]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - - // now the RC4 part - - u32x key[4]; - - key[0] = w0[0]; - key[1] = w0[1]; - key[2] = 0; - key[3] = 0; - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - rc4_next_16 (rc4_key, 0, 0, padding, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10410_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10410_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10410_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * key - */ - - __shared__ RC4_KEY rc4_keys[64]; - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[2]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - - // now the RC4 part - - u32x key[4]; - - key[0] = w0[0]; - key[1] = w0[1]; - key[2] = 0; - key[3] = 0; - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - rc4_next_16 (rc4_key, 0, 0, padding, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10410_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10410_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m10410_a3.cu b/nv/m10410_a3.cu deleted file mode 100644 index 7e0ab50..0000000 --- a/nv/m10410_a3.cu +++ /dev/null @@ -1,552 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ __constant__ u32 padding[8] = -{ - 0x5e4ebf28, - 0x418a754e, - 0x564e0064, - 0x0801faff, - 0xb6002e2e, - 0x803e68d0, - 0xfea90c2f, - 0x7a695364 -}; - -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -__device__ static void swap (RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -__device__ static void rc4_init_16 (RC4_KEY *rc4_key, const u32 data[4]) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - u32 *ptr = (u32 *) rc4_key->S; - - #pragma unroll 64 - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - const u32 d0 = data[0] >> 0; - const u32 d1 = data[0] >> 8; - const u32 d2 = data[0] >> 16; - const u32 d3 = data[0] >> 24; - const u32 d4 = data[1] >> 0; - - u32 i = 0; - u32 j = 0; - - #pragma unroll 52 - for (i = 0; i < 255; i += 5) - { - j += rc4_key->S[i + 0]; j += d0; swap (rc4_key, i + 0, j); - j += rc4_key->S[i + 1]; j += d1; swap (rc4_key, i + 1, j); - j += rc4_key->S[i + 2]; j += d2; swap (rc4_key, i + 2, j); - j += rc4_key->S[i + 3]; j += d3; swap (rc4_key, i + 3, j); - j += rc4_key->S[i + 4]; j += d4; swap (rc4_key, i + 4, j); - } - - j += rc4_key->S[i + 0]; j += d0; swap (rc4_key, i + 0, j); -} - -__device__ static u8 rc4_next_16 (RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) -{ - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m10410m (RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - // now the RC4 part - - u32x key[4]; - - key[0] = w0[0]; - key[1] = w0[1]; - key[2] = 0; - key[3] = 0; - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - rc4_next_16 (rc4_key, 0, 0, padding, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_M - } -} - -__device__ static void m10410s (RC4_KEY rc4_keys[64], u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - // now the RC4 part - - u32x key[4]; - - key[0] = w0[0]; - key[1] = w0[1]; - key[2] = 0; - key[3] = 0; - - rc4_init_16 (rc4_key, key); - - u32x out[4]; - - rc4_next_16 (rc4_key, 0, 0, padding, out); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = out[2]; - const u32x r3 = out[3]; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10410_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - __shared__ RC4_KEY rc4_keys[64]; - - m10410m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, pdf_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10410_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - __shared__ RC4_KEY rc4_keys[64]; - - m10410m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, pdf_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10410_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - __shared__ RC4_KEY rc4_keys[64]; - - m10410m (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, pdf_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10410_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - __shared__ RC4_KEY rc4_keys[64]; - - m10410s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, pdf_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10410_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - __shared__ RC4_KEY rc4_keys[64]; - - m10410s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, pdf_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10410_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - __shared__ RC4_KEY rc4_keys[64]; - - m10410s (rc4_keys, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, pdf_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m10420_a0.cu b/nv/m10420_a0.cu deleted file mode 100644 index aa2d33c..0000000 --- a/nv/m10420_a0.cu +++ /dev/null @@ -1,536 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ __constant__ u32 padding[8] = -{ - 0x5e4ebf28, - 0x418a754e, - 0x564e0064, - 0x0801faff, - 0xb6002e2e, - 0x803e68d0, - 0xfea90c2f, - 0x7a695364 -}; - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m10420_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * U_buf - */ - - u32 o_buf[8]; - - o_buf[0] = pdf_bufs[salt_pos].o_buf[0]; - o_buf[1] = pdf_bufs[salt_pos].o_buf[1]; - o_buf[2] = pdf_bufs[salt_pos].o_buf[2]; - o_buf[3] = pdf_bufs[salt_pos].o_buf[3]; - o_buf[4] = pdf_bufs[salt_pos].o_buf[4]; - o_buf[5] = pdf_bufs[salt_pos].o_buf[5]; - o_buf[6] = pdf_bufs[salt_pos].o_buf[6]; - o_buf[7] = pdf_bufs[salt_pos].o_buf[7]; - - u32 P = pdf_bufs[salt_pos].P; - - u32 id_buf[4]; - - id_buf[0] = pdf_bufs[salt_pos].id_buf[0]; - id_buf[1] = pdf_bufs[salt_pos].id_buf[1]; - id_buf[2] = pdf_bufs[salt_pos].id_buf[2]; - id_buf[3] = pdf_bufs[salt_pos].id_buf[3]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - // max length supported by pdf11 is 32 - - w0_t[0] = padding[0]; - w0_t[1] = padding[1]; - w0_t[2] = padding[2]; - w0_t[3] = padding[3]; - w1_t[0] = padding[4]; - w1_t[1] = padding[5]; - w1_t[2] = padding[6]; - w1_t[3] = padding[7]; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, pw_len); - - // add password - // truncate at 32 is wanted, not a bug! - // add o_buf - - w0_t[0] |= w0[0]; - w0_t[1] |= w0[1]; - w0_t[2] |= w0[2]; - w0_t[3] |= w0[3]; - w1_t[0] |= w1[0]; - w1_t[1] |= w1[1]; - w1_t[2] |= w1[2]; - w1_t[3] |= w1[3]; - w2_t[0] = o_buf[0]; - w2_t[1] = o_buf[1]; - w2_t[2] = o_buf[2]; - w2_t[3] = o_buf[3]; - w3_t[0] = o_buf[4]; - w3_t[1] = o_buf[5]; - w3_t[2] = o_buf[6]; - w3_t[3] = o_buf[7]; - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = P; - w0_t[1] = id_buf[0]; - w0_t[2] = id_buf[1]; - w0_t[3] = id_buf[2]; - w1_t[0] = id_buf[3]; - w1_t[1] = 0x80; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 84 * 8; - w3_t[3] = 0; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - u32x a = digest[0]; - u32x b = digest[1] & 0xff; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10420_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10420_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10420_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * U_buf - */ - - u32 o_buf[8]; - - o_buf[0] = pdf_bufs[salt_pos].o_buf[0]; - o_buf[1] = pdf_bufs[salt_pos].o_buf[1]; - o_buf[2] = pdf_bufs[salt_pos].o_buf[2]; - o_buf[3] = pdf_bufs[salt_pos].o_buf[3]; - o_buf[4] = pdf_bufs[salt_pos].o_buf[4]; - o_buf[5] = pdf_bufs[salt_pos].o_buf[5]; - o_buf[6] = pdf_bufs[salt_pos].o_buf[6]; - o_buf[7] = pdf_bufs[salt_pos].o_buf[7]; - - u32 P = pdf_bufs[salt_pos].P; - - u32 id_buf[4]; - - id_buf[0] = pdf_bufs[salt_pos].id_buf[0]; - id_buf[1] = pdf_bufs[salt_pos].id_buf[1]; - id_buf[2] = pdf_bufs[salt_pos].id_buf[2]; - id_buf[3] = pdf_bufs[salt_pos].id_buf[3]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - // max length supported by pdf11 is 32 - - w0_t[0] = padding[0]; - w0_t[1] = padding[1]; - w0_t[2] = padding[2]; - w0_t[3] = padding[3]; - w1_t[0] = padding[4]; - w1_t[1] = padding[5]; - w1_t[2] = padding[6]; - w1_t[3] = padding[7]; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, pw_len); - - // add password - // truncate at 32 is wanted, not a bug! - // add o_buf - - w0_t[0] |= w0[0]; - w0_t[1] |= w0[1]; - w0_t[2] |= w0[2]; - w0_t[3] |= w0[3]; - w1_t[0] |= w1[0]; - w1_t[1] |= w1[1]; - w1_t[2] |= w1[2]; - w1_t[3] |= w1[3]; - w2_t[0] = o_buf[0]; - w2_t[1] = o_buf[1]; - w2_t[2] = o_buf[2]; - w2_t[3] = o_buf[3]; - w3_t[0] = o_buf[4]; - w3_t[1] = o_buf[5]; - w3_t[2] = o_buf[6]; - w3_t[3] = o_buf[7]; - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = P; - w0_t[1] = id_buf[0]; - w0_t[2] = id_buf[1]; - w0_t[3] = id_buf[2]; - w1_t[0] = id_buf[3]; - w1_t[1] = 0x80; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 84 * 8; - w3_t[3] = 0; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - u32x a = digest[0]; - u32x b = digest[1] & 0xff; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10420_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10420_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m10420_a1.cu b/nv/m10420_a1.cu deleted file mode 100644 index 0fcae66..0000000 --- a/nv/m10420_a1.cu +++ /dev/null @@ -1,646 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE2 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ __constant__ u32 padding[8] = -{ - 0x5e4ebf28, - 0x418a754e, - 0x564e0064, - 0x0801faff, - 0xb6002e2e, - 0x803e68d0, - 0xfea90c2f, - 0x7a695364 -}; - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m10420_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * U_buf - */ - - u32 o_buf[8]; - - o_buf[0] = pdf_bufs[salt_pos].o_buf[0]; - o_buf[1] = pdf_bufs[salt_pos].o_buf[1]; - o_buf[2] = pdf_bufs[salt_pos].o_buf[2]; - o_buf[3] = pdf_bufs[salt_pos].o_buf[3]; - o_buf[4] = pdf_bufs[salt_pos].o_buf[4]; - o_buf[5] = pdf_bufs[salt_pos].o_buf[5]; - o_buf[6] = pdf_bufs[salt_pos].o_buf[6]; - o_buf[7] = pdf_bufs[salt_pos].o_buf[7]; - - u32 P = pdf_bufs[salt_pos].P; - - u32 id_buf[4]; - - id_buf[0] = pdf_bufs[salt_pos].id_buf[0]; - id_buf[1] = pdf_bufs[salt_pos].id_buf[1]; - id_buf[2] = pdf_bufs[salt_pos].id_buf[2]; - id_buf[3] = pdf_bufs[salt_pos].id_buf[3]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - // max length supported by pdf11 is 32 - - w0_t[0] = padding[0]; - w0_t[1] = padding[1]; - w0_t[2] = padding[2]; - w0_t[3] = padding[3]; - w1_t[0] = padding[4]; - w1_t[1] = padding[5]; - w1_t[2] = padding[6]; - w1_t[3] = padding[7]; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, pw_len); - - // add password - // truncate at 32 is wanted, not a bug! - // add o_buf - - w0_t[0] |= w0[0]; - w0_t[1] |= w0[1]; - w0_t[2] |= w0[2]; - w0_t[3] |= w0[3]; - w1_t[0] |= w1[0]; - w1_t[1] |= w1[1]; - w1_t[2] |= w1[2]; - w1_t[3] |= w1[3]; - w2_t[0] = o_buf[0]; - w2_t[1] = o_buf[1]; - w2_t[2] = o_buf[2]; - w2_t[3] = o_buf[3]; - w3_t[0] = o_buf[4]; - w3_t[1] = o_buf[5]; - w3_t[2] = o_buf[6]; - w3_t[3] = o_buf[7]; - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = P; - w0_t[1] = id_buf[0]; - w0_t[2] = id_buf[1]; - w0_t[3] = id_buf[2]; - w1_t[0] = id_buf[3]; - w1_t[1] = 0x80; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 84 * 8; - w3_t[3] = 0; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - u32x a = digest[0]; - u32x b = digest[1] & 0xff; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10420_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10420_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10420_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * U_buf - */ - - u32 o_buf[8]; - - o_buf[0] = pdf_bufs[salt_pos].o_buf[0]; - o_buf[1] = pdf_bufs[salt_pos].o_buf[1]; - o_buf[2] = pdf_bufs[salt_pos].o_buf[2]; - o_buf[3] = pdf_bufs[salt_pos].o_buf[3]; - o_buf[4] = pdf_bufs[salt_pos].o_buf[4]; - o_buf[5] = pdf_bufs[salt_pos].o_buf[5]; - o_buf[6] = pdf_bufs[salt_pos].o_buf[6]; - o_buf[7] = pdf_bufs[salt_pos].o_buf[7]; - - u32 P = pdf_bufs[salt_pos].P; - - u32 id_buf[4]; - - id_buf[0] = pdf_bufs[salt_pos].id_buf[0]; - id_buf[1] = pdf_bufs[salt_pos].id_buf[1]; - id_buf[2] = pdf_bufs[salt_pos].id_buf[2]; - id_buf[3] = pdf_bufs[salt_pos].id_buf[3]; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - // max length supported by pdf11 is 32 - - w0_t[0] = padding[0]; - w0_t[1] = padding[1]; - w0_t[2] = padding[2]; - w0_t[3] = padding[3]; - w1_t[0] = padding[4]; - w1_t[1] = padding[5]; - w1_t[2] = padding[6]; - w1_t[3] = padding[7]; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, pw_len); - - // add password - // truncate at 32 is wanted, not a bug! - // add o_buf - - w0_t[0] |= w0[0]; - w0_t[1] |= w0[1]; - w0_t[2] |= w0[2]; - w0_t[3] |= w0[3]; - w1_t[0] |= w1[0]; - w1_t[1] |= w1[1]; - w1_t[2] |= w1[2]; - w1_t[3] |= w1[3]; - w2_t[0] = o_buf[0]; - w2_t[1] = o_buf[1]; - w2_t[2] = o_buf[2]; - w2_t[3] = o_buf[3]; - w3_t[0] = o_buf[4]; - w3_t[1] = o_buf[5]; - w3_t[2] = o_buf[6]; - w3_t[3] = o_buf[7]; - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = P; - w0_t[1] = id_buf[0]; - w0_t[2] = id_buf[1]; - w0_t[3] = id_buf[2]; - w1_t[0] = id_buf[3]; - w1_t[1] = 0x80; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 84 * 8; - w3_t[3] = 0; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - u32x a = digest[0]; - u32x b = digest[1] & 0xff; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10420_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10420_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m10420_a3.cu b/nv/m10420_a3.cu deleted file mode 100644 index afd8179..0000000 --- a/nv/m10420_a3.cu +++ /dev/null @@ -1,711 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE2 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ u32 padding[8] = -{ - 0x5e4ebf28, - 0x418a754e, - 0x564e0064, - 0x0801faff, - 0xb6002e2e, - 0x803e68d0, - 0xfea90c2f, - 0x7a695364 -}; - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m10420m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * U_buf - */ - - u32 o_buf[8]; - - o_buf[0] = pdf_bufs[salt_pos].o_buf[0]; - o_buf[1] = pdf_bufs[salt_pos].o_buf[1]; - o_buf[2] = pdf_bufs[salt_pos].o_buf[2]; - o_buf[3] = pdf_bufs[salt_pos].o_buf[3]; - o_buf[4] = pdf_bufs[salt_pos].o_buf[4]; - o_buf[5] = pdf_bufs[salt_pos].o_buf[5]; - o_buf[6] = pdf_bufs[salt_pos].o_buf[6]; - o_buf[7] = pdf_bufs[salt_pos].o_buf[7]; - - u32 P = pdf_bufs[salt_pos].P; - - u32 id_buf[4]; - - id_buf[0] = pdf_bufs[salt_pos].id_buf[0]; - id_buf[1] = pdf_bufs[salt_pos].id_buf[1]; - id_buf[2] = pdf_bufs[salt_pos].id_buf[2]; - id_buf[3] = pdf_bufs[salt_pos].id_buf[3]; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - // max length supported by pdf11 is 32 - - w0_t[0] = padding[0]; - w0_t[1] = padding[1]; - w0_t[2] = padding[2]; - w0_t[3] = padding[3]; - w1_t[0] = padding[4]; - w1_t[1] = padding[5]; - w1_t[2] = padding[6]; - w1_t[3] = padding[7]; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, pw_len); - - // add password - // truncate at 32 is wanted, not a bug! - // add o_buf - - w0_t[0] |= w0[0]; - w0_t[1] |= w0[1]; - w0_t[2] |= w0[2]; - w0_t[3] |= w0[3]; - w1_t[0] |= w1[0]; - w1_t[1] |= w1[1]; - w1_t[2] |= w1[2]; - w1_t[3] |= w1[3]; - w2_t[0] = o_buf[0]; - w2_t[1] = o_buf[1]; - w2_t[2] = o_buf[2]; - w2_t[3] = o_buf[3]; - w3_t[0] = o_buf[4]; - w3_t[1] = o_buf[5]; - w3_t[2] = o_buf[6]; - w3_t[3] = o_buf[7]; - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = P; - w0_t[1] = id_buf[0]; - w0_t[2] = id_buf[1]; - w0_t[3] = id_buf[2]; - w1_t[0] = id_buf[3]; - w1_t[1] = 0x80; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 84 * 8; - w3_t[3] = 0; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - u32x a = digest[0]; - u32x b = digest[1] & 0xff; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } -} - -__device__ static void m10420s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * U_buf - */ - - u32 o_buf[8]; - - o_buf[0] = pdf_bufs[salt_pos].o_buf[0]; - o_buf[1] = pdf_bufs[salt_pos].o_buf[1]; - o_buf[2] = pdf_bufs[salt_pos].o_buf[2]; - o_buf[3] = pdf_bufs[salt_pos].o_buf[3]; - o_buf[4] = pdf_bufs[salt_pos].o_buf[4]; - o_buf[5] = pdf_bufs[salt_pos].o_buf[5]; - o_buf[6] = pdf_bufs[salt_pos].o_buf[6]; - o_buf[7] = pdf_bufs[salt_pos].o_buf[7]; - - u32 P = pdf_bufs[salt_pos].P; - - u32 id_buf[4]; - - id_buf[0] = pdf_bufs[salt_pos].id_buf[0]; - id_buf[1] = pdf_bufs[salt_pos].id_buf[1]; - id_buf[2] = pdf_bufs[salt_pos].id_buf[2]; - id_buf[3] = pdf_bufs[salt_pos].id_buf[3]; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - // max length supported by pdf11 is 32 - - w0_t[0] = padding[0]; - w0_t[1] = padding[1]; - w0_t[2] = padding[2]; - w0_t[3] = padding[3]; - w1_t[0] = padding[4]; - w1_t[1] = padding[5]; - w1_t[2] = padding[6]; - w1_t[3] = padding[7]; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, pw_len); - - // add password - // truncate at 32 is wanted, not a bug! - // add o_buf - - w0_t[0] |= w0[0]; - w0_t[1] |= w0[1]; - w0_t[2] |= w0[2]; - w0_t[3] |= w0[3]; - w1_t[0] |= w1[0]; - w1_t[1] |= w1[1]; - w1_t[2] |= w1[2]; - w1_t[3] |= w1[3]; - w2_t[0] = o_buf[0]; - w2_t[1] = o_buf[1]; - w2_t[2] = o_buf[2]; - w2_t[3] = o_buf[3]; - w3_t[0] = o_buf[4]; - w3_t[1] = o_buf[5]; - w3_t[2] = o_buf[6]; - w3_t[3] = o_buf[7]; - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = P; - w0_t[1] = id_buf[0]; - w0_t[2] = id_buf[1]; - w0_t[3] = id_buf[2]; - w1_t[0] = id_buf[3]; - w1_t[1] = 0x80; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 84 * 8; - w3_t[3] = 0; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - u32x a = digest[0]; - u32x b = digest[1] & 0xff; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10420_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m10420m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, pdf_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10420_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m10420m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, pdf_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10420_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m10420m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, pdf_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10420_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m10420s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, pdf_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10420_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m10420s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, pdf_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10420_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x * words_buf_r, void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m10420s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, pdf_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m10500.cu b/nv/m10500.cu deleted file mode 100644 index c7453dd..0000000 --- a/nv/m10500.cu +++ /dev/null @@ -1,574 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ __constant__ u32 padding[8] = -{ - 0x5e4ebf28, - 0x418a754e, - 0x564e0064, - 0x0801faff, - 0xb6002e2e, - 0x803e68d0, - 0xfea90c2f, - 0x7a695364 -}; - -typedef struct -{ - u8 S[256]; - - u32 wtf_its_faster; - -} RC4_KEY; - -__device__ static void swap (RC4_KEY *rc4_key, const u8 i, const u8 j) -{ - u8 tmp; - - tmp = rc4_key->S[i]; - rc4_key->S[i] = rc4_key->S[j]; - rc4_key->S[j] = tmp; -} - -__device__ static void rc4_init_16 (RC4_KEY *rc4_key, const u32 data[4]) -{ - u32 v = 0x03020100; - u32 a = 0x04040404; - - u32 *ptr = (u32 *) rc4_key->S; - - #pragma unroll 64 - for (u32 i = 0; i < 64; i++) - { - *ptr++ = v; v += a; - } - - u32 j = 0; - - for (u32 i = 0; i < 16; i++) - { - u32 idx = i * 16; - - u32 v; - - v = data[0]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[1]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[2]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - - v = data[3]; - - j += rc4_key->S[idx] + (v >> 0); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 8); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 16); swap (rc4_key, idx, j); idx++; - j += rc4_key->S[idx] + (v >> 24); swap (rc4_key, idx, j); idx++; - } -} - -__device__ static u8 rc4_next_16 (RC4_KEY *rc4_key, u8 i, u8 j, const u32 in[4], u32 out[4]) -{ - for (u32 k = 0; k < 4; k++) - { - u32 xor4 = 0; - - u8 idx; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 0; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 8; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 16; - - i += 1; - j += rc4_key->S[i]; - - swap (rc4_key, i, j); - - idx = rc4_key->S[i] + rc4_key->S[j]; - - xor4 |= rc4_key->S[idx] << 24; - - out[k] = in[k] ^ xor4; - } - - return j; -} - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - u32x tmp2; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10500_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, pdf14_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = 0; - w2[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * U_buf - */ - - u32 o_buf[8]; - - o_buf[0] = pdf_bufs[salt_pos].o_buf[0]; - o_buf[1] = pdf_bufs[salt_pos].o_buf[1]; - o_buf[2] = pdf_bufs[salt_pos].o_buf[2]; - o_buf[3] = pdf_bufs[salt_pos].o_buf[3]; - o_buf[4] = pdf_bufs[salt_pos].o_buf[4]; - o_buf[5] = pdf_bufs[salt_pos].o_buf[5]; - o_buf[6] = pdf_bufs[salt_pos].o_buf[6]; - o_buf[7] = pdf_bufs[salt_pos].o_buf[7]; - - u32 P = pdf_bufs[salt_pos].P; - - u32 id_buf[12]; - - id_buf[ 0] = pdf_bufs[salt_pos].id_buf[0]; - id_buf[ 1] = pdf_bufs[salt_pos].id_buf[1]; - id_buf[ 2] = pdf_bufs[salt_pos].id_buf[2]; - id_buf[ 3] = pdf_bufs[salt_pos].id_buf[3]; - - id_buf[ 4] = pdf_bufs[salt_pos].id_buf[4]; - id_buf[ 5] = pdf_bufs[salt_pos].id_buf[5]; - id_buf[ 6] = pdf_bufs[salt_pos].id_buf[6]; - id_buf[ 7] = pdf_bufs[salt_pos].id_buf[7]; - - id_buf[ 8] = 0; - id_buf[ 9] = 0; - id_buf[10] = 0; - id_buf[11] = 0; - - u32 id_len = pdf_bufs[salt_pos].id_len; - u32 id_len4 = id_len / 4; - - u32 rc4data[2]; - - rc4data[0] = pdf_bufs[salt_pos].rc4data[0]; - rc4data[1] = pdf_bufs[salt_pos].rc4data[1]; - - u32 final_length = 68 + id_len; - - u32x w11 = 0x80; - u32x w12 = 0; - - if (pdf_bufs[salt_pos].enc_md != 1) - { - w11 = 0xffffffff; - w12 = 0x80; - - final_length += 4; - } - - id_buf[id_len4 + 0] = w11; - id_buf[id_len4 + 1] = w12; - - /** - * main init - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - // max length supported by pdf11 is 32 - - w0_t[0] = padding[0]; - w0_t[1] = padding[1]; - w0_t[2] = padding[2]; - w0_t[3] = padding[3]; - w1_t[0] = padding[4]; - w1_t[1] = padding[5]; - w1_t[2] = padding[6]; - w1_t[3] = padding[7]; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, pw_len); - - // add password - // truncate at 32 is wanted, not a bug! - // add o_buf - - w0_t[0] |= w0[0]; - w0_t[1] |= w0[1]; - w0_t[2] |= w0[2]; - w0_t[3] |= w0[3]; - w1_t[0] |= w1[0]; - w1_t[1] |= w1[1]; - w1_t[2] |= w1[2]; - w1_t[3] |= w1[3]; - w2_t[0] = o_buf[0]; - w2_t[1] = o_buf[1]; - w2_t[2] = o_buf[2]; - w2_t[3] = o_buf[3]; - w3_t[0] = o_buf[4]; - w3_t[1] = o_buf[5]; - w3_t[2] = o_buf[6]; - w3_t[3] = o_buf[7]; - - u32x digest[4]; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - w0_t[0] = P; - w0_t[1] = id_buf[ 0]; - w0_t[2] = id_buf[ 1]; - w0_t[3] = id_buf[ 2]; - w1_t[0] = id_buf[ 3]; - w1_t[1] = id_buf[ 4]; - w1_t[2] = id_buf[ 5]; - w1_t[3] = id_buf[ 6]; - w2_t[0] = id_buf[ 7]; - w2_t[1] = id_buf[ 8]; - w2_t[2] = id_buf[ 9]; - w2_t[3] = id_buf[10]; - w3_t[0] = id_buf[11]; - w3_t[1] = 0; - w3_t[2] = final_length * 8; - w3_t[3] = 0; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - - tmps[gid].digest[0] = digest[0]; - tmps[gid].digest[1] = digest[1]; - tmps[gid].digest[2] = digest[2]; - tmps[gid].digest[3] = digest[3]; - - tmps[gid].out[0] = rc4data[0]; - tmps[gid].out[1] = rc4data[1]; - tmps[gid].out[2] = 0; - tmps[gid].out[3] = 0; -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10500_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, pdf14_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - if (gid >= gid_max) return; - - /** - * shared - */ - - __shared__ RC4_KEY rc4_keys[64]; - - RC4_KEY *rc4_key = &rc4_keys[lid]; - - /** - * loop - */ - - u32x digest[4]; - - digest[0] = tmps[gid].digest[0]; - digest[1] = tmps[gid].digest[1]; - digest[2] = tmps[gid].digest[2]; - digest[3] = tmps[gid].digest[3]; - - u32x out[4]; - - out[0] = tmps[gid].out[0]; - out[1] = tmps[gid].out[1]; - out[2] = tmps[gid].out[2]; - out[3] = tmps[gid].out[3]; - - for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++) - { - if (j < 50) - { - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = digest[0]; - w0_t[1] = digest[1]; - w0_t[2] = digest[2]; - w0_t[3] = digest[3]; - w1_t[0] = 0x80; - w1_t[1] = 0; - w1_t[2] = 0; - w1_t[3] = 0; - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 16 * 8; - w3_t[3] = 0; - - digest[0] = MD5M_A; - digest[1] = MD5M_B; - digest[2] = MD5M_C; - digest[3] = MD5M_D; - - md5_transform (w0_t, w1_t, w2_t, w3_t, digest); - } - else - { - const u32 x = j - 50; - - const u32 xv = x << 0 - | x << 8 - | x << 16 - | x << 24; - - u32x tmp[4]; - - tmp[0] = digest[0] ^ xv; - tmp[1] = digest[1] ^ xv; - tmp[2] = digest[2] ^ xv; - tmp[3] = digest[3] ^ xv; - - rc4_init_16 (rc4_key, tmp); - - rc4_next_16 (rc4_key, 0, 0, out, out); - } - } - - tmps[gid].digest[0] = digest[0]; - tmps[gid].digest[1] = digest[1]; - tmps[gid].digest[2] = digest[2]; - tmps[gid].digest[3] = digest[3]; - - tmps[gid].out[0] = out[0]; - tmps[gid].out[1] = out[1]; - tmps[gid].out[2] = out[2]; - tmps[gid].out[3] = out[3]; -} - -extern "C" __global__ void __launch_bounds__ (64, 1) m10500_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, pdf14_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32x r0 = tmps[gid].out[0]; - const u32x r1 = tmps[gid].out[1]; - const u32x r2 = 0; - const u32x r3 = 0; - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m10700.cu b/nv/m10700.cu deleted file mode 100644 index e1a89c6..0000000 --- a/nv/m10700.cu +++ /dev/null @@ -1,1720 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _PDF17L8_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -typedef struct -{ - union - { - u32 dgst32[16]; - u64 dgst64[8]; - }; - - u32 dgst_len; - - union - { - u32 W32[32]; - u64 W64[16]; - }; - - u32 W_len; - -} ctx_t; - -__device__ __constant__ u32 k_sha256[64] = -{ - SHA256C00, SHA256C01, SHA256C02, SHA256C03, - SHA256C04, SHA256C05, SHA256C06, SHA256C07, - SHA256C08, SHA256C09, SHA256C0a, SHA256C0b, - SHA256C0c, SHA256C0d, SHA256C0e, SHA256C0f, - SHA256C10, SHA256C11, SHA256C12, SHA256C13, - SHA256C14, SHA256C15, SHA256C16, SHA256C17, - SHA256C18, SHA256C19, SHA256C1a, SHA256C1b, - SHA256C1c, SHA256C1d, SHA256C1e, SHA256C1f, - SHA256C20, SHA256C21, SHA256C22, SHA256C23, - SHA256C24, SHA256C25, SHA256C26, SHA256C27, - SHA256C28, SHA256C29, SHA256C2a, SHA256C2b, - SHA256C2c, SHA256C2d, SHA256C2e, SHA256C2f, - SHA256C30, SHA256C31, SHA256C32, SHA256C33, - SHA256C34, SHA256C35, SHA256C36, SHA256C37, - SHA256C38, SHA256C39, SHA256C3a, SHA256C3b, - SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f, -}; - -__device__ static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = swap_workaround (w3[2]); - u32x wf_t = swap_workaround (w3[3]); - - #define ROUND256_EXPAND() \ - { \ - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND256_STEP(i) \ - { \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha256[i + 0]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha256[i + 1]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha256[i + 2]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha256[i + 3]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha256[i + 4]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha256[i + 5]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha256[i + 6]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha256[i + 7]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha256[i + 8]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha256[i + 9]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha256[i + 10]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha256[i + 11]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha256[i + 12]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha256[i + 13]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, k_sha256[i + 14]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha256[i + 15]); \ - } - - ROUND256_STEP (0); - - for (int i = 16; i < 64; i += 16) - { - ROUND256_EXPAND (); ROUND256_STEP (i); - } - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; -} - -__device__ __constant__ u64 k_sha384[80] = -{ - SHA384C00, SHA384C01, SHA384C02, SHA384C03, - SHA384C04, SHA384C05, SHA384C06, SHA384C07, - SHA384C08, SHA384C09, SHA384C0a, SHA384C0b, - SHA384C0c, SHA384C0d, SHA384C0e, SHA384C0f, - SHA384C10, SHA384C11, SHA384C12, SHA384C13, - SHA384C14, SHA384C15, SHA384C16, SHA384C17, - SHA384C18, SHA384C19, SHA384C1a, SHA384C1b, - SHA384C1c, SHA384C1d, SHA384C1e, SHA384C1f, - SHA384C20, SHA384C21, SHA384C22, SHA384C23, - SHA384C24, SHA384C25, SHA384C26, SHA384C27, - SHA384C28, SHA384C29, SHA384C2a, SHA384C2b, - SHA384C2c, SHA384C2d, SHA384C2e, SHA384C2f, - SHA384C30, SHA384C31, SHA384C32, SHA384C33, - SHA384C34, SHA384C35, SHA384C36, SHA384C37, - SHA384C38, SHA384C39, SHA384C3a, SHA384C3b, - SHA384C3c, SHA384C3d, SHA384C3e, SHA384C3f, - SHA384C40, SHA384C41, SHA384C42, SHA384C43, - SHA384C44, SHA384C45, SHA384C46, SHA384C47, - SHA384C48, SHA384C49, SHA384C4a, SHA384C4b, - SHA384C4c, SHA384C4d, SHA384C4e, SHA384C4f, -}; - -__device__ static void sha384_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const u64 w3[4], u64x digest[8]) -{ - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; - - u64x w0_t = swap_workaround (w0[0]); - u64x w1_t = swap_workaround (w0[1]); - u64x w2_t = swap_workaround (w0[2]); - u64x w3_t = swap_workaround (w0[3]); - u64x w4_t = swap_workaround (w1[0]); - u64x w5_t = swap_workaround (w1[1]); - u64x w6_t = swap_workaround (w1[2]); - u64x w7_t = swap_workaround (w1[3]); - u64x w8_t = swap_workaround (w2[0]); - u64x w9_t = swap_workaround (w2[1]); - u64x wa_t = swap_workaround (w2[2]); - u64x wb_t = swap_workaround (w2[3]); - u64x wc_t = swap_workaround (w3[0]); - u64x wd_t = swap_workaround (w3[1]); - u64x we_t = swap_workaround (w3[2]); - u64x wf_t = swap_workaround (w3[3]); - - #define ROUND384_EXPAND() \ - { \ - w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND384_STEP(i) \ - { \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \ - } - - ROUND384_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND384_EXPAND (); ROUND384_STEP (i); - } - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; -} - -__device__ __constant__ u64 k_sha512[80] = -{ - SHA384C00, SHA384C01, SHA384C02, SHA384C03, - SHA384C04, SHA384C05, SHA384C06, SHA384C07, - SHA384C08, SHA384C09, SHA384C0a, SHA384C0b, - SHA384C0c, SHA384C0d, SHA384C0e, SHA384C0f, - SHA384C10, SHA384C11, SHA384C12, SHA384C13, - SHA384C14, SHA384C15, SHA384C16, SHA384C17, - SHA384C18, SHA384C19, SHA384C1a, SHA384C1b, - SHA384C1c, SHA384C1d, SHA384C1e, SHA384C1f, - SHA384C20, SHA384C21, SHA384C22, SHA384C23, - SHA384C24, SHA384C25, SHA384C26, SHA384C27, - SHA384C28, SHA384C29, SHA384C2a, SHA384C2b, - SHA384C2c, SHA384C2d, SHA384C2e, SHA384C2f, - SHA384C30, SHA384C31, SHA384C32, SHA384C33, - SHA384C34, SHA384C35, SHA384C36, SHA384C37, - SHA384C38, SHA384C39, SHA384C3a, SHA384C3b, - SHA384C3c, SHA384C3d, SHA384C3e, SHA384C3f, - SHA384C40, SHA384C41, SHA384C42, SHA384C43, - SHA384C44, SHA384C45, SHA384C46, SHA384C47, - SHA384C48, SHA384C49, SHA384C4a, SHA384C4b, - SHA384C4c, SHA384C4d, SHA384C4e, SHA384C4f, -}; - -__device__ static void sha512_transform (const u64 w0[4], const u64 w1[4], const u64 w2[4], const u64 w3[4], u64 digest[8]) -{ - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; - - u64x w0_t = swap_workaround (w0[0]); - u64x w1_t = swap_workaround (w0[1]); - u64x w2_t = swap_workaround (w0[2]); - u64x w3_t = swap_workaround (w0[3]); - u64x w4_t = swap_workaround (w1[0]); - u64x w5_t = swap_workaround (w1[1]); - u64x w6_t = swap_workaround (w1[2]); - u64x w7_t = swap_workaround (w1[3]); - u64x w8_t = swap_workaround (w2[0]); - u64x w9_t = swap_workaround (w2[1]); - u64x wa_t = swap_workaround (w2[2]); - u64x wb_t = swap_workaround (w2[3]); - u64x wc_t = swap_workaround (w3[0]); - u64x wd_t = swap_workaround (w3[1]); - u64x we_t = swap_workaround (w3[2]); - u64x wf_t = swap_workaround (w3[3]); - - #define ROUND512_EXPAND() \ - { \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND512_STEP(i) \ - { \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \ - } - - ROUND512_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND512_EXPAND (); ROUND512_STEP (i); - } - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; -} - -__device__ __constant__ u32 te0[256] = -{ - 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, - 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, - 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, - 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, - 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, - 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, - 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, - 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, - 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, - 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, - 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, - 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, - 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, - 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, - 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, - 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, - 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, - 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, - 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, - 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, - 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, - 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, - 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, - 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, - 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, - 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, - 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, - 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, - 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, - 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, - 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, - 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, - 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, - 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, - 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, - 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, - 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, - 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, - 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, - 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, - 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, - 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, - 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, - 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, - 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, - 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, - 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, - 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, - 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, - 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, - 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, - 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, - 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, - 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, - 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, - 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, - 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, - 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, - 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, - 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, - 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, - 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, - 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, - 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a, -}; - -__device__ __constant__ u32 te1[256] = -{ - 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b, - 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5, - 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b, - 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676, - 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d, - 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0, - 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf, - 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0, - 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626, - 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc, - 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1, - 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515, - 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3, - 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a, - 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2, - 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575, - 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a, - 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0, - 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3, - 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484, - 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded, - 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b, - 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939, - 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf, - 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb, - 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585, - 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f, - 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8, - 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f, - 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5, - 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121, - 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2, - 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec, - 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717, - 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d, - 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373, - 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc, - 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888, - 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414, - 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb, - 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a, - 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c, - 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262, - 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979, - 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d, - 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9, - 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea, - 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808, - 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e, - 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6, - 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f, - 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a, - 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666, - 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e, - 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9, - 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e, - 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111, - 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494, - 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9, - 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf, - 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d, - 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868, - 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f, - 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616, -}; - -__device__ __constant__ u32 te2[256] = -{ - 0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b, - 0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5, - 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b, - 0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76, - 0xca458fca, 0x829d1f82, 0xc94089c9, 0x7d87fa7d, - 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0, - 0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af, - 0x9cbf239c, 0xa4f753a4, 0x7296e472, 0xc05b9bc0, - 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26, - 0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc, - 0x345c6834, 0xa5f451a5, 0xe534d1e5, 0xf108f9f1, - 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15, - 0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3, - 0x18283018, 0x96a13796, 0x050f0a05, 0x9ab52f9a, - 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2, - 0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75, - 0x091b1209, 0x839e1d83, 0x2c74582c, 0x1a2e341a, - 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0, - 0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3, - 0x297b5229, 0xe33edde3, 0x2f715e2f, 0x84971384, - 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed, - 0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b, - 0x6abed46a, 0xcb468dcb, 0xbed967be, 0x394b7239, - 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf, - 0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb, - 0x43c58643, 0x4dd79a4d, 0x33556633, 0x85941185, - 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f, - 0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8, - 0x51f3a251, 0xa3fe5da3, 0x40c08040, 0x8f8a058f, - 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5, - 0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221, - 0x10302010, 0xff1ae5ff, 0xf30efdf3, 0xd26dbfd2, - 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec, - 0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17, - 0xc45793c4, 0xa7f255a7, 0x7e82fc7e, 0x3d477a3d, - 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673, - 0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc, - 0x22664422, 0x2a7e542a, 0x90ab3b90, 0x88830b88, - 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814, - 0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb, - 0xe03bdbe0, 0x32566432, 0x3a4e743a, 0x0a1e140a, - 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c, - 0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462, - 0x91a83991, 0x95a43195, 0xe437d3e4, 0x798bf279, - 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d, - 0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9, - 0x6cb4d86c, 0x56faac56, 0xf407f3f4, 0xea25cfea, - 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008, - 0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e, - 0x1c24381c, 0xa6f157a6, 0xb4c773b4, 0xc65197c6, - 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f, - 0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a, - 0x7090e070, 0x3e427c3e, 0xb5c471b5, 0x66aacc66, - 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e, - 0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9, - 0x86911786, 0xc15899c1, 0x1d273a1d, 0x9eb9279e, - 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211, - 0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394, - 0x9bb62d9b, 0x1e223c1e, 0x87921587, 0xe920c9e9, - 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df, - 0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d, - 0xbfda65bf, 0xe631d7e6, 0x42c68442, 0x68b8d068, - 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f, - 0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16, -}; - -__device__ __constant__ u32 te3[256] = -{ - 0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6, - 0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491, - 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56, - 0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec, - 0xcaca458f, 0x82829d1f, 0xc9c94089, 0x7d7d87fa, - 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb, - 0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45, - 0x9c9cbf23, 0xa4a4f753, 0x727296e4, 0xc0c05b9b, - 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c, - 0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83, - 0x34345c68, 0xa5a5f451, 0xe5e534d1, 0xf1f108f9, - 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a, - 0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d, - 0x18182830, 0x9696a137, 0x05050f0a, 0x9a9ab52f, - 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf, - 0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea, - 0x09091b12, 0x83839e1d, 0x2c2c7458, 0x1a1a2e34, - 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b, - 0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d, - 0x29297b52, 0xe3e33edd, 0x2f2f715e, 0x84849713, - 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1, - 0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6, - 0x6a6abed4, 0xcbcb468d, 0xbebed967, 0x39394b72, - 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85, - 0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed, - 0x4343c586, 0x4d4dd79a, 0x33335566, 0x85859411, - 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe, - 0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b, - 0x5151f3a2, 0xa3a3fe5d, 0x4040c080, 0x8f8f8a05, - 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1, - 0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342, - 0x10103020, 0xffff1ae5, 0xf3f30efd, 0xd2d26dbf, - 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3, - 0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e, - 0xc4c45793, 0xa7a7f255, 0x7e7e82fc, 0x3d3d477a, - 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6, - 0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3, - 0x22226644, 0x2a2a7e54, 0x9090ab3b, 0x8888830b, - 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28, - 0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad, - 0xe0e03bdb, 0x32325664, 0x3a3a4e74, 0x0a0a1e14, - 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8, - 0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4, - 0x9191a839, 0x9595a431, 0xe4e437d3, 0x79798bf2, - 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da, - 0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049, - 0x6c6cb4d8, 0x5656faac, 0xf4f407f3, 0xeaea25cf, - 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810, - 0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c, - 0x1c1c2438, 0xa6a6f157, 0xb4b4c773, 0xc6c65197, - 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e, - 0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f, - 0x707090e0, 0x3e3e427c, 0xb5b5c471, 0x6666aacc, - 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c, - 0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069, - 0x86869117, 0xc1c15899, 0x1d1d273a, 0x9e9eb927, - 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322, - 0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733, - 0x9b9bb62d, 0x1e1e223c, 0x87879215, 0xe9e920c9, - 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5, - 0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a, - 0xbfbfda65, 0xe6e631d7, 0x4242c684, 0x6868b8d0, - 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e, - 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c, -}; - -__device__ __constant__ u32 te4[256] = -{ - 0x63636363, 0x7c7c7c7c, 0x77777777, 0x7b7b7b7b, - 0xf2f2f2f2, 0x6b6b6b6b, 0x6f6f6f6f, 0xc5c5c5c5, - 0x30303030, 0x01010101, 0x67676767, 0x2b2b2b2b, - 0xfefefefe, 0xd7d7d7d7, 0xabababab, 0x76767676, - 0xcacacaca, 0x82828282, 0xc9c9c9c9, 0x7d7d7d7d, - 0xfafafafa, 0x59595959, 0x47474747, 0xf0f0f0f0, - 0xadadadad, 0xd4d4d4d4, 0xa2a2a2a2, 0xafafafaf, - 0x9c9c9c9c, 0xa4a4a4a4, 0x72727272, 0xc0c0c0c0, - 0xb7b7b7b7, 0xfdfdfdfd, 0x93939393, 0x26262626, - 0x36363636, 0x3f3f3f3f, 0xf7f7f7f7, 0xcccccccc, - 0x34343434, 0xa5a5a5a5, 0xe5e5e5e5, 0xf1f1f1f1, - 0x71717171, 0xd8d8d8d8, 0x31313131, 0x15151515, - 0x04040404, 0xc7c7c7c7, 0x23232323, 0xc3c3c3c3, - 0x18181818, 0x96969696, 0x05050505, 0x9a9a9a9a, - 0x07070707, 0x12121212, 0x80808080, 0xe2e2e2e2, - 0xebebebeb, 0x27272727, 0xb2b2b2b2, 0x75757575, - 0x09090909, 0x83838383, 0x2c2c2c2c, 0x1a1a1a1a, - 0x1b1b1b1b, 0x6e6e6e6e, 0x5a5a5a5a, 0xa0a0a0a0, - 0x52525252, 0x3b3b3b3b, 0xd6d6d6d6, 0xb3b3b3b3, - 0x29292929, 0xe3e3e3e3, 0x2f2f2f2f, 0x84848484, - 0x53535353, 0xd1d1d1d1, 0x00000000, 0xedededed, - 0x20202020, 0xfcfcfcfc, 0xb1b1b1b1, 0x5b5b5b5b, - 0x6a6a6a6a, 0xcbcbcbcb, 0xbebebebe, 0x39393939, - 0x4a4a4a4a, 0x4c4c4c4c, 0x58585858, 0xcfcfcfcf, - 0xd0d0d0d0, 0xefefefef, 0xaaaaaaaa, 0xfbfbfbfb, - 0x43434343, 0x4d4d4d4d, 0x33333333, 0x85858585, - 0x45454545, 0xf9f9f9f9, 0x02020202, 0x7f7f7f7f, - 0x50505050, 0x3c3c3c3c, 0x9f9f9f9f, 0xa8a8a8a8, - 0x51515151, 0xa3a3a3a3, 0x40404040, 0x8f8f8f8f, - 0x92929292, 0x9d9d9d9d, 0x38383838, 0xf5f5f5f5, - 0xbcbcbcbc, 0xb6b6b6b6, 0xdadadada, 0x21212121, - 0x10101010, 0xffffffff, 0xf3f3f3f3, 0xd2d2d2d2, - 0xcdcdcdcd, 0x0c0c0c0c, 0x13131313, 0xecececec, - 0x5f5f5f5f, 0x97979797, 0x44444444, 0x17171717, - 0xc4c4c4c4, 0xa7a7a7a7, 0x7e7e7e7e, 0x3d3d3d3d, - 0x64646464, 0x5d5d5d5d, 0x19191919, 0x73737373, - 0x60606060, 0x81818181, 0x4f4f4f4f, 0xdcdcdcdc, - 0x22222222, 0x2a2a2a2a, 0x90909090, 0x88888888, - 0x46464646, 0xeeeeeeee, 0xb8b8b8b8, 0x14141414, - 0xdededede, 0x5e5e5e5e, 0x0b0b0b0b, 0xdbdbdbdb, - 0xe0e0e0e0, 0x32323232, 0x3a3a3a3a, 0x0a0a0a0a, - 0x49494949, 0x06060606, 0x24242424, 0x5c5c5c5c, - 0xc2c2c2c2, 0xd3d3d3d3, 0xacacacac, 0x62626262, - 0x91919191, 0x95959595, 0xe4e4e4e4, 0x79797979, - 0xe7e7e7e7, 0xc8c8c8c8, 0x37373737, 0x6d6d6d6d, - 0x8d8d8d8d, 0xd5d5d5d5, 0x4e4e4e4e, 0xa9a9a9a9, - 0x6c6c6c6c, 0x56565656, 0xf4f4f4f4, 0xeaeaeaea, - 0x65656565, 0x7a7a7a7a, 0xaeaeaeae, 0x08080808, - 0xbabababa, 0x78787878, 0x25252525, 0x2e2e2e2e, - 0x1c1c1c1c, 0xa6a6a6a6, 0xb4b4b4b4, 0xc6c6c6c6, - 0xe8e8e8e8, 0xdddddddd, 0x74747474, 0x1f1f1f1f, - 0x4b4b4b4b, 0xbdbdbdbd, 0x8b8b8b8b, 0x8a8a8a8a, - 0x70707070, 0x3e3e3e3e, 0xb5b5b5b5, 0x66666666, - 0x48484848, 0x03030303, 0xf6f6f6f6, 0x0e0e0e0e, - 0x61616161, 0x35353535, 0x57575757, 0xb9b9b9b9, - 0x86868686, 0xc1c1c1c1, 0x1d1d1d1d, 0x9e9e9e9e, - 0xe1e1e1e1, 0xf8f8f8f8, 0x98989898, 0x11111111, - 0x69696969, 0xd9d9d9d9, 0x8e8e8e8e, 0x94949494, - 0x9b9b9b9b, 0x1e1e1e1e, 0x87878787, 0xe9e9e9e9, - 0xcececece, 0x55555555, 0x28282828, 0xdfdfdfdf, - 0x8c8c8c8c, 0xa1a1a1a1, 0x89898989, 0x0d0d0d0d, - 0xbfbfbfbf, 0xe6e6e6e6, 0x42424242, 0x68686868, - 0x41414141, 0x99999999, 0x2d2d2d2d, 0x0f0f0f0f, - 0xb0b0b0b0, 0x54545454, 0xbbbbbbbb, 0x16161616, -}; - -__device__ __constant__ u32 rcon[] = -{ - 0x01000000, 0x02000000, 0x04000000, 0x08000000, - 0x10000000, 0x20000000, 0x40000000, 0x80000000, - 0x1b000000, 0x36000000, -}; - -__device__ static void AES128_ExpandKey (u32 *userkey, u32 *rek, u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - rek[0] = swap_workaround (userkey[0]); - rek[1] = swap_workaround (userkey[1]); - rek[2] = swap_workaround (userkey[2]); - rek[3] = swap_workaround (userkey[3]); - - for (u32 i = 0, j = 0; i < 10; i += 1, j += 4) - { - u32 temp = rek[j + 3]; - - temp = (s_te2[(temp >> 16) & 0xff] & 0xff000000) - ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000) - ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00) - ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff); - - rek[j + 4] = rek[j + 0] - ^ temp - ^ rcon[i]; - - rek[j + 5] = rek[j + 1] ^ rek[j + 4]; - rek[j + 6] = rek[j + 2] ^ rek[j + 5]; - rek[j + 7] = rek[j + 3] ^ rek[j + 6]; - } -} - -__device__ static void AES128_encrypt (const u32 *in, u32 *out, const u32 *rek, u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - u32 in_swap[4]; - - in_swap[0] = swap_workaround (in[0]); - in_swap[1] = swap_workaround (in[1]); - in_swap[2] = swap_workaround (in[2]); - in_swap[3] = swap_workaround (in[3]); - - u32 s0 = in_swap[0] ^ rek[0]; - u32 s1 = in_swap[1] ^ rek[1]; - u32 s2 = in_swap[2] ^ rek[2]; - u32 s3 = in_swap[3] ^ rek[3]; - - u32 t0; - u32 t1; - u32 t2; - u32 t3; - - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[ 4]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[ 5]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[ 6]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[ 7]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[ 8]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[ 9]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[10]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[11]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[12]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[13]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[14]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[15]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[16]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[17]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[18]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[19]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[20]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[21]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[22]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[23]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[24]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[25]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[26]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[27]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[28]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[29]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[30]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[31]; - s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ rek[32]; - s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ rek[33]; - s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ rek[34]; - s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ rek[35]; - t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ rek[36]; - t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ rek[37]; - t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ rek[38]; - t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ rek[39]; - - out[0] = (s_te4[(t0 >> 24) & 0xff] & 0xff000000) - ^ (s_te4[(t1 >> 16) & 0xff] & 0x00ff0000) - ^ (s_te4[(t2 >> 8) & 0xff] & 0x0000ff00) - ^ (s_te4[(t3 >> 0) & 0xff] & 0x000000ff) - ^ rek[40]; - - out[1] = (s_te4[(t1 >> 24) & 0xff] & 0xff000000) - ^ (s_te4[(t2 >> 16) & 0xff] & 0x00ff0000) - ^ (s_te4[(t3 >> 8) & 0xff] & 0x0000ff00) - ^ (s_te4[(t0 >> 0) & 0xff] & 0x000000ff) - ^ rek[41]; - - out[2] = (s_te4[(t2 >> 24) & 0xff] & 0xff000000) - ^ (s_te4[(t3 >> 16) & 0xff] & 0x00ff0000) - ^ (s_te4[(t0 >> 8) & 0xff] & 0x0000ff00) - ^ (s_te4[(t1 >> 0) & 0xff] & 0x000000ff) - ^ rek[42]; - - out[3] = (s_te4[(t3 >> 24) & 0xff] & 0xff000000) - ^ (s_te4[(t0 >> 16) & 0xff] & 0x00ff0000) - ^ (s_te4[(t1 >> 8) & 0xff] & 0x0000ff00) - ^ (s_te4[(t2 >> 0) & 0xff] & 0x000000ff) - ^ rek[43]; - - out[0] = swap_workaround (out[0]); - out[1] = swap_workaround (out[1]); - out[2] = swap_workaround (out[2]); - out[3] = swap_workaround (out[3]); -} - -__device__ static void memcat8 (u32x block0[4], u32x block1[4], u32x block2[4], u32x block3[4], const u32 block_len, const u32 append[2]) -{ - switch (block_len) - { - case 0: - block0[0] = append[0]; - block0[1] = append[1]; - break; - - case 1: - block0[0] = block0[0] | append[0] << 8; - block0[1] = append[0] >> 24 | append[1] << 8; - block0[2] = append[1] >> 24; - break; - - case 2: - block0[0] = block0[0] | append[0] << 16; - block0[1] = append[0] >> 16 | append[1] << 16; - block0[2] = append[1] >> 16; - break; - - case 3: - block0[0] = block0[0] | append[0] << 24; - block0[1] = append[0] >> 8 | append[1] << 24; - block0[2] = append[1] >> 8; - break; - - case 4: - block0[1] = append[0]; - block0[2] = append[1]; - break; - - case 5: - block0[1] = block0[1] | append[0] << 8; - block0[2] = append[0] >> 24 | append[1] << 8; - block0[3] = append[1] >> 24; - break; - - case 6: - block0[1] = block0[1] | append[0] << 16; - block0[2] = append[0] >> 16 | append[1] << 16; - block0[3] = append[1] >> 16; - break; - - case 7: - block0[1] = block0[1] | append[0] << 24; - block0[2] = append[0] >> 8 | append[1] << 24; - block0[3] = append[1] >> 8; - break; - - case 8: - block0[2] = append[0]; - block0[3] = append[1]; - break; - - case 9: - block0[2] = block0[2] | append[0] << 8; - block0[3] = append[0] >> 24 | append[1] << 8; - block1[0] = append[1] >> 24; - break; - - case 10: - block0[2] = block0[2] | append[0] << 16; - block0[3] = append[0] >> 16 | append[1] << 16; - block1[0] = append[1] >> 16; - break; - - case 11: - block0[2] = block0[2] | append[0] << 24; - block0[3] = append[0] >> 8 | append[1] << 24; - block1[0] = append[1] >> 8; - break; - - case 12: - block0[3] = append[0]; - block1[0] = append[1]; - break; - - case 13: - block0[3] = block0[3] | append[0] << 8; - block1[0] = append[0] >> 24 | append[1] << 8; - block1[1] = append[1] >> 24; - break; - - case 14: - block0[3] = block0[3] | append[0] << 16; - block1[0] = append[0] >> 16 | append[1] << 16; - block1[1] = append[1] >> 16; - break; - - case 15: - block0[3] = block0[3] | append[0] << 24; - block1[0] = append[0] >> 8 | append[1] << 24; - block1[1] = append[1] >> 8; - break; - - case 16: - block1[0] = append[0]; - block1[1] = append[1]; - break; - - case 17: - block1[0] = block1[0] | append[0] << 8; - block1[1] = append[0] >> 24 | append[1] << 8; - block1[2] = append[1] >> 24; - break; - - case 18: - block1[0] = block1[0] | append[0] << 16; - block1[1] = append[0] >> 16 | append[1] << 16; - block1[2] = append[1] >> 16; - break; - - case 19: - block1[0] = block1[0] | append[0] << 24; - block1[1] = append[0] >> 8 | append[1] << 24; - block1[2] = append[1] >> 8; - break; - - case 20: - block1[1] = append[0]; - block1[2] = append[1]; - break; - - case 21: - block1[1] = block1[1] | append[0] << 8; - block1[2] = append[0] >> 24 | append[1] << 8; - block1[3] = append[1] >> 24; - break; - - case 22: - block1[1] = block1[1] | append[0] << 16; - block1[2] = append[0] >> 16 | append[1] << 16; - block1[3] = append[1] >> 16; - break; - - case 23: - block1[1] = block1[1] | append[0] << 24; - block1[2] = append[0] >> 8 | append[1] << 24; - block1[3] = append[1] >> 8; - break; - - case 24: - block1[2] = append[0]; - block1[3] = append[1]; - break; - - case 25: - block1[2] = block1[2] | append[0] << 8; - block1[3] = append[0] >> 24 | append[1] << 8; - block2[0] = append[1] >> 24; - break; - - case 26: - block1[2] = block1[2] | append[0] << 16; - block1[3] = append[0] >> 16 | append[1] << 16; - block2[0] = append[1] >> 16; - break; - - case 27: - block1[2] = block1[2] | append[0] << 24; - block1[3] = append[0] >> 8 | append[1] << 24; - block2[0] = append[1] >> 8; - break; - - case 28: - block1[3] = append[0]; - block2[0] = append[1]; - break; - - case 29: - block1[3] = block1[3] | append[0] << 8; - block2[0] = append[0] >> 24 | append[1] << 8; - block2[1] = append[1] >> 24; - break; - - case 30: - block1[3] = block1[3] | append[0] << 16; - block2[0] = append[0] >> 16 | append[1] << 16; - block2[1] = append[1] >> 16; - break; - - case 31: - block1[3] = block1[3] | append[0] << 24; - block2[0] = append[0] >> 8 | append[1] << 24; - block2[1] = append[1] >> 8; - break; - - case 32: - block2[0] = append[0]; - block2[1] = append[1]; - break; - - case 33: - block2[0] = block2[0] | append[0] << 8; - block2[1] = append[0] >> 24 | append[1] << 8; - block2[2] = append[1] >> 24; - break; - - case 34: - block2[0] = block2[0] | append[0] << 16; - block2[1] = append[0] >> 16 | append[1] << 16; - block2[2] = append[1] >> 16; - break; - - case 35: - block2[0] = block2[0] | append[0] << 24; - block2[1] = append[0] >> 8 | append[1] << 24; - block2[2] = append[1] >> 8; - break; - - case 36: - block2[1] = append[0]; - block2[2] = append[1]; - break; - - case 37: - block2[1] = block2[1] | append[0] << 8; - block2[2] = append[0] >> 24 | append[1] << 8; - block2[3] = append[1] >> 24; - break; - - case 38: - block2[1] = block2[1] | append[0] << 16; - block2[2] = append[0] >> 16 | append[1] << 16; - block2[3] = append[1] >> 16; - break; - - case 39: - block2[1] = block2[1] | append[0] << 24; - block2[2] = append[0] >> 8 | append[1] << 24; - block2[3] = append[1] >> 8; - break; - - case 40: - block2[2] = append[0]; - block2[3] = append[1]; - break; - - case 41: - block2[2] = block2[2] | append[0] << 8; - block2[3] = append[0] >> 24 | append[1] << 8; - block3[0] = append[1] >> 24; - break; - - case 42: - block2[2] = block2[2] | append[0] << 16; - block2[3] = append[0] >> 16 | append[1] << 16; - block3[0] = append[1] >> 16; - break; - - case 43: - block2[2] = block2[2] | append[0] << 24; - block2[3] = append[0] >> 8 | append[1] << 24; - block3[0] = append[1] >> 8; - break; - - case 44: - block2[3] = append[0]; - block3[0] = append[1]; - break; - - case 45: - block2[3] = block2[3] | append[0] << 8; - block3[0] = append[0] >> 24 | append[1] << 8; - block3[1] = append[1] >> 24; - break; - - case 46: - block2[3] = block2[3] | append[0] << 16; - block3[0] = append[0] >> 16 | append[1] << 16; - block3[1] = append[1] >> 16; - break; - - case 47: - block2[3] = block2[3] | append[0] << 24; - block3[0] = append[0] >> 8 | append[1] << 24; - block3[1] = append[1] >> 8; - break; - - case 48: - block3[0] = append[0]; - block3[1] = append[1]; - break; - - case 49: - block3[0] = block3[0] | append[0] << 8; - block3[1] = append[0] >> 24 | append[1] << 8; - block3[2] = append[1] >> 24; - break; - - case 50: - block3[0] = block3[0] | append[0] << 16; - block3[1] = append[0] >> 16 | append[1] << 16; - block3[2] = append[1] >> 16; - break; - - case 51: - block3[0] = block3[0] | append[0] << 24; - block3[1] = append[0] >> 8 | append[1] << 24; - block3[2] = append[1] >> 8; - break; - - case 52: - block3[1] = append[0]; - block3[2] = append[1]; - break; - - case 53: - block3[1] = block3[1] | append[0] << 8; - block3[2] = append[0] >> 24 | append[1] << 8; - block3[3] = append[1] >> 24; - break; - - case 54: - block3[1] = block3[1] | append[0] << 16; - block3[2] = append[0] >> 16 | append[1] << 16; - block3[3] = append[1] >> 16; - break; - - case 55: - block3[1] = block3[1] | append[0] << 24; - block3[2] = append[0] >> 8 | append[1] << 24; - block3[3] = append[1] >> 8; - break; - - case 56: - block3[2] = append[0]; - block3[3] = append[1]; - break; - } -} - -#define AESSZ 16 // AES_BLOCK_SIZE - -#define BLSZ256 32 -#define BLSZ384 48 -#define BLSZ512 64 - -#define WORDSZ256 64 -#define WORDSZ384 128 -#define WORDSZ512 128 - -#define PWMAXSZ 32 // oclHashcat password length limit -#define BLMAXSZ BLSZ512 -#define WORDMAXSZ WORDSZ512 - -#define PWMAXSZ4 (PWMAXSZ / 4) -#define BLMAXSZ4 (BLMAXSZ / 4) -#define WORDMAXSZ4 (WORDMAXSZ / 4) -#define AESSZ4 (AESSZ / 4) - -__device__ static void make_sc (u32 *sc, const u32 *pw, const u32 pw_len, const u32 *bl, const u32 bl_len) -{ - const u32 bd = bl_len / 4; - - const u32 pm = pw_len % 4; - const u32 pd = pw_len / 4; - - u32 idx = 0; - - if (pm == 0) - { - for (u32 i = 0; i < pd; i++) sc[idx++] = pw[i]; - for (u32 i = 0; i < bd; i++) sc[idx++] = bl[i]; - for (u32 i = 0; i < 4; i++) sc[idx++] = sc[i]; - } - else - { - u32 pm4 = 4 - pm; - - int selector = (0x76543210 >> (pm4 * 4)) & 0xffff; - - u32 i; - - for (i = 0; i < pd; i++) sc[idx++] = pw[i]; - sc[idx++] = pw[i] - | __byte_perm ( 0, bl[0], selector); - for (i = 1; i < bd; i++) sc[idx++] = __byte_perm (bl[i - 1], bl[i], selector); - sc[idx++] = __byte_perm (bl[i - 1], sc[0], selector); - for (i = 1; i < 4; i++) sc[idx++] = __byte_perm (sc[i - 1], sc[i], selector); - sc[idx++] = __byte_perm (sc[i - 1], 0, selector); - } -} - -__device__ static void make_pt_with_offset (u32 *pt, const u32 offset, const u32 *sc, const u32 pwbl_len) -{ - const u32 m = offset % pwbl_len; - - const u32 om = m % 4; - const u32 od = m / 4; - - int selector = (0x76543210 >> (om * 4)) & 0xffff; - - pt[0] = __byte_perm (sc[od + 0], sc[od + 1], selector); - pt[1] = __byte_perm (sc[od + 1], sc[od + 2], selector); - pt[2] = __byte_perm (sc[od + 2], sc[od + 3], selector); - pt[3] = __byte_perm (sc[od + 3], sc[od + 4], selector); -} - -__device__ static void make_w_with_offset (ctx_t *ctx, const u32 W_len, const u32 offset, const u32 *sc, const u32 pwbl_len, u32 *iv, const u32 *rek, u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - for (u32 k = 0, wk = 0; k < W_len; k += AESSZ, wk += AESSZ4) - { - u32 pt[AESSZ4]; - - make_pt_with_offset (pt, offset + k, sc, pwbl_len); - - pt[0] ^= iv[0]; - pt[1] ^= iv[1]; - pt[2] ^= iv[2]; - pt[3] ^= iv[3]; - - AES128_encrypt (pt, iv, rek, s_te0, s_te1, s_te2, s_te3, s_te4); - - ctx->W32[wk + 0] = iv[0]; - ctx->W32[wk + 1] = iv[1]; - ctx->W32[wk + 2] = iv[2]; - ctx->W32[wk + 3] = iv[3]; - } -} - -__device__ static u32 do_round (const u32 *pw, const u32 pw_len, ctx_t *ctx, u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - // make scratch buffer - - u32 sc[PWMAXSZ4 + BLMAXSZ4 + AESSZ4]; - - make_sc (sc, pw, pw_len, ctx->dgst32, ctx->dgst_len); - - // make sure pwbl_len is calculcated before it gets changed - - const u32 pwbl_len = pw_len + ctx->dgst_len; - - // init iv - - u32 iv[AESSZ4]; - - iv[0] = ctx->dgst32[4]; - iv[1] = ctx->dgst32[5]; - iv[2] = ctx->dgst32[6]; - iv[3] = ctx->dgst32[7]; - - // init aes - - u32 rek[60]; - - AES128_ExpandKey (ctx->dgst32, rek, s_te0, s_te1, s_te2, s_te3, s_te4); - - // first call is special as the hash depends on the result of it - // but since we do not know about the outcome at this time - // we must use the max - - make_w_with_offset (ctx, WORDMAXSZ, 0, sc, pwbl_len, iv, rek, s_te0, s_te1, s_te2, s_te3, s_te4); - - // now we can find out hash to use - - u32 sum = 0; - - for (u32 i = 0; i < 4; i++) - { - sum += (ctx->W32[i] >> 24) & 0xff; - sum += (ctx->W32[i] >> 16) & 0xff; - sum += (ctx->W32[i] >> 8) & 0xff; - sum += (ctx->W32[i] >> 0) & 0xff; - } - - // init hash - - switch (sum % 3) - { - case 0: ctx->dgst32[0] = SHA256M_A; - ctx->dgst32[1] = SHA256M_B; - ctx->dgst32[2] = SHA256M_C; - ctx->dgst32[3] = SHA256M_D; - ctx->dgst32[4] = SHA256M_E; - ctx->dgst32[5] = SHA256M_F; - ctx->dgst32[6] = SHA256M_G; - ctx->dgst32[7] = SHA256M_H; - ctx->dgst_len = BLSZ256; - ctx->W_len = WORDSZ256; - sha256_transform (&ctx->W32[ 0], &ctx->W32[ 4], &ctx->W32[ 8], &ctx->W32[12], ctx->dgst32); - sha256_transform (&ctx->W32[16], &ctx->W32[20], &ctx->W32[24], &ctx->W32[28], ctx->dgst32); - break; - case 1: ctx->dgst64[0] = SHA384M_A; - ctx->dgst64[1] = SHA384M_B; - ctx->dgst64[2] = SHA384M_C; - ctx->dgst64[3] = SHA384M_D; - ctx->dgst64[4] = SHA384M_E; - ctx->dgst64[5] = SHA384M_F; - ctx->dgst64[6] = SHA384M_G; - ctx->dgst64[7] = SHA384M_H; - ctx->dgst_len = BLSZ384; - ctx->W_len = WORDSZ384; - sha384_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64); - break; - case 2: ctx->dgst64[0] = SHA512M_A; - ctx->dgst64[1] = SHA512M_B; - ctx->dgst64[2] = SHA512M_C; - ctx->dgst64[3] = SHA512M_D; - ctx->dgst64[4] = SHA512M_E; - ctx->dgst64[5] = SHA512M_F; - ctx->dgst64[6] = SHA512M_G; - ctx->dgst64[7] = SHA512M_H; - ctx->dgst_len = BLSZ512; - ctx->W_len = WORDSZ512; - sha512_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64); - break; - } - - // main loop - - const u32 final_len = pwbl_len * 64; - - const u32 iter_max = ctx->W_len - (ctx->W_len / 8); - - u32 offset; - u32 left; - - for (offset = WORDMAXSZ, left = final_len - offset; left >= iter_max; offset += ctx->W_len, left -= ctx->W_len) - { - make_w_with_offset (ctx, ctx->W_len, offset, sc, pwbl_len, iv, rek, s_te0, s_te1, s_te2, s_te3, s_te4); - - switch (ctx->dgst_len) - { - case BLSZ256: sha256_transform (&ctx->W32[ 0], &ctx->W32[ 4], &ctx->W32[ 8], &ctx->W32[12], ctx->dgst32); - break; - case BLSZ384: sha384_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64); - break; - case BLSZ512: sha512_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64); - break; - } - } - - u32 ex = 0; - - if (left) - { - switch (ctx->dgst_len) - { - case BLSZ384: make_w_with_offset (ctx, 64, offset, sc, pwbl_len, iv, rek, s_te0, s_te1, s_te2, s_te3, s_te4); - ctx->W64[ 8] = 0x80; - ctx->W64[ 9] = 0; - ctx->W64[10] = 0; - ctx->W64[11] = 0; - ctx->W64[12] = 0; - ctx->W64[13] = 0; - ctx->W64[14] = 0; - ctx->W64[15] = swap_workaround ((u64) (final_len * 8)); - ex = ctx->W64[7] >> 56; - break; - case BLSZ512: make_w_with_offset (ctx, 64, offset, sc, pwbl_len, iv, rek, s_te0, s_te1, s_te2, s_te3, s_te4); - ctx->W64[ 8] = 0x80; - ctx->W64[ 9] = 0; - ctx->W64[10] = 0; - ctx->W64[11] = 0; - ctx->W64[12] = 0; - ctx->W64[13] = 0; - ctx->W64[14] = 0; - ctx->W64[15] = swap_workaround ((u64) (final_len * 8)); - ex = ctx->W64[7] >> 56; - break; - } - } - else - { - switch (ctx->dgst_len) - { - case BLSZ256: ex = ctx->W32[15] >> 24; - ctx->W32[ 0] = 0x80; - ctx->W32[ 1] = 0; - ctx->W32[ 2] = 0; - ctx->W32[ 3] = 0; - ctx->W32[ 4] = 0; - ctx->W32[ 5] = 0; - ctx->W32[ 6] = 0; - ctx->W32[ 7] = 0; - ctx->W32[ 8] = 0; - ctx->W32[ 9] = 0; - ctx->W32[10] = 0; - ctx->W32[11] = 0; - ctx->W32[12] = 0; - ctx->W32[13] = 0; - ctx->W32[14] = 0; - ctx->W32[15] = swap_workaround (final_len * 8); - break; - case BLSZ384: ex = ctx->W64[15] >> 56; - ctx->W64[ 0] = 0x80; - ctx->W64[ 1] = 0; - ctx->W64[ 2] = 0; - ctx->W64[ 3] = 0; - ctx->W64[ 4] = 0; - ctx->W64[ 5] = 0; - ctx->W64[ 6] = 0; - ctx->W64[ 7] = 0; - ctx->W64[ 8] = 0; - ctx->W64[ 9] = 0; - ctx->W64[10] = 0; - ctx->W64[11] = 0; - ctx->W64[12] = 0; - ctx->W64[13] = 0; - ctx->W64[14] = 0; - ctx->W64[15] = swap_workaround ((u64) (final_len * 8)); - break; - case BLSZ512: ex = ctx->W64[15] >> 56; - ctx->W64[ 0] = 0x80; - ctx->W64[ 1] = 0; - ctx->W64[ 2] = 0; - ctx->W64[ 3] = 0; - ctx->W64[ 4] = 0; - ctx->W64[ 5] = 0; - ctx->W64[ 6] = 0; - ctx->W64[ 7] = 0; - ctx->W64[ 8] = 0; - ctx->W64[ 9] = 0; - ctx->W64[10] = 0; - ctx->W64[11] = 0; - ctx->W64[12] = 0; - ctx->W64[13] = 0; - ctx->W64[14] = 0; - ctx->W64[15] = swap_workaround ((u64) (final_len * 8)); - break; - } - } - - switch (ctx->dgst_len) - { - case BLSZ256: sha256_transform (&ctx->W32[ 0], &ctx->W32[ 4], &ctx->W32[ 8], &ctx->W32[12], ctx->dgst32); - ctx->dgst32[ 0] = swap_workaround (ctx->dgst32[0]); - ctx->dgst32[ 1] = swap_workaround (ctx->dgst32[1]); - ctx->dgst32[ 2] = swap_workaround (ctx->dgst32[2]); - ctx->dgst32[ 3] = swap_workaround (ctx->dgst32[3]); - ctx->dgst32[ 4] = swap_workaround (ctx->dgst32[4]); - ctx->dgst32[ 5] = swap_workaround (ctx->dgst32[5]); - ctx->dgst32[ 6] = swap_workaround (ctx->dgst32[6]); - ctx->dgst32[ 7] = swap_workaround (ctx->dgst32[7]); - ctx->dgst32[ 8] = 0; - ctx->dgst32[ 9] = 0; - ctx->dgst32[10] = 0; - ctx->dgst32[11] = 0; - ctx->dgst32[12] = 0; - ctx->dgst32[13] = 0; - ctx->dgst32[14] = 0; - ctx->dgst32[15] = 0; - break; - case BLSZ384: sha384_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64); - ctx->dgst64[0] = swap_workaround (ctx->dgst64[0]); - ctx->dgst64[1] = swap_workaround (ctx->dgst64[1]); - ctx->dgst64[2] = swap_workaround (ctx->dgst64[2]); - ctx->dgst64[3] = swap_workaround (ctx->dgst64[3]); - ctx->dgst64[4] = swap_workaround (ctx->dgst64[4]); - ctx->dgst64[5] = swap_workaround (ctx->dgst64[5]); - ctx->dgst64[6] = 0; - ctx->dgst64[7] = 0; - break; - case BLSZ512: sha512_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64); - ctx->dgst64[0] = swap_workaround (ctx->dgst64[0]); - ctx->dgst64[1] = swap_workaround (ctx->dgst64[1]); - ctx->dgst64[2] = swap_workaround (ctx->dgst64[2]); - ctx->dgst64[3] = swap_workaround (ctx->dgst64[3]); - ctx->dgst64[4] = swap_workaround (ctx->dgst64[4]); - ctx->dgst64[5] = swap_workaround (ctx->dgst64[5]); - ctx->dgst64[6] = swap_workaround (ctx->dgst64[6]); - ctx->dgst64[7] = swap_workaround (ctx->dgst64[7]); - break; - } - - return ex; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10700_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, pdf17l8_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[0]; - w0[1] = pws[gid].i[1]; - w0[2] = pws[gid].i[2]; - w0[3] = pws[gid].i[3]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf[2]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * init - */ - - u32 block_len = pw_len; - - u32x block0[4]; - - block0[0] = w0[0]; - block0[1] = w0[1]; - block0[2] = w0[2]; - block0[3] = w0[3]; - - u32x block1[4]; - - block1[0] = 0; - block1[1] = 0; - block1[2] = 0; - block1[3] = 0; - - u32x block2[4]; - - block2[0] = 0; - block2[1] = 0; - block2[2] = 0; - block2[3] = 0; - - u32x block3[4]; - - block3[0] = 0; - block3[1] = 0; - block3[2] = 0; - block3[3] = 0; - - memcat8 (block0, block1, block2, block3, block_len, salt_buf); - - block_len += salt_len; - - append_0x80_2 (block0, block1, block_len); - - block3[3] = swap_workaround (block_len * 8); - - u32x digest[8]; - - digest[0] = SHA256M_A; - digest[1] = SHA256M_B; - digest[2] = SHA256M_C; - digest[3] = SHA256M_D; - digest[4] = SHA256M_E; - digest[5] = SHA256M_F; - digest[6] = SHA256M_G; - digest[7] = SHA256M_H; - - sha256_transform (block0, block1, block2, block3, digest); - - digest[0] = swap_workaround (digest[0]); - digest[1] = swap_workaround (digest[1]); - digest[2] = swap_workaround (digest[2]); - digest[3] = swap_workaround (digest[3]); - digest[4] = swap_workaround (digest[4]); - digest[5] = swap_workaround (digest[5]); - digest[6] = swap_workaround (digest[6]); - digest[7] = swap_workaround (digest[7]); - - tmps[gid].dgst32[0] = digest[0]; - tmps[gid].dgst32[1] = digest[1]; - tmps[gid].dgst32[2] = digest[2]; - tmps[gid].dgst32[3] = digest[3]; - tmps[gid].dgst32[4] = digest[4]; - tmps[gid].dgst32[5] = digest[5]; - tmps[gid].dgst32[6] = digest[6]; - tmps[gid].dgst32[7] = digest[7]; - tmps[gid].dgst_len = BLSZ256; - tmps[gid].W_len = WORDSZ256; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10700_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, pdf17l8_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * aes shared - */ - - __shared__ u32 s_te0[256]; - __shared__ u32 s_te1[256]; - __shared__ u32 s_te2[256]; - __shared__ u32 s_te3[256]; - __shared__ u32 s_te4[256]; - - s_te0[lid] = te0[lid]; - s_te1[lid] = te1[lid]; - s_te2[lid] = te2[lid]; - s_te3[lid] = te3[lid]; - s_te4[lid] = te4[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * base - */ - - u32x w0[4]; - - w0[0] = pws[gid].i[0]; - w0[1] = pws[gid].i[1]; - w0[2] = pws[gid].i[2]; - w0[3] = pws[gid].i[3]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * digest - */ - - ctx_t ctx; - - ctx.dgst64[0] = tmps[gid].dgst64[0]; - ctx.dgst64[1] = tmps[gid].dgst64[1]; - ctx.dgst64[2] = tmps[gid].dgst64[2]; - ctx.dgst64[3] = tmps[gid].dgst64[3]; - ctx.dgst64[4] = tmps[gid].dgst64[4]; - ctx.dgst64[5] = tmps[gid].dgst64[5]; - ctx.dgst64[6] = tmps[gid].dgst64[6]; - ctx.dgst64[7] = tmps[gid].dgst64[7]; - ctx.dgst_len = tmps[gid].dgst_len; - ctx.W_len = tmps[gid].W_len; - - u32 ex = 0; - - for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++) - { - ex = do_round (w0, pw_len, &ctx, s_te0, s_te1, s_te2, s_te3, s_te4); - } - - if ((loop_pos + loop_cnt) == 64) - { - for (u32 i = 64; i < ex + 32; i++) - { - ex = do_round (w0, pw_len, &ctx, s_te0, s_te1, s_te2, s_te3, s_te4); - } - } - - tmps[gid].dgst64[0] = ctx.dgst64[0]; - tmps[gid].dgst64[1] = ctx.dgst64[1]; - tmps[gid].dgst64[2] = ctx.dgst64[2]; - tmps[gid].dgst64[3] = ctx.dgst64[3]; - tmps[gid].dgst64[4] = ctx.dgst64[4]; - tmps[gid].dgst64[5] = ctx.dgst64[5]; - tmps[gid].dgst64[6] = ctx.dgst64[6]; - tmps[gid].dgst64[7] = ctx.dgst64[7]; - tmps[gid].dgst_len = ctx.dgst_len; - tmps[gid].W_len = ctx.W_len; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10700_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, pdf17l8_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pdf_t *pdf_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32x r0 = swap_workaround (tmps[gid].dgst32[DGST_R0]); - const u32x r1 = swap_workaround (tmps[gid].dgst32[DGST_R1]); - const u32x r2 = swap_workaround (tmps[gid].dgst32[DGST_R2]); - const u32x r3 = swap_workaround (tmps[gid].dgst32[DGST_R3]); - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m10800_a0.cu b/nv/m10800_a0.cu deleted file mode 100644 index 060e048..0000000 --- a/nv/m10800_a0.cu +++ /dev/null @@ -1,429 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA384_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 6 -#define DGST_R1 7 -#define DGST_R2 4 -#define DGST_R3 5 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -__device__ __constant__ u64 k_sha384[80] = -{ - SHA384C00, SHA384C01, SHA384C02, SHA384C03, - SHA384C04, SHA384C05, SHA384C06, SHA384C07, - SHA384C08, SHA384C09, SHA384C0a, SHA384C0b, - SHA384C0c, SHA384C0d, SHA384C0e, SHA384C0f, - SHA384C10, SHA384C11, SHA384C12, SHA384C13, - SHA384C14, SHA384C15, SHA384C16, SHA384C17, - SHA384C18, SHA384C19, SHA384C1a, SHA384C1b, - SHA384C1c, SHA384C1d, SHA384C1e, SHA384C1f, - SHA384C20, SHA384C21, SHA384C22, SHA384C23, - SHA384C24, SHA384C25, SHA384C26, SHA384C27, - SHA384C28, SHA384C29, SHA384C2a, SHA384C2b, - SHA384C2c, SHA384C2d, SHA384C2e, SHA384C2f, - SHA384C30, SHA384C31, SHA384C32, SHA384C33, - SHA384C34, SHA384C35, SHA384C36, SHA384C37, - SHA384C38, SHA384C39, SHA384C3a, SHA384C3b, - SHA384C3c, SHA384C3d, SHA384C3e, SHA384C3f, - SHA384C40, SHA384C41, SHA384C42, SHA384C43, - SHA384C44, SHA384C45, SHA384C46, SHA384C47, - SHA384C48, SHA384C49, SHA384C4a, SHA384C4b, - SHA384C4c, SHA384C4d, SHA384C4e, SHA384C4f, -}; - -__device__ static void sha384_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) -{ - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - /* rev - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; - */ - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = 0; - digest[7] = 0; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10800_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - /** - * SHA384 - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = 0; - w3_t[3] = out_len * 8; - - u64x digest[8]; - - digest[0] = SHA384M_A; - digest[1] = SHA384M_B; - digest[2] = SHA384M_C; - digest[3] = SHA384M_D; - digest[4] = SHA384M_E; - digest[5] = SHA384M_F; - digest[6] = SHA384M_G; - digest[7] = SHA384M_H; - - sha384_transform (w0_t, w1_t, w2_t, w3_t, digest); - - const u32x r0 = l32_from_64 (digest[3]); - const u32x r1 = h32_from_64 (digest[3]); - const u32x r2 = l32_from_64 (digest[2]); - const u32x r3 = h32_from_64 (digest[2]); - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10800_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10800_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10800_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - /** - * SHA384 - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = 0; - w3_t[3] = out_len * 8; - - u64x digest[8]; - - digest[0] = SHA384M_A; - digest[1] = SHA384M_B; - digest[2] = SHA384M_C; - digest[3] = SHA384M_D; - digest[4] = SHA384M_E; - digest[5] = SHA384M_F; - digest[6] = SHA384M_G; - digest[7] = SHA384M_H; - - sha384_transform (w0_t, w1_t, w2_t, w3_t, digest); - - const u32x r0 = l32_from_64 (digest[3]); - const u32x r1 = h32_from_64 (digest[3]); - const u32x r2 = l32_from_64 (digest[2]); - const u32x r3 = h32_from_64 (digest[2]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10800_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10800_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m10800_a1.cu b/nv/m10800_a1.cu deleted file mode 100644 index 19b4e18..0000000 --- a/nv/m10800_a1.cu +++ /dev/null @@ -1,527 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA384_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 6 -#define DGST_R1 7 -#define DGST_R2 4 -#define DGST_R3 5 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -__device__ __constant__ u64 k_sha384[80] = -{ - SHA384C00, SHA384C01, SHA384C02, SHA384C03, - SHA384C04, SHA384C05, SHA384C06, SHA384C07, - SHA384C08, SHA384C09, SHA384C0a, SHA384C0b, - SHA384C0c, SHA384C0d, SHA384C0e, SHA384C0f, - SHA384C10, SHA384C11, SHA384C12, SHA384C13, - SHA384C14, SHA384C15, SHA384C16, SHA384C17, - SHA384C18, SHA384C19, SHA384C1a, SHA384C1b, - SHA384C1c, SHA384C1d, SHA384C1e, SHA384C1f, - SHA384C20, SHA384C21, SHA384C22, SHA384C23, - SHA384C24, SHA384C25, SHA384C26, SHA384C27, - SHA384C28, SHA384C29, SHA384C2a, SHA384C2b, - SHA384C2c, SHA384C2d, SHA384C2e, SHA384C2f, - SHA384C30, SHA384C31, SHA384C32, SHA384C33, - SHA384C34, SHA384C35, SHA384C36, SHA384C37, - SHA384C38, SHA384C39, SHA384C3a, SHA384C3b, - SHA384C3c, SHA384C3d, SHA384C3e, SHA384C3f, - SHA384C40, SHA384C41, SHA384C42, SHA384C43, - SHA384C44, SHA384C45, SHA384C46, SHA384C47, - SHA384C48, SHA384C49, SHA384C4a, SHA384C4b, - SHA384C4c, SHA384C4d, SHA384C4e, SHA384C4f, -}; - -__device__ static void sha384_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) -{ - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - /* rev - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; - */ - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = 0; - digest[7] = 0; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10800_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - append_0x80_2 (wordr0, wordr1, pw_r_len); - - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * SHA384 - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = 0; - w3_t[3] = pw_len * 8; - - u64x digest[8]; - - digest[0] = SHA384M_A; - digest[1] = SHA384M_B; - digest[2] = SHA384M_C; - digest[3] = SHA384M_D; - digest[4] = SHA384M_E; - digest[5] = SHA384M_F; - digest[6] = SHA384M_G; - digest[7] = SHA384M_H; - - sha384_transform (w0_t, w1_t, w2_t, w3_t, digest); - - const u32x r0 = l32_from_64 (digest[3]); - const u32x r1 = h32_from_64 (digest[3]); - const u32x r2 = l32_from_64 (digest[2]); - const u32x r3 = h32_from_64 (digest[2]); - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10800_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10800_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10800_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - append_0x80_2 (wordr0, wordr1, pw_r_len); - - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - /** - * SHA384 - */ - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = swap_workaround (w0[0]); - w0_t[1] = swap_workaround (w0[1]); - w0_t[2] = swap_workaround (w0[2]); - w0_t[3] = swap_workaround (w0[3]); - w1_t[0] = swap_workaround (w1[0]); - w1_t[1] = swap_workaround (w1[1]); - w1_t[2] = swap_workaround (w1[2]); - w1_t[3] = swap_workaround (w1[3]); - w2_t[0] = swap_workaround (w2[0]); - w2_t[1] = swap_workaround (w2[1]); - w2_t[2] = swap_workaround (w2[2]); - w2_t[3] = swap_workaround (w2[3]); - w3_t[0] = swap_workaround (w3[0]); - w3_t[1] = swap_workaround (w3[1]); - w3_t[2] = 0; - w3_t[3] = pw_len * 8; - - u64x digest[8]; - - digest[0] = SHA384M_A; - digest[1] = SHA384M_B; - digest[2] = SHA384M_C; - digest[3] = SHA384M_D; - digest[4] = SHA384M_E; - digest[5] = SHA384M_F; - digest[6] = SHA384M_G; - digest[7] = SHA384M_H; - - sha384_transform (w0_t, w1_t, w2_t, w3_t, digest); - - const u32x r0 = l32_from_64 (digest[3]); - const u32x r1 = h32_from_64 (digest[3]); - const u32x r2 = l32_from_64 (digest[2]); - const u32x r3 = h32_from_64 (digest[2]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10800_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10800_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m10800_a3.cu b/nv/m10800_a3.cu deleted file mode 100644 index 50ccf9e..0000000 --- a/nv/m10800_a3.cu +++ /dev/null @@ -1,538 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA384_ -#define _SCALAR_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 6 -#define DGST_R1 7 -#define DGST_R2 4 -#define DGST_R3 5 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -__device__ __constant__ u32x c_bfs[1024]; - -__device__ __constant__ u64 k_sha384[80] = -{ - SHA384C00, SHA384C01, SHA384C02, SHA384C03, - SHA384C04, SHA384C05, SHA384C06, SHA384C07, - SHA384C08, SHA384C09, SHA384C0a, SHA384C0b, - SHA384C0c, SHA384C0d, SHA384C0e, SHA384C0f, - SHA384C10, SHA384C11, SHA384C12, SHA384C13, - SHA384C14, SHA384C15, SHA384C16, SHA384C17, - SHA384C18, SHA384C19, SHA384C1a, SHA384C1b, - SHA384C1c, SHA384C1d, SHA384C1e, SHA384C1f, - SHA384C20, SHA384C21, SHA384C22, SHA384C23, - SHA384C24, SHA384C25, SHA384C26, SHA384C27, - SHA384C28, SHA384C29, SHA384C2a, SHA384C2b, - SHA384C2c, SHA384C2d, SHA384C2e, SHA384C2f, - SHA384C30, SHA384C31, SHA384C32, SHA384C33, - SHA384C34, SHA384C35, SHA384C36, SHA384C37, - SHA384C38, SHA384C39, SHA384C3a, SHA384C3b, - SHA384C3c, SHA384C3d, SHA384C3e, SHA384C3f, - SHA384C40, SHA384C41, SHA384C42, SHA384C43, - SHA384C44, SHA384C45, SHA384C46, SHA384C47, - SHA384C48, SHA384C49, SHA384C4a, SHA384C4b, - SHA384C4c, SHA384C4d, SHA384C4e, SHA384C4f, -}; - -__device__ static void sha384_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u64x digest[8]) -{ - u64x w0_t = hl32_to_64 (w0[0], w0[1]); - u64x w1_t = hl32_to_64 (w0[2], w0[3]); - u64x w2_t = hl32_to_64 (w1[0], w1[1]); - u64x w3_t = hl32_to_64 (w1[2], w1[3]); - u64x w4_t = hl32_to_64 (w2[0], w2[1]); - u64x w5_t = hl32_to_64 (w2[2], w2[3]); - u64x w6_t = hl32_to_64 (w3[0], w3[1]); - u64x w7_t = 0; - u64x w8_t = 0; - u64x w9_t = 0; - u64x wa_t = 0; - u64x wb_t = 0; - u64x wc_t = 0; - u64x wd_t = 0; - u64x we_t = 0; - u64x wf_t = hl32_to_64 (w3[2], w3[3]); - - u64x a = digest[0]; - u64x b = digest[1]; - u64x c = digest[2]; - u64x d = digest[3]; - u64x e = digest[4]; - u64x f = digest[5]; - u64x g = digest[6]; - u64x h = digest[7]; - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA384_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA384_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA384_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA384_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA384_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA384_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA384_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA384_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA384_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA384_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA384_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA384_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA384_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA384_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA384_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA384_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha384[i + 0]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha384[i + 1]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha384[i + 2]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha384[i + 3]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha384[i + 4]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha384[i + 5]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha384[i + 6]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha384[i + 7]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha384[i + 8]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha384[i + 9]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha384[i + 10]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha384[i + 11]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha384[i + 12]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha384[i + 13]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, c, d, e, f, g, h, a, b, we_t, k_sha384[i + 14]); \ - SHA384_STEP (SHA384_F0o, SHA384_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha384[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - /* rev - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; - */ - - digest[0] = a; - digest[1] = b; - digest[2] = c; - digest[3] = d; - digest[4] = e; - digest[5] = f; - digest[6] = 0; - digest[7] = 0; -} - -__device__ static void m10800m (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0; - w0_t[1] = w[ 1]; - w0_t[2] = w[ 2]; - w0_t[3] = w[ 3]; - w1_t[0] = w[ 4]; - w1_t[1] = w[ 5]; - w1_t[2] = w[ 6]; - w1_t[3] = w[ 7]; - w2_t[0] = w[ 8]; - w2_t[1] = w[ 9]; - w2_t[2] = w[10]; - w2_t[3] = w[11]; - w3_t[0] = w[12]; - w3_t[1] = w[13]; - w3_t[2] = w[14]; - w3_t[3] = w[15]; - - u64x digest[8]; - - digest[0] = SHA384M_A; - digest[1] = SHA384M_B; - digest[2] = SHA384M_C; - digest[3] = SHA384M_D; - digest[4] = SHA384M_E; - digest[5] = SHA384M_F; - digest[6] = SHA384M_G; - digest[7] = SHA384M_H; - - sha384_transform (w0_t, w1_t, w2_t, w3_t, digest); - - const u32x r0 = l32_from_64 (digest[3]); - const u32x r1 = h32_from_64 (digest[3]); - const u32x r2 = l32_from_64 (digest[2]); - const u32x r3 = h32_from_64 (digest[2]); - - #include VECT_COMPARE_M - } -} - -__device__ static void m10800s (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = w0; - w0_t[1] = w[ 1]; - w0_t[2] = w[ 2]; - w0_t[3] = w[ 3]; - w1_t[0] = w[ 4]; - w1_t[1] = w[ 5]; - w1_t[2] = w[ 6]; - w1_t[3] = w[ 7]; - w2_t[0] = w[ 8]; - w2_t[1] = w[ 9]; - w2_t[2] = w[10]; - w2_t[3] = w[11]; - w3_t[0] = w[12]; - w3_t[1] = w[13]; - w3_t[2] = w[14]; - w3_t[3] = w[15]; - - u64x digest[8]; - - digest[0] = SHA384M_A; - digest[1] = SHA384M_B; - digest[2] = SHA384M_C; - digest[3] = SHA384M_D; - digest[4] = SHA384M_E; - digest[5] = SHA384M_F; - digest[6] = SHA384M_G; - digest[7] = SHA384M_H; - - sha384_transform (w0_t, w1_t, w2_t, w3_t, digest); - - const u32x r0 = l32_from_64 (digest[3]); - const u32x r1 = h32_from_64 (digest[3]); - const u32x r2 = l32_from_64 (digest[2]); - const u32x r3 = h32_from_64 (digest[2]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10800_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m10800m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10800_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m10800m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10800_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m10800m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10800_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m10800s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10800_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m10800s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10800_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m10800s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m10900.cu b/nv/m10900.cu deleted file mode 100644 index 9fbd78d..0000000 --- a/nv/m10900.cu +++ /dev/null @@ -1,480 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _PBKDF2_SHA256_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" - -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -__device__ static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; -} - -__device__ static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = SHA256M_A; - ipad[1] = SHA256M_B; - ipad[2] = SHA256M_C; - ipad[3] = SHA256M_D; - ipad[4] = SHA256M_E; - ipad[5] = SHA256M_F; - ipad[6] = SHA256M_G; - ipad[7] = SHA256M_H; - - sha256_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = SHA256M_A; - opad[1] = SHA256M_B; - opad[2] = SHA256M_C; - opad[3] = SHA256M_D; - opad[4] = SHA256M_E; - opad[5] = SHA256M_F; - opad[6] = SHA256M_G; - opad[7] = SHA256M_H; - - sha256_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha256_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8], u32x digest[8]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - digest[5] = ipad[5]; - digest[6] = ipad[6]; - digest[7] = ipad[7]; - - sha256_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = digest[5]; - w1[2] = digest[6]; - w1[3] = digest[7]; - w2[0] = 0x80000000; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 32) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - digest[5] = opad[5]; - digest[6] = opad[6]; - digest[7] = opad[7]; - - sha256_transform (w0, w1, w2, w3, digest); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10900_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, pbkdf2_sha256_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pbkdf2_sha256_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = swap_workaround (pws[gid].i[ 0]); - w0[1] = swap_workaround (pws[gid].i[ 1]); - w0[2] = swap_workaround (pws[gid].i[ 2]); - w0[3] = swap_workaround (pws[gid].i[ 3]); - - u32x w1[4]; - - w1[0] = swap_workaround (pws[gid].i[ 4]); - w1[1] = swap_workaround (pws[gid].i[ 5]); - w1[2] = swap_workaround (pws[gid].i[ 6]); - w1[3] = swap_workaround (pws[gid].i[ 7]); - - u32x w2[4]; - - w2[0] = swap_workaround (pws[gid].i[ 8]); - w2[1] = swap_workaround (pws[gid].i[ 9]); - w2[2] = swap_workaround (pws[gid].i[10]); - w2[3] = swap_workaround (pws[gid].i[11]); - - u32x w3[4]; - - w3[0] = swap_workaround (pws[gid].i[12]); - w3[1] = swap_workaround (pws[gid].i[13]); - w3[2] = swap_workaround (pws[gid].i[14]); - w3[3] = swap_workaround (pws[gid].i[15]); - - /** - * salt - */ - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 esalt_buf0[4]; - u32 esalt_buf1[4]; - u32 esalt_buf2[4]; - u32 esalt_buf3[4]; - - esalt_buf0[0] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 0]); - esalt_buf0[1] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 1]); - esalt_buf0[2] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 2]); - esalt_buf0[3] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 3]); - esalt_buf1[0] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 4]); - esalt_buf1[1] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 5]); - esalt_buf1[2] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 6]); - esalt_buf1[3] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 7]); - esalt_buf2[0] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 8]); - esalt_buf2[1] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 9]); - esalt_buf2[2] = swap_workaround (esalt_bufs[salt_pos].salt_buf[10]); - esalt_buf2[3] = swap_workaround (esalt_bufs[salt_pos].salt_buf[11]); - esalt_buf3[0] = swap_workaround (esalt_bufs[salt_pos].salt_buf[12]); - esalt_buf3[1] = swap_workaround (esalt_bufs[salt_pos].salt_buf[13]); - esalt_buf3[2] = 0; - esalt_buf3[3] = (64 + salt_len + 4) * 8; - - u32 ipad[8]; - u32 opad[8]; - - hmac_sha256_pad (w0, w1, w2, w3, ipad, opad); - - tmps[gid].ipad[0] = ipad[0]; - tmps[gid].ipad[1] = ipad[1]; - tmps[gid].ipad[2] = ipad[2]; - tmps[gid].ipad[3] = ipad[3]; - tmps[gid].ipad[4] = ipad[4]; - tmps[gid].ipad[5] = ipad[5]; - tmps[gid].ipad[6] = ipad[6]; - tmps[gid].ipad[7] = ipad[7]; - - tmps[gid].opad[0] = opad[0]; - tmps[gid].opad[1] = opad[1]; - tmps[gid].opad[2] = opad[2]; - tmps[gid].opad[3] = opad[3]; - tmps[gid].opad[4] = opad[4]; - tmps[gid].opad[5] = opad[5]; - tmps[gid].opad[6] = opad[6]; - tmps[gid].opad[7] = opad[7]; - - for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) - { - u32 dgst[8]; - - hmac_sha256_run (esalt_buf0, esalt_buf1, esalt_buf2, esalt_buf3, ipad, opad, dgst); - - tmps[gid].dgst[i + 0] = dgst[0]; - tmps[gid].dgst[i + 1] = dgst[1]; - tmps[gid].dgst[i + 2] = dgst[2]; - tmps[gid].dgst[i + 3] = dgst[3]; - tmps[gid].dgst[i + 4] = dgst[4]; - tmps[gid].dgst[i + 5] = dgst[5]; - tmps[gid].dgst[i + 6] = dgst[6]; - tmps[gid].dgst[i + 7] = dgst[7]; - - tmps[gid].out[i + 0] = dgst[0]; - tmps[gid].out[i + 1] = dgst[1]; - tmps[gid].out[i + 2] = dgst[2]; - tmps[gid].out[i + 3] = dgst[3]; - tmps[gid].out[i + 4] = dgst[4]; - tmps[gid].out[i + 5] = dgst[5]; - tmps[gid].out[i + 6] = dgst[6]; - tmps[gid].out[i + 7] = dgst[7]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10900_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, pbkdf2_sha256_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pbkdf2_sha256_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 ipad[8]; - - ipad[0] = tmps[gid].ipad[0]; - ipad[1] = tmps[gid].ipad[1]; - ipad[2] = tmps[gid].ipad[2]; - ipad[3] = tmps[gid].ipad[3]; - ipad[4] = tmps[gid].ipad[4]; - ipad[5] = tmps[gid].ipad[5]; - ipad[6] = tmps[gid].ipad[6]; - ipad[7] = tmps[gid].ipad[7]; - - u32 opad[8]; - - opad[0] = tmps[gid].opad[0]; - opad[1] = tmps[gid].opad[1]; - opad[2] = tmps[gid].opad[2]; - opad[3] = tmps[gid].opad[3]; - opad[4] = tmps[gid].opad[4]; - opad[5] = tmps[gid].opad[5]; - opad[6] = tmps[gid].opad[6]; - opad[7] = tmps[gid].opad[7]; - - for (u32 i = 0; i < 8; i += 8) - { - u32 dgst[8]; - - dgst[0] = tmps[gid].dgst[i + 0]; - dgst[1] = tmps[gid].dgst[i + 1]; - dgst[2] = tmps[gid].dgst[i + 2]; - dgst[3] = tmps[gid].dgst[i + 3]; - dgst[4] = tmps[gid].dgst[i + 4]; - dgst[5] = tmps[gid].dgst[i + 5]; - dgst[6] = tmps[gid].dgst[i + 6]; - dgst[7] = tmps[gid].dgst[i + 7]; - - u32 out[8]; - - out[0] = tmps[gid].out[i + 0]; - out[1] = tmps[gid].out[i + 1]; - out[2] = tmps[gid].out[i + 2]; - out[3] = tmps[gid].out[i + 3]; - out[4] = tmps[gid].out[i + 4]; - out[5] = tmps[gid].out[i + 5]; - out[6] = tmps[gid].out[i + 6]; - out[7] = tmps[gid].out[i + 7]; - - for (u32 j = 0; j < loop_cnt; j++) - { - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; - - w0[0] = dgst[0]; - w0[1] = dgst[1]; - w0[2] = dgst[2]; - w0[3] = dgst[3]; - w1[0] = dgst[4]; - w1[1] = dgst[5]; - w1[2] = dgst[6]; - w1[3] = dgst[7]; - w2[0] = 0x80000000; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 32) * 8; - - hmac_sha256_run (w0, w1, w2, w3, ipad, opad, dgst); - - out[0] ^= dgst[0]; - out[1] ^= dgst[1]; - out[2] ^= dgst[2]; - out[3] ^= dgst[3]; - out[4] ^= dgst[4]; - out[5] ^= dgst[5]; - out[6] ^= dgst[6]; - out[7] ^= dgst[7]; - } - - tmps[gid].dgst[i + 0] = dgst[0]; - tmps[gid].dgst[i + 1] = dgst[1]; - tmps[gid].dgst[i + 2] = dgst[2]; - tmps[gid].dgst[i + 3] = dgst[3]; - tmps[gid].dgst[i + 4] = dgst[4]; - tmps[gid].dgst[i + 5] = dgst[5]; - tmps[gid].dgst[i + 6] = dgst[6]; - tmps[gid].dgst[i + 7] = dgst[7]; - - tmps[gid].out[i + 0] = out[0]; - tmps[gid].out[i + 1] = out[1]; - tmps[gid].out[i + 2] = out[2]; - tmps[gid].out[i + 3] = out[3]; - tmps[gid].out[i + 4] = out[4]; - tmps[gid].out[i + 5] = out[5]; - tmps[gid].out[i + 6] = out[6]; - tmps[gid].out[i + 7] = out[7]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m10900_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, pbkdf2_sha256_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pbkdf2_sha256_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 lid = threadIdx.x; - - const u32x r0 = tmps[gid].out[DGST_R0]; - const u32x r1 = tmps[gid].out[DGST_R1]; - const u32x r2 = tmps[gid].out[DGST_R2]; - const u32x r3 = tmps[gid].out[DGST_R3]; - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m11000_a0.cu b/nv/m11000_a0.cu deleted file mode 100644 index 49e3bca..0000000 --- a/nv/m11000_a0.cu +++ /dev/null @@ -1,713 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m11000_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - - u32 salt_buf3[2]; - - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - const u32 pw_salt_len = pw_len + salt_len; - - append_0x80_4 (w0, w1, w2, w3, out_len); - - /** - * prepend salt - */ - - // first step fixed 56 bytes of salt - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = salt_buf2[2]; - w2_t[3] = salt_buf2[3]; - w3_t[0] = salt_buf3[0]; - w3_t[1] = salt_buf3[1]; - - // after 56 byte salt, we have beginning of the password - - w3_t[2] = w0[0]; - w3_t[3] = w0[1]; - - /** - * md5 - */ - - // first transform - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - - // 2nd transform - - w0_t[0] = w0[2]; - w0_t[1] = w0[3]; - w0_t[2] = w1[0]; - w0_t[3] = w1[1]; - w1_t[0] = w1[2]; - w1_t[1] = w1[3]; - w1_t[2] = w2[0]; - w1_t[3] = w2[1]; - w2_t[0] = w2[2]; - w2_t[1] = w2[3]; - w2_t[2] = w3[0]; - w2_t[3] = w3[1]; - w3_t[0] = w3[2]; - w3_t[1] = w3[3]; - w3_t[2] = pw_salt_len * 8; - w3_t[3] = 0; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11000_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11000_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11000_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - - u32 salt_buf3[2]; - - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - const u32 pw_salt_len = pw_len + salt_len; - - append_0x80_4 (w0, w1, w2, w3, out_len); - - /** - * prepend salt - */ - - // first step fixed 56 bytes of salt - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = salt_buf2[2]; - w2_t[3] = salt_buf2[3]; - w3_t[0] = salt_buf3[0]; - w3_t[1] = salt_buf3[1]; - - // after 56 byte salt, we have beginning of the password - - w3_t[2] = w0[0]; - w3_t[3] = w0[1]; - - /** - * md5 - */ - - // first transform - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - - // 2nd transform - - w0_t[0] = w0[2]; - w0_t[1] = w0[3]; - w0_t[2] = w1[0]; - w0_t[3] = w1[1]; - w1_t[0] = w1[2]; - w1_t[1] = w1[3]; - w1_t[2] = w2[0]; - w1_t[3] = w2[1]; - w2_t[0] = w2[2]; - w2_t[1] = w2[3]; - w2_t[2] = w3[0]; - w2_t[3] = w3[1]; - w3_t[0] = w3[2]; - w3_t[1] = w3[3]; - w3_t[2] = pw_salt_len * 8; - w3_t[3] = 0; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11000_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11000_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m11000_a1.cu b/nv/m11000_a1.cu deleted file mode 100644 index e04e7b1..0000000 --- a/nv/m11000_a1.cu +++ /dev/null @@ -1,805 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m11000_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - - u32 salt_buf3[2]; - - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - u32 wordr1[4]; - u32 wordr2[4]; - u32 wordr3[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - append_0x80_4 (w0, w1, w2, w3, pw_len); - - const u32 pw_salt_len = pw_len + salt_len; - - /** - * prepend salt - */ - - // first step fixed 56 bytes of salt - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = salt_buf2[2]; - w2_t[3] = salt_buf2[3]; - w3_t[0] = salt_buf3[0]; - w3_t[1] = salt_buf3[1]; - - // after 56 byte salt, we have beginning of the password - - w3_t[2] = w0[0]; - w3_t[3] = w0[1]; - - /** - * md5 - */ - - // first transform - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - - // 2nd transform - - w0_t[0] = w0[2]; - w0_t[1] = w0[3]; - w0_t[2] = w1[0]; - w0_t[3] = w1[1]; - w1_t[0] = w1[2]; - w1_t[1] = w1[3]; - w1_t[2] = w2[0]; - w1_t[3] = w2[1]; - w2_t[0] = w2[2]; - w2_t[1] = w2[3]; - w2_t[2] = w3[0]; - w2_t[3] = w3[1]; - w3_t[0] = w3[2]; - w3_t[1] = w3[3]; - w3_t[2] = pw_salt_len * 8; - w3_t[3] = 0; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11000_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11000_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11000_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - - u32 salt_buf3[2]; - - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - u32 wordr1[4]; - u32 wordr2[4]; - u32 wordr3[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - append_0x80_4 (w0, w1, w2, w3, pw_len); - - const u32 pw_salt_len = pw_len + salt_len; - - /** - * prepend salt - */ - - // first step fixed 56 bytes of salt - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = salt_buf2[2]; - w2_t[3] = salt_buf2[3]; - w3_t[0] = salt_buf3[0]; - w3_t[1] = salt_buf3[1]; - - // after 56 byte salt, we have beginning of the password - - w3_t[2] = w0[0]; - w3_t[3] = w0[1]; - - /** - * md5 - */ - - // first transform - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - - // 2nd transform - - w0_t[0] = w0[2]; - w0_t[1] = w0[3]; - w0_t[2] = w1[0]; - w0_t[3] = w1[1]; - w1_t[0] = w1[2]; - w1_t[1] = w1[3]; - w1_t[2] = w2[0]; - w1_t[3] = w2[1]; - w2_t[0] = w2[2]; - w2_t[1] = w2[3]; - w2_t[2] = w3[0]; - w2_t[3] = w3[1]; - w3_t[0] = w3[2]; - w3_t[1] = w3[3]; - w3_t[2] = pw_salt_len * 8; - w3_t[3] = 0; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11000_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11000_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m11000_a3.cu b/nv/m11000_a3.cu deleted file mode 100644 index 9bde0a4..0000000 --- a/nv/m11000_a3.cu +++ /dev/null @@ -1,889 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m11000m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - - u32 salt_buf3[2]; - - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * prepend salt - */ - - // first step fixed 56 bytes of salt - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = salt_buf2[2]; - w2_t[3] = salt_buf2[3]; - w3_t[0] = salt_buf3[0]; - w3_t[1] = salt_buf3[1]; - - // after 56 byte salt, we have beginning of the password - - w3_t[2] = w0[0]; - w3_t[3] = w0[1]; - - /** - * md5 - */ - - // first transform - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - - // 2nd transform - - w0_t[0] = w0[2]; - w0_t[1] = w0[3]; - w0_t[2] = w1[0]; - w0_t[3] = w1[1]; - w1_t[0] = w1[2]; - w1_t[1] = w1[3]; - w1_t[2] = w2[0]; - w1_t[3] = w2[1]; - w2_t[0] = w2[2]; - w2_t[1] = w2[3]; - w2_t[2] = w3[0]; - w2_t[3] = w3[1]; - w3_t[0] = w3[2]; - w3_t[1] = w3[3]; - w3_t[2] = pw_salt_len * 8; - w3_t[3] = 0; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m11000s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = salt_bufs[salt_pos].salt_buf[ 8]; - salt_buf2[1] = salt_bufs[salt_pos].salt_buf[ 9]; - salt_buf2[2] = salt_bufs[salt_pos].salt_buf[10]; - salt_buf2[3] = salt_bufs[salt_pos].salt_buf[11]; - - u32 salt_buf3[2]; - - salt_buf3[0] = salt_bufs[salt_pos].salt_buf[12]; - salt_buf3[1] = salt_bufs[salt_pos].salt_buf[13]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - const u32 pw_salt_len = pw_len + salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * prepend salt - */ - - // first step fixed 56 bytes of salt - - u32x w0_t[4]; - u32x w1_t[4]; - u32x w2_t[4]; - u32x w3_t[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - w2_t[0] = salt_buf2[0]; - w2_t[1] = salt_buf2[1]; - w2_t[2] = salt_buf2[2]; - w2_t[3] = salt_buf2[3]; - w3_t[0] = salt_buf3[0]; - w3_t[1] = salt_buf3[1]; - - // after 56 byte salt, we have beginning of the password - - w3_t[2] = w0[0]; - w3_t[3] = w0[1]; - - /** - * md5 - */ - - // first transform - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - - // 2nd transform - - w0_t[0] = w0[2]; - w0_t[1] = w0[3]; - w0_t[2] = w1[0]; - w0_t[3] = w1[1]; - w1_t[0] = w1[2]; - w1_t[1] = w1[3]; - w1_t[2] = w2[0]; - w1_t[3] = w2[1]; - w2_t[0] = w2[2]; - w2_t[1] = w2[3]; - w2_t[2] = w3[0]; - w2_t[3] = w3[1]; - w3_t[0] = w3[2]; - w3_t[1] = w3[3]; - w3_t[2] = pw_salt_len * 8; - w3_t[3] = 0; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11000_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m11000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11000_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m11000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11000_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m11000m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11000_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m11000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11000_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m11000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11000_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m11000s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m11100_a0.cu b/nv/m11100_a0.cu deleted file mode 100644 index 75029e2..0000000 --- a/nv/m11100_a0.cu +++ /dev/null @@ -1,805 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m11100_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * challenge - */ - - u32 challenge; - - challenge = salt_bufs[salt_pos].salt_buf[0]; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 1]; // not a bug - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 4]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 8]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len - 4; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - u32x w0_t[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - - u32x w1_t[4]; - - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - /* - * append the salt - */ - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, pw_len); - - const u32 pw_salt_len = out_len + salt_len; - - w0_t[0] |= w0[0]; - w0_t[1] |= w0[1]; - w0_t[2] |= w0[2]; - w0_t[3] |= w0[3]; - - w1_t[0] |= w1[0]; - w1_t[1] |= w1[1]; - w1_t[2] |= w1[2]; - w1_t[3] |= w1[3]; - - w2_t[0] |= w2[0]; - w2_t[1] |= w2[1]; - w2_t[2] |= w2[2]; - w2_t[3] |= w2[3]; - - w3_t[0] |= w3[0]; - w3_t[1] |= w3[1]; - w3_t[2] = pw_salt_len * 8; - w3_t[3] = 0; - - /* - * md5 ($pass.$salt) - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - // add the 4 byte challenge here - - w2_t[0] = challenge; - w2_t[1] = 0x00000080; - w2_t[2] = 0; - w2_t[3] = 0; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = (32 + 4) * 8; - w3_t[3] = 0; - - /** - * md5 ($hash.$challenge) - */ - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11100_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11100_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11100_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * challenge - */ - - u32 challenge; - - challenge = salt_bufs[salt_pos].salt_buf[0]; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 1]; // not a bug - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 4]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 8]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len - 4; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - u32x w0_t[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - - u32x w1_t[4]; - - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - /* - * append the salt - */ - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, pw_len); - - const u32 pw_salt_len = out_len + salt_len; - - w0_t[0] |= w0[0]; - w0_t[1] |= w0[1]; - w0_t[2] |= w0[2]; - w0_t[3] |= w0[3]; - - w1_t[0] |= w1[0]; - w1_t[1] |= w1[1]; - w1_t[2] |= w1[2]; - w1_t[3] |= w1[3]; - - w2_t[0] |= w2[0]; - w2_t[1] |= w2[1]; - w2_t[2] |= w2[2]; - w2_t[3] |= w2[3]; - - w3_t[0] |= w3[0]; - w3_t[1] |= w3[1]; - w3_t[2] = pw_salt_len * 8; - w3_t[3] = 0; - - /* - * md5 ($pass.$salt) - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - // add the 4 byte challenge here - - w2_t[0] = challenge; - w2_t[1] = 0x00000080; - w2_t[2] = 0; - w2_t[3] = 0; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = (32 + 4) * 8; - w3_t[3] = 0; - - /** - * md5 ($hash.$challenge) - */ - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11100_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11100_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m11100_a1.cu b/nv/m11100_a1.cu deleted file mode 100644 index c28630e..0000000 --- a/nv/m11100_a1.cu +++ /dev/null @@ -1,855 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m11100_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * challenge - */ - - u32 challenge; - - challenge = salt_bufs[salt_pos].salt_buf[0]; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 1]; // not a bug - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 4]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 8]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len - 4; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0_t[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - - u32x w1_t[4]; - - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - /* - * append the salt - */ - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, pw_len); - - const u32 pw_salt_len = pw_len + salt_len; - - w0_t[0] |= wordl0[0] | wordr0[0]; - w0_t[1] |= wordl0[1] | wordr0[1]; - w0_t[2] |= wordl0[2] | wordr0[2]; - w0_t[3] |= wordl0[3] | wordr0[3]; - - w1_t[0] |= wordl1[0] | wordr1[0]; - w1_t[1] |= wordl1[1] | wordr1[1]; - w1_t[2] |= wordl1[2] | wordr1[2]; - w1_t[3] |= wordl1[3] | wordr1[3]; - - w2_t[0] |= wordl2[0] | wordr2[0]; - w2_t[1] |= wordl2[1] | wordr2[1]; - w2_t[2] |= wordl2[2] | wordr2[2]; - w2_t[3] |= wordl2[3] | wordr2[3]; - - w3_t[0] |= wordl3[0] | wordr3[0]; - w3_t[1] |= wordl3[0] | wordr3[0]; - w3_t[2] = pw_salt_len * 8; - w3_t[3] = 0; - - /* - * md5 ($pass.$salt) - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - // add the 4 byte challenge here - - w2_t[0] = challenge; - w2_t[1] = 0x00000080; - w2_t[2] = 0; - w2_t[3] = 0; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = (32 + 4) * 8; - w3_t[3] = 0; - - /** - * md5 ($hash.$challenge) - */ - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11100_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11100_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11100_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * challenge - */ - - u32 challenge; - - challenge = salt_bufs[salt_pos].salt_buf[0]; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 1]; // not a bug - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 4]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 8]; - - const u32 salt_len = salt_bufs[salt_pos].salt_len - 4; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0_t[4]; - - w0_t[0] = salt_buf0[0]; - w0_t[1] = salt_buf0[1]; - w0_t[2] = salt_buf0[2]; - w0_t[3] = salt_buf0[3]; - - u32x w1_t[4]; - - w1_t[0] = salt_buf1[0]; - w1_t[1] = salt_buf1[1]; - w1_t[2] = salt_buf1[2]; - w1_t[3] = salt_buf1[3]; - - u32x w2_t[4]; - - w2_t[0] = 0; - w2_t[1] = 0; - w2_t[2] = 0; - w2_t[3] = 0; - - u32x w3_t[4]; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = 0; - w3_t[3] = 0; - - /* - * append the salt - */ - - switch_buffer_by_offset (w0_t, w1_t, w2_t, w3_t, pw_len); - - const u32 pw_salt_len = pw_len + salt_len; - - w0_t[0] |= wordl0[0] | wordr0[0]; - w0_t[1] |= wordl0[1] | wordr0[1]; - w0_t[2] |= wordl0[2] | wordr0[2]; - w0_t[3] |= wordl0[3] | wordr0[3]; - - w1_t[0] |= wordl1[0] | wordr1[0]; - w1_t[1] |= wordl1[1] | wordr1[1]; - w1_t[2] |= wordl1[2] | wordr1[2]; - w1_t[3] |= wordl1[3] | wordr1[3]; - - w2_t[0] |= wordl2[0] | wordr2[0]; - w2_t[1] |= wordl2[1] | wordr2[1]; - w2_t[2] |= wordl2[2] | wordr2[2]; - w2_t[3] |= wordl2[3] | wordr2[3]; - - w3_t[0] |= wordl3[0] | wordr3[0]; - w3_t[1] |= wordl3[0] | wordr3[0]; - w3_t[2] = pw_salt_len * 8; - w3_t[3] = 0; - - /* - * md5 ($pass.$salt) - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - // add the 4 byte challenge here - - w2_t[0] = challenge; - w2_t[1] = 0x00000080; - w2_t[2] = 0; - w2_t[3] = 0; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = (32 + 4) * 8; - w3_t[3] = 0; - - /** - * md5 ($hash.$challenge) - */ - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11100_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11100_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m11100_a3.cu b/nv/m11100_a3.cu deleted file mode 100644 index b30db2c..0000000 --- a/nv/m11100_a3.cu +++ /dev/null @@ -1,1027 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m11100m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * challenge - */ - - u32 challenge; - - challenge = salt_bufs[salt_pos].salt_buf[0]; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 1]; // not a bug - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 4]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 8]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len - 4; - - switch_buffer_by_offset (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); - - const u32 pw_salt_len = pw_len + salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x w0_t[4]; - - w0_t[0] = w0[0] | salt_buf0[0]; - w0_t[1] = w0[1] | salt_buf0[1]; - w0_t[2] = w0[2] | salt_buf0[2]; - w0_t[3] = w0[3] | salt_buf0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0] | salt_buf1[0]; - w1_t[1] = w1[1] | salt_buf1[1]; - w1_t[2] = w1[2] | salt_buf1[2]; - w1_t[3] = w1[3] | salt_buf1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0] | salt_buf2[0]; - w2_t[1] = w2[1] | salt_buf2[1]; - w2_t[2] = w2[2] | salt_buf2[2]; - w2_t[3] = w2[3] | salt_buf2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0] | salt_buf3[0]; - w3_t[1] = w3[1] | salt_buf3[1]; - w3_t[2] = pw_salt_len * 8; - w3_t[3] = 0; - - /* - * md5 ($pass.$salt) - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - // add the 4 byte challenge here - - w2_t[0] = challenge; - w2_t[1] = 0x00000080; - w2_t[2] = 0; - w2_t[3] = 0; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = (32 + 4) * 8; - w3_t[3] = 0; - - /** - * md5 ($hash.$challenge) - */ - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m11100s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * challenge - */ - - u32 challenge; - - challenge = salt_bufs[salt_pos].salt_buf[0]; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[ 1]; // not a bug - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[ 3]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[ 4]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[ 5]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[ 6]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[ 7]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[ 8]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - const u32 salt_len = salt_bufs[salt_pos].salt_len - 4; - - switch_buffer_by_offset (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); - - const u32 pw_salt_len = pw_len + salt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - u32x w0_t[4]; - - w0_t[0] = w0[0] | salt_buf0[0]; - w0_t[1] = w0[1] | salt_buf0[1]; - w0_t[2] = w0[2] | salt_buf0[2]; - w0_t[3] = w0[3] | salt_buf0[3]; - - u32x w1_t[4]; - - w1_t[0] = w1[0] | salt_buf1[0]; - w1_t[1] = w1[1] | salt_buf1[1]; - w1_t[2] = w1[2] | salt_buf1[2]; - w1_t[3] = w1[3] | salt_buf1[3]; - - u32x w2_t[4]; - - w2_t[0] = w2[0] | salt_buf2[0]; - w2_t[1] = w2[1] | salt_buf2[1]; - w2_t[2] = w2[2] | salt_buf2[2]; - w2_t[3] = w2[3] | salt_buf2[3]; - - u32x w3_t[4]; - - w3_t[0] = w3[0] | salt_buf3[0]; - w3_t[1] = w3[1] | salt_buf3[1]; - w3_t[2] = pw_salt_len * 8; - w3_t[3] = 0; - - /* - * md5 ($pass.$salt) - */ - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - // add the 4 byte challenge here - - w2_t[0] = challenge; - w2_t[1] = 0x00000080; - w2_t[2] = 0; - w2_t[3] = 0; - - w3_t[0] = 0; - w3_t[1] = 0; - w3_t[2] = (32 + 4) * 8; - w3_t[3] = 0; - - /** - * md5 ($hash.$challenge) - */ - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H , a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H , a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H , d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H , c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H , b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11100_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m11100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11100_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m11100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11100_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m11100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11100_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m11100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11100_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m11100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11100_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m11100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m11200_a0.cu b/nv/m11200_a0.cu deleted file mode 100644 index 3a59118..0000000 --- a/nv/m11200_a0.cu +++ /dev/null @@ -1,1043 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m11200_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf[5]; - - salt_buf[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[0]); - salt_buf[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[1]); - salt_buf[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[2]); - salt_buf[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[3]); - salt_buf[4] = swap_workaround (salt_bufs[salt_pos].salt_buf[4]); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - /** - * sha1 ($pass) - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - u32x plain_sha1_a = a + SHA1M_A; - u32x plain_sha1_b = b + SHA1M_B; - u32x plain_sha1_c = c + SHA1M_C; - u32x plain_sha1_d = d + SHA1M_D; - u32x plain_sha1_e = e + SHA1M_E; - - /** - * sha1 (sha1 ($pass)) - */ - - w0_t = plain_sha1_a; - w1_t = plain_sha1_b; - w2_t = plain_sha1_c; - w3_t = plain_sha1_d; - w4_t = plain_sha1_e; - - w5_t = 0x80000000; - w6_t = 0; - w7_t = 0; - w8_t = 0; - w9_t = 0; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 20 * 8; - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - /** - * sha1 ($salt . sha1 (sha1 ($pass))) - */ - - w0_t = salt_buf[0]; - w1_t = salt_buf[1]; - w2_t = salt_buf[2]; - w3_t = salt_buf[3]; - w4_t = salt_buf[4]; - w5_t = a; - w6_t = b; - w7_t = c; - w8_t = d; - w9_t = e; - wa_t = 0x80000000; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 40 * 8; - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - a ^= plain_sha1_a; - b ^= plain_sha1_b; - c ^= plain_sha1_c; - d ^= plain_sha1_d; - e ^= plain_sha1_e; - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11200_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11200_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11200_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - u32 salt_buf[5]; - - salt_buf[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[0]); - salt_buf[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[1]); - salt_buf[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[2]); - salt_buf[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[3]); - salt_buf[4] = swap_workaround (salt_bufs[salt_pos].salt_buf[4]); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - /** - * sha1 ($pass) - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - u32x plain_sha1_a = a + SHA1M_A; - u32x plain_sha1_b = b + SHA1M_B; - u32x plain_sha1_c = c + SHA1M_C; - u32x plain_sha1_d = d + SHA1M_D; - u32x plain_sha1_e = e + SHA1M_E; - - /** - * sha1 (sha1 ($pass)) - */ - - w0_t = plain_sha1_a; - w1_t = plain_sha1_b; - w2_t = plain_sha1_c; - w3_t = plain_sha1_d; - w4_t = plain_sha1_e; - - w5_t = 0x80000000; - w6_t = 0; - w7_t = 0; - w8_t = 0; - w9_t = 0; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 20 * 8; - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - /** - * sha1 ($salt . sha1 (sha1 ($pass))) - */ - - w0_t = salt_buf[0]; - w1_t = salt_buf[1]; - w2_t = salt_buf[2]; - w3_t = salt_buf[3]; - w4_t = salt_buf[4]; - w5_t = a; - w6_t = b; - w7_t = c; - w8_t = d; - w9_t = e; - wa_t = 0x80000000; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 40 * 8; - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - a ^= plain_sha1_a; - b ^= plain_sha1_b; - c ^= plain_sha1_c; - d ^= plain_sha1_d; - e ^= plain_sha1_e; - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11200_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11200_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m11200_a1.cu b/nv/m11200_a1.cu deleted file mode 100644 index c373e21..0000000 --- a/nv/m11200_a1.cu +++ /dev/null @@ -1,1153 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m11200_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 salt_buf[5]; - - salt_buf[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[0]); - salt_buf[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[1]); - salt_buf[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[2]); - salt_buf[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[3]); - salt_buf[4] = swap_workaround (salt_bufs[salt_pos].salt_buf[4]); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - append_0x80_2 (wordr0, wordr1, pw_r_len); - - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - /** - * sha1 ($pass) - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - u32x plain_sha1_a = a + SHA1M_A; - u32x plain_sha1_b = b + SHA1M_B; - u32x plain_sha1_c = c + SHA1M_C; - u32x plain_sha1_d = d + SHA1M_D; - u32x plain_sha1_e = e + SHA1M_E; - - /** - * sha1 (sha1 ($pass)) - */ - - w0_t = plain_sha1_a; - w1_t = plain_sha1_b; - w2_t = plain_sha1_c; - w3_t = plain_sha1_d; - w4_t = plain_sha1_e; - - w5_t = 0x80000000; - w6_t = 0; - w7_t = 0; - w8_t = 0; - w9_t = 0; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 20 * 8; - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - /** - * sha1 ($salt . sha1 (sha1 ($pass))) - */ - - w0_t = salt_buf[0]; - w1_t = salt_buf[1]; - w2_t = salt_buf[2]; - w3_t = salt_buf[3]; - w4_t = salt_buf[4]; - w5_t = a; - w6_t = b; - w7_t = c; - w8_t = d; - w9_t = e; - wa_t = 0x80000000; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 40 * 8; - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - a ^= plain_sha1_a; - b ^= plain_sha1_b; - c ^= plain_sha1_c; - d ^= plain_sha1_d; - e ^= plain_sha1_e; - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11200_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11200_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11200_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - u32 salt_buf[5]; - - salt_buf[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[0]); - salt_buf[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[1]); - salt_buf[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[2]); - salt_buf[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[3]); - salt_buf[4] = swap_workaround (salt_bufs[salt_pos].salt_buf[4]); - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - append_0x80_2 (wordr0, wordr1, pw_r_len); - - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - /** - * sha1 ($pass) - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - u32x plain_sha1_a = a + SHA1M_A; - u32x plain_sha1_b = b + SHA1M_B; - u32x plain_sha1_c = c + SHA1M_C; - u32x plain_sha1_d = d + SHA1M_D; - u32x plain_sha1_e = e + SHA1M_E; - - /** - * sha1 (sha1 ($pass)) - */ - - w0_t = plain_sha1_a; - w1_t = plain_sha1_b; - w2_t = plain_sha1_c; - w3_t = plain_sha1_d; - w4_t = plain_sha1_e; - - w5_t = 0x80000000; - w6_t = 0; - w7_t = 0; - w8_t = 0; - w9_t = 0; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 20 * 8; - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - /** - * sha1 ($salt . sha1 (sha1 ($pass))) - */ - - w0_t = salt_buf[0]; - w1_t = salt_buf[1]; - w2_t = salt_buf[2]; - w3_t = salt_buf[3]; - w4_t = salt_buf[4]; - w5_t = a; - w6_t = b; - w7_t = c; - w8_t = d; - w9_t = e; - wa_t = 0x80000000; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 40 * 8; - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - a ^= plain_sha1_a; - b ^= plain_sha1_b; - c ^= plain_sha1_c; - d ^= plain_sha1_d; - e ^= plain_sha1_e; - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11200_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11200_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m11200_a3.cu b/nv/m11200_a3.cu deleted file mode 100644 index c6953c4..0000000 --- a/nv/m11200_a3.cu +++ /dev/null @@ -1,1233 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 4 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m11200m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 salt_buf[5]; - - salt_buf[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[0]); - salt_buf[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[1]); - salt_buf[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[2]); - salt_buf[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[3]); - salt_buf[4] = swap_workaround (salt_bufs[salt_pos].salt_buf[4]); - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * sha1 ($pass) - */ - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - u32x plain_sha1_a = a + SHA1M_A; - u32x plain_sha1_b = b + SHA1M_B; - u32x plain_sha1_c = c + SHA1M_C; - u32x plain_sha1_d = d + SHA1M_D; - u32x plain_sha1_e = e + SHA1M_E; - - /** - * sha1 (sha1 ($pass)) - */ - - w0_t = plain_sha1_a; - w1_t = plain_sha1_b; - w2_t = plain_sha1_c; - w3_t = plain_sha1_d; - w4_t = plain_sha1_e; - - w5_t = 0x80000000; - w6_t = 0; - w7_t = 0; - w8_t = 0; - w9_t = 0; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 20 * 8; - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - /** - * sha1 ($salt . sha1 (sha1 ($pass))) - */ - - w0_t = salt_buf[0]; - w1_t = salt_buf[1]; - w2_t = salt_buf[2]; - w3_t = salt_buf[3]; - w4_t = salt_buf[4]; - w5_t = a; - w6_t = b; - w7_t = c; - w8_t = d; - w9_t = e; - wa_t = 0x80000000; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 40 * 8; - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - a ^= plain_sha1_a; - b ^= plain_sha1_b; - c ^= plain_sha1_c; - d ^= plain_sha1_d; - e ^= plain_sha1_e; - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -__device__ static void m11200s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - u32 salt_buf[5]; - - salt_buf[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[0]); - salt_buf[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[1]); - salt_buf[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[2]); - salt_buf[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[3]); - salt_buf[4] = swap_workaround (salt_bufs[salt_pos].salt_buf[4]); - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * sha1 ($pass) - */ - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - u32x plain_sha1_a = a + SHA1M_A; - u32x plain_sha1_b = b + SHA1M_B; - u32x plain_sha1_c = c + SHA1M_C; - u32x plain_sha1_d = d + SHA1M_D; - u32x plain_sha1_e = e + SHA1M_E; - - /** - * sha1 (sha1 ($pass)) - */ - - w0_t = plain_sha1_a; - w1_t = plain_sha1_b; - w2_t = plain_sha1_c; - w3_t = plain_sha1_d; - w4_t = plain_sha1_e; - - w5_t = 0x80000000; - w6_t = 0; - w7_t = 0; - w8_t = 0; - w9_t = 0; - wa_t = 0; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 20 * 8; - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - /** - * sha1 ($salt . sha1 (sha1 ($pass))) - */ - - w0_t = salt_buf[0]; - w1_t = salt_buf[1]; - w2_t = salt_buf[2]; - w3_t = salt_buf[3]; - w4_t = salt_buf[4]; - w5_t = a; - w6_t = b; - w7_t = c; - w8_t = d; - w9_t = e; - wa_t = 0x80000000; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = 40 * 8; - - a = SHA1M_A; - b = SHA1M_B; - c = SHA1M_C; - d = SHA1M_D; - e = SHA1M_E; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - a ^= plain_sha1_a; - b ^= plain_sha1_b; - c ^= plain_sha1_c; - d ^= plain_sha1_d; - e ^= plain_sha1_e; - - const u32x r0 = d; - const u32x r1 = e; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11200_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - /** - * modifier - */ - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m11200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11200_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - /** - * modifier - */ - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m11200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11200_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - /** - * modifier - */ - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m11200m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11200_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - /** - * modifier - */ - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m11200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11200_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - /** - * modifier - */ - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m11200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11200_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - /** - * modifier - */ - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m11200s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m11300.cu b/nv/m11300.cu deleted file mode 100644 index 4691c47..0000000 --- a/nv/m11300.cu +++ /dev/null @@ -1,1356 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA512_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -__device__ __constant__ u64 k[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -#define ROUND_EXPAND() \ -{ \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ -} - -#define ROUND_STEP(i) \ -{ \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k[i + 15]); \ -} - -__device__ static void sha512_transform (const u64 w[16], u64 dgst[8]) -{ - u64 a = dgst[0]; - u64 b = dgst[1]; - u64 c = dgst[2]; - u64 d = dgst[3]; - u64 e = dgst[4]; - u64 f = dgst[5]; - u64 g = dgst[6]; - u64 h = dgst[7]; - - u64 w0_t = w[ 0]; - u64 w1_t = w[ 1]; - u64 w2_t = w[ 2]; - u64 w3_t = w[ 3]; - u64 w4_t = w[ 4]; - u64 w5_t = w[ 5]; - u64 w6_t = w[ 6]; - u64 w7_t = w[ 7]; - u64 w8_t = w[ 8]; - u64 w9_t = w[ 9]; - u64 wa_t = w[10]; - u64 wb_t = w[11]; - u64 wc_t = w[12]; - u64 wd_t = w[13]; - u64 we_t = w[14]; - u64 wf_t = w[15]; - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - dgst[0] += a; - dgst[1] += b; - dgst[2] += c; - dgst[3] += d; - dgst[4] += e; - dgst[5] += f; - dgst[6] += g; - dgst[7] += h; -} - -__device__ __constant__ u32 te0[256] = -{ - 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, - 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, - 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, - 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, - 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, - 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, - 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, - 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, - 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, - 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, - 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, - 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, - 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, - 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, - 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, - 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, - 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, - 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, - 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, - 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, - 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, - 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, - 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, - 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, - 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, - 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, - 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, - 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, - 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, - 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, - 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, - 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, - 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, - 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, - 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, - 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, - 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, - 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, - 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, - 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, - 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, - 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, - 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, - 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, - 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, - 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, - 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, - 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, - 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, - 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, - 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, - 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, - 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, - 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, - 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, - 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, - 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, - 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, - 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, - 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, - 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, - 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, - 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, - 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a, -}; - -__device__ __constant__ u32 te1[256] = -{ - 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b, - 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5, - 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b, - 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676, - 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d, - 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0, - 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf, - 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0, - 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626, - 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc, - 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1, - 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515, - 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3, - 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a, - 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2, - 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575, - 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a, - 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0, - 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3, - 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484, - 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded, - 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b, - 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939, - 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf, - 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb, - 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585, - 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f, - 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8, - 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f, - 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5, - 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121, - 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2, - 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec, - 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717, - 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d, - 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373, - 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc, - 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888, - 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414, - 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb, - 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a, - 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c, - 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262, - 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979, - 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d, - 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9, - 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea, - 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808, - 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e, - 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6, - 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f, - 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a, - 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666, - 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e, - 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9, - 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e, - 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111, - 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494, - 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9, - 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf, - 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d, - 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868, - 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f, - 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616, -}; - -__device__ __constant__ u32 te2[256] = -{ - 0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b, - 0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5, - 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b, - 0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76, - 0xca458fca, 0x829d1f82, 0xc94089c9, 0x7d87fa7d, - 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0, - 0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af, - 0x9cbf239c, 0xa4f753a4, 0x7296e472, 0xc05b9bc0, - 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26, - 0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc, - 0x345c6834, 0xa5f451a5, 0xe534d1e5, 0xf108f9f1, - 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15, - 0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3, - 0x18283018, 0x96a13796, 0x050f0a05, 0x9ab52f9a, - 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2, - 0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75, - 0x091b1209, 0x839e1d83, 0x2c74582c, 0x1a2e341a, - 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0, - 0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3, - 0x297b5229, 0xe33edde3, 0x2f715e2f, 0x84971384, - 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed, - 0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b, - 0x6abed46a, 0xcb468dcb, 0xbed967be, 0x394b7239, - 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf, - 0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb, - 0x43c58643, 0x4dd79a4d, 0x33556633, 0x85941185, - 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f, - 0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8, - 0x51f3a251, 0xa3fe5da3, 0x40c08040, 0x8f8a058f, - 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5, - 0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221, - 0x10302010, 0xff1ae5ff, 0xf30efdf3, 0xd26dbfd2, - 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec, - 0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17, - 0xc45793c4, 0xa7f255a7, 0x7e82fc7e, 0x3d477a3d, - 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673, - 0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc, - 0x22664422, 0x2a7e542a, 0x90ab3b90, 0x88830b88, - 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814, - 0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb, - 0xe03bdbe0, 0x32566432, 0x3a4e743a, 0x0a1e140a, - 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c, - 0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462, - 0x91a83991, 0x95a43195, 0xe437d3e4, 0x798bf279, - 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d, - 0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9, - 0x6cb4d86c, 0x56faac56, 0xf407f3f4, 0xea25cfea, - 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008, - 0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e, - 0x1c24381c, 0xa6f157a6, 0xb4c773b4, 0xc65197c6, - 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f, - 0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a, - 0x7090e070, 0x3e427c3e, 0xb5c471b5, 0x66aacc66, - 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e, - 0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9, - 0x86911786, 0xc15899c1, 0x1d273a1d, 0x9eb9279e, - 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211, - 0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394, - 0x9bb62d9b, 0x1e223c1e, 0x87921587, 0xe920c9e9, - 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df, - 0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d, - 0xbfda65bf, 0xe631d7e6, 0x42c68442, 0x68b8d068, - 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f, - 0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16, -}; - -__device__ __constant__ u32 te3[256] = -{ - 0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6, - 0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491, - 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56, - 0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec, - 0xcaca458f, 0x82829d1f, 0xc9c94089, 0x7d7d87fa, - 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb, - 0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45, - 0x9c9cbf23, 0xa4a4f753, 0x727296e4, 0xc0c05b9b, - 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c, - 0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83, - 0x34345c68, 0xa5a5f451, 0xe5e534d1, 0xf1f108f9, - 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a, - 0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d, - 0x18182830, 0x9696a137, 0x05050f0a, 0x9a9ab52f, - 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf, - 0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea, - 0x09091b12, 0x83839e1d, 0x2c2c7458, 0x1a1a2e34, - 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b, - 0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d, - 0x29297b52, 0xe3e33edd, 0x2f2f715e, 0x84849713, - 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1, - 0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6, - 0x6a6abed4, 0xcbcb468d, 0xbebed967, 0x39394b72, - 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85, - 0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed, - 0x4343c586, 0x4d4dd79a, 0x33335566, 0x85859411, - 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe, - 0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b, - 0x5151f3a2, 0xa3a3fe5d, 0x4040c080, 0x8f8f8a05, - 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1, - 0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342, - 0x10103020, 0xffff1ae5, 0xf3f30efd, 0xd2d26dbf, - 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3, - 0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e, - 0xc4c45793, 0xa7a7f255, 0x7e7e82fc, 0x3d3d477a, - 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6, - 0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3, - 0x22226644, 0x2a2a7e54, 0x9090ab3b, 0x8888830b, - 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28, - 0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad, - 0xe0e03bdb, 0x32325664, 0x3a3a4e74, 0x0a0a1e14, - 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8, - 0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4, - 0x9191a839, 0x9595a431, 0xe4e437d3, 0x79798bf2, - 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da, - 0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049, - 0x6c6cb4d8, 0x5656faac, 0xf4f407f3, 0xeaea25cf, - 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810, - 0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c, - 0x1c1c2438, 0xa6a6f157, 0xb4b4c773, 0xc6c65197, - 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e, - 0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f, - 0x707090e0, 0x3e3e427c, 0xb5b5c471, 0x6666aacc, - 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c, - 0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069, - 0x86869117, 0xc1c15899, 0x1d1d273a, 0x9e9eb927, - 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322, - 0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733, - 0x9b9bb62d, 0x1e1e223c, 0x87879215, 0xe9e920c9, - 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5, - 0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a, - 0xbfbfda65, 0xe6e631d7, 0x4242c684, 0x6868b8d0, - 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e, - 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c, -}; - -__device__ __constant__ u32 te4[256] = -{ - 0x63636363, 0x7c7c7c7c, 0x77777777, 0x7b7b7b7b, - 0xf2f2f2f2, 0x6b6b6b6b, 0x6f6f6f6f, 0xc5c5c5c5, - 0x30303030, 0x01010101, 0x67676767, 0x2b2b2b2b, - 0xfefefefe, 0xd7d7d7d7, 0xabababab, 0x76767676, - 0xcacacaca, 0x82828282, 0xc9c9c9c9, 0x7d7d7d7d, - 0xfafafafa, 0x59595959, 0x47474747, 0xf0f0f0f0, - 0xadadadad, 0xd4d4d4d4, 0xa2a2a2a2, 0xafafafaf, - 0x9c9c9c9c, 0xa4a4a4a4, 0x72727272, 0xc0c0c0c0, - 0xb7b7b7b7, 0xfdfdfdfd, 0x93939393, 0x26262626, - 0x36363636, 0x3f3f3f3f, 0xf7f7f7f7, 0xcccccccc, - 0x34343434, 0xa5a5a5a5, 0xe5e5e5e5, 0xf1f1f1f1, - 0x71717171, 0xd8d8d8d8, 0x31313131, 0x15151515, - 0x04040404, 0xc7c7c7c7, 0x23232323, 0xc3c3c3c3, - 0x18181818, 0x96969696, 0x05050505, 0x9a9a9a9a, - 0x07070707, 0x12121212, 0x80808080, 0xe2e2e2e2, - 0xebebebeb, 0x27272727, 0xb2b2b2b2, 0x75757575, - 0x09090909, 0x83838383, 0x2c2c2c2c, 0x1a1a1a1a, - 0x1b1b1b1b, 0x6e6e6e6e, 0x5a5a5a5a, 0xa0a0a0a0, - 0x52525252, 0x3b3b3b3b, 0xd6d6d6d6, 0xb3b3b3b3, - 0x29292929, 0xe3e3e3e3, 0x2f2f2f2f, 0x84848484, - 0x53535353, 0xd1d1d1d1, 0x00000000, 0xedededed, - 0x20202020, 0xfcfcfcfc, 0xb1b1b1b1, 0x5b5b5b5b, - 0x6a6a6a6a, 0xcbcbcbcb, 0xbebebebe, 0x39393939, - 0x4a4a4a4a, 0x4c4c4c4c, 0x58585858, 0xcfcfcfcf, - 0xd0d0d0d0, 0xefefefef, 0xaaaaaaaa, 0xfbfbfbfb, - 0x43434343, 0x4d4d4d4d, 0x33333333, 0x85858585, - 0x45454545, 0xf9f9f9f9, 0x02020202, 0x7f7f7f7f, - 0x50505050, 0x3c3c3c3c, 0x9f9f9f9f, 0xa8a8a8a8, - 0x51515151, 0xa3a3a3a3, 0x40404040, 0x8f8f8f8f, - 0x92929292, 0x9d9d9d9d, 0x38383838, 0xf5f5f5f5, - 0xbcbcbcbc, 0xb6b6b6b6, 0xdadadada, 0x21212121, - 0x10101010, 0xffffffff, 0xf3f3f3f3, 0xd2d2d2d2, - 0xcdcdcdcd, 0x0c0c0c0c, 0x13131313, 0xecececec, - 0x5f5f5f5f, 0x97979797, 0x44444444, 0x17171717, - 0xc4c4c4c4, 0xa7a7a7a7, 0x7e7e7e7e, 0x3d3d3d3d, - 0x64646464, 0x5d5d5d5d, 0x19191919, 0x73737373, - 0x60606060, 0x81818181, 0x4f4f4f4f, 0xdcdcdcdc, - 0x22222222, 0x2a2a2a2a, 0x90909090, 0x88888888, - 0x46464646, 0xeeeeeeee, 0xb8b8b8b8, 0x14141414, - 0xdededede, 0x5e5e5e5e, 0x0b0b0b0b, 0xdbdbdbdb, - 0xe0e0e0e0, 0x32323232, 0x3a3a3a3a, 0x0a0a0a0a, - 0x49494949, 0x06060606, 0x24242424, 0x5c5c5c5c, - 0xc2c2c2c2, 0xd3d3d3d3, 0xacacacac, 0x62626262, - 0x91919191, 0x95959595, 0xe4e4e4e4, 0x79797979, - 0xe7e7e7e7, 0xc8c8c8c8, 0x37373737, 0x6d6d6d6d, - 0x8d8d8d8d, 0xd5d5d5d5, 0x4e4e4e4e, 0xa9a9a9a9, - 0x6c6c6c6c, 0x56565656, 0xf4f4f4f4, 0xeaeaeaea, - 0x65656565, 0x7a7a7a7a, 0xaeaeaeae, 0x08080808, - 0xbabababa, 0x78787878, 0x25252525, 0x2e2e2e2e, - 0x1c1c1c1c, 0xa6a6a6a6, 0xb4b4b4b4, 0xc6c6c6c6, - 0xe8e8e8e8, 0xdddddddd, 0x74747474, 0x1f1f1f1f, - 0x4b4b4b4b, 0xbdbdbdbd, 0x8b8b8b8b, 0x8a8a8a8a, - 0x70707070, 0x3e3e3e3e, 0xb5b5b5b5, 0x66666666, - 0x48484848, 0x03030303, 0xf6f6f6f6, 0x0e0e0e0e, - 0x61616161, 0x35353535, 0x57575757, 0xb9b9b9b9, - 0x86868686, 0xc1c1c1c1, 0x1d1d1d1d, 0x9e9e9e9e, - 0xe1e1e1e1, 0xf8f8f8f8, 0x98989898, 0x11111111, - 0x69696969, 0xd9d9d9d9, 0x8e8e8e8e, 0x94949494, - 0x9b9b9b9b, 0x1e1e1e1e, 0x87878787, 0xe9e9e9e9, - 0xcececece, 0x55555555, 0x28282828, 0xdfdfdfdf, - 0x8c8c8c8c, 0xa1a1a1a1, 0x89898989, 0x0d0d0d0d, - 0xbfbfbfbf, 0xe6e6e6e6, 0x42424242, 0x68686868, - 0x41414141, 0x99999999, 0x2d2d2d2d, 0x0f0f0f0f, - 0xb0b0b0b0, 0x54545454, 0xbbbbbbbb, 0x16161616, -}; - -__device__ __constant__ u32 td0[256] = -{ - 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, - 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, - 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, - 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, - 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, - 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, - 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, - 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, - 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, - 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, - 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, - 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, - 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, - 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, - 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, - 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, - 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, - 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, - 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, - 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, - 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, - 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, - 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, - 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, - 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, - 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, - 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, - 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, - 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, - 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, - 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, - 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, - 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, - 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, - 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, - 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, - 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, - 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, - 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, - 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, - 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, - 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, - 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, - 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, - 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, - 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, - 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, - 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, - 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, - 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, - 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, - 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, - 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, - 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, - 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, - 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, - 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, - 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, - 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, - 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, - 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, - 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, - 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, - 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742, -}; - -__device__ __constant__ u32 td1[256] = -{ - 0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e, - 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303, - 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c, - 0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3, - 0x49deb15a, 0x6725ba1b, 0x9845ea0e, 0xe15dfec0, - 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9, - 0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259, - 0x2dd4be83, 0xd3587421, 0x2949e069, 0x448ec9c8, - 0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971, - 0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a, - 0x1863df4a, 0x82e51a31, 0x60975133, 0x4562537f, - 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b, - 0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8, - 0x23ab73d3, 0xe2724b02, 0x57e31f8f, 0x2a6655ab, - 0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708, - 0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682, - 0x2b8acf1c, 0x92a779b4, 0xf0f307f2, 0xa14e69e2, - 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe, - 0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb, - 0x390b83ec, 0xaa4060ef, 0x065e719f, 0x51bd6e10, - 0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd, - 0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015, - 0x241998fb, 0x97d6bde9, 0xcc894043, 0x7767d99e, - 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee, - 0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000, - 0x83098086, 0x48322bed, 0xac1e1170, 0x4e6c5a72, - 0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39, - 0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e, - 0xb10c0a67, 0x0f9357e7, 0xd2b4ee96, 0x9e1b9b91, - 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a, - 0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17, - 0x0b0e090d, 0xadf28bc7, 0xb92db6a8, 0xc8141ea9, - 0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60, - 0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e, - 0x768b4329, 0xdccb23c6, 0x68b6edfc, 0x63b8e4f1, - 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611, - 0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1, - 0x4b1d9e2f, 0xf3dcb230, 0xec0d8652, 0xd077c1e3, - 0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964, - 0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390, - 0xc787494e, 0xc1d938d1, 0xfe8ccaa2, 0x3698d40b, - 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf, - 0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46, - 0xc2f68d13, 0xe890d8b8, 0x5e2e39f7, 0xf582c3af, - 0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512, - 0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb, - 0x09cd2678, 0xf46e5918, 0x01ec9ab7, 0xa8834f9a, - 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8, - 0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c, - 0xaf31a4b2, 0x312a3f23, 0x30c6a594, 0xc035a266, - 0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8, - 0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6, - 0x8d764dd6, 0x4d43efb0, 0x54ccaa4d, 0xdfe49604, - 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551, - 0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41, - 0x5ab3671d, 0x5292dbd2, 0x33e91056, 0x136dd647, - 0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c, - 0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1, - 0x599cd2df, 0x3f55f273, 0x791814ce, 0xbf73c737, - 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db, - 0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340, - 0x72161dc3, 0x0cbce225, 0x8b283c49, 0x41ff0d95, - 0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1, - 0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857, -}; - -__device__ __constant__ u32 td2[256] = -{ - 0xa75051f4, 0x65537e41, 0xa4c31a17, 0x5e963a27, - 0x6bcb3bab, 0x45f11f9d, 0x58abacfa, 0x03934be3, - 0xfa552030, 0x6df6ad76, 0x769188cc, 0x4c25f502, - 0xd7fc4fe5, 0xcbd7c52a, 0x44802635, 0xa38fb562, - 0x5a49deb1, 0x1b6725ba, 0x0e9845ea, 0xc0e15dfe, - 0x7502c32f, 0xf012814c, 0x97a38d46, 0xf9c66bd3, - 0x5fe7038f, 0x9c951592, 0x7aebbf6d, 0x59da9552, - 0x832dd4be, 0x21d35874, 0x692949e0, 0xc8448ec9, - 0x896a75c2, 0x7978f48e, 0x3e6b9958, 0x71dd27b9, - 0x4fb6bee1, 0xad17f088, 0xac66c920, 0x3ab47dce, - 0x4a1863df, 0x3182e51a, 0x33609751, 0x7f456253, - 0x77e0b164, 0xae84bb6b, 0xa01cfe81, 0x2b94f908, - 0x68587048, 0xfd198f45, 0x6c8794de, 0xf8b7527b, - 0xd323ab73, 0x02e2724b, 0x8f57e31f, 0xab2a6655, - 0x2807b2eb, 0xc2032fb5, 0x7b9a86c5, 0x08a5d337, - 0x87f23028, 0xa5b223bf, 0x6aba0203, 0x825ced16, - 0x1c2b8acf, 0xb492a779, 0xf2f0f307, 0xe2a14e69, - 0xf4cd65da, 0xbed50605, 0x621fd134, 0xfe8ac4a6, - 0x539d342e, 0x55a0a2f3, 0xe132058a, 0xeb75a4f6, - 0xec390b83, 0xefaa4060, 0x9f065e71, 0x1051bd6e, - 0x8af93e21, 0x063d96dd, 0x05aedd3e, 0xbd464de6, - 0x8db59154, 0x5d0571c4, 0xd46f0406, 0x15ff6050, - 0xfb241998, 0xe997d6bd, 0x43cc8940, 0x9e7767d9, - 0x42bdb0e8, 0x8b880789, 0x5b38e719, 0xeedb79c8, - 0x0a47a17c, 0x0fe97c42, 0x1ec9f884, 0x00000000, - 0x86830980, 0xed48322b, 0x70ac1e11, 0x724e6c5a, - 0xfffbfd0e, 0x38560f85, 0xd51e3dae, 0x3927362d, - 0xd9640a0f, 0xa621685c, 0x54d19b5b, 0x2e3a2436, - 0x67b10c0a, 0xe70f9357, 0x96d2b4ee, 0x919e1b9b, - 0xc54f80c0, 0x20a261dc, 0x4b695a77, 0x1a161c12, - 0xba0ae293, 0x2ae5c0a0, 0xe0433c22, 0x171d121b, - 0x0d0b0e09, 0xc7adf28b, 0xa8b92db6, 0xa9c8141e, - 0x198557f1, 0x074caf75, 0xddbbee99, 0x60fda37f, - 0x269ff701, 0xf5bc5c72, 0x3bc54466, 0x7e345bfb, - 0x29768b43, 0xc6dccb23, 0xfc68b6ed, 0xf163b8e4, - 0xdccad731, 0x85104263, 0x22401397, 0x112084c6, - 0x247d854a, 0x3df8d2bb, 0x3211aef9, 0xa16dc729, - 0x2f4b1d9e, 0x30f3dcb2, 0x52ec0d86, 0xe3d077c1, - 0x166c2bb3, 0xb999a970, 0x48fa1194, 0x642247e9, - 0x8cc4a8fc, 0x3f1aa0f0, 0x2cd8567d, 0x90ef2233, - 0x4ec78749, 0xd1c1d938, 0xa2fe8cca, 0x0b3698d4, - 0x81cfa6f5, 0xde28a57a, 0x8e26dab7, 0xbfa43fad, - 0x9de42c3a, 0x920d5078, 0xcc9b6a5f, 0x4662547e, - 0x13c2f68d, 0xb8e890d8, 0xf75e2e39, 0xaff582c3, - 0x80be9f5d, 0x937c69d0, 0x2da96fd5, 0x12b3cf25, - 0x993bc8ac, 0x7da71018, 0x636ee89c, 0xbb7bdb3b, - 0x7809cd26, 0x18f46e59, 0xb701ec9a, 0x9aa8834f, - 0x6e65e695, 0xe67eaaff, 0xcf0821bc, 0xe8e6ef15, - 0x9bd9bae7, 0x36ce4a6f, 0x09d4ea9f, 0x7cd629b0, - 0xb2af31a4, 0x23312a3f, 0x9430c6a5, 0x66c035a2, - 0xbc37744e, 0xcaa6fc82, 0xd0b0e090, 0xd81533a7, - 0x984af104, 0xdaf741ec, 0x500e7fcd, 0xf62f1791, - 0xd68d764d, 0xb04d43ef, 0x4d54ccaa, 0x04dfe496, - 0xb5e39ed1, 0x881b4c6a, 0x1fb8c12c, 0x517f4665, - 0xea049d5e, 0x355d018c, 0x7473fa87, 0x412efb0b, - 0x1d5ab367, 0xd25292db, 0x5633e910, 0x47136dd6, - 0x618c9ad7, 0x0c7a37a1, 0x148e59f8, 0x3c89eb13, - 0x27eecea9, 0xc935b761, 0xe5ede11c, 0xb13c7a47, - 0xdf599cd2, 0x733f55f2, 0xce791814, 0x37bf73c7, - 0xcdea53f7, 0xaa5b5ffd, 0x6f14df3d, 0xdb867844, - 0xf381caaf, 0xc43eb968, 0x342c3824, 0x405fc2a3, - 0xc372161d, 0x250cbce2, 0x498b283c, 0x9541ff0d, - 0x017139a8, 0xb3de080c, 0xe49cd8b4, 0xc1906456, - 0x84617bcb, 0xb670d532, 0x5c74486c, 0x5742d0b8, -}; - -__device__ __constant__ u32 td3[256] = -{ - 0xf4a75051, 0x4165537e, 0x17a4c31a, 0x275e963a, - 0xab6bcb3b, 0x9d45f11f, 0xfa58abac, 0xe303934b, - 0x30fa5520, 0x766df6ad, 0xcc769188, 0x024c25f5, - 0xe5d7fc4f, 0x2acbd7c5, 0x35448026, 0x62a38fb5, - 0xb15a49de, 0xba1b6725, 0xea0e9845, 0xfec0e15d, - 0x2f7502c3, 0x4cf01281, 0x4697a38d, 0xd3f9c66b, - 0x8f5fe703, 0x929c9515, 0x6d7aebbf, 0x5259da95, - 0xbe832dd4, 0x7421d358, 0xe0692949, 0xc9c8448e, - 0xc2896a75, 0x8e7978f4, 0x583e6b99, 0xb971dd27, - 0xe14fb6be, 0x88ad17f0, 0x20ac66c9, 0xce3ab47d, - 0xdf4a1863, 0x1a3182e5, 0x51336097, 0x537f4562, - 0x6477e0b1, 0x6bae84bb, 0x81a01cfe, 0x082b94f9, - 0x48685870, 0x45fd198f, 0xde6c8794, 0x7bf8b752, - 0x73d323ab, 0x4b02e272, 0x1f8f57e3, 0x55ab2a66, - 0xeb2807b2, 0xb5c2032f, 0xc57b9a86, 0x3708a5d3, - 0x2887f230, 0xbfa5b223, 0x036aba02, 0x16825ced, - 0xcf1c2b8a, 0x79b492a7, 0x07f2f0f3, 0x69e2a14e, - 0xdaf4cd65, 0x05bed506, 0x34621fd1, 0xa6fe8ac4, - 0x2e539d34, 0xf355a0a2, 0x8ae13205, 0xf6eb75a4, - 0x83ec390b, 0x60efaa40, 0x719f065e, 0x6e1051bd, - 0x218af93e, 0xdd063d96, 0x3e05aedd, 0xe6bd464d, - 0x548db591, 0xc45d0571, 0x06d46f04, 0x5015ff60, - 0x98fb2419, 0xbde997d6, 0x4043cc89, 0xd99e7767, - 0xe842bdb0, 0x898b8807, 0x195b38e7, 0xc8eedb79, - 0x7c0a47a1, 0x420fe97c, 0x841ec9f8, 0x00000000, - 0x80868309, 0x2bed4832, 0x1170ac1e, 0x5a724e6c, - 0x0efffbfd, 0x8538560f, 0xaed51e3d, 0x2d392736, - 0x0fd9640a, 0x5ca62168, 0x5b54d19b, 0x362e3a24, - 0x0a67b10c, 0x57e70f93, 0xee96d2b4, 0x9b919e1b, - 0xc0c54f80, 0xdc20a261, 0x774b695a, 0x121a161c, - 0x93ba0ae2, 0xa02ae5c0, 0x22e0433c, 0x1b171d12, - 0x090d0b0e, 0x8bc7adf2, 0xb6a8b92d, 0x1ea9c814, - 0xf1198557, 0x75074caf, 0x99ddbbee, 0x7f60fda3, - 0x01269ff7, 0x72f5bc5c, 0x663bc544, 0xfb7e345b, - 0x4329768b, 0x23c6dccb, 0xedfc68b6, 0xe4f163b8, - 0x31dccad7, 0x63851042, 0x97224013, 0xc6112084, - 0x4a247d85, 0xbb3df8d2, 0xf93211ae, 0x29a16dc7, - 0x9e2f4b1d, 0xb230f3dc, 0x8652ec0d, 0xc1e3d077, - 0xb3166c2b, 0x70b999a9, 0x9448fa11, 0xe9642247, - 0xfc8cc4a8, 0xf03f1aa0, 0x7d2cd856, 0x3390ef22, - 0x494ec787, 0x38d1c1d9, 0xcaa2fe8c, 0xd40b3698, - 0xf581cfa6, 0x7ade28a5, 0xb78e26da, 0xadbfa43f, - 0x3a9de42c, 0x78920d50, 0x5fcc9b6a, 0x7e466254, - 0x8d13c2f6, 0xd8b8e890, 0x39f75e2e, 0xc3aff582, - 0x5d80be9f, 0xd0937c69, 0xd52da96f, 0x2512b3cf, - 0xac993bc8, 0x187da710, 0x9c636ee8, 0x3bbb7bdb, - 0x267809cd, 0x5918f46e, 0x9ab701ec, 0x4f9aa883, - 0x956e65e6, 0xffe67eaa, 0xbccf0821, 0x15e8e6ef, - 0xe79bd9ba, 0x6f36ce4a, 0x9f09d4ea, 0xb07cd629, - 0xa4b2af31, 0x3f23312a, 0xa59430c6, 0xa266c035, - 0x4ebc3774, 0x82caa6fc, 0x90d0b0e0, 0xa7d81533, - 0x04984af1, 0xecdaf741, 0xcd500e7f, 0x91f62f17, - 0x4dd68d76, 0xefb04d43, 0xaa4d54cc, 0x9604dfe4, - 0xd1b5e39e, 0x6a881b4c, 0x2c1fb8c1, 0x65517f46, - 0x5eea049d, 0x8c355d01, 0x877473fa, 0x0b412efb, - 0x671d5ab3, 0xdbd25292, 0x105633e9, 0xd647136d, - 0xd7618c9a, 0xa10c7a37, 0xf8148e59, 0x133c89eb, - 0xa927eece, 0x61c935b7, 0x1ce5ede1, 0x47b13c7a, - 0xd2df599c, 0xf2733f55, 0x14ce7918, 0xc737bf73, - 0xf7cdea53, 0xfdaa5b5f, 0x3d6f14df, 0x44db8678, - 0xaff381ca, 0x68c43eb9, 0x24342c38, 0xa3405fc2, - 0x1dc37216, 0xe2250cbc, 0x3c498b28, 0x0d9541ff, - 0xa8017139, 0x0cb3de08, 0xb4e49cd8, 0x56c19064, - 0xcb84617b, 0x32b670d5, 0x6c5c7448, 0xb85742d0, -}; - -__device__ __constant__ u32 td4[256] = -{ - 0x52525252, 0x09090909, 0x6a6a6a6a, 0xd5d5d5d5, - 0x30303030, 0x36363636, 0xa5a5a5a5, 0x38383838, - 0xbfbfbfbf, 0x40404040, 0xa3a3a3a3, 0x9e9e9e9e, - 0x81818181, 0xf3f3f3f3, 0xd7d7d7d7, 0xfbfbfbfb, - 0x7c7c7c7c, 0xe3e3e3e3, 0x39393939, 0x82828282, - 0x9b9b9b9b, 0x2f2f2f2f, 0xffffffff, 0x87878787, - 0x34343434, 0x8e8e8e8e, 0x43434343, 0x44444444, - 0xc4c4c4c4, 0xdededede, 0xe9e9e9e9, 0xcbcbcbcb, - 0x54545454, 0x7b7b7b7b, 0x94949494, 0x32323232, - 0xa6a6a6a6, 0xc2c2c2c2, 0x23232323, 0x3d3d3d3d, - 0xeeeeeeee, 0x4c4c4c4c, 0x95959595, 0x0b0b0b0b, - 0x42424242, 0xfafafafa, 0xc3c3c3c3, 0x4e4e4e4e, - 0x08080808, 0x2e2e2e2e, 0xa1a1a1a1, 0x66666666, - 0x28282828, 0xd9d9d9d9, 0x24242424, 0xb2b2b2b2, - 0x76767676, 0x5b5b5b5b, 0xa2a2a2a2, 0x49494949, - 0x6d6d6d6d, 0x8b8b8b8b, 0xd1d1d1d1, 0x25252525, - 0x72727272, 0xf8f8f8f8, 0xf6f6f6f6, 0x64646464, - 0x86868686, 0x68686868, 0x98989898, 0x16161616, - 0xd4d4d4d4, 0xa4a4a4a4, 0x5c5c5c5c, 0xcccccccc, - 0x5d5d5d5d, 0x65656565, 0xb6b6b6b6, 0x92929292, - 0x6c6c6c6c, 0x70707070, 0x48484848, 0x50505050, - 0xfdfdfdfd, 0xedededed, 0xb9b9b9b9, 0xdadadada, - 0x5e5e5e5e, 0x15151515, 0x46464646, 0x57575757, - 0xa7a7a7a7, 0x8d8d8d8d, 0x9d9d9d9d, 0x84848484, - 0x90909090, 0xd8d8d8d8, 0xabababab, 0x00000000, - 0x8c8c8c8c, 0xbcbcbcbc, 0xd3d3d3d3, 0x0a0a0a0a, - 0xf7f7f7f7, 0xe4e4e4e4, 0x58585858, 0x05050505, - 0xb8b8b8b8, 0xb3b3b3b3, 0x45454545, 0x06060606, - 0xd0d0d0d0, 0x2c2c2c2c, 0x1e1e1e1e, 0x8f8f8f8f, - 0xcacacaca, 0x3f3f3f3f, 0x0f0f0f0f, 0x02020202, - 0xc1c1c1c1, 0xafafafaf, 0xbdbdbdbd, 0x03030303, - 0x01010101, 0x13131313, 0x8a8a8a8a, 0x6b6b6b6b, - 0x3a3a3a3a, 0x91919191, 0x11111111, 0x41414141, - 0x4f4f4f4f, 0x67676767, 0xdcdcdcdc, 0xeaeaeaea, - 0x97979797, 0xf2f2f2f2, 0xcfcfcfcf, 0xcececece, - 0xf0f0f0f0, 0xb4b4b4b4, 0xe6e6e6e6, 0x73737373, - 0x96969696, 0xacacacac, 0x74747474, 0x22222222, - 0xe7e7e7e7, 0xadadadad, 0x35353535, 0x85858585, - 0xe2e2e2e2, 0xf9f9f9f9, 0x37373737, 0xe8e8e8e8, - 0x1c1c1c1c, 0x75757575, 0xdfdfdfdf, 0x6e6e6e6e, - 0x47474747, 0xf1f1f1f1, 0x1a1a1a1a, 0x71717171, - 0x1d1d1d1d, 0x29292929, 0xc5c5c5c5, 0x89898989, - 0x6f6f6f6f, 0xb7b7b7b7, 0x62626262, 0x0e0e0e0e, - 0xaaaaaaaa, 0x18181818, 0xbebebebe, 0x1b1b1b1b, - 0xfcfcfcfc, 0x56565656, 0x3e3e3e3e, 0x4b4b4b4b, - 0xc6c6c6c6, 0xd2d2d2d2, 0x79797979, 0x20202020, - 0x9a9a9a9a, 0xdbdbdbdb, 0xc0c0c0c0, 0xfefefefe, - 0x78787878, 0xcdcdcdcd, 0x5a5a5a5a, 0xf4f4f4f4, - 0x1f1f1f1f, 0xdddddddd, 0xa8a8a8a8, 0x33333333, - 0x88888888, 0x07070707, 0xc7c7c7c7, 0x31313131, - 0xb1b1b1b1, 0x12121212, 0x10101010, 0x59595959, - 0x27272727, 0x80808080, 0xecececec, 0x5f5f5f5f, - 0x60606060, 0x51515151, 0x7f7f7f7f, 0xa9a9a9a9, - 0x19191919, 0xb5b5b5b5, 0x4a4a4a4a, 0x0d0d0d0d, - 0x2d2d2d2d, 0xe5e5e5e5, 0x7a7a7a7a, 0x9f9f9f9f, - 0x93939393, 0xc9c9c9c9, 0x9c9c9c9c, 0xefefefef, - 0xa0a0a0a0, 0xe0e0e0e0, 0x3b3b3b3b, 0x4d4d4d4d, - 0xaeaeaeae, 0x2a2a2a2a, 0xf5f5f5f5, 0xb0b0b0b0, - 0xc8c8c8c8, 0xebebebeb, 0xbbbbbbbb, 0x3c3c3c3c, - 0x83838383, 0x53535353, 0x99999999, 0x61616161, - 0x17171717, 0x2b2b2b2b, 0x04040404, 0x7e7e7e7e, - 0xbabababa, 0x77777777, 0xd6d6d6d6, 0x26262626, - 0xe1e1e1e1, 0x69696969, 0x14141414, 0x63636363, - 0x55555555, 0x21212121, 0x0c0c0c0c, 0x7d7d7d7d, -}; - -__device__ __constant__ u32 rcon[] = -{ - 0x01000000, 0x02000000, 0x04000000, 0x08000000, - 0x10000000, 0x20000000, 0x40000000, 0x80000000, - 0x1b000000, 0x36000000, -}; - -__device__ static void AES256_ExpandKey (u32 *userkey, u32 *rek, u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - rek[0] = userkey[0]; - rek[1] = userkey[1]; - rek[2] = userkey[2]; - rek[3] = userkey[3]; - rek[4] = userkey[4]; - rek[5] = userkey[5]; - rek[6] = userkey[6]; - rek[7] = userkey[7]; - - int i; - int j; - - i = 0; - j = 0; - - u32 run = 1; - - while (run) - { - u32 temp = rek[j + 7]; - - rek[j + 8] = rek[j + 0] - ^ (s_te2[(temp >> 16) & 0xff] & 0xff000000) - ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000) - ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00) - ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff) - ^ rcon[i]; - - rek[j + 9] = rek[j + 1] ^ rek[j + 8]; - rek[j + 10] = rek[j + 2] ^ rek[j + 9]; - rek[j + 11] = rek[j + 3] ^ rek[j + 10]; - - if (++i == 7) - { - run = 0; - continue; - } - - temp = rek[j + 11]; - - rek[j + 12] = rek[j + 4] - ^ (s_te2[(temp >> 24) & 0xff] & 0xff000000) - ^ (s_te3[(temp >> 16) & 0xff] & 0x00ff0000) - ^ (s_te0[(temp >> 8) & 0xff] & 0x0000ff00) - ^ (s_te1[(temp >> 0) & 0xff] & 0x000000ff); - - rek[j + 13] = rek[j + 5] ^ rek[j + 12]; - rek[j + 14] = rek[j + 6] ^ rek[j + 13]; - rek[j + 15] = rek[j + 7] ^ rek[j + 14]; - - j += 8; - } -} - -__device__ static void AES256_InvertKey (u32 *rdk, u32 s_td0[256], u32 s_td1[256], u32 s_td2[256], u32 s_td3[256], u32 s_td4[256], u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - for (u32 i = 0, j = 56; i < j; i += 4, j -= 4) - { - u32 temp; - - temp = rdk[i + 0]; rdk[i + 0] = rdk[j + 0]; rdk[j + 0] = temp; - temp = rdk[i + 1]; rdk[i + 1] = rdk[j + 1]; rdk[j + 1] = temp; - temp = rdk[i + 2]; rdk[i + 2] = rdk[j + 2]; rdk[j + 2] = temp; - temp = rdk[i + 3]; rdk[i + 3] = rdk[j + 3]; rdk[j + 3] = temp; - } - - for (u32 i = 1, j = 4; i < 14; i += 1, j += 4) - { - rdk[j + 0] = - s_td0[s_te1[(rdk[j + 0] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 0] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 0] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 0] >> 0) & 0xff] & 0xff]; - - rdk[j + 1] = - s_td0[s_te1[(rdk[j + 1] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 1] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 1] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 1] >> 0) & 0xff] & 0xff]; - - rdk[j + 2] = - s_td0[s_te1[(rdk[j + 2] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 2] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 2] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 2] >> 0) & 0xff] & 0xff]; - - rdk[j + 3] = - s_td0[s_te1[(rdk[j + 3] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 3] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 3] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 3] >> 0) & 0xff] & 0xff]; - } -} - -__device__ static void AES256_decrypt (const u32 *in, u32 *out, const u32 *rdk, u32 s_td0[256], u32 s_td1[256], u32 s_td2[256], u32 s_td3[256], u32 s_td4[256]) -{ - u32 s0 = in[0] ^ rdk[0]; - u32 s1 = in[1] ^ rdk[1]; - u32 s2 = in[2] ^ rdk[2]; - u32 s3 = in[3] ^ rdk[3]; - - u32 t0; - u32 t1; - u32 t2; - u32 t3; - - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[ 4]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[ 5]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[ 6]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[ 7]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[ 8]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[ 9]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[10]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[11]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[12]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[13]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[14]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[15]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[16]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[17]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[18]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[19]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[20]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[21]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[22]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[23]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[24]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[25]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[26]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[27]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[28]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[29]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[30]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[31]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[32]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[33]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[34]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[35]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[36]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[37]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[38]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[39]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[40]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[41]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[42]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[43]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[44]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[45]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[46]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[47]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[48]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[49]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[50]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[51]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[52]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[53]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[54]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[55]; - - out[0] = (s_td4[(t0 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t3 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t2 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t1 >> 0) & 0xff] & 0x000000ff) - ^ rdk[56]; - - out[1] = (s_td4[(t1 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t0 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t3 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t2 >> 0) & 0xff] & 0x000000ff) - ^ rdk[57]; - - out[2] = (s_td4[(t2 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t1 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t0 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t3 >> 0) & 0xff] & 0x000000ff) - ^ rdk[58]; - - out[3] = (s_td4[(t3 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t2 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t1 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t0 >> 0) & 0xff] & 0x000000ff) - ^ rdk[59]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11300_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, bitcoin_wallet_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const bitcoin_wallet_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_buf0[4]; - - salt_buf0[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf0[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf0[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf0[3] = salt_bufs[salt_pos].salt_buf[3]; - - u32 salt_buf1[4]; - - salt_buf1[0] = salt_bufs[salt_pos].salt_buf[4]; - salt_buf1[1] = salt_bufs[salt_pos].salt_buf[5]; - salt_buf1[2] = salt_bufs[salt_pos].salt_buf[6]; - salt_buf1[3] = salt_bufs[salt_pos].salt_buf[7]; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = 0; - - u32 salt_len = salt_bufs[salt_pos].salt_len; - - switch_buffer_by_offset (salt_buf0, salt_buf1, salt_buf2, salt_buf3, pw_len); - - w0[0] |= salt_buf0[0]; - w0[1] |= salt_buf0[1]; - w0[2] |= salt_buf0[2]; - w0[3] |= salt_buf0[3]; - - w1[0] |= salt_buf1[0]; - w1[1] |= salt_buf1[1]; - w1[2] |= salt_buf1[2]; - w1[3] |= salt_buf1[3]; - - w2[0] |= salt_buf2[0]; - w2[1] |= salt_buf2[1]; - w2[2] |= salt_buf2[2]; - w2[3] |= salt_buf2[3]; - - w3[0] |= salt_buf3[0]; - w3[1] |= salt_buf3[1]; - w3[2] |= salt_buf3[2]; - w3[3] |= salt_buf3[3]; - - const u32 block_len = pw_len + salt_len; - - /** - * init - */ - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - w2[0] = swap_workaround (w2[0]); - w2[1] = swap_workaround (w2[1]); - w2[2] = swap_workaround (w2[2]); - w2[3] = swap_workaround (w2[3]); - w3[0] = swap_workaround (w3[0]); - w3[1] = swap_workaround (w3[1]); - w3[2] = swap_workaround (w3[2]); - w3[3] = swap_workaround (w3[3]); - - u64x w[16]; - - w[ 0] = hl32_to_64 (w0[0], w0[1]); - w[ 1] = hl32_to_64 (w0[2], w0[3]); - w[ 2] = hl32_to_64 (w1[0], w1[1]); - w[ 3] = hl32_to_64 (w1[2], w1[3]); - w[ 4] = hl32_to_64 (w2[0], w2[1]); - w[ 5] = hl32_to_64 (w2[2], w2[3]); - w[ 6] = hl32_to_64 (w3[0], w3[1]); - w[ 7] = hl32_to_64 (w3[2], w3[3]); - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = block_len * 8; - - u64x dgst[8]; - - dgst[0] = SHA512M_A; - dgst[1] = SHA512M_B; - dgst[2] = SHA512M_C; - dgst[3] = SHA512M_D; - dgst[4] = SHA512M_E; - dgst[5] = SHA512M_F; - dgst[6] = SHA512M_G; - dgst[7] = SHA512M_H; - - sha512_transform (w, dgst); - - tmps[gid].dgst[0] = dgst[0]; - tmps[gid].dgst[1] = dgst[1]; - tmps[gid].dgst[2] = dgst[2]; - tmps[gid].dgst[3] = dgst[3]; - tmps[gid].dgst[4] = dgst[4]; - tmps[gid].dgst[5] = dgst[5]; - tmps[gid].dgst[6] = dgst[6]; - tmps[gid].dgst[7] = dgst[7]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11300_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, bitcoin_wallet_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const bitcoin_wallet_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - - if (gid >= gid_max) return; - - u64x dgst[8]; - - dgst[0] = tmps[gid].dgst[0]; - dgst[1] = tmps[gid].dgst[1]; - dgst[2] = tmps[gid].dgst[2]; - dgst[3] = tmps[gid].dgst[3]; - dgst[4] = tmps[gid].dgst[4]; - dgst[5] = tmps[gid].dgst[5]; - dgst[6] = tmps[gid].dgst[6]; - dgst[7] = tmps[gid].dgst[7]; - - u64x w[16]; - - w[ 0] = dgst[0]; - w[ 1] = dgst[1]; - w[ 2] = dgst[2]; - w[ 3] = dgst[3]; - w[ 4] = dgst[4]; - w[ 5] = dgst[5]; - w[ 6] = dgst[6]; - w[ 7] = dgst[7]; - w[ 8] = 0x8000000000000000; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 64 * 8; - - for (u32 i = 0; i < loop_cnt; i++) - { - w[0] = dgst[0]; - w[1] = dgst[1]; - w[2] = dgst[2]; - w[3] = dgst[3]; - w[4] = dgst[4]; - w[5] = dgst[5]; - w[6] = dgst[6]; - w[7] = dgst[7]; - - dgst[0] = SHA512M_A; - dgst[1] = SHA512M_B; - dgst[2] = SHA512M_C; - dgst[3] = SHA512M_D; - dgst[4] = SHA512M_E; - dgst[5] = SHA512M_F; - dgst[6] = SHA512M_G; - dgst[7] = SHA512M_H; - - sha512_transform (w, dgst); - } - - tmps[gid].dgst[0] = dgst[0]; - tmps[gid].dgst[1] = dgst[1]; - tmps[gid].dgst[2] = dgst[2]; - tmps[gid].dgst[3] = dgst[3]; - tmps[gid].dgst[4] = dgst[4]; - tmps[gid].dgst[5] = dgst[5]; - tmps[gid].dgst[6] = dgst[6]; - tmps[gid].dgst[7] = dgst[7]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11300_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, bitcoin_wallet_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const bitcoin_wallet_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * aes shared - */ - - __shared__ u32 s_td0[256]; - __shared__ u32 s_td1[256]; - __shared__ u32 s_td2[256]; - __shared__ u32 s_td3[256]; - __shared__ u32 s_td4[256]; - - __shared__ u32 s_te0[256]; - __shared__ u32 s_te1[256]; - __shared__ u32 s_te2[256]; - __shared__ u32 s_te3[256]; - __shared__ u32 s_te4[256]; - - s_td0[lid] = td0[lid]; - s_td1[lid] = td1[lid]; - s_td2[lid] = td2[lid]; - s_td3[lid] = td3[lid]; - s_td4[lid] = td4[lid]; - - s_te0[lid] = te0[lid]; - s_te1[lid] = te1[lid]; - s_te2[lid] = te2[lid]; - s_te3[lid] = te3[lid]; - s_te4[lid] = te4[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * real code - */ - - u64 dgst[8]; - - dgst[0] = tmps[gid].dgst[0]; - dgst[1] = tmps[gid].dgst[1]; - dgst[2] = tmps[gid].dgst[2]; - dgst[3] = tmps[gid].dgst[3]; - dgst[4] = tmps[gid].dgst[4]; - dgst[5] = tmps[gid].dgst[5]; - dgst[6] = tmps[gid].dgst[6]; - dgst[7] = tmps[gid].dgst[7]; - - u32x key[8]; - - key[0] = h32_from_64 (dgst[0]); - key[1] = l32_from_64 (dgst[0]); - key[2] = h32_from_64 (dgst[1]); - key[3] = l32_from_64 (dgst[1]); - key[4] = h32_from_64 (dgst[2]); - key[5] = l32_from_64 (dgst[2]); - key[6] = h32_from_64 (dgst[3]); - key[7] = l32_from_64 (dgst[3]); - - u32x iv[4]; - - iv[0] = h32_from_64 (dgst[4]); - iv[1] = l32_from_64 (dgst[4]); - iv[2] = h32_from_64 (dgst[5]); - iv[3] = l32_from_64 (dgst[5]); - - #define KEYLEN 60 - - u32 rk[KEYLEN]; - - AES256_ExpandKey (key, rk, s_te0, s_te1, s_te2, s_te3, s_te4); - - AES256_InvertKey (rk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4); - - u32x out[4]; - - for (u32 i = 0; i < esalt_bufs[salt_pos].cry_master_len; i += 16) - { - u32x data[4]; - - data[0] = swap_workaround (esalt_bufs[salt_pos].cry_master_buf[(i / 4) + 0]); - data[1] = swap_workaround (esalt_bufs[salt_pos].cry_master_buf[(i / 4) + 1]); - data[2] = swap_workaround (esalt_bufs[salt_pos].cry_master_buf[(i / 4) + 2]); - data[3] = swap_workaround (esalt_bufs[salt_pos].cry_master_buf[(i / 4) + 3]); - - AES256_decrypt (data, out, rk, s_td0, s_td1, s_td2, s_td3, s_td4); - - out[0] ^= iv[0]; - out[1] ^= iv[1]; - out[2] ^= iv[2]; - out[3] ^= iv[3]; - - iv[0] = data[0]; - iv[1] = data[1]; - iv[2] = data[2]; - iv[3] = data[3]; - } - - if ((out[0] == 0x10101010) - && (out[1] == 0x10101010) - && (out[2] == 0x10101010) - && (out[3] == 0x10101010)) - { - mark_hash_s0 (plains_buf, hashes_shown, digests_offset + 0, gid, 0); - - d_return_buf[lid] = 1; - } -} diff --git a/nv/m11400_a0.cu b/nv/m11400_a0.cu deleted file mode 100644 index 8ffb01f..0000000 --- a/nv/m11400_a0.cu +++ /dev/null @@ -1,2322 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ static u32 memcat32 (u32x block0[16], u32x block1[16], const u32 block_len, const u32x append0[4], const u32x append1[4], const u32x append2[4], const u32x append3[4], const u32 append_len) -{ - const u32 mod = block_len & 3; - const u32 div = block_len / 4; - - const int offset_minus_4 = 4 - mod; - - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - - u32x append0_t[4]; - - append0_t[0] = __byte_perm ( 0, append0[0], selector); - append0_t[1] = __byte_perm (append0[0], append0[1], selector); - append0_t[2] = __byte_perm (append0[1], append0[2], selector); - append0_t[3] = __byte_perm (append0[2], append0[3], selector); - - u32x append1_t[4]; - - append1_t[0] = __byte_perm (append0[3], append1[0], selector); - append1_t[1] = __byte_perm (append1[0], append1[1], selector); - append1_t[2] = __byte_perm (append1[1], append1[2], selector); - append1_t[3] = __byte_perm (append1[2], append1[3], selector); - - u32x append2_t[4]; - - append2_t[0] = __byte_perm (append1[3], append2[0], selector); - append2_t[1] = __byte_perm (append2[0], append2[1], selector); - append2_t[2] = __byte_perm (append2[1], append2[2], selector); - append2_t[3] = __byte_perm (append2[2], append2[3], selector); - - u32x append3_t[4]; - - append3_t[0] = __byte_perm (append2[3], append3[0], selector); - append3_t[1] = __byte_perm (append3[0], append3[1], selector); - append3_t[2] = __byte_perm (append3[1], append3[2], selector); - append3_t[3] = __byte_perm (append3[2], append3[3], selector); - - u32x append4_t[4]; - - append4_t[0] = __byte_perm (append3[3], 0, selector); - append4_t[1] = 0; - append4_t[2] = 0; - append4_t[3] = 0; - - switch (div) - { - case 0: block0[ 0] |= append0_t[0]; - block0[ 1] = append0_t[1]; - block0[ 2] = append0_t[2]; - block0[ 3] = append0_t[3]; - - block0[ 4] = append1_t[0]; - block0[ 5] = append1_t[1]; - block0[ 6] = append1_t[2]; - block0[ 7] = append1_t[3]; - - block0[ 8] = append2_t[0]; - block0[ 9] = append2_t[1]; - block0[10] = append2_t[2]; - block0[11] = append2_t[3]; - - block0[12] = append3_t[0]; - block0[13] = append3_t[1]; - block0[14] = append3_t[2]; - block0[15] = append3_t[3]; - - block1[ 0] = append4_t[0]; - block1[ 1] = append4_t[1]; - block1[ 2] = append4_t[2]; - block1[ 3] = append4_t[3]; - break; - - case 1: block0[ 1] |= append0_t[0]; - block0[ 2] = append0_t[1]; - block0[ 3] = append0_t[2]; - block0[ 4] = append0_t[3]; - - block0[ 5] = append1_t[0]; - block0[ 6] = append1_t[1]; - block0[ 7] = append1_t[2]; - block0[ 8] = append1_t[3]; - - block0[ 9] = append2_t[0]; - block0[10] = append2_t[1]; - block0[11] = append2_t[2]; - block0[12] = append2_t[3]; - - block0[13] = append3_t[0]; - block0[14] = append3_t[1]; - block0[15] = append3_t[2]; - block1[ 0] = append3_t[3]; - - block1[ 1] = append4_t[0]; - block1[ 2] = append4_t[1]; - block1[ 3] = append4_t[2]; - block1[ 4] = append4_t[3]; - break; - - case 2: block0[ 2] |= append0_t[0]; - block0[ 3] = append0_t[1]; - block0[ 4] = append0_t[2]; - block0[ 5] = append0_t[3]; - - block0[ 6] = append1_t[0]; - block0[ 7] = append1_t[1]; - block0[ 8] = append1_t[2]; - block0[ 9] = append1_t[3]; - - block0[10] = append2_t[0]; - block0[11] = append2_t[1]; - block0[12] = append2_t[2]; - block0[13] = append2_t[3]; - - block0[14] = append3_t[0]; - block0[15] = append3_t[1]; - block1[ 0] = append3_t[2]; - block1[ 1] = append3_t[3]; - - block1[ 2] = append4_t[0]; - block1[ 3] = append4_t[1]; - block1[ 4] = append4_t[2]; - block1[ 5] = append4_t[3]; - break; - - case 3: block0[ 3] |= append0_t[0]; - block0[ 4] = append0_t[1]; - block0[ 5] = append0_t[2]; - block0[ 6] = append0_t[3]; - - block0[ 7] = append1_t[0]; - block0[ 8] = append1_t[1]; - block0[ 9] = append1_t[2]; - block0[10] = append1_t[3]; - - block0[11] = append2_t[0]; - block0[12] = append2_t[1]; - block0[13] = append2_t[2]; - block0[14] = append2_t[3]; - - block0[15] = append3_t[0]; - block1[ 0] = append3_t[1]; - block1[ 1] = append3_t[2]; - block1[ 2] = append3_t[3]; - - block1[ 3] = append4_t[0]; - block1[ 4] = append4_t[1]; - block1[ 5] = append4_t[2]; - block1[ 6] = append4_t[3]; - break; - - case 4: block0[ 4] |= append0_t[0]; - block0[ 5] = append0_t[1]; - block0[ 6] = append0_t[2]; - block0[ 7] = append0_t[3]; - - block0[ 8] = append1_t[0]; - block0[ 9] = append1_t[1]; - block0[10] = append1_t[2]; - block0[11] = append1_t[3]; - - block0[12] = append2_t[0]; - block0[13] = append2_t[1]; - block0[14] = append2_t[2]; - block0[15] = append2_t[3]; - - block1[ 0] = append3_t[0]; - block1[ 1] = append3_t[1]; - block1[ 2] = append3_t[2]; - block1[ 3] = append3_t[3]; - - block1[ 4] = append4_t[0]; - block1[ 5] = append4_t[1]; - block1[ 6] = append4_t[2]; - block1[ 7] = append4_t[3]; - break; - - case 5: block0[ 5] |= append0_t[0]; - block0[ 6] = append0_t[1]; - block0[ 7] = append0_t[2]; - block0[ 8] = append0_t[3]; - - block0[ 9] = append1_t[0]; - block0[10] = append1_t[1]; - block0[11] = append1_t[2]; - block0[12] = append1_t[3]; - - block0[13] = append2_t[0]; - block0[14] = append2_t[1]; - block0[15] = append2_t[2]; - block1[ 0] = append2_t[3]; - - block1[ 1] = append3_t[0]; - block1[ 2] = append3_t[1]; - block1[ 3] = append3_t[2]; - block1[ 4] = append3_t[3]; - - block1[ 5] = append4_t[0]; - block1[ 6] = append4_t[1]; - block1[ 7] = append4_t[2]; - block1[ 8] = append4_t[3]; - break; - - case 6: block0[ 6] |= append0_t[0]; - block0[ 7] = append0_t[1]; - block0[ 8] = append0_t[2]; - block0[ 9] = append0_t[3]; - - block0[10] = append1_t[0]; - block0[11] = append1_t[1]; - block0[12] = append1_t[2]; - block0[13] = append1_t[3]; - - block0[14] = append2_t[0]; - block0[15] = append2_t[1]; - block1[ 0] = append2_t[2]; - block1[ 1] = append2_t[3]; - - block1[ 2] = append3_t[0]; - block1[ 3] = append3_t[1]; - block1[ 4] = append3_t[2]; - block1[ 5] = append3_t[3]; - - block1[ 6] = append4_t[0]; - block1[ 7] = append4_t[1]; - block1[ 8] = append4_t[2]; - block1[ 9] = append4_t[3]; - break; - - case 7: block0[ 7] |= append0_t[0]; - block0[ 8] = append0_t[1]; - block0[ 9] = append0_t[2]; - block0[10] = append0_t[3]; - - block0[11] = append1_t[0]; - block0[12] = append1_t[1]; - block0[13] = append1_t[2]; - block0[14] = append1_t[3]; - - block0[15] = append2_t[0]; - block1[ 0] = append2_t[1]; - block1[ 1] = append2_t[2]; - block1[ 2] = append2_t[3]; - - block1[ 3] = append3_t[0]; - block1[ 4] = append3_t[1]; - block1[ 5] = append3_t[2]; - block1[ 6] = append3_t[3]; - - block1[ 7] = append4_t[0]; - block1[ 8] = append4_t[1]; - block1[ 9] = append4_t[2]; - block1[10] = append4_t[3]; - break; - - case 8: block0[ 8] |= append0_t[0]; - block0[ 9] = append0_t[1]; - block0[10] = append0_t[2]; - block0[11] = append0_t[3]; - - block0[12] = append1_t[0]; - block0[13] = append1_t[1]; - block0[14] = append1_t[2]; - block0[15] = append1_t[3]; - - block1[ 0] = append2_t[0]; - block1[ 1] = append2_t[1]; - block1[ 2] = append2_t[2]; - block1[ 3] = append2_t[3]; - - block1[ 4] = append3_t[0]; - block1[ 5] = append3_t[1]; - block1[ 6] = append3_t[2]; - block1[ 7] = append3_t[3]; - - block1[ 8] = append4_t[0]; - block1[ 9] = append4_t[1]; - block1[10] = append4_t[2]; - block1[11] = append4_t[3]; - break; - - case 9: block0[ 9] |= append0_t[0]; - block0[10] = append0_t[1]; - block0[11] = append0_t[2]; - block0[12] = append0_t[3]; - - block0[13] = append1_t[0]; - block0[14] = append1_t[1]; - block0[15] = append1_t[2]; - block1[ 0] = append1_t[3]; - - block1[ 1] = append2_t[0]; - block1[ 2] = append2_t[1]; - block1[ 3] = append2_t[2]; - block1[ 4] = append2_t[3]; - - block1[ 5] = append3_t[0]; - block1[ 6] = append3_t[1]; - block1[ 7] = append3_t[2]; - block1[ 8] = append3_t[3]; - - block1[ 9] = append4_t[0]; - block1[10] = append4_t[1]; - block1[11] = append4_t[2]; - block1[12] = append4_t[3]; - break; - - case 10: block0[10] |= append0_t[0]; - block0[11] = append0_t[1]; - block0[12] = append0_t[2]; - block0[13] = append0_t[3]; - - block0[14] = append1_t[0]; - block0[15] = append1_t[1]; - block1[ 0] = append1_t[2]; - block1[ 1] = append1_t[3]; - - block1[ 2] = append2_t[0]; - block1[ 3] = append2_t[1]; - block1[ 4] = append2_t[2]; - block1[ 5] = append2_t[3]; - - block1[ 6] = append3_t[0]; - block1[ 7] = append3_t[1]; - block1[ 8] = append3_t[2]; - block1[ 9] = append3_t[3]; - - block1[10] = append4_t[0]; - block1[11] = append4_t[1]; - block1[12] = append4_t[2]; - block1[13] = append4_t[3]; - break; - - case 11: block0[11] |= append0_t[0]; - block0[12] = append0_t[1]; - block0[13] = append0_t[2]; - block0[14] = append0_t[3]; - - block0[15] = append1_t[0]; - block1[ 0] = append1_t[1]; - block1[ 1] = append1_t[2]; - block1[ 2] = append1_t[3]; - - block1[ 3] = append2_t[0]; - block1[ 4] = append2_t[1]; - block1[ 5] = append2_t[2]; - block1[ 6] = append2_t[3]; - - block1[ 7] = append3_t[0]; - block1[ 8] = append3_t[1]; - block1[ 9] = append3_t[2]; - block1[10] = append3_t[3]; - - block1[11] = append4_t[0]; - block1[12] = append4_t[1]; - block1[13] = append4_t[2]; - block1[14] = append4_t[3]; - break; - - case 12: block0[12] |= append0_t[0]; - block0[13] = append0_t[1]; - block0[14] = append0_t[2]; - block0[15] = append0_t[3]; - - block1[ 0] = append1_t[0]; - block1[ 1] = append1_t[1]; - block1[ 2] = append1_t[2]; - block1[ 3] = append1_t[3]; - - block1[ 4] = append2_t[0]; - block1[ 5] = append2_t[1]; - block1[ 6] = append2_t[2]; - block1[ 7] = append2_t[3]; - - block1[ 8] = append3_t[0]; - block1[ 9] = append3_t[1]; - block1[10] = append3_t[2]; - block1[11] = append3_t[3]; - - block1[12] = append4_t[0]; - block1[13] = append4_t[1]; - block1[14] = append4_t[2]; - block1[15] = append4_t[3]; - break; - - case 13: block0[13] |= append0_t[0]; - block0[14] = append0_t[1]; - block0[15] = append0_t[2]; - block1[ 0] = append0_t[3]; - - block1[ 1] = append1_t[0]; - block1[ 2] = append1_t[1]; - block1[ 3] = append1_t[2]; - block1[ 4] = append1_t[3]; - - block1[ 5] = append2_t[0]; - block1[ 6] = append2_t[1]; - block1[ 7] = append2_t[2]; - block1[ 8] = append2_t[3]; - - block1[ 9] = append3_t[0]; - block1[10] = append3_t[1]; - block1[11] = append3_t[2]; - block1[12] = append3_t[3]; - - block1[13] = append4_t[0]; - block1[14] = append4_t[1]; - block1[15] = append4_t[2]; - break; - - case 14: block0[14] |= append0_t[0]; - block0[15] = append0_t[1]; - block1[ 0] = append0_t[2]; - block1[ 1] = append0_t[3]; - - block1[ 2] = append1_t[0]; - block1[ 3] = append1_t[1]; - block1[ 4] = append1_t[2]; - block1[ 5] = append1_t[3]; - - block1[ 6] = append2_t[0]; - block1[ 7] = append2_t[1]; - block1[ 8] = append2_t[2]; - block1[ 9] = append2_t[3]; - - block1[10] = append3_t[0]; - block1[11] = append3_t[1]; - block1[12] = append3_t[2]; - block1[13] = append3_t[3]; - - block1[14] = append4_t[0]; - block1[15] = append4_t[1]; - break; - - case 15: block0[15] |= append0_t[0]; - block1[ 0] = append0_t[1]; - block1[ 1] = append0_t[2]; - block1[ 2] = append0_t[3]; - - block1[ 3] = append1_t[1]; - block1[ 4] = append1_t[2]; - block1[ 5] = append1_t[3]; - block1[ 6] = append1_t[0]; - - block1[ 7] = append2_t[0]; - block1[ 8] = append2_t[1]; - block1[ 9] = append2_t[2]; - block1[10] = append2_t[3]; - - block1[11] = append3_t[0]; - block1[12] = append3_t[1]; - block1[13] = append3_t[2]; - block1[14] = append3_t[3]; - - block1[15] = append4_t[0]; - break; - - case 16: block1[ 0] |= append0_t[0]; - block1[ 1] = append0_t[1]; - block1[ 2] = append0_t[2]; - block1[ 3] = append0_t[3]; - - block1[ 4] = append1_t[0]; - block1[ 5] = append1_t[1]; - block1[ 6] = append1_t[2]; - block1[ 7] = append1_t[3]; - - block1[ 8] = append2_t[0]; - block1[ 9] = append2_t[1]; - block1[10] = append2_t[2]; - block1[11] = append2_t[3]; - - block1[12] = append3_t[0]; - block1[13] = append3_t[1]; - block1[14] = append3_t[2]; - block1[15] = append3_t[3]; - break; - - case 17: block1[ 1] |= append0_t[0]; - block1[ 2] = append0_t[1]; - block1[ 3] = append0_t[2]; - block1[ 4] = append0_t[3]; - - block1[ 5] = append1_t[0]; - block1[ 6] = append1_t[1]; - block1[ 7] = append1_t[2]; - block1[ 8] = append1_t[3]; - - block1[ 9] = append2_t[0]; - block1[10] = append2_t[1]; - block1[11] = append2_t[2]; - block1[12] = append2_t[3]; - - block1[13] = append3_t[0]; - block1[14] = append3_t[1]; - block1[15] = append3_t[2]; - break; - - case 18: block1[ 2] |= append0_t[0]; - block1[ 3] = append0_t[1]; - block1[ 4] = append0_t[2]; - block1[ 5] = append0_t[3]; - - block1[ 6] = append1_t[0]; - block1[ 7] = append1_t[1]; - block1[ 8] = append1_t[2]; - block1[ 9] = append1_t[3]; - - block1[10] = append2_t[0]; - block1[11] = append2_t[1]; - block1[12] = append2_t[2]; - block1[13] = append2_t[3]; - - block1[14] = append3_t[0]; - block1[15] = append3_t[1]; - break; - - case 19: block1[ 3] |= append0_t[0]; - block1[ 4] = append0_t[1]; - block1[ 5] = append0_t[2]; - block1[ 6] = append0_t[3]; - - block1[ 7] = append1_t[0]; - block1[ 8] = append1_t[1]; - block1[ 9] = append1_t[2]; - block1[10] = append1_t[3]; - - block1[11] = append2_t[0]; - block1[12] = append2_t[1]; - block1[13] = append2_t[2]; - block1[14] = append2_t[3]; - - block1[15] = append3_t[0]; - break; - - case 20: block1[ 4] |= append0_t[0]; - block1[ 5] = append0_t[1]; - block1[ 6] = append0_t[2]; - block1[ 7] = append0_t[3]; - - block1[ 8] = append1_t[0]; - block1[ 9] = append1_t[1]; - block1[10] = append1_t[2]; - block1[11] = append1_t[3]; - - block1[12] = append2_t[0]; - block1[13] = append2_t[1]; - block1[14] = append2_t[2]; - block1[15] = append2_t[3]; - break; - - case 21: block1[ 5] |= append0_t[0]; - block1[ 6] = append0_t[1]; - block1[ 7] = append0_t[2]; - block1[ 8] = append0_t[3]; - - block1[ 9] = append1_t[0]; - block1[10] = append1_t[1]; - block1[11] = append1_t[2]; - block1[12] = append1_t[3]; - - block1[13] = append2_t[0]; - block1[14] = append2_t[1]; - block1[15] = append2_t[2]; - break; - - case 22: block1[ 6] |= append0_t[0]; - block1[ 7] = append0_t[1]; - block1[ 8] = append0_t[2]; - block1[ 9] = append0_t[3]; - - block1[10] = append1_t[0]; - block1[11] = append1_t[1]; - block1[12] = append1_t[2]; - block1[13] = append1_t[3]; - - block1[14] = append2_t[0]; - block1[15] = append2_t[1]; - break; - - case 23: block1[ 7] |= append0_t[0]; - block1[ 8] = append0_t[1]; - block1[ 9] = append0_t[2]; - block1[10] = append0_t[3]; - - block1[11] = append1_t[0]; - block1[12] = append1_t[1]; - block1[13] = append1_t[2]; - block1[14] = append1_t[3]; - - block1[15] = append2_t[0]; - break; - - case 24: block1[ 8] |= append0_t[0]; - block1[ 9] = append0_t[1]; - block1[10] = append0_t[2]; - block1[11] = append0_t[3]; - - block1[12] = append1_t[0]; - block1[13] = append1_t[1]; - block1[14] = append1_t[2]; - block1[15] = append1_t[3]; - break; - - case 25: block1[ 9] |= append0_t[0]; - block1[10] = append0_t[1]; - block1[11] = append0_t[2]; - block1[12] = append0_t[3]; - - block1[13] = append1_t[0]; - block1[14] = append1_t[1]; - block1[15] = append1_t[2]; - break; - - case 26: block1[10] |= append0_t[0]; - block1[11] = append0_t[1]; - block1[12] = append0_t[2]; - block1[13] = append0_t[3]; - - block1[14] = append1_t[0]; - block1[15] = append1_t[1]; - break; - - case 27: block1[11] |= append0_t[0]; - block1[12] = append0_t[1]; - block1[13] = append0_t[2]; - block1[14] = append0_t[3]; - - block1[15] = append1_t[0]; - break; - - case 28: block1[12] |= append0_t[0]; - block1[13] = append0_t[1]; - block1[14] = append0_t[2]; - block1[15] = append0_t[3]; - break; - - case 29: block1[13] |= append0_t[0]; - block1[14] = append0_t[1]; - block1[15] = append0_t[2]; - break; - - case 30: block1[14] |= append0_t[0]; - block1[15] = append0_t[1]; - break; - } - - u32 new_len = block_len + append_len; - - return new_len; -} - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m11400_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const sip_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * salt - */ - - const u32 salt_len = esalt_bufs[salt_pos].salt_len; // not a bug, we need to get it from the esalt - - u32 salt_buf0[16]; - - salt_buf0[ 0] = esalt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[ 1] = esalt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[ 2] = esalt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[ 3] = esalt_bufs[salt_pos].salt_buf[ 3]; - salt_buf0[ 4] = esalt_bufs[salt_pos].salt_buf[ 4]; - salt_buf0[ 5] = esalt_bufs[salt_pos].salt_buf[ 5]; - salt_buf0[ 6] = esalt_bufs[salt_pos].salt_buf[ 6]; - salt_buf0[ 7] = esalt_bufs[salt_pos].salt_buf[ 7]; - salt_buf0[ 8] = esalt_bufs[salt_pos].salt_buf[ 8]; - salt_buf0[ 9] = esalt_bufs[salt_pos].salt_buf[ 9]; - salt_buf0[10] = esalt_bufs[salt_pos].salt_buf[10]; - salt_buf0[11] = esalt_bufs[salt_pos].salt_buf[11]; - salt_buf0[12] = esalt_bufs[salt_pos].salt_buf[12]; - salt_buf0[13] = esalt_bufs[salt_pos].salt_buf[13]; - salt_buf0[14] = esalt_bufs[salt_pos].salt_buf[14]; - salt_buf0[15] = esalt_bufs[salt_pos].salt_buf[15]; - - u32 salt_buf1[16]; - - salt_buf1[ 0] = esalt_bufs[salt_pos].salt_buf[16]; - salt_buf1[ 1] = esalt_bufs[salt_pos].salt_buf[17]; - salt_buf1[ 2] = esalt_bufs[salt_pos].salt_buf[18]; - salt_buf1[ 3] = esalt_bufs[salt_pos].salt_buf[19]; - salt_buf1[ 4] = esalt_bufs[salt_pos].salt_buf[20]; - salt_buf1[ 5] = esalt_bufs[salt_pos].salt_buf[21]; - salt_buf1[ 6] = esalt_bufs[salt_pos].salt_buf[22]; - salt_buf1[ 7] = esalt_bufs[salt_pos].salt_buf[23]; - salt_buf1[ 8] = esalt_bufs[salt_pos].salt_buf[24]; - salt_buf1[ 9] = esalt_bufs[salt_pos].salt_buf[25]; - salt_buf1[10] = esalt_bufs[salt_pos].salt_buf[26]; - salt_buf1[11] = esalt_bufs[salt_pos].salt_buf[27]; - salt_buf1[12] = esalt_bufs[salt_pos].salt_buf[28]; - salt_buf1[13] = esalt_bufs[salt_pos].salt_buf[29]; - salt_buf1[14] = 0; - salt_buf1[15] = 0; - - /** - * esalt - */ - - const u32 esalt_len = esalt_bufs[salt_pos].esalt_len; - - u32 esalt_buf0[16]; - - esalt_buf0[ 0] = esalt_bufs[salt_pos].esalt_buf[ 0]; - esalt_buf0[ 1] = esalt_bufs[salt_pos].esalt_buf[ 1]; - esalt_buf0[ 2] = esalt_bufs[salt_pos].esalt_buf[ 2]; - esalt_buf0[ 3] = esalt_bufs[salt_pos].esalt_buf[ 3]; - esalt_buf0[ 4] = esalt_bufs[salt_pos].esalt_buf[ 4]; - esalt_buf0[ 5] = esalt_bufs[salt_pos].esalt_buf[ 5]; - esalt_buf0[ 6] = esalt_bufs[salt_pos].esalt_buf[ 6]; - esalt_buf0[ 7] = esalt_bufs[salt_pos].esalt_buf[ 7]; - esalt_buf0[ 8] = esalt_bufs[salt_pos].esalt_buf[ 8]; - esalt_buf0[ 9] = esalt_bufs[salt_pos].esalt_buf[ 9]; - esalt_buf0[10] = esalt_bufs[salt_pos].esalt_buf[10]; - esalt_buf0[11] = esalt_bufs[salt_pos].esalt_buf[11]; - esalt_buf0[12] = esalt_bufs[salt_pos].esalt_buf[12]; - esalt_buf0[13] = esalt_bufs[salt_pos].esalt_buf[13]; - esalt_buf0[14] = esalt_bufs[salt_pos].esalt_buf[14]; - esalt_buf0[15] = esalt_bufs[salt_pos].esalt_buf[15]; - - u32 esalt_buf1[16]; - - esalt_buf1[ 0] = esalt_bufs[salt_pos].esalt_buf[16]; - esalt_buf1[ 1] = esalt_bufs[salt_pos].esalt_buf[17]; - esalt_buf1[ 2] = esalt_bufs[salt_pos].esalt_buf[18]; - esalt_buf1[ 3] = esalt_bufs[salt_pos].esalt_buf[19]; - esalt_buf1[ 4] = esalt_bufs[salt_pos].esalt_buf[20]; - esalt_buf1[ 5] = esalt_bufs[salt_pos].esalt_buf[21]; - esalt_buf1[ 6] = esalt_bufs[salt_pos].esalt_buf[22]; - esalt_buf1[ 7] = esalt_bufs[salt_pos].esalt_buf[23]; - esalt_buf1[ 8] = esalt_bufs[salt_pos].esalt_buf[24]; - esalt_buf1[ 9] = esalt_bufs[salt_pos].esalt_buf[25]; - esalt_buf1[10] = esalt_bufs[salt_pos].esalt_buf[26]; - esalt_buf1[11] = esalt_bufs[salt_pos].esalt_buf[27]; - esalt_buf1[12] = esalt_bufs[salt_pos].esalt_buf[28]; - esalt_buf1[13] = esalt_bufs[salt_pos].esalt_buf[29]; - esalt_buf1[14] = esalt_bufs[salt_pos].esalt_buf[30]; - esalt_buf1[15] = esalt_bufs[salt_pos].esalt_buf[31]; - - u32 esalt_buf2[16]; - - esalt_buf2[ 0] = esalt_bufs[salt_pos].esalt_buf[32]; - esalt_buf2[ 1] = esalt_bufs[salt_pos].esalt_buf[33]; - esalt_buf2[ 2] = esalt_bufs[salt_pos].esalt_buf[34]; - esalt_buf2[ 3] = esalt_bufs[salt_pos].esalt_buf[35]; - esalt_buf2[ 4] = esalt_bufs[salt_pos].esalt_buf[36]; - esalt_buf2[ 5] = esalt_bufs[salt_pos].esalt_buf[37]; - esalt_buf2[ 6] = 0; - esalt_buf2[ 7] = 0; - esalt_buf2[ 8] = 0; - esalt_buf2[ 9] = 0; - esalt_buf2[10] = 0; - esalt_buf2[11] = 0; - esalt_buf2[12] = 0; - esalt_buf2[13] = 0; - esalt_buf2[14] = 0; - esalt_buf2[15] = 0; - - const u32 digest_esalt_len = 32 + esalt_len; - const u32 remaining_bytes = digest_esalt_len + 1 - 64; // substract previous block - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - const u32 pw_salt_len = salt_len + out_len; - - /* - * HA1 = md5 ($salt . $pass) - */ - - // append the pass to the salt - - u32x block0[16]; - - block0[ 0] = salt_buf0[ 0]; - block0[ 1] = salt_buf0[ 1]; - block0[ 2] = salt_buf0[ 2]; - block0[ 3] = salt_buf0[ 3]; - block0[ 4] = salt_buf0[ 4]; - block0[ 5] = salt_buf0[ 5]; - block0[ 6] = salt_buf0[ 6]; - block0[ 7] = salt_buf0[ 7]; - block0[ 8] = salt_buf0[ 8]; - block0[ 9] = salt_buf0[ 9]; - block0[10] = salt_buf0[10]; - block0[11] = salt_buf0[11]; - block0[12] = salt_buf0[12]; - block0[13] = salt_buf0[13]; - block0[14] = salt_buf0[14]; - block0[15] = salt_buf0[15]; - - u32x block1[16]; - - block1[ 0] = salt_buf1[ 0]; - block1[ 1] = salt_buf1[ 1]; - block1[ 2] = salt_buf1[ 2]; - block1[ 3] = salt_buf1[ 3]; - block1[ 4] = salt_buf1[ 4]; - block1[ 5] = salt_buf1[ 5]; - block1[ 6] = salt_buf1[ 6]; - block1[ 7] = salt_buf1[ 7]; - block1[ 8] = salt_buf1[ 8]; - block1[ 9] = salt_buf1[ 9]; - block1[10] = salt_buf1[10]; - block1[11] = salt_buf1[11]; - block1[12] = salt_buf1[12]; - block1[13] = salt_buf1[13]; - block1[14] = salt_buf1[14]; - block1[15] = salt_buf1[15]; - - u32 block_len = 0; - - block_len = memcat32 (block0, block1, salt_len, w0, w1, w2, w3, out_len); - - u32x w0_t[4]; - - w0_t[0] = block0[ 0]; - w0_t[1] = block0[ 1]; - w0_t[2] = block0[ 2]; - w0_t[3] = block0[ 3]; - - u32x w1_t[4]; - - w1_t[0] = block0[ 4]; - w1_t[1] = block0[ 5]; - w1_t[2] = block0[ 6]; - w1_t[3] = block0[ 7]; - - u32x w2_t[4]; - - w2_t[0] = block0[ 8]; - w2_t[1] = block0[ 9]; - w2_t[2] = block0[10]; - w2_t[3] = block0[11]; - - u32x w3_t[4]; - - w3_t[0] = block0[12]; - w3_t[1] = block0[13]; - w3_t[2] = block0[14]; - w3_t[3] = block0[15]; - - if (block_len < 56) - { - w3_t[2] = pw_salt_len * 8; - } - - // md5 - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - if (block_len > 55) - { - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - - w0_t[0] = block1[ 0]; - w0_t[1] = block1[ 1]; - w0_t[2] = block1[ 2]; - w0_t[3] = block1[ 3]; - - w1_t[0] = block1[ 4]; - w1_t[1] = block1[ 5]; - w1_t[2] = block1[ 6]; - w1_t[3] = block1[ 7]; - - w2_t[0] = block1[ 8]; - w2_t[1] = block1[ 9]; - w2_t[2] = block1[10]; - w2_t[3] = block1[11]; - - w3_t[0] = block1[12]; - w3_t[1] = block1[13]; - w3_t[2] = pw_salt_len * 8; - w3_t[3] = 0; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - } - - /* - * final = md5 ($HA1 . $esalt) - * we have at least 2 MD5 blocks/transformations, but we might need 3 - */ - - w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - w2_t[0] = esalt_buf0[0]; - w2_t[1] = esalt_buf0[1]; - w2_t[2] = esalt_buf0[2]; - w2_t[3] = esalt_buf0[3]; - - w3_t[0] = esalt_buf0[4]; - w3_t[1] = esalt_buf0[5]; - w3_t[2] = esalt_buf0[6]; - w3_t[3] = esalt_buf0[7]; - - // md5 - // 1st transform - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - - // 2nd transform - - w0_t[0] = esalt_buf0[ 8]; - w0_t[1] = esalt_buf0[ 9]; - w0_t[2] = esalt_buf0[10]; - w0_t[3] = esalt_buf0[11]; - - w1_t[0] = esalt_buf0[12]; - w1_t[1] = esalt_buf0[13]; - w1_t[2] = esalt_buf0[14]; - w1_t[3] = esalt_buf0[15]; - - w2_t[0] = esalt_buf1[ 0]; - w2_t[1] = esalt_buf1[ 1]; - w2_t[2] = esalt_buf1[ 2]; - w2_t[3] = esalt_buf1[ 3]; - - w3_t[0] = esalt_buf1[ 4]; - w3_t[1] = esalt_buf1[ 5]; - w3_t[2] = esalt_buf1[ 6]; - w3_t[3] = esalt_buf1[ 7]; - - // it is the final block when no more than 55 bytes left - - if (remaining_bytes < 56) - { - // it is the last block ! - - w3_t[2] = digest_esalt_len * 8; - } - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - // sometimes (not rare at all) we need a third block :( - - if (remaining_bytes > 55) - { - // this is for sure the final block - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - r_a = a; - r_b = b; - r_c = c; - r_d = d; - - w0_t[0] = esalt_buf1[ 8]; - w0_t[1] = esalt_buf1[ 9]; - w0_t[2] = esalt_buf1[10]; - w0_t[3] = esalt_buf1[11]; - - w1_t[0] = esalt_buf1[12]; - w1_t[1] = esalt_buf1[13]; - w1_t[2] = esalt_buf1[14]; - w1_t[3] = esalt_buf1[15]; - - w2_t[0] = esalt_buf2[ 0]; - w2_t[1] = esalt_buf2[ 1]; - w2_t[2] = esalt_buf2[ 2]; - w2_t[3] = esalt_buf2[ 3]; - - w3_t[0] = esalt_buf2[ 4]; - w3_t[1] = esalt_buf2[ 5]; - w3_t[2] = digest_esalt_len * 8; - w3_t[3] = 0; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - } - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11400_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const sip_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11400_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const sip_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11400_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const sip_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * salt - */ - - const u32 salt_len = esalt_bufs[salt_pos].salt_len; // not a bug, we need to get it from the esalt - - u32 salt_buf0[16]; - - salt_buf0[ 0] = esalt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[ 1] = esalt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[ 2] = esalt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[ 3] = esalt_bufs[salt_pos].salt_buf[ 3]; - salt_buf0[ 4] = esalt_bufs[salt_pos].salt_buf[ 4]; - salt_buf0[ 5] = esalt_bufs[salt_pos].salt_buf[ 5]; - salt_buf0[ 6] = esalt_bufs[salt_pos].salt_buf[ 6]; - salt_buf0[ 7] = esalt_bufs[salt_pos].salt_buf[ 7]; - salt_buf0[ 8] = esalt_bufs[salt_pos].salt_buf[ 8]; - salt_buf0[ 9] = esalt_bufs[salt_pos].salt_buf[ 9]; - salt_buf0[10] = esalt_bufs[salt_pos].salt_buf[10]; - salt_buf0[11] = esalt_bufs[salt_pos].salt_buf[11]; - salt_buf0[12] = esalt_bufs[salt_pos].salt_buf[12]; - salt_buf0[13] = esalt_bufs[salt_pos].salt_buf[13]; - salt_buf0[14] = esalt_bufs[salt_pos].salt_buf[14]; - salt_buf0[15] = esalt_bufs[salt_pos].salt_buf[15]; - - u32 salt_buf1[16]; - - salt_buf1[ 0] = esalt_bufs[salt_pos].salt_buf[16]; - salt_buf1[ 1] = esalt_bufs[salt_pos].salt_buf[17]; - salt_buf1[ 2] = esalt_bufs[salt_pos].salt_buf[18]; - salt_buf1[ 3] = esalt_bufs[salt_pos].salt_buf[19]; - salt_buf1[ 4] = esalt_bufs[salt_pos].salt_buf[20]; - salt_buf1[ 5] = esalt_bufs[salt_pos].salt_buf[21]; - salt_buf1[ 6] = esalt_bufs[salt_pos].salt_buf[22]; - salt_buf1[ 7] = esalt_bufs[salt_pos].salt_buf[23]; - salt_buf1[ 8] = esalt_bufs[salt_pos].salt_buf[24]; - salt_buf1[ 9] = esalt_bufs[salt_pos].salt_buf[25]; - salt_buf1[10] = esalt_bufs[salt_pos].salt_buf[26]; - salt_buf1[11] = esalt_bufs[salt_pos].salt_buf[27]; - salt_buf1[12] = esalt_bufs[salt_pos].salt_buf[28]; - salt_buf1[13] = esalt_bufs[salt_pos].salt_buf[29]; - salt_buf1[14] = 0; - salt_buf1[15] = 0; - - /** - * esalt - */ - - const u32 esalt_len = esalt_bufs[salt_pos].esalt_len; - - u32 esalt_buf0[16]; - - esalt_buf0[ 0] = esalt_bufs[salt_pos].esalt_buf[ 0]; - esalt_buf0[ 1] = esalt_bufs[salt_pos].esalt_buf[ 1]; - esalt_buf0[ 2] = esalt_bufs[salt_pos].esalt_buf[ 2]; - esalt_buf0[ 3] = esalt_bufs[salt_pos].esalt_buf[ 3]; - esalt_buf0[ 4] = esalt_bufs[salt_pos].esalt_buf[ 4]; - esalt_buf0[ 5] = esalt_bufs[salt_pos].esalt_buf[ 5]; - esalt_buf0[ 6] = esalt_bufs[salt_pos].esalt_buf[ 6]; - esalt_buf0[ 7] = esalt_bufs[salt_pos].esalt_buf[ 7]; - esalt_buf0[ 8] = esalt_bufs[salt_pos].esalt_buf[ 8]; - esalt_buf0[ 9] = esalt_bufs[salt_pos].esalt_buf[ 9]; - esalt_buf0[10] = esalt_bufs[salt_pos].esalt_buf[10]; - esalt_buf0[11] = esalt_bufs[salt_pos].esalt_buf[11]; - esalt_buf0[12] = esalt_bufs[salt_pos].esalt_buf[12]; - esalt_buf0[13] = esalt_bufs[salt_pos].esalt_buf[13]; - esalt_buf0[14] = esalt_bufs[salt_pos].esalt_buf[14]; - esalt_buf0[15] = esalt_bufs[salt_pos].esalt_buf[15]; - - u32 esalt_buf1[16]; - - esalt_buf1[ 0] = esalt_bufs[salt_pos].esalt_buf[16]; - esalt_buf1[ 1] = esalt_bufs[salt_pos].esalt_buf[17]; - esalt_buf1[ 2] = esalt_bufs[salt_pos].esalt_buf[18]; - esalt_buf1[ 3] = esalt_bufs[salt_pos].esalt_buf[19]; - esalt_buf1[ 4] = esalt_bufs[salt_pos].esalt_buf[20]; - esalt_buf1[ 5] = esalt_bufs[salt_pos].esalt_buf[21]; - esalt_buf1[ 6] = esalt_bufs[salt_pos].esalt_buf[22]; - esalt_buf1[ 7] = esalt_bufs[salt_pos].esalt_buf[23]; - esalt_buf1[ 8] = esalt_bufs[salt_pos].esalt_buf[24]; - esalt_buf1[ 9] = esalt_bufs[salt_pos].esalt_buf[25]; - esalt_buf1[10] = esalt_bufs[salt_pos].esalt_buf[26]; - esalt_buf1[11] = esalt_bufs[salt_pos].esalt_buf[27]; - esalt_buf1[12] = esalt_bufs[salt_pos].esalt_buf[28]; - esalt_buf1[13] = esalt_bufs[salt_pos].esalt_buf[29]; - esalt_buf1[14] = esalt_bufs[salt_pos].esalt_buf[30]; - esalt_buf1[15] = esalt_bufs[salt_pos].esalt_buf[31]; - - u32 esalt_buf2[16]; - - esalt_buf2[ 0] = esalt_bufs[salt_pos].esalt_buf[32]; - esalt_buf2[ 1] = esalt_bufs[salt_pos].esalt_buf[33]; - esalt_buf2[ 2] = esalt_bufs[salt_pos].esalt_buf[34]; - esalt_buf2[ 3] = esalt_bufs[salt_pos].esalt_buf[35]; - esalt_buf2[ 4] = esalt_bufs[salt_pos].esalt_buf[36]; - esalt_buf2[ 5] = esalt_bufs[salt_pos].esalt_buf[37]; - esalt_buf2[ 6] = 0; - esalt_buf2[ 7] = 0; - esalt_buf2[ 8] = 0; - esalt_buf2[ 9] = 0; - esalt_buf2[10] = 0; - esalt_buf2[11] = 0; - esalt_buf2[12] = 0; - esalt_buf2[13] = 0; - esalt_buf2[14] = 0; - esalt_buf2[15] = 0; - - const u32 digest_esalt_len = 32 + esalt_len; - const u32 remaining_bytes = digest_esalt_len + 1 - 64; // substract previous block - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - const u32 pw_salt_len = salt_len + out_len; - - /* - * HA1 = md5 ($salt . $pass) - */ - - // append the pass to the salt - - u32x block0[16]; - - block0[ 0] = salt_buf0[ 0]; - block0[ 1] = salt_buf0[ 1]; - block0[ 2] = salt_buf0[ 2]; - block0[ 3] = salt_buf0[ 3]; - block0[ 4] = salt_buf0[ 4]; - block0[ 5] = salt_buf0[ 5]; - block0[ 6] = salt_buf0[ 6]; - block0[ 7] = salt_buf0[ 7]; - block0[ 8] = salt_buf0[ 8]; - block0[ 9] = salt_buf0[ 9]; - block0[10] = salt_buf0[10]; - block0[11] = salt_buf0[11]; - block0[12] = salt_buf0[12]; - block0[13] = salt_buf0[13]; - block0[14] = salt_buf0[14]; - block0[15] = salt_buf0[15]; - - u32x block1[16]; - - block1[ 0] = salt_buf1[ 0]; - block1[ 1] = salt_buf1[ 1]; - block1[ 2] = salt_buf1[ 2]; - block1[ 3] = salt_buf1[ 3]; - block1[ 4] = salt_buf1[ 4]; - block1[ 5] = salt_buf1[ 5]; - block1[ 6] = salt_buf1[ 6]; - block1[ 7] = salt_buf1[ 7]; - block1[ 8] = salt_buf1[ 8]; - block1[ 9] = salt_buf1[ 9]; - block1[10] = salt_buf1[10]; - block1[11] = salt_buf1[11]; - block1[12] = salt_buf1[12]; - block1[13] = salt_buf1[13]; - block1[14] = salt_buf1[14]; - block1[15] = salt_buf1[15]; - - u32 block_len = 0; - - block_len = memcat32 (block0, block1, salt_len, w0, w1, w2, w3, out_len); - - u32x w0_t[4]; - - w0_t[0] = block0[ 0]; - w0_t[1] = block0[ 1]; - w0_t[2] = block0[ 2]; - w0_t[3] = block0[ 3]; - - u32x w1_t[4]; - - w1_t[0] = block0[ 4]; - w1_t[1] = block0[ 5]; - w1_t[2] = block0[ 6]; - w1_t[3] = block0[ 7]; - - u32x w2_t[4]; - - w2_t[0] = block0[ 8]; - w2_t[1] = block0[ 9]; - w2_t[2] = block0[10]; - w2_t[3] = block0[11]; - - u32x w3_t[4]; - - w3_t[0] = block0[12]; - w3_t[1] = block0[13]; - w3_t[2] = block0[14]; - w3_t[3] = block0[15]; - - if (block_len < 56) - { - w3_t[2] = pw_salt_len * 8; - } - - // md5 - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - if (block_len > 55) - { - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - - w0_t[0] = block1[ 0]; - w0_t[1] = block1[ 1]; - w0_t[2] = block1[ 2]; - w0_t[3] = block1[ 3]; - - w1_t[0] = block1[ 4]; - w1_t[1] = block1[ 5]; - w1_t[2] = block1[ 6]; - w1_t[3] = block1[ 7]; - - w2_t[0] = block1[ 8]; - w2_t[1] = block1[ 9]; - w2_t[2] = block1[10]; - w2_t[3] = block1[11]; - - w3_t[0] = block1[12]; - w3_t[1] = block1[13]; - w3_t[2] = pw_salt_len * 8; - w3_t[3] = 0; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - } - - /* - * final = md5 ($HA1 . $esalt) - * we have at least 2 MD5 blocks/transformations, but we might need 3 - */ - - w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - w2_t[0] = esalt_buf0[0]; - w2_t[1] = esalt_buf0[1]; - w2_t[2] = esalt_buf0[2]; - w2_t[3] = esalt_buf0[3]; - - w3_t[0] = esalt_buf0[4]; - w3_t[1] = esalt_buf0[5]; - w3_t[2] = esalt_buf0[6]; - w3_t[3] = esalt_buf0[7]; - - // md5 - // 1st transform - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - - // 2nd transform - - w0_t[0] = esalt_buf0[ 8]; - w0_t[1] = esalt_buf0[ 9]; - w0_t[2] = esalt_buf0[10]; - w0_t[3] = esalt_buf0[11]; - - w1_t[0] = esalt_buf0[12]; - w1_t[1] = esalt_buf0[13]; - w1_t[2] = esalt_buf0[14]; - w1_t[3] = esalt_buf0[15]; - - w2_t[0] = esalt_buf1[ 0]; - w2_t[1] = esalt_buf1[ 1]; - w2_t[2] = esalt_buf1[ 2]; - w2_t[3] = esalt_buf1[ 3]; - - w3_t[0] = esalt_buf1[ 4]; - w3_t[1] = esalt_buf1[ 5]; - w3_t[2] = esalt_buf1[ 6]; - w3_t[3] = esalt_buf1[ 7]; - - // it is the final block when no more than 55 bytes left - - if (remaining_bytes < 56) - { - // it is the last block ! - - w3_t[2] = digest_esalt_len * 8; - } - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - // sometimes (not rare at all) we need a third block :( - - if (remaining_bytes > 55) - { - // this is for sure the final block - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - r_a = a; - r_b = b; - r_c = c; - r_d = d; - - w0_t[0] = esalt_buf1[ 8]; - w0_t[1] = esalt_buf1[ 9]; - w0_t[2] = esalt_buf1[10]; - w0_t[3] = esalt_buf1[11]; - - w1_t[0] = esalt_buf1[12]; - w1_t[1] = esalt_buf1[13]; - w1_t[2] = esalt_buf1[14]; - w1_t[3] = esalt_buf1[15]; - - w2_t[0] = esalt_buf2[ 0]; - w2_t[1] = esalt_buf2[ 1]; - w2_t[2] = esalt_buf2[ 2]; - w2_t[3] = esalt_buf2[ 3]; - - w3_t[0] = esalt_buf2[ 4]; - w3_t[1] = esalt_buf2[ 5]; - w3_t[2] = digest_esalt_len * 8; - w3_t[3] = 0; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - } - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11400_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const sip_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11400_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const sip_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m11400_a1.cu b/nv/m11400_a1.cu deleted file mode 100644 index e8a7d31..0000000 --- a/nv/m11400_a1.cu +++ /dev/null @@ -1,2428 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ static u32 memcat32 (u32x block0[16], u32x block1[16], const u32 block_len, const u32x append0[4], const u32x append1[4], const u32x append2[4], const u32x append3[4], const u32 append_len) -{ - const u32 mod = block_len & 3; - const u32 div = block_len / 4; - - const int offset_minus_4 = 4 - mod; - - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - - u32x append0_t[4]; - - append0_t[0] = __byte_perm ( 0, append0[0], selector); - append0_t[1] = __byte_perm (append0[0], append0[1], selector); - append0_t[2] = __byte_perm (append0[1], append0[2], selector); - append0_t[3] = __byte_perm (append0[2], append0[3], selector); - - u32x append1_t[4]; - - append1_t[0] = __byte_perm (append0[3], append1[0], selector); - append1_t[1] = __byte_perm (append1[0], append1[1], selector); - append1_t[2] = __byte_perm (append1[1], append1[2], selector); - append1_t[3] = __byte_perm (append1[2], append1[3], selector); - - u32x append2_t[4]; - - append2_t[0] = __byte_perm (append1[3], append2[0], selector); - append2_t[1] = __byte_perm (append2[0], append2[1], selector); - append2_t[2] = __byte_perm (append2[1], append2[2], selector); - append2_t[3] = __byte_perm (append2[2], append2[3], selector); - - u32x append3_t[4]; - - append3_t[0] = __byte_perm (append2[3], append3[0], selector); - append3_t[1] = __byte_perm (append3[0], append3[1], selector); - append3_t[2] = __byte_perm (append3[1], append3[2], selector); - append3_t[3] = __byte_perm (append3[2], append3[3], selector); - - u32x append4_t[4]; - - append4_t[0] = __byte_perm (append3[3], 0, selector); - append4_t[1] = 0; - append4_t[2] = 0; - append4_t[3] = 0; - - switch (div) - { - case 0: block0[ 0] |= append0_t[0]; - block0[ 1] = append0_t[1]; - block0[ 2] = append0_t[2]; - block0[ 3] = append0_t[3]; - - block0[ 4] = append1_t[0]; - block0[ 5] = append1_t[1]; - block0[ 6] = append1_t[2]; - block0[ 7] = append1_t[3]; - - block0[ 8] = append2_t[0]; - block0[ 9] = append2_t[1]; - block0[10] = append2_t[2]; - block0[11] = append2_t[3]; - - block0[12] = append3_t[0]; - block0[13] = append3_t[1]; - block0[14] = append3_t[2]; - block0[15] = append3_t[3]; - - block1[ 0] = append4_t[0]; - block1[ 1] = append4_t[1]; - block1[ 2] = append4_t[2]; - block1[ 3] = append4_t[3]; - break; - - case 1: block0[ 1] |= append0_t[0]; - block0[ 2] = append0_t[1]; - block0[ 3] = append0_t[2]; - block0[ 4] = append0_t[3]; - - block0[ 5] = append1_t[0]; - block0[ 6] = append1_t[1]; - block0[ 7] = append1_t[2]; - block0[ 8] = append1_t[3]; - - block0[ 9] = append2_t[0]; - block0[10] = append2_t[1]; - block0[11] = append2_t[2]; - block0[12] = append2_t[3]; - - block0[13] = append3_t[0]; - block0[14] = append3_t[1]; - block0[15] = append3_t[2]; - block1[ 0] = append3_t[3]; - - block1[ 1] = append4_t[0]; - block1[ 2] = append4_t[1]; - block1[ 3] = append4_t[2]; - block1[ 4] = append4_t[3]; - break; - - case 2: block0[ 2] |= append0_t[0]; - block0[ 3] = append0_t[1]; - block0[ 4] = append0_t[2]; - block0[ 5] = append0_t[3]; - - block0[ 6] = append1_t[0]; - block0[ 7] = append1_t[1]; - block0[ 8] = append1_t[2]; - block0[ 9] = append1_t[3]; - - block0[10] = append2_t[0]; - block0[11] = append2_t[1]; - block0[12] = append2_t[2]; - block0[13] = append2_t[3]; - - block0[14] = append3_t[0]; - block0[15] = append3_t[1]; - block1[ 0] = append3_t[2]; - block1[ 1] = append3_t[3]; - - block1[ 2] = append4_t[0]; - block1[ 3] = append4_t[1]; - block1[ 4] = append4_t[2]; - block1[ 5] = append4_t[3]; - break; - - case 3: block0[ 3] |= append0_t[0]; - block0[ 4] = append0_t[1]; - block0[ 5] = append0_t[2]; - block0[ 6] = append0_t[3]; - - block0[ 7] = append1_t[0]; - block0[ 8] = append1_t[1]; - block0[ 9] = append1_t[2]; - block0[10] = append1_t[3]; - - block0[11] = append2_t[0]; - block0[12] = append2_t[1]; - block0[13] = append2_t[2]; - block0[14] = append2_t[3]; - - block0[15] = append3_t[0]; - block1[ 0] = append3_t[1]; - block1[ 1] = append3_t[2]; - block1[ 2] = append3_t[3]; - - block1[ 3] = append4_t[0]; - block1[ 4] = append4_t[1]; - block1[ 5] = append4_t[2]; - block1[ 6] = append4_t[3]; - break; - - case 4: block0[ 4] |= append0_t[0]; - block0[ 5] = append0_t[1]; - block0[ 6] = append0_t[2]; - block0[ 7] = append0_t[3]; - - block0[ 8] = append1_t[0]; - block0[ 9] = append1_t[1]; - block0[10] = append1_t[2]; - block0[11] = append1_t[3]; - - block0[12] = append2_t[0]; - block0[13] = append2_t[1]; - block0[14] = append2_t[2]; - block0[15] = append2_t[3]; - - block1[ 0] = append3_t[0]; - block1[ 1] = append3_t[1]; - block1[ 2] = append3_t[2]; - block1[ 3] = append3_t[3]; - - block1[ 4] = append4_t[0]; - block1[ 5] = append4_t[1]; - block1[ 6] = append4_t[2]; - block1[ 7] = append4_t[3]; - break; - - case 5: block0[ 5] |= append0_t[0]; - block0[ 6] = append0_t[1]; - block0[ 7] = append0_t[2]; - block0[ 8] = append0_t[3]; - - block0[ 9] = append1_t[0]; - block0[10] = append1_t[1]; - block0[11] = append1_t[2]; - block0[12] = append1_t[3]; - - block0[13] = append2_t[0]; - block0[14] = append2_t[1]; - block0[15] = append2_t[2]; - block1[ 0] = append2_t[3]; - - block1[ 1] = append3_t[0]; - block1[ 2] = append3_t[1]; - block1[ 3] = append3_t[2]; - block1[ 4] = append3_t[3]; - - block1[ 5] = append4_t[0]; - block1[ 6] = append4_t[1]; - block1[ 7] = append4_t[2]; - block1[ 8] = append4_t[3]; - break; - - case 6: block0[ 6] |= append0_t[0]; - block0[ 7] = append0_t[1]; - block0[ 8] = append0_t[2]; - block0[ 9] = append0_t[3]; - - block0[10] = append1_t[0]; - block0[11] = append1_t[1]; - block0[12] = append1_t[2]; - block0[13] = append1_t[3]; - - block0[14] = append2_t[0]; - block0[15] = append2_t[1]; - block1[ 0] = append2_t[2]; - block1[ 1] = append2_t[3]; - - block1[ 2] = append3_t[0]; - block1[ 3] = append3_t[1]; - block1[ 4] = append3_t[2]; - block1[ 5] = append3_t[3]; - - block1[ 6] = append4_t[0]; - block1[ 7] = append4_t[1]; - block1[ 8] = append4_t[2]; - block1[ 9] = append4_t[3]; - break; - - case 7: block0[ 7] |= append0_t[0]; - block0[ 8] = append0_t[1]; - block0[ 9] = append0_t[2]; - block0[10] = append0_t[3]; - - block0[11] = append1_t[0]; - block0[12] = append1_t[1]; - block0[13] = append1_t[2]; - block0[14] = append1_t[3]; - - block0[15] = append2_t[0]; - block1[ 0] = append2_t[1]; - block1[ 1] = append2_t[2]; - block1[ 2] = append2_t[3]; - - block1[ 3] = append3_t[0]; - block1[ 4] = append3_t[1]; - block1[ 5] = append3_t[2]; - block1[ 6] = append3_t[3]; - - block1[ 7] = append4_t[0]; - block1[ 8] = append4_t[1]; - block1[ 9] = append4_t[2]; - block1[10] = append4_t[3]; - break; - - case 8: block0[ 8] |= append0_t[0]; - block0[ 9] = append0_t[1]; - block0[10] = append0_t[2]; - block0[11] = append0_t[3]; - - block0[12] = append1_t[0]; - block0[13] = append1_t[1]; - block0[14] = append1_t[2]; - block0[15] = append1_t[3]; - - block1[ 0] = append2_t[0]; - block1[ 1] = append2_t[1]; - block1[ 2] = append2_t[2]; - block1[ 3] = append2_t[3]; - - block1[ 4] = append3_t[0]; - block1[ 5] = append3_t[1]; - block1[ 6] = append3_t[2]; - block1[ 7] = append3_t[3]; - - block1[ 8] = append4_t[0]; - block1[ 9] = append4_t[1]; - block1[10] = append4_t[2]; - block1[11] = append4_t[3]; - break; - - case 9: block0[ 9] |= append0_t[0]; - block0[10] = append0_t[1]; - block0[11] = append0_t[2]; - block0[12] = append0_t[3]; - - block0[13] = append1_t[0]; - block0[14] = append1_t[1]; - block0[15] = append1_t[2]; - block1[ 0] = append1_t[3]; - - block1[ 1] = append2_t[0]; - block1[ 2] = append2_t[1]; - block1[ 3] = append2_t[2]; - block1[ 4] = append2_t[3]; - - block1[ 5] = append3_t[0]; - block1[ 6] = append3_t[1]; - block1[ 7] = append3_t[2]; - block1[ 8] = append3_t[3]; - - block1[ 9] = append4_t[0]; - block1[10] = append4_t[1]; - block1[11] = append4_t[2]; - block1[12] = append4_t[3]; - break; - - case 10: block0[10] |= append0_t[0]; - block0[11] = append0_t[1]; - block0[12] = append0_t[2]; - block0[13] = append0_t[3]; - - block0[14] = append1_t[0]; - block0[15] = append1_t[1]; - block1[ 0] = append1_t[2]; - block1[ 1] = append1_t[3]; - - block1[ 2] = append2_t[0]; - block1[ 3] = append2_t[1]; - block1[ 4] = append2_t[2]; - block1[ 5] = append2_t[3]; - - block1[ 6] = append3_t[0]; - block1[ 7] = append3_t[1]; - block1[ 8] = append3_t[2]; - block1[ 9] = append3_t[3]; - - block1[10] = append4_t[0]; - block1[11] = append4_t[1]; - block1[12] = append4_t[2]; - block1[13] = append4_t[3]; - break; - - case 11: block0[11] |= append0_t[0]; - block0[12] = append0_t[1]; - block0[13] = append0_t[2]; - block0[14] = append0_t[3]; - - block0[15] = append1_t[0]; - block1[ 0] = append1_t[1]; - block1[ 1] = append1_t[2]; - block1[ 2] = append1_t[3]; - - block1[ 3] = append2_t[0]; - block1[ 4] = append2_t[1]; - block1[ 5] = append2_t[2]; - block1[ 6] = append2_t[3]; - - block1[ 7] = append3_t[0]; - block1[ 8] = append3_t[1]; - block1[ 9] = append3_t[2]; - block1[10] = append3_t[3]; - - block1[11] = append4_t[0]; - block1[12] = append4_t[1]; - block1[13] = append4_t[2]; - block1[14] = append4_t[3]; - break; - - case 12: block0[12] |= append0_t[0]; - block0[13] = append0_t[1]; - block0[14] = append0_t[2]; - block0[15] = append0_t[3]; - - block1[ 0] = append1_t[0]; - block1[ 1] = append1_t[1]; - block1[ 2] = append1_t[2]; - block1[ 3] = append1_t[3]; - - block1[ 4] = append2_t[0]; - block1[ 5] = append2_t[1]; - block1[ 6] = append2_t[2]; - block1[ 7] = append2_t[3]; - - block1[ 8] = append3_t[0]; - block1[ 9] = append3_t[1]; - block1[10] = append3_t[2]; - block1[11] = append3_t[3]; - - block1[12] = append4_t[0]; - block1[13] = append4_t[1]; - block1[14] = append4_t[2]; - block1[15] = append4_t[3]; - break; - - case 13: block0[13] |= append0_t[0]; - block0[14] = append0_t[1]; - block0[15] = append0_t[2]; - block1[ 0] = append0_t[3]; - - block1[ 1] = append1_t[0]; - block1[ 2] = append1_t[1]; - block1[ 3] = append1_t[2]; - block1[ 4] = append1_t[3]; - - block1[ 5] = append2_t[0]; - block1[ 6] = append2_t[1]; - block1[ 7] = append2_t[2]; - block1[ 8] = append2_t[3]; - - block1[ 9] = append3_t[0]; - block1[10] = append3_t[1]; - block1[11] = append3_t[2]; - block1[12] = append3_t[3]; - - block1[13] = append4_t[0]; - block1[14] = append4_t[1]; - block1[15] = append4_t[2]; - break; - - case 14: block0[14] |= append0_t[0]; - block0[15] = append0_t[1]; - block1[ 0] = append0_t[2]; - block1[ 1] = append0_t[3]; - - block1[ 2] = append1_t[0]; - block1[ 3] = append1_t[1]; - block1[ 4] = append1_t[2]; - block1[ 5] = append1_t[3]; - - block1[ 6] = append2_t[0]; - block1[ 7] = append2_t[1]; - block1[ 8] = append2_t[2]; - block1[ 9] = append2_t[3]; - - block1[10] = append3_t[0]; - block1[11] = append3_t[1]; - block1[12] = append3_t[2]; - block1[13] = append3_t[3]; - - block1[14] = append4_t[0]; - block1[15] = append4_t[1]; - break; - - case 15: block0[15] |= append0_t[0]; - block1[ 0] = append0_t[1]; - block1[ 1] = append0_t[2]; - block1[ 2] = append0_t[3]; - - block1[ 3] = append1_t[1]; - block1[ 4] = append1_t[2]; - block1[ 5] = append1_t[3]; - block1[ 6] = append1_t[0]; - - block1[ 7] = append2_t[0]; - block1[ 8] = append2_t[1]; - block1[ 9] = append2_t[2]; - block1[10] = append2_t[3]; - - block1[11] = append3_t[0]; - block1[12] = append3_t[1]; - block1[13] = append3_t[2]; - block1[14] = append3_t[3]; - - block1[15] = append4_t[0]; - break; - - case 16: block1[ 0] |= append0_t[0]; - block1[ 1] = append0_t[1]; - block1[ 2] = append0_t[2]; - block1[ 3] = append0_t[3]; - - block1[ 4] = append1_t[0]; - block1[ 5] = append1_t[1]; - block1[ 6] = append1_t[2]; - block1[ 7] = append1_t[3]; - - block1[ 8] = append2_t[0]; - block1[ 9] = append2_t[1]; - block1[10] = append2_t[2]; - block1[11] = append2_t[3]; - - block1[12] = append3_t[0]; - block1[13] = append3_t[1]; - block1[14] = append3_t[2]; - block1[15] = append3_t[3]; - break; - - case 17: block1[ 1] |= append0_t[0]; - block1[ 2] = append0_t[1]; - block1[ 3] = append0_t[2]; - block1[ 4] = append0_t[3]; - - block1[ 5] = append1_t[0]; - block1[ 6] = append1_t[1]; - block1[ 7] = append1_t[2]; - block1[ 8] = append1_t[3]; - - block1[ 9] = append2_t[0]; - block1[10] = append2_t[1]; - block1[11] = append2_t[2]; - block1[12] = append2_t[3]; - - block1[13] = append3_t[0]; - block1[14] = append3_t[1]; - block1[15] = append3_t[2]; - break; - - case 18: block1[ 2] |= append0_t[0]; - block1[ 3] = append0_t[1]; - block1[ 4] = append0_t[2]; - block1[ 5] = append0_t[3]; - - block1[ 6] = append1_t[0]; - block1[ 7] = append1_t[1]; - block1[ 8] = append1_t[2]; - block1[ 9] = append1_t[3]; - - block1[10] = append2_t[0]; - block1[11] = append2_t[1]; - block1[12] = append2_t[2]; - block1[13] = append2_t[3]; - - block1[14] = append3_t[0]; - block1[15] = append3_t[1]; - break; - - case 19: block1[ 3] |= append0_t[0]; - block1[ 4] = append0_t[1]; - block1[ 5] = append0_t[2]; - block1[ 6] = append0_t[3]; - - block1[ 7] = append1_t[0]; - block1[ 8] = append1_t[1]; - block1[ 9] = append1_t[2]; - block1[10] = append1_t[3]; - - block1[11] = append2_t[0]; - block1[12] = append2_t[1]; - block1[13] = append2_t[2]; - block1[14] = append2_t[3]; - - block1[15] = append3_t[0]; - break; - - case 20: block1[ 4] |= append0_t[0]; - block1[ 5] = append0_t[1]; - block1[ 6] = append0_t[2]; - block1[ 7] = append0_t[3]; - - block1[ 8] = append1_t[0]; - block1[ 9] = append1_t[1]; - block1[10] = append1_t[2]; - block1[11] = append1_t[3]; - - block1[12] = append2_t[0]; - block1[13] = append2_t[1]; - block1[14] = append2_t[2]; - block1[15] = append2_t[3]; - break; - - case 21: block1[ 5] |= append0_t[0]; - block1[ 6] = append0_t[1]; - block1[ 7] = append0_t[2]; - block1[ 8] = append0_t[3]; - - block1[ 9] = append1_t[0]; - block1[10] = append1_t[1]; - block1[11] = append1_t[2]; - block1[12] = append1_t[3]; - - block1[13] = append2_t[0]; - block1[14] = append2_t[1]; - block1[15] = append2_t[2]; - break; - - case 22: block1[ 6] |= append0_t[0]; - block1[ 7] = append0_t[1]; - block1[ 8] = append0_t[2]; - block1[ 9] = append0_t[3]; - - block1[10] = append1_t[0]; - block1[11] = append1_t[1]; - block1[12] = append1_t[2]; - block1[13] = append1_t[3]; - - block1[14] = append2_t[0]; - block1[15] = append2_t[1]; - break; - - case 23: block1[ 7] |= append0_t[0]; - block1[ 8] = append0_t[1]; - block1[ 9] = append0_t[2]; - block1[10] = append0_t[3]; - - block1[11] = append1_t[0]; - block1[12] = append1_t[1]; - block1[13] = append1_t[2]; - block1[14] = append1_t[3]; - - block1[15] = append2_t[0]; - break; - - case 24: block1[ 8] |= append0_t[0]; - block1[ 9] = append0_t[1]; - block1[10] = append0_t[2]; - block1[11] = append0_t[3]; - - block1[12] = append1_t[0]; - block1[13] = append1_t[1]; - block1[14] = append1_t[2]; - block1[15] = append1_t[3]; - break; - - case 25: block1[ 9] |= append0_t[0]; - block1[10] = append0_t[1]; - block1[11] = append0_t[2]; - block1[12] = append0_t[3]; - - block1[13] = append1_t[0]; - block1[14] = append1_t[1]; - block1[15] = append1_t[2]; - break; - - case 26: block1[10] |= append0_t[0]; - block1[11] = append0_t[1]; - block1[12] = append0_t[2]; - block1[13] = append0_t[3]; - - block1[14] = append1_t[0]; - block1[15] = append1_t[1]; - break; - - case 27: block1[11] |= append0_t[0]; - block1[12] = append0_t[1]; - block1[13] = append0_t[2]; - block1[14] = append0_t[3]; - - block1[15] = append1_t[0]; - break; - - case 28: block1[12] |= append0_t[0]; - block1[13] = append0_t[1]; - block1[14] = append0_t[2]; - block1[15] = append0_t[3]; - break; - - case 29: block1[13] |= append0_t[0]; - block1[14] = append0_t[1]; - block1[15] = append0_t[2]; - break; - - case 30: block1[14] |= append0_t[0]; - block1[15] = append0_t[1]; - break; - } - - u32 new_len = block_len + append_len; - - return new_len; -} - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m11400_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const sip_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - const u32 salt_len = esalt_bufs[salt_pos].salt_len; // not a bug, we need to get it from the esalt - - u32 salt_buf0[16]; - - salt_buf0[ 0] = esalt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[ 1] = esalt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[ 2] = esalt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[ 3] = esalt_bufs[salt_pos].salt_buf[ 3]; - salt_buf0[ 4] = esalt_bufs[salt_pos].salt_buf[ 4]; - salt_buf0[ 5] = esalt_bufs[salt_pos].salt_buf[ 5]; - salt_buf0[ 6] = esalt_bufs[salt_pos].salt_buf[ 6]; - salt_buf0[ 7] = esalt_bufs[salt_pos].salt_buf[ 7]; - salt_buf0[ 8] = esalt_bufs[salt_pos].salt_buf[ 8]; - salt_buf0[ 9] = esalt_bufs[salt_pos].salt_buf[ 9]; - salt_buf0[10] = esalt_bufs[salt_pos].salt_buf[10]; - salt_buf0[11] = esalt_bufs[salt_pos].salt_buf[11]; - salt_buf0[12] = esalt_bufs[salt_pos].salt_buf[12]; - salt_buf0[13] = esalt_bufs[salt_pos].salt_buf[13]; - salt_buf0[14] = esalt_bufs[salt_pos].salt_buf[14]; - salt_buf0[15] = esalt_bufs[salt_pos].salt_buf[15]; - - u32 salt_buf1[16]; - - salt_buf1[ 0] = esalt_bufs[salt_pos].salt_buf[16]; - salt_buf1[ 1] = esalt_bufs[salt_pos].salt_buf[17]; - salt_buf1[ 2] = esalt_bufs[salt_pos].salt_buf[18]; - salt_buf1[ 3] = esalt_bufs[salt_pos].salt_buf[19]; - salt_buf1[ 4] = esalt_bufs[salt_pos].salt_buf[20]; - salt_buf1[ 5] = esalt_bufs[salt_pos].salt_buf[21]; - salt_buf1[ 6] = esalt_bufs[salt_pos].salt_buf[22]; - salt_buf1[ 7] = esalt_bufs[salt_pos].salt_buf[23]; - salt_buf1[ 8] = esalt_bufs[salt_pos].salt_buf[24]; - salt_buf1[ 9] = esalt_bufs[salt_pos].salt_buf[25]; - salt_buf1[10] = esalt_bufs[salt_pos].salt_buf[26]; - salt_buf1[11] = esalt_bufs[salt_pos].salt_buf[27]; - salt_buf1[12] = esalt_bufs[salt_pos].salt_buf[28]; - salt_buf1[13] = esalt_bufs[salt_pos].salt_buf[29]; - salt_buf1[14] = 0; - salt_buf1[15] = 0; - - /** - * esalt - */ - - const u32 esalt_len = esalt_bufs[salt_pos].esalt_len; - - u32 esalt_buf0[16]; - - esalt_buf0[ 0] = esalt_bufs[salt_pos].esalt_buf[ 0]; - esalt_buf0[ 1] = esalt_bufs[salt_pos].esalt_buf[ 1]; - esalt_buf0[ 2] = esalt_bufs[salt_pos].esalt_buf[ 2]; - esalt_buf0[ 3] = esalt_bufs[salt_pos].esalt_buf[ 3]; - esalt_buf0[ 4] = esalt_bufs[salt_pos].esalt_buf[ 4]; - esalt_buf0[ 5] = esalt_bufs[salt_pos].esalt_buf[ 5]; - esalt_buf0[ 6] = esalt_bufs[salt_pos].esalt_buf[ 6]; - esalt_buf0[ 7] = esalt_bufs[salt_pos].esalt_buf[ 7]; - esalt_buf0[ 8] = esalt_bufs[salt_pos].esalt_buf[ 8]; - esalt_buf0[ 9] = esalt_bufs[salt_pos].esalt_buf[ 9]; - esalt_buf0[10] = esalt_bufs[salt_pos].esalt_buf[10]; - esalt_buf0[11] = esalt_bufs[salt_pos].esalt_buf[11]; - esalt_buf0[12] = esalt_bufs[salt_pos].esalt_buf[12]; - esalt_buf0[13] = esalt_bufs[salt_pos].esalt_buf[13]; - esalt_buf0[14] = esalt_bufs[salt_pos].esalt_buf[14]; - esalt_buf0[15] = esalt_bufs[salt_pos].esalt_buf[15]; - - u32 esalt_buf1[16]; - - esalt_buf1[ 0] = esalt_bufs[salt_pos].esalt_buf[16]; - esalt_buf1[ 1] = esalt_bufs[salt_pos].esalt_buf[17]; - esalt_buf1[ 2] = esalt_bufs[salt_pos].esalt_buf[18]; - esalt_buf1[ 3] = esalt_bufs[salt_pos].esalt_buf[19]; - esalt_buf1[ 4] = esalt_bufs[salt_pos].esalt_buf[20]; - esalt_buf1[ 5] = esalt_bufs[salt_pos].esalt_buf[21]; - esalt_buf1[ 6] = esalt_bufs[salt_pos].esalt_buf[22]; - esalt_buf1[ 7] = esalt_bufs[salt_pos].esalt_buf[23]; - esalt_buf1[ 8] = esalt_bufs[salt_pos].esalt_buf[24]; - esalt_buf1[ 9] = esalt_bufs[salt_pos].esalt_buf[25]; - esalt_buf1[10] = esalt_bufs[salt_pos].esalt_buf[26]; - esalt_buf1[11] = esalt_bufs[salt_pos].esalt_buf[27]; - esalt_buf1[12] = esalt_bufs[salt_pos].esalt_buf[28]; - esalt_buf1[13] = esalt_bufs[salt_pos].esalt_buf[29]; - esalt_buf1[14] = esalt_bufs[salt_pos].esalt_buf[30]; - esalt_buf1[15] = esalt_bufs[salt_pos].esalt_buf[31]; - - u32 esalt_buf2[16]; - - esalt_buf2[ 0] = esalt_bufs[salt_pos].esalt_buf[32]; - esalt_buf2[ 1] = esalt_bufs[salt_pos].esalt_buf[33]; - esalt_buf2[ 2] = esalt_bufs[salt_pos].esalt_buf[34]; - esalt_buf2[ 3] = esalt_bufs[salt_pos].esalt_buf[35]; - esalt_buf2[ 4] = esalt_bufs[salt_pos].esalt_buf[36]; - esalt_buf2[ 5] = esalt_bufs[salt_pos].esalt_buf[37]; - esalt_buf2[ 6] = 0; - esalt_buf2[ 7] = 0; - esalt_buf2[ 8] = 0; - esalt_buf2[ 9] = 0; - esalt_buf2[10] = 0; - esalt_buf2[11] = 0; - esalt_buf2[12] = 0; - esalt_buf2[13] = 0; - esalt_buf2[14] = 0; - esalt_buf2[15] = 0; - - const u32 digest_esalt_len = 32 + esalt_len; - const u32 remaining_bytes = digest_esalt_len + 1 - 64; // substract previous block - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - const u32 pw_salt_len = salt_len + pw_len; - - /* - * HA1 = md5 ($salt . $pass) - */ - - // append the pass to the salt - - u32x block0[16]; - - block0[ 0] = salt_buf0[ 0]; - block0[ 1] = salt_buf0[ 1]; - block0[ 2] = salt_buf0[ 2]; - block0[ 3] = salt_buf0[ 3]; - block0[ 4] = salt_buf0[ 4]; - block0[ 5] = salt_buf0[ 5]; - block0[ 6] = salt_buf0[ 6]; - block0[ 7] = salt_buf0[ 7]; - block0[ 8] = salt_buf0[ 8]; - block0[ 9] = salt_buf0[ 9]; - block0[10] = salt_buf0[10]; - block0[11] = salt_buf0[11]; - block0[12] = salt_buf0[12]; - block0[13] = salt_buf0[13]; - block0[14] = salt_buf0[14]; - block0[15] = salt_buf0[15]; - - u32x block1[16]; - - block1[ 0] = salt_buf1[ 0]; - block1[ 1] = salt_buf1[ 1]; - block1[ 2] = salt_buf1[ 2]; - block1[ 3] = salt_buf1[ 3]; - block1[ 4] = salt_buf1[ 4]; - block1[ 5] = salt_buf1[ 5]; - block1[ 6] = salt_buf1[ 6]; - block1[ 7] = salt_buf1[ 7]; - block1[ 8] = salt_buf1[ 8]; - block1[ 9] = salt_buf1[ 9]; - block1[10] = salt_buf1[10]; - block1[11] = salt_buf1[11]; - block1[12] = salt_buf1[12]; - block1[13] = salt_buf1[13]; - block1[14] = salt_buf1[14]; - block1[15] = salt_buf1[15]; - - u32 block_len = 0; - - block_len = memcat32 (block0, block1, salt_len, w0, w1, w2, w3, pw_len); - - u32x w0_t[4]; - - w0_t[0] = block0[ 0]; - w0_t[1] = block0[ 1]; - w0_t[2] = block0[ 2]; - w0_t[3] = block0[ 3]; - - u32x w1_t[4]; - - w1_t[0] = block0[ 4]; - w1_t[1] = block0[ 5]; - w1_t[2] = block0[ 6]; - w1_t[3] = block0[ 7]; - - u32x w2_t[4]; - - w2_t[0] = block0[ 8]; - w2_t[1] = block0[ 9]; - w2_t[2] = block0[10]; - w2_t[3] = block0[11]; - - u32x w3_t[4]; - - w3_t[0] = block0[12]; - w3_t[1] = block0[13]; - w3_t[2] = block0[14]; - w3_t[3] = block0[15]; - - if (block_len < 56) - { - w3_t[2] = pw_salt_len * 8; - } - - // md5 - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - if (block_len > 55) - { - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - - w0_t[0] = block1[ 0]; - w0_t[1] = block1[ 1]; - w0_t[2] = block1[ 2]; - w0_t[3] = block1[ 3]; - - w1_t[0] = block1[ 4]; - w1_t[1] = block1[ 5]; - w1_t[2] = block1[ 6]; - w1_t[3] = block1[ 7]; - - w2_t[0] = block1[ 8]; - w2_t[1] = block1[ 9]; - w2_t[2] = block1[10]; - w2_t[3] = block1[11]; - - w3_t[0] = block1[12]; - w3_t[1] = block1[13]; - w3_t[2] = pw_salt_len * 8; - w3_t[3] = 0; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - } - - /* - * final = md5 ($HA1 . $esalt) - * we have at least 2 MD5 blocks/transformations, but we might need 3 - */ - - w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - w2_t[0] = esalt_buf0[0]; - w2_t[1] = esalt_buf0[1]; - w2_t[2] = esalt_buf0[2]; - w2_t[3] = esalt_buf0[3]; - - w3_t[0] = esalt_buf0[4]; - w3_t[1] = esalt_buf0[5]; - w3_t[2] = esalt_buf0[6]; - w3_t[3] = esalt_buf0[7]; - - // md5 - // 1st transform - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - - // 2nd transform - - w0_t[0] = esalt_buf0[ 8]; - w0_t[1] = esalt_buf0[ 9]; - w0_t[2] = esalt_buf0[10]; - w0_t[3] = esalt_buf0[11]; - - w1_t[0] = esalt_buf0[12]; - w1_t[1] = esalt_buf0[13]; - w1_t[2] = esalt_buf0[14]; - w1_t[3] = esalt_buf0[15]; - - w2_t[0] = esalt_buf1[ 0]; - w2_t[1] = esalt_buf1[ 1]; - w2_t[2] = esalt_buf1[ 2]; - w2_t[3] = esalt_buf1[ 3]; - - w3_t[0] = esalt_buf1[ 4]; - w3_t[1] = esalt_buf1[ 5]; - w3_t[2] = esalt_buf1[ 6]; - w3_t[3] = esalt_buf1[ 7]; - - // it is the final block when no more than 55 bytes left - - if (remaining_bytes < 56) - { - // it is the last block ! - - w3_t[2] = digest_esalt_len * 8; - } - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - // sometimes (not rare at all) we need a third block :( - - if (remaining_bytes > 55) - { - // this is for sure the final block - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - r_a = a; - r_b = b; - r_c = c; - r_d = d; - - w0_t[0] = esalt_buf1[ 8]; - w0_t[1] = esalt_buf1[ 9]; - w0_t[2] = esalt_buf1[10]; - w0_t[3] = esalt_buf1[11]; - - w1_t[0] = esalt_buf1[12]; - w1_t[1] = esalt_buf1[13]; - w1_t[2] = esalt_buf1[14]; - w1_t[3] = esalt_buf1[15]; - - w2_t[0] = esalt_buf2[ 0]; - w2_t[1] = esalt_buf2[ 1]; - w2_t[2] = esalt_buf2[ 2]; - w2_t[3] = esalt_buf2[ 3]; - - w3_t[0] = esalt_buf2[ 4]; - w3_t[1] = esalt_buf2[ 5]; - w3_t[2] = digest_esalt_len * 8; - w3_t[3] = 0; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - } - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11400_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const sip_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11400_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const sip_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11400_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const sip_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - const u32 salt_len = esalt_bufs[salt_pos].salt_len; // not a bug, we need to get it from the esalt - - u32 salt_buf0[16]; - - salt_buf0[ 0] = esalt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[ 1] = esalt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[ 2] = esalt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[ 3] = esalt_bufs[salt_pos].salt_buf[ 3]; - salt_buf0[ 4] = esalt_bufs[salt_pos].salt_buf[ 4]; - salt_buf0[ 5] = esalt_bufs[salt_pos].salt_buf[ 5]; - salt_buf0[ 6] = esalt_bufs[salt_pos].salt_buf[ 6]; - salt_buf0[ 7] = esalt_bufs[salt_pos].salt_buf[ 7]; - salt_buf0[ 8] = esalt_bufs[salt_pos].salt_buf[ 8]; - salt_buf0[ 9] = esalt_bufs[salt_pos].salt_buf[ 9]; - salt_buf0[10] = esalt_bufs[salt_pos].salt_buf[10]; - salt_buf0[11] = esalt_bufs[salt_pos].salt_buf[11]; - salt_buf0[12] = esalt_bufs[salt_pos].salt_buf[12]; - salt_buf0[13] = esalt_bufs[salt_pos].salt_buf[13]; - salt_buf0[14] = esalt_bufs[salt_pos].salt_buf[14]; - salt_buf0[15] = esalt_bufs[salt_pos].salt_buf[15]; - - u32 salt_buf1[16]; - - salt_buf1[ 0] = esalt_bufs[salt_pos].salt_buf[16]; - salt_buf1[ 1] = esalt_bufs[salt_pos].salt_buf[17]; - salt_buf1[ 2] = esalt_bufs[salt_pos].salt_buf[18]; - salt_buf1[ 3] = esalt_bufs[salt_pos].salt_buf[19]; - salt_buf1[ 4] = esalt_bufs[salt_pos].salt_buf[20]; - salt_buf1[ 5] = esalt_bufs[salt_pos].salt_buf[21]; - salt_buf1[ 6] = esalt_bufs[salt_pos].salt_buf[22]; - salt_buf1[ 7] = esalt_bufs[salt_pos].salt_buf[23]; - salt_buf1[ 8] = esalt_bufs[salt_pos].salt_buf[24]; - salt_buf1[ 9] = esalt_bufs[salt_pos].salt_buf[25]; - salt_buf1[10] = esalt_bufs[salt_pos].salt_buf[26]; - salt_buf1[11] = esalt_bufs[salt_pos].salt_buf[27]; - salt_buf1[12] = esalt_bufs[salt_pos].salt_buf[28]; - salt_buf1[13] = esalt_bufs[salt_pos].salt_buf[29]; - salt_buf1[14] = 0; - salt_buf1[15] = 0; - - /** - * esalt - */ - - const u32 esalt_len = esalt_bufs[salt_pos].esalt_len; - - u32 esalt_buf0[16]; - - esalt_buf0[ 0] = esalt_bufs[salt_pos].esalt_buf[ 0]; - esalt_buf0[ 1] = esalt_bufs[salt_pos].esalt_buf[ 1]; - esalt_buf0[ 2] = esalt_bufs[salt_pos].esalt_buf[ 2]; - esalt_buf0[ 3] = esalt_bufs[salt_pos].esalt_buf[ 3]; - esalt_buf0[ 4] = esalt_bufs[salt_pos].esalt_buf[ 4]; - esalt_buf0[ 5] = esalt_bufs[salt_pos].esalt_buf[ 5]; - esalt_buf0[ 6] = esalt_bufs[salt_pos].esalt_buf[ 6]; - esalt_buf0[ 7] = esalt_bufs[salt_pos].esalt_buf[ 7]; - esalt_buf0[ 8] = esalt_bufs[salt_pos].esalt_buf[ 8]; - esalt_buf0[ 9] = esalt_bufs[salt_pos].esalt_buf[ 9]; - esalt_buf0[10] = esalt_bufs[salt_pos].esalt_buf[10]; - esalt_buf0[11] = esalt_bufs[salt_pos].esalt_buf[11]; - esalt_buf0[12] = esalt_bufs[salt_pos].esalt_buf[12]; - esalt_buf0[13] = esalt_bufs[salt_pos].esalt_buf[13]; - esalt_buf0[14] = esalt_bufs[salt_pos].esalt_buf[14]; - esalt_buf0[15] = esalt_bufs[salt_pos].esalt_buf[15]; - - u32 esalt_buf1[16]; - - esalt_buf1[ 0] = esalt_bufs[salt_pos].esalt_buf[16]; - esalt_buf1[ 1] = esalt_bufs[salt_pos].esalt_buf[17]; - esalt_buf1[ 2] = esalt_bufs[salt_pos].esalt_buf[18]; - esalt_buf1[ 3] = esalt_bufs[salt_pos].esalt_buf[19]; - esalt_buf1[ 4] = esalt_bufs[salt_pos].esalt_buf[20]; - esalt_buf1[ 5] = esalt_bufs[salt_pos].esalt_buf[21]; - esalt_buf1[ 6] = esalt_bufs[salt_pos].esalt_buf[22]; - esalt_buf1[ 7] = esalt_bufs[salt_pos].esalt_buf[23]; - esalt_buf1[ 8] = esalt_bufs[salt_pos].esalt_buf[24]; - esalt_buf1[ 9] = esalt_bufs[salt_pos].esalt_buf[25]; - esalt_buf1[10] = esalt_bufs[salt_pos].esalt_buf[26]; - esalt_buf1[11] = esalt_bufs[salt_pos].esalt_buf[27]; - esalt_buf1[12] = esalt_bufs[salt_pos].esalt_buf[28]; - esalt_buf1[13] = esalt_bufs[salt_pos].esalt_buf[29]; - esalt_buf1[14] = esalt_bufs[salt_pos].esalt_buf[30]; - esalt_buf1[15] = esalt_bufs[salt_pos].esalt_buf[31]; - - u32 esalt_buf2[16]; - - esalt_buf2[ 0] = esalt_bufs[salt_pos].esalt_buf[32]; - esalt_buf2[ 1] = esalt_bufs[salt_pos].esalt_buf[33]; - esalt_buf2[ 2] = esalt_bufs[salt_pos].esalt_buf[34]; - esalt_buf2[ 3] = esalt_bufs[salt_pos].esalt_buf[35]; - esalt_buf2[ 4] = esalt_bufs[salt_pos].esalt_buf[36]; - esalt_buf2[ 5] = esalt_bufs[salt_pos].esalt_buf[37]; - esalt_buf2[ 6] = 0; - esalt_buf2[ 7] = 0; - esalt_buf2[ 8] = 0; - esalt_buf2[ 9] = 0; - esalt_buf2[10] = 0; - esalt_buf2[11] = 0; - esalt_buf2[12] = 0; - esalt_buf2[13] = 0; - esalt_buf2[14] = 0; - esalt_buf2[15] = 0; - - const u32 digest_esalt_len = 32 + esalt_len; - const u32 remaining_bytes = digest_esalt_len + 1 - 64; // substract previous block - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = wordl3[2] | wordr3[2]; - w3[3] = wordl3[3] | wordr3[3]; - - const u32 pw_salt_len = salt_len + pw_len; - - /* - * HA1 = md5 ($salt . $pass) - */ - - // append the pass to the salt - - u32x block0[16]; - - block0[ 0] = salt_buf0[ 0]; - block0[ 1] = salt_buf0[ 1]; - block0[ 2] = salt_buf0[ 2]; - block0[ 3] = salt_buf0[ 3]; - block0[ 4] = salt_buf0[ 4]; - block0[ 5] = salt_buf0[ 5]; - block0[ 6] = salt_buf0[ 6]; - block0[ 7] = salt_buf0[ 7]; - block0[ 8] = salt_buf0[ 8]; - block0[ 9] = salt_buf0[ 9]; - block0[10] = salt_buf0[10]; - block0[11] = salt_buf0[11]; - block0[12] = salt_buf0[12]; - block0[13] = salt_buf0[13]; - block0[14] = salt_buf0[14]; - block0[15] = salt_buf0[15]; - - u32x block1[16]; - - block1[ 0] = salt_buf1[ 0]; - block1[ 1] = salt_buf1[ 1]; - block1[ 2] = salt_buf1[ 2]; - block1[ 3] = salt_buf1[ 3]; - block1[ 4] = salt_buf1[ 4]; - block1[ 5] = salt_buf1[ 5]; - block1[ 6] = salt_buf1[ 6]; - block1[ 7] = salt_buf1[ 7]; - block1[ 8] = salt_buf1[ 8]; - block1[ 9] = salt_buf1[ 9]; - block1[10] = salt_buf1[10]; - block1[11] = salt_buf1[11]; - block1[12] = salt_buf1[12]; - block1[13] = salt_buf1[13]; - block1[14] = salt_buf1[14]; - block1[15] = salt_buf1[15]; - - u32 block_len = 0; - - block_len = memcat32 (block0, block1, salt_len, w0, w1, w2, w3, pw_len); - - u32x w0_t[4]; - - w0_t[0] = block0[ 0]; - w0_t[1] = block0[ 1]; - w0_t[2] = block0[ 2]; - w0_t[3] = block0[ 3]; - - u32x w1_t[4]; - - w1_t[0] = block0[ 4]; - w1_t[1] = block0[ 5]; - w1_t[2] = block0[ 6]; - w1_t[3] = block0[ 7]; - - u32x w2_t[4]; - - w2_t[0] = block0[ 8]; - w2_t[1] = block0[ 9]; - w2_t[2] = block0[10]; - w2_t[3] = block0[11]; - - u32x w3_t[4]; - - w3_t[0] = block0[12]; - w3_t[1] = block0[13]; - w3_t[2] = block0[14]; - w3_t[3] = block0[15]; - - if (block_len < 56) - { - w3_t[2] = pw_salt_len * 8; - } - - // md5 - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - if (block_len > 55) - { - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - - w0_t[0] = block1[ 0]; - w0_t[1] = block1[ 1]; - w0_t[2] = block1[ 2]; - w0_t[3] = block1[ 3]; - - w1_t[0] = block1[ 4]; - w1_t[1] = block1[ 5]; - w1_t[2] = block1[ 6]; - w1_t[3] = block1[ 7]; - - w2_t[0] = block1[ 8]; - w2_t[1] = block1[ 9]; - w2_t[2] = block1[10]; - w2_t[3] = block1[11]; - - w3_t[0] = block1[12]; - w3_t[1] = block1[13]; - w3_t[2] = pw_salt_len * 8; - w3_t[3] = 0; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - } - - /* - * final = md5 ($HA1 . $esalt) - * we have at least 2 MD5 blocks/transformations, but we might need 3 - */ - - w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - w2_t[0] = esalt_buf0[0]; - w2_t[1] = esalt_buf0[1]; - w2_t[2] = esalt_buf0[2]; - w2_t[3] = esalt_buf0[3]; - - w3_t[0] = esalt_buf0[4]; - w3_t[1] = esalt_buf0[5]; - w3_t[2] = esalt_buf0[6]; - w3_t[3] = esalt_buf0[7]; - - // md5 - // 1st transform - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - - // 2nd transform - - w0_t[0] = esalt_buf0[ 8]; - w0_t[1] = esalt_buf0[ 9]; - w0_t[2] = esalt_buf0[10]; - w0_t[3] = esalt_buf0[11]; - - w1_t[0] = esalt_buf0[12]; - w1_t[1] = esalt_buf0[13]; - w1_t[2] = esalt_buf0[14]; - w1_t[3] = esalt_buf0[15]; - - w2_t[0] = esalt_buf1[ 0]; - w2_t[1] = esalt_buf1[ 1]; - w2_t[2] = esalt_buf1[ 2]; - w2_t[3] = esalt_buf1[ 3]; - - w3_t[0] = esalt_buf1[ 4]; - w3_t[1] = esalt_buf1[ 5]; - w3_t[2] = esalt_buf1[ 6]; - w3_t[3] = esalt_buf1[ 7]; - - // it is the final block when no more than 55 bytes left - - if (remaining_bytes < 56) - { - // it is the last block ! - - w3_t[2] = digest_esalt_len * 8; - } - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - // sometimes (not rare at all) we need a third block :( - - if (remaining_bytes > 55) - { - // this is for sure the final block - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - r_a = a; - r_b = b; - r_c = c; - r_d = d; - - w0_t[0] = esalt_buf1[ 8]; - w0_t[1] = esalt_buf1[ 9]; - w0_t[2] = esalt_buf1[10]; - w0_t[3] = esalt_buf1[11]; - - w1_t[0] = esalt_buf1[12]; - w1_t[1] = esalt_buf1[13]; - w1_t[2] = esalt_buf1[14]; - w1_t[3] = esalt_buf1[15]; - - w2_t[0] = esalt_buf2[ 0]; - w2_t[1] = esalt_buf2[ 1]; - w2_t[2] = esalt_buf2[ 2]; - w2_t[3] = esalt_buf2[ 3]; - - w3_t[0] = esalt_buf2[ 4]; - w3_t[1] = esalt_buf2[ 5]; - w3_t[2] = digest_esalt_len * 8; - w3_t[3] = 0; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - } - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11400_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const sip_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11400_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const sip_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m11400_a3.cu b/nv/m11400_a3.cu deleted file mode 100644 index d17c45b..0000000 --- a/nv/m11400_a3.cu +++ /dev/null @@ -1,6095 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 3 -#define DGST_R2 2 -#define DGST_R3 1 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_lower8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ static u32 memcat32 (u32x block0[16], u32x block1[16], const u32 block_len, const u32x append0[4], const u32x append1[4], const u32x append2[4], const u32x append3[4], const u32 append_len) -{ - const u32 mod = block_len & 3; - const u32 div = block_len / 4; - - const int offset_minus_4 = 4 - mod; - - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - - u32x append0_t[4]; - - append0_t[0] = __byte_perm ( 0, append0[0], selector); - append0_t[1] = __byte_perm (append0[0], append0[1], selector); - append0_t[2] = __byte_perm (append0[1], append0[2], selector); - append0_t[3] = __byte_perm (append0[2], append0[3], selector); - - u32x append1_t[4]; - - append1_t[0] = __byte_perm (append0[3], append1[0], selector); - append1_t[1] = __byte_perm (append1[0], append1[1], selector); - append1_t[2] = __byte_perm (append1[1], append1[2], selector); - append1_t[3] = __byte_perm (append1[2], append1[3], selector); - - u32x append2_t[4]; - - append2_t[0] = __byte_perm (append1[3], append2[0], selector); - append2_t[1] = __byte_perm (append2[0], append2[1], selector); - append2_t[2] = __byte_perm (append2[1], append2[2], selector); - append2_t[3] = __byte_perm (append2[2], append2[3], selector); - - u32x append3_t[4]; - - append3_t[0] = __byte_perm (append2[3], append3[0], selector); - append3_t[1] = __byte_perm (append3[0], append3[1], selector); - append3_t[2] = __byte_perm (append3[1], append3[2], selector); - append3_t[3] = __byte_perm (append3[2], append3[3], selector); - - u32x append4_t[4]; - - append4_t[0] = __byte_perm (append3[3], 0, selector); - append4_t[1] = 0; - append4_t[2] = 0; - append4_t[3] = 0; - - switch (div) - { - case 0: block0[ 0] |= append0_t[0]; - block0[ 1] = append0_t[1]; - block0[ 2] = append0_t[2]; - block0[ 3] = append0_t[3]; - - block0[ 4] = append1_t[0]; - block0[ 5] = append1_t[1]; - block0[ 6] = append1_t[2]; - block0[ 7] = append1_t[3]; - - block0[ 8] = append2_t[0]; - block0[ 9] = append2_t[1]; - block0[10] = append2_t[2]; - block0[11] = append2_t[3]; - - block0[12] = append3_t[0]; - block0[13] = append3_t[1]; - block0[14] = append3_t[2]; - block0[15] = append3_t[3]; - - block1[ 0] = append4_t[0]; - block1[ 1] = append4_t[1]; - block1[ 2] = append4_t[2]; - block1[ 3] = append4_t[3]; - break; - - case 1: block0[ 1] |= append0_t[0]; - block0[ 2] = append0_t[1]; - block0[ 3] = append0_t[2]; - block0[ 4] = append0_t[3]; - - block0[ 5] = append1_t[0]; - block0[ 6] = append1_t[1]; - block0[ 7] = append1_t[2]; - block0[ 8] = append1_t[3]; - - block0[ 9] = append2_t[0]; - block0[10] = append2_t[1]; - block0[11] = append2_t[2]; - block0[12] = append2_t[3]; - - block0[13] = append3_t[0]; - block0[14] = append3_t[1]; - block0[15] = append3_t[2]; - block1[ 0] = append3_t[3]; - - block1[ 1] = append4_t[0]; - block1[ 2] = append4_t[1]; - block1[ 3] = append4_t[2]; - block1[ 4] = append4_t[3]; - break; - - case 2: block0[ 2] |= append0_t[0]; - block0[ 3] = append0_t[1]; - block0[ 4] = append0_t[2]; - block0[ 5] = append0_t[3]; - - block0[ 6] = append1_t[0]; - block0[ 7] = append1_t[1]; - block0[ 8] = append1_t[2]; - block0[ 9] = append1_t[3]; - - block0[10] = append2_t[0]; - block0[11] = append2_t[1]; - block0[12] = append2_t[2]; - block0[13] = append2_t[3]; - - block0[14] = append3_t[0]; - block0[15] = append3_t[1]; - block1[ 0] = append3_t[2]; - block1[ 1] = append3_t[3]; - - block1[ 2] = append4_t[0]; - block1[ 3] = append4_t[1]; - block1[ 4] = append4_t[2]; - block1[ 5] = append4_t[3]; - break; - - case 3: block0[ 3] |= append0_t[0]; - block0[ 4] = append0_t[1]; - block0[ 5] = append0_t[2]; - block0[ 6] = append0_t[3]; - - block0[ 7] = append1_t[0]; - block0[ 8] = append1_t[1]; - block0[ 9] = append1_t[2]; - block0[10] = append1_t[3]; - - block0[11] = append2_t[0]; - block0[12] = append2_t[1]; - block0[13] = append2_t[2]; - block0[14] = append2_t[3]; - - block0[15] = append3_t[0]; - block1[ 0] = append3_t[1]; - block1[ 1] = append3_t[2]; - block1[ 2] = append3_t[3]; - - block1[ 3] = append4_t[0]; - block1[ 4] = append4_t[1]; - block1[ 5] = append4_t[2]; - block1[ 6] = append4_t[3]; - break; - - case 4: block0[ 4] |= append0_t[0]; - block0[ 5] = append0_t[1]; - block0[ 6] = append0_t[2]; - block0[ 7] = append0_t[3]; - - block0[ 8] = append1_t[0]; - block0[ 9] = append1_t[1]; - block0[10] = append1_t[2]; - block0[11] = append1_t[3]; - - block0[12] = append2_t[0]; - block0[13] = append2_t[1]; - block0[14] = append2_t[2]; - block0[15] = append2_t[3]; - - block1[ 0] = append3_t[0]; - block1[ 1] = append3_t[1]; - block1[ 2] = append3_t[2]; - block1[ 3] = append3_t[3]; - - block1[ 4] = append4_t[0]; - block1[ 5] = append4_t[1]; - block1[ 6] = append4_t[2]; - block1[ 7] = append4_t[3]; - break; - - case 5: block0[ 5] |= append0_t[0]; - block0[ 6] = append0_t[1]; - block0[ 7] = append0_t[2]; - block0[ 8] = append0_t[3]; - - block0[ 9] = append1_t[0]; - block0[10] = append1_t[1]; - block0[11] = append1_t[2]; - block0[12] = append1_t[3]; - - block0[13] = append2_t[0]; - block0[14] = append2_t[1]; - block0[15] = append2_t[2]; - block1[ 0] = append2_t[3]; - - block1[ 1] = append3_t[0]; - block1[ 2] = append3_t[1]; - block1[ 3] = append3_t[2]; - block1[ 4] = append3_t[3]; - - block1[ 5] = append4_t[0]; - block1[ 6] = append4_t[1]; - block1[ 7] = append4_t[2]; - block1[ 8] = append4_t[3]; - break; - - case 6: block0[ 6] |= append0_t[0]; - block0[ 7] = append0_t[1]; - block0[ 8] = append0_t[2]; - block0[ 9] = append0_t[3]; - - block0[10] = append1_t[0]; - block0[11] = append1_t[1]; - block0[12] = append1_t[2]; - block0[13] = append1_t[3]; - - block0[14] = append2_t[0]; - block0[15] = append2_t[1]; - block1[ 0] = append2_t[2]; - block1[ 1] = append2_t[3]; - - block1[ 2] = append3_t[0]; - block1[ 3] = append3_t[1]; - block1[ 4] = append3_t[2]; - block1[ 5] = append3_t[3]; - - block1[ 6] = append4_t[0]; - block1[ 7] = append4_t[1]; - block1[ 8] = append4_t[2]; - block1[ 9] = append4_t[3]; - break; - - case 7: block0[ 7] |= append0_t[0]; - block0[ 8] = append0_t[1]; - block0[ 9] = append0_t[2]; - block0[10] = append0_t[3]; - - block0[11] = append1_t[0]; - block0[12] = append1_t[1]; - block0[13] = append1_t[2]; - block0[14] = append1_t[3]; - - block0[15] = append2_t[0]; - block1[ 0] = append2_t[1]; - block1[ 1] = append2_t[2]; - block1[ 2] = append2_t[3]; - - block1[ 3] = append3_t[0]; - block1[ 4] = append3_t[1]; - block1[ 5] = append3_t[2]; - block1[ 6] = append3_t[3]; - - block1[ 7] = append4_t[0]; - block1[ 8] = append4_t[1]; - block1[ 9] = append4_t[2]; - block1[10] = append4_t[3]; - break; - - case 8: block0[ 8] |= append0_t[0]; - block0[ 9] = append0_t[1]; - block0[10] = append0_t[2]; - block0[11] = append0_t[3]; - - block0[12] = append1_t[0]; - block0[13] = append1_t[1]; - block0[14] = append1_t[2]; - block0[15] = append1_t[3]; - - block1[ 0] = append2_t[0]; - block1[ 1] = append2_t[1]; - block1[ 2] = append2_t[2]; - block1[ 3] = append2_t[3]; - - block1[ 4] = append3_t[0]; - block1[ 5] = append3_t[1]; - block1[ 6] = append3_t[2]; - block1[ 7] = append3_t[3]; - - block1[ 8] = append4_t[0]; - block1[ 9] = append4_t[1]; - block1[10] = append4_t[2]; - block1[11] = append4_t[3]; - break; - - case 9: block0[ 9] |= append0_t[0]; - block0[10] = append0_t[1]; - block0[11] = append0_t[2]; - block0[12] = append0_t[3]; - - block0[13] = append1_t[0]; - block0[14] = append1_t[1]; - block0[15] = append1_t[2]; - block1[ 0] = append1_t[3]; - - block1[ 1] = append2_t[0]; - block1[ 2] = append2_t[1]; - block1[ 3] = append2_t[2]; - block1[ 4] = append2_t[3]; - - block1[ 5] = append3_t[0]; - block1[ 6] = append3_t[1]; - block1[ 7] = append3_t[2]; - block1[ 8] = append3_t[3]; - - block1[ 9] = append4_t[0]; - block1[10] = append4_t[1]; - block1[11] = append4_t[2]; - block1[12] = append4_t[3]; - break; - - case 10: block0[10] |= append0_t[0]; - block0[11] = append0_t[1]; - block0[12] = append0_t[2]; - block0[13] = append0_t[3]; - - block0[14] = append1_t[0]; - block0[15] = append1_t[1]; - block1[ 0] = append1_t[2]; - block1[ 1] = append1_t[3]; - - block1[ 2] = append2_t[0]; - block1[ 3] = append2_t[1]; - block1[ 4] = append2_t[2]; - block1[ 5] = append2_t[3]; - - block1[ 6] = append3_t[0]; - block1[ 7] = append3_t[1]; - block1[ 8] = append3_t[2]; - block1[ 9] = append3_t[3]; - - block1[10] = append4_t[0]; - block1[11] = append4_t[1]; - block1[12] = append4_t[2]; - block1[13] = append4_t[3]; - break; - - case 11: block0[11] |= append0_t[0]; - block0[12] = append0_t[1]; - block0[13] = append0_t[2]; - block0[14] = append0_t[3]; - - block0[15] = append1_t[0]; - block1[ 0] = append1_t[1]; - block1[ 1] = append1_t[2]; - block1[ 2] = append1_t[3]; - - block1[ 3] = append2_t[0]; - block1[ 4] = append2_t[1]; - block1[ 5] = append2_t[2]; - block1[ 6] = append2_t[3]; - - block1[ 7] = append3_t[0]; - block1[ 8] = append3_t[1]; - block1[ 9] = append3_t[2]; - block1[10] = append3_t[3]; - - block1[11] = append4_t[0]; - block1[12] = append4_t[1]; - block1[13] = append4_t[2]; - block1[14] = append4_t[3]; - break; - - case 12: block0[12] |= append0_t[0]; - block0[13] = append0_t[1]; - block0[14] = append0_t[2]; - block0[15] = append0_t[3]; - - block1[ 0] = append1_t[0]; - block1[ 1] = append1_t[1]; - block1[ 2] = append1_t[2]; - block1[ 3] = append1_t[3]; - - block1[ 4] = append2_t[0]; - block1[ 5] = append2_t[1]; - block1[ 6] = append2_t[2]; - block1[ 7] = append2_t[3]; - - block1[ 8] = append3_t[0]; - block1[ 9] = append3_t[1]; - block1[10] = append3_t[2]; - block1[11] = append3_t[3]; - - block1[12] = append4_t[0]; - block1[13] = append4_t[1]; - block1[14] = append4_t[2]; - block1[15] = append4_t[3]; - break; - - case 13: block0[13] |= append0_t[0]; - block0[14] = append0_t[1]; - block0[15] = append0_t[2]; - block1[ 0] = append0_t[3]; - - block1[ 1] = append1_t[0]; - block1[ 2] = append1_t[1]; - block1[ 3] = append1_t[2]; - block1[ 4] = append1_t[3]; - - block1[ 5] = append2_t[0]; - block1[ 6] = append2_t[1]; - block1[ 7] = append2_t[2]; - block1[ 8] = append2_t[3]; - - block1[ 9] = append3_t[0]; - block1[10] = append3_t[1]; - block1[11] = append3_t[2]; - block1[12] = append3_t[3]; - - block1[13] = append4_t[0]; - block1[14] = append4_t[1]; - block1[15] = append4_t[2]; - break; - - case 14: block0[14] |= append0_t[0]; - block0[15] = append0_t[1]; - block1[ 0] = append0_t[2]; - block1[ 1] = append0_t[3]; - - block1[ 2] = append1_t[0]; - block1[ 3] = append1_t[1]; - block1[ 4] = append1_t[2]; - block1[ 5] = append1_t[3]; - - block1[ 6] = append2_t[0]; - block1[ 7] = append2_t[1]; - block1[ 8] = append2_t[2]; - block1[ 9] = append2_t[3]; - - block1[10] = append3_t[0]; - block1[11] = append3_t[1]; - block1[12] = append3_t[2]; - block1[13] = append3_t[3]; - - block1[14] = append4_t[0]; - block1[15] = append4_t[1]; - break; - - case 15: block0[15] |= append0_t[0]; - block1[ 0] = append0_t[1]; - block1[ 1] = append0_t[2]; - block1[ 2] = append0_t[3]; - - block1[ 3] = append1_t[1]; - block1[ 4] = append1_t[2]; - block1[ 5] = append1_t[3]; - block1[ 6] = append1_t[0]; - - block1[ 7] = append2_t[0]; - block1[ 8] = append2_t[1]; - block1[ 9] = append2_t[2]; - block1[10] = append2_t[3]; - - block1[11] = append3_t[0]; - block1[12] = append3_t[1]; - block1[13] = append3_t[2]; - block1[14] = append3_t[3]; - - block1[15] = append4_t[0]; - break; - - case 16: block1[ 0] |= append0_t[0]; - block1[ 1] = append0_t[1]; - block1[ 2] = append0_t[2]; - block1[ 3] = append0_t[3]; - - block1[ 4] = append1_t[0]; - block1[ 5] = append1_t[1]; - block1[ 6] = append1_t[2]; - block1[ 7] = append1_t[3]; - - block1[ 8] = append2_t[0]; - block1[ 9] = append2_t[1]; - block1[10] = append2_t[2]; - block1[11] = append2_t[3]; - - block1[12] = append3_t[0]; - block1[13] = append3_t[1]; - block1[14] = append3_t[2]; - block1[15] = append3_t[3]; - break; - - case 17: block1[ 1] |= append0_t[0]; - block1[ 2] = append0_t[1]; - block1[ 3] = append0_t[2]; - block1[ 4] = append0_t[3]; - - block1[ 5] = append1_t[0]; - block1[ 6] = append1_t[1]; - block1[ 7] = append1_t[2]; - block1[ 8] = append1_t[3]; - - block1[ 9] = append2_t[0]; - block1[10] = append2_t[1]; - block1[11] = append2_t[2]; - block1[12] = append2_t[3]; - - block1[13] = append3_t[0]; - block1[14] = append3_t[1]; - block1[15] = append3_t[2]; - break; - - case 18: block1[ 2] |= append0_t[0]; - block1[ 3] = append0_t[1]; - block1[ 4] = append0_t[2]; - block1[ 5] = append0_t[3]; - - block1[ 6] = append1_t[0]; - block1[ 7] = append1_t[1]; - block1[ 8] = append1_t[2]; - block1[ 9] = append1_t[3]; - - block1[10] = append2_t[0]; - block1[11] = append2_t[1]; - block1[12] = append2_t[2]; - block1[13] = append2_t[3]; - - block1[14] = append3_t[0]; - block1[15] = append3_t[1]; - break; - - case 19: block1[ 3] |= append0_t[0]; - block1[ 4] = append0_t[1]; - block1[ 5] = append0_t[2]; - block1[ 6] = append0_t[3]; - - block1[ 7] = append1_t[0]; - block1[ 8] = append1_t[1]; - block1[ 9] = append1_t[2]; - block1[10] = append1_t[3]; - - block1[11] = append2_t[0]; - block1[12] = append2_t[1]; - block1[13] = append2_t[2]; - block1[14] = append2_t[3]; - - block1[15] = append3_t[0]; - break; - - case 20: block1[ 4] |= append0_t[0]; - block1[ 5] = append0_t[1]; - block1[ 6] = append0_t[2]; - block1[ 7] = append0_t[3]; - - block1[ 8] = append1_t[0]; - block1[ 9] = append1_t[1]; - block1[10] = append1_t[2]; - block1[11] = append1_t[3]; - - block1[12] = append2_t[0]; - block1[13] = append2_t[1]; - block1[14] = append2_t[2]; - block1[15] = append2_t[3]; - break; - - case 21: block1[ 5] |= append0_t[0]; - block1[ 6] = append0_t[1]; - block1[ 7] = append0_t[2]; - block1[ 8] = append0_t[3]; - - block1[ 9] = append1_t[0]; - block1[10] = append1_t[1]; - block1[11] = append1_t[2]; - block1[12] = append1_t[3]; - - block1[13] = append2_t[0]; - block1[14] = append2_t[1]; - block1[15] = append2_t[2]; - break; - - case 22: block1[ 6] |= append0_t[0]; - block1[ 7] = append0_t[1]; - block1[ 8] = append0_t[2]; - block1[ 9] = append0_t[3]; - - block1[10] = append1_t[0]; - block1[11] = append1_t[1]; - block1[12] = append1_t[2]; - block1[13] = append1_t[3]; - - block1[14] = append2_t[0]; - block1[15] = append2_t[1]; - break; - - case 23: block1[ 7] |= append0_t[0]; - block1[ 8] = append0_t[1]; - block1[ 9] = append0_t[2]; - block1[10] = append0_t[3]; - - block1[11] = append1_t[0]; - block1[12] = append1_t[1]; - block1[13] = append1_t[2]; - block1[14] = append1_t[3]; - - block1[15] = append2_t[0]; - break; - - case 24: block1[ 8] |= append0_t[0]; - block1[ 9] = append0_t[1]; - block1[10] = append0_t[2]; - block1[11] = append0_t[3]; - - block1[12] = append1_t[0]; - block1[13] = append1_t[1]; - block1[14] = append1_t[2]; - block1[15] = append1_t[3]; - break; - - case 25: block1[ 9] |= append0_t[0]; - block1[10] = append0_t[1]; - block1[11] = append0_t[2]; - block1[12] = append0_t[3]; - - block1[13] = append1_t[0]; - block1[14] = append1_t[1]; - block1[15] = append1_t[2]; - break; - - case 26: block1[10] |= append0_t[0]; - block1[11] = append0_t[1]; - block1[12] = append0_t[2]; - block1[13] = append0_t[3]; - - block1[14] = append1_t[0]; - block1[15] = append1_t[1]; - break; - - case 27: block1[11] |= append0_t[0]; - block1[12] = append0_t[1]; - block1[13] = append0_t[2]; - block1[14] = append0_t[3]; - - block1[15] = append1_t[0]; - break; - - case 28: block1[12] |= append0_t[0]; - block1[13] = append0_t[1]; - block1[14] = append0_t[2]; - block1[15] = append0_t[3]; - break; - - case 29: block1[13] |= append0_t[0]; - block1[14] = append0_t[1]; - block1[15] = append0_t[2]; - break; - - case 30: block1[14] |= append0_t[0]; - block1[15] = append0_t[1]; - break; - } - - u32 new_len = block_len + append_len; - - return new_len; -} - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m11400m_0_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const sip_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - const u32 salt_len = esalt_bufs[salt_pos].salt_len; // not a bug, we need to get it from the esalt - - const u32 pw_salt_len = salt_len + pw_len; - - u32 salt_buf0[16]; - - salt_buf0[ 0] = esalt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[ 1] = esalt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[ 2] = esalt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[ 3] = esalt_bufs[salt_pos].salt_buf[ 3]; - salt_buf0[ 4] = esalt_bufs[salt_pos].salt_buf[ 4]; - salt_buf0[ 5] = esalt_bufs[salt_pos].salt_buf[ 5]; - salt_buf0[ 6] = esalt_bufs[salt_pos].salt_buf[ 6]; - salt_buf0[ 7] = esalt_bufs[salt_pos].salt_buf[ 7]; - salt_buf0[ 8] = esalt_bufs[salt_pos].salt_buf[ 8]; - salt_buf0[ 9] = esalt_bufs[salt_pos].salt_buf[ 9]; - salt_buf0[10] = esalt_bufs[salt_pos].salt_buf[10]; - salt_buf0[11] = esalt_bufs[salt_pos].salt_buf[11]; - salt_buf0[12] = esalt_bufs[salt_pos].salt_buf[12]; - salt_buf0[13] = esalt_bufs[salt_pos].salt_buf[13]; - salt_buf0[14] = esalt_bufs[salt_pos].salt_buf[14]; - salt_buf0[15] = esalt_bufs[salt_pos].salt_buf[15]; - - u32 salt_buf1[16]; - - salt_buf1[ 0] = esalt_bufs[salt_pos].salt_buf[16]; - salt_buf1[ 1] = esalt_bufs[salt_pos].salt_buf[17]; - salt_buf1[ 2] = esalt_bufs[salt_pos].salt_buf[18]; - salt_buf1[ 3] = esalt_bufs[salt_pos].salt_buf[19]; - salt_buf1[ 4] = esalt_bufs[salt_pos].salt_buf[20]; - salt_buf1[ 5] = esalt_bufs[salt_pos].salt_buf[21]; - salt_buf1[ 6] = esalt_bufs[salt_pos].salt_buf[22]; - salt_buf1[ 7] = esalt_bufs[salt_pos].salt_buf[23]; - salt_buf1[ 8] = esalt_bufs[salt_pos].salt_buf[24]; - salt_buf1[ 9] = esalt_bufs[salt_pos].salt_buf[25]; - salt_buf1[10] = esalt_bufs[salt_pos].salt_buf[26]; - salt_buf1[11] = esalt_bufs[salt_pos].salt_buf[27]; - salt_buf1[12] = esalt_bufs[salt_pos].salt_buf[28]; - salt_buf1[13] = esalt_bufs[salt_pos].salt_buf[29]; - salt_buf1[14] = 0; - salt_buf1[15] = 0; - - /** - * esalt - */ - - const u32 esalt_len = esalt_bufs[salt_pos].esalt_len; - - u32 esalt_buf0[16]; - - esalt_buf0[ 0] = esalt_bufs[salt_pos].esalt_buf[ 0]; - esalt_buf0[ 1] = esalt_bufs[salt_pos].esalt_buf[ 1]; - esalt_buf0[ 2] = esalt_bufs[salt_pos].esalt_buf[ 2]; - esalt_buf0[ 3] = esalt_bufs[salt_pos].esalt_buf[ 3]; - esalt_buf0[ 4] = esalt_bufs[salt_pos].esalt_buf[ 4]; - esalt_buf0[ 5] = esalt_bufs[salt_pos].esalt_buf[ 5]; - esalt_buf0[ 6] = esalt_bufs[salt_pos].esalt_buf[ 6]; - esalt_buf0[ 7] = esalt_bufs[salt_pos].esalt_buf[ 7]; - esalt_buf0[ 8] = esalt_bufs[salt_pos].esalt_buf[ 8]; - esalt_buf0[ 9] = esalt_bufs[salt_pos].esalt_buf[ 9]; - esalt_buf0[10] = esalt_bufs[salt_pos].esalt_buf[10]; - esalt_buf0[11] = esalt_bufs[salt_pos].esalt_buf[11]; - esalt_buf0[12] = esalt_bufs[salt_pos].esalt_buf[12]; - esalt_buf0[13] = esalt_bufs[salt_pos].esalt_buf[13]; - esalt_buf0[14] = esalt_bufs[salt_pos].esalt_buf[14]; - esalt_buf0[15] = esalt_bufs[salt_pos].esalt_buf[15]; - - u32 esalt_buf1[16]; - - esalt_buf1[ 0] = esalt_bufs[salt_pos].esalt_buf[16]; - esalt_buf1[ 1] = esalt_bufs[salt_pos].esalt_buf[17]; - esalt_buf1[ 2] = esalt_bufs[salt_pos].esalt_buf[18]; - esalt_buf1[ 3] = esalt_bufs[salt_pos].esalt_buf[19]; - esalt_buf1[ 4] = esalt_bufs[salt_pos].esalt_buf[20]; - esalt_buf1[ 5] = esalt_bufs[salt_pos].esalt_buf[21]; - esalt_buf1[ 6] = esalt_bufs[salt_pos].esalt_buf[22]; - esalt_buf1[ 7] = esalt_bufs[salt_pos].esalt_buf[23]; - esalt_buf1[ 8] = esalt_bufs[salt_pos].esalt_buf[24]; - esalt_buf1[ 9] = esalt_bufs[salt_pos].esalt_buf[25]; - esalt_buf1[10] = esalt_bufs[salt_pos].esalt_buf[26]; - esalt_buf1[11] = esalt_bufs[salt_pos].esalt_buf[27]; - esalt_buf1[12] = esalt_bufs[salt_pos].esalt_buf[28]; - esalt_buf1[13] = esalt_bufs[salt_pos].esalt_buf[29]; - esalt_buf1[14] = esalt_bufs[salt_pos].esalt_buf[30]; - esalt_buf1[15] = esalt_bufs[salt_pos].esalt_buf[31]; - - const u32 digest_esalt_len = 32 + esalt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /* - * HA1 = md5 ($salt . $pass) - */ - - // append the pass to the salt - - u32x block0[16]; - - block0[ 0] = salt_buf0[ 0]; - block0[ 1] = salt_buf0[ 1]; - block0[ 2] = salt_buf0[ 2]; - block0[ 3] = salt_buf0[ 3]; - block0[ 4] = salt_buf0[ 4]; - block0[ 5] = salt_buf0[ 5]; - block0[ 6] = salt_buf0[ 6]; - block0[ 7] = salt_buf0[ 7]; - block0[ 8] = salt_buf0[ 8]; - block0[ 9] = salt_buf0[ 9]; - block0[10] = salt_buf0[10]; - block0[11] = salt_buf0[11]; - block0[12] = salt_buf0[12]; - block0[13] = salt_buf0[13]; - block0[14] = salt_buf0[14]; - block0[15] = salt_buf0[15]; - - u32x block1[16]; - - block1[ 0] = salt_buf1[ 0]; - block1[ 1] = salt_buf1[ 1]; - block1[ 2] = salt_buf1[ 2]; - block1[ 3] = salt_buf1[ 3]; - block1[ 4] = salt_buf1[ 4]; - block1[ 5] = salt_buf1[ 5]; - block1[ 6] = salt_buf1[ 6]; - block1[ 7] = salt_buf1[ 7]; - block1[ 8] = salt_buf1[ 8]; - block1[ 9] = salt_buf1[ 9]; - block1[10] = salt_buf1[10]; - block1[11] = salt_buf1[11]; - block1[12] = salt_buf1[12]; - block1[13] = salt_buf1[13]; - block1[14] = salt_buf1[14]; - block1[15] = salt_buf1[15]; - - memcat32 (block0, block1, salt_len, w0, w1, w2, w3, pw_len); - - u32x w0_t[4]; - - w0_t[0] = block0[ 0]; - w0_t[1] = block0[ 1]; - w0_t[2] = block0[ 2]; - w0_t[3] = block0[ 3]; - - u32x w1_t[4]; - - w1_t[0] = block0[ 4]; - w1_t[1] = block0[ 5]; - w1_t[2] = block0[ 6]; - w1_t[3] = block0[ 7]; - - u32x w2_t[4]; - - w2_t[0] = block0[ 8]; - w2_t[1] = block0[ 9]; - w2_t[2] = block0[10]; - w2_t[3] = block0[11]; - - u32x w3_t[4]; - - w3_t[0] = block0[12]; - w3_t[1] = block0[13]; - w3_t[2] = pw_salt_len * 8; - w3_t[3] = 0; - - // md5 - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - /* - * final = md5 ($HA1 . $esalt) - * we have at least 2 MD5 blocks/transformations, but we might need 3 - */ - - w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - w2_t[0] = esalt_buf0[0]; - w2_t[1] = esalt_buf0[1]; - w2_t[2] = esalt_buf0[2]; - w2_t[3] = esalt_buf0[3]; - - w3_t[0] = esalt_buf0[4]; - w3_t[1] = esalt_buf0[5]; - w3_t[2] = esalt_buf0[6]; - w3_t[3] = esalt_buf0[7]; - - // md5 - // 1st transform - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - - // 2nd transform - - w0_t[0] = esalt_buf0[ 8]; - w0_t[1] = esalt_buf0[ 9]; - w0_t[2] = esalt_buf0[10]; - w0_t[3] = esalt_buf0[11]; - - w1_t[0] = esalt_buf0[12]; - w1_t[1] = esalt_buf0[13]; - w1_t[2] = esalt_buf0[14]; - w1_t[3] = esalt_buf0[15]; - - w2_t[0] = esalt_buf1[ 0]; - w2_t[1] = esalt_buf1[ 1]; - w2_t[2] = esalt_buf1[ 2]; - w2_t[3] = esalt_buf1[ 3]; - - w3_t[0] = esalt_buf1[ 4]; - w3_t[1] = esalt_buf1[ 5]; - w3_t[2] = digest_esalt_len * 8; - w3_t[3] = 0; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -__device__ static void m11400m_0_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const sip_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - const u32 salt_len = esalt_bufs[salt_pos].salt_len; // not a bug, we need to get it from the esalt - - const u32 pw_salt_len = salt_len + pw_len; - - u32 salt_buf0[16]; - - salt_buf0[ 0] = esalt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[ 1] = esalt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[ 2] = esalt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[ 3] = esalt_bufs[salt_pos].salt_buf[ 3]; - salt_buf0[ 4] = esalt_bufs[salt_pos].salt_buf[ 4]; - salt_buf0[ 5] = esalt_bufs[salt_pos].salt_buf[ 5]; - salt_buf0[ 6] = esalt_bufs[salt_pos].salt_buf[ 6]; - salt_buf0[ 7] = esalt_bufs[salt_pos].salt_buf[ 7]; - salt_buf0[ 8] = esalt_bufs[salt_pos].salt_buf[ 8]; - salt_buf0[ 9] = esalt_bufs[salt_pos].salt_buf[ 9]; - salt_buf0[10] = esalt_bufs[salt_pos].salt_buf[10]; - salt_buf0[11] = esalt_bufs[salt_pos].salt_buf[11]; - salt_buf0[12] = esalt_bufs[salt_pos].salt_buf[12]; - salt_buf0[13] = esalt_bufs[salt_pos].salt_buf[13]; - salt_buf0[14] = esalt_bufs[salt_pos].salt_buf[14]; - salt_buf0[15] = esalt_bufs[salt_pos].salt_buf[15]; - - u32 salt_buf1[16]; - - salt_buf1[ 0] = esalt_bufs[salt_pos].salt_buf[16]; - salt_buf1[ 1] = esalt_bufs[salt_pos].salt_buf[17]; - salt_buf1[ 2] = esalt_bufs[salt_pos].salt_buf[18]; - salt_buf1[ 3] = esalt_bufs[salt_pos].salt_buf[19]; - salt_buf1[ 4] = esalt_bufs[salt_pos].salt_buf[20]; - salt_buf1[ 5] = esalt_bufs[salt_pos].salt_buf[21]; - salt_buf1[ 6] = esalt_bufs[salt_pos].salt_buf[22]; - salt_buf1[ 7] = esalt_bufs[salt_pos].salt_buf[23]; - salt_buf1[ 8] = esalt_bufs[salt_pos].salt_buf[24]; - salt_buf1[ 9] = esalt_bufs[salt_pos].salt_buf[25]; - salt_buf1[10] = esalt_bufs[salt_pos].salt_buf[26]; - salt_buf1[11] = esalt_bufs[salt_pos].salt_buf[27]; - salt_buf1[12] = esalt_bufs[salt_pos].salt_buf[28]; - salt_buf1[13] = esalt_bufs[salt_pos].salt_buf[29]; - salt_buf1[14] = 0; - salt_buf1[15] = 0; - - /** - * esalt - */ - - const u32 esalt_len = esalt_bufs[salt_pos].esalt_len; - - u32 esalt_buf0[16]; - - esalt_buf0[ 0] = esalt_bufs[salt_pos].esalt_buf[ 0]; - esalt_buf0[ 1] = esalt_bufs[salt_pos].esalt_buf[ 1]; - esalt_buf0[ 2] = esalt_bufs[salt_pos].esalt_buf[ 2]; - esalt_buf0[ 3] = esalt_bufs[salt_pos].esalt_buf[ 3]; - esalt_buf0[ 4] = esalt_bufs[salt_pos].esalt_buf[ 4]; - esalt_buf0[ 5] = esalt_bufs[salt_pos].esalt_buf[ 5]; - esalt_buf0[ 6] = esalt_bufs[salt_pos].esalt_buf[ 6]; - esalt_buf0[ 7] = esalt_bufs[salt_pos].esalt_buf[ 7]; - esalt_buf0[ 8] = esalt_bufs[salt_pos].esalt_buf[ 8]; - esalt_buf0[ 9] = esalt_bufs[salt_pos].esalt_buf[ 9]; - esalt_buf0[10] = esalt_bufs[salt_pos].esalt_buf[10]; - esalt_buf0[11] = esalt_bufs[salt_pos].esalt_buf[11]; - esalt_buf0[12] = esalt_bufs[salt_pos].esalt_buf[12]; - esalt_buf0[13] = esalt_bufs[salt_pos].esalt_buf[13]; - esalt_buf0[14] = esalt_bufs[salt_pos].esalt_buf[14]; - esalt_buf0[15] = esalt_bufs[salt_pos].esalt_buf[15]; - - u32 esalt_buf1[16]; - - esalt_buf1[ 0] = esalt_bufs[salt_pos].esalt_buf[16]; - esalt_buf1[ 1] = esalt_bufs[salt_pos].esalt_buf[17]; - esalt_buf1[ 2] = esalt_bufs[salt_pos].esalt_buf[18]; - esalt_buf1[ 3] = esalt_bufs[salt_pos].esalt_buf[19]; - esalt_buf1[ 4] = esalt_bufs[salt_pos].esalt_buf[20]; - esalt_buf1[ 5] = esalt_bufs[salt_pos].esalt_buf[21]; - esalt_buf1[ 6] = esalt_bufs[salt_pos].esalt_buf[22]; - esalt_buf1[ 7] = esalt_bufs[salt_pos].esalt_buf[23]; - esalt_buf1[ 8] = esalt_bufs[salt_pos].esalt_buf[24]; - esalt_buf1[ 9] = esalt_bufs[salt_pos].esalt_buf[25]; - esalt_buf1[10] = esalt_bufs[salt_pos].esalt_buf[26]; - esalt_buf1[11] = esalt_bufs[salt_pos].esalt_buf[27]; - esalt_buf1[12] = esalt_bufs[salt_pos].esalt_buf[28]; - esalt_buf1[13] = esalt_bufs[salt_pos].esalt_buf[29]; - esalt_buf1[14] = esalt_bufs[salt_pos].esalt_buf[30]; - esalt_buf1[15] = esalt_bufs[salt_pos].esalt_buf[31]; - - u32 esalt_buf2[16]; - - esalt_buf2[ 0] = esalt_bufs[salt_pos].esalt_buf[32]; - esalt_buf2[ 1] = esalt_bufs[salt_pos].esalt_buf[33]; - esalt_buf2[ 2] = esalt_bufs[salt_pos].esalt_buf[34]; - esalt_buf2[ 3] = esalt_bufs[salt_pos].esalt_buf[35]; - esalt_buf2[ 4] = esalt_bufs[salt_pos].esalt_buf[36]; - esalt_buf2[ 5] = esalt_bufs[salt_pos].esalt_buf[37]; - esalt_buf2[ 6] = 0; - esalt_buf2[ 7] = 0; - esalt_buf2[ 8] = 0; - esalt_buf2[ 9] = 0; - esalt_buf2[10] = 0; - esalt_buf2[11] = 0; - esalt_buf2[12] = 0; - esalt_buf2[13] = 0; - esalt_buf2[14] = 0; - esalt_buf2[15] = 0; - - const u32 digest_esalt_len = 32 + esalt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /* - * HA1 = md5 ($salt . $pass) - */ - - // append the pass to the salt - - u32x block0[16]; - - block0[ 0] = salt_buf0[ 0]; - block0[ 1] = salt_buf0[ 1]; - block0[ 2] = salt_buf0[ 2]; - block0[ 3] = salt_buf0[ 3]; - block0[ 4] = salt_buf0[ 4]; - block0[ 5] = salt_buf0[ 5]; - block0[ 6] = salt_buf0[ 6]; - block0[ 7] = salt_buf0[ 7]; - block0[ 8] = salt_buf0[ 8]; - block0[ 9] = salt_buf0[ 9]; - block0[10] = salt_buf0[10]; - block0[11] = salt_buf0[11]; - block0[12] = salt_buf0[12]; - block0[13] = salt_buf0[13]; - block0[14] = salt_buf0[14]; - block0[15] = salt_buf0[15]; - - u32x block1[16]; - - block1[ 0] = salt_buf1[ 0]; - block1[ 1] = salt_buf1[ 1]; - block1[ 2] = salt_buf1[ 2]; - block1[ 3] = salt_buf1[ 3]; - block1[ 4] = salt_buf1[ 4]; - block1[ 5] = salt_buf1[ 5]; - block1[ 6] = salt_buf1[ 6]; - block1[ 7] = salt_buf1[ 7]; - block1[ 8] = salt_buf1[ 8]; - block1[ 9] = salt_buf1[ 9]; - block1[10] = salt_buf1[10]; - block1[11] = salt_buf1[11]; - block1[12] = salt_buf1[12]; - block1[13] = salt_buf1[13]; - block1[14] = salt_buf1[14]; - block1[15] = salt_buf1[15]; - - memcat32 (block0, block1, salt_len, w0, w1, w2, w3, pw_len); - - u32x w0_t[4]; - - w0_t[0] = block0[ 0]; - w0_t[1] = block0[ 1]; - w0_t[2] = block0[ 2]; - w0_t[3] = block0[ 3]; - - u32x w1_t[4]; - - w1_t[0] = block0[ 4]; - w1_t[1] = block0[ 5]; - w1_t[2] = block0[ 6]; - w1_t[3] = block0[ 7]; - - u32x w2_t[4]; - - w2_t[0] = block0[ 8]; - w2_t[1] = block0[ 9]; - w2_t[2] = block0[10]; - w2_t[3] = block0[11]; - - u32x w3_t[4]; - - w3_t[0] = block0[12]; - w3_t[1] = block0[13]; - w3_t[2] = pw_salt_len * 8; - w3_t[3] = 0; - - // md5 - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - /* - * final = md5 ($HA1 . $esalt) - * we have at least 2 MD5 blocks/transformations, but we might need 3 - */ - - w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - w2_t[0] = esalt_buf0[0]; - w2_t[1] = esalt_buf0[1]; - w2_t[2] = esalt_buf0[2]; - w2_t[3] = esalt_buf0[3]; - - w3_t[0] = esalt_buf0[4]; - w3_t[1] = esalt_buf0[5]; - w3_t[2] = esalt_buf0[6]; - w3_t[3] = esalt_buf0[7]; - - // md5 - // 1st transform - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - - // 2nd transform - - w0_t[0] = esalt_buf0[ 8]; - w0_t[1] = esalt_buf0[ 9]; - w0_t[2] = esalt_buf0[10]; - w0_t[3] = esalt_buf0[11]; - - w1_t[0] = esalt_buf0[12]; - w1_t[1] = esalt_buf0[13]; - w1_t[2] = esalt_buf0[14]; - w1_t[3] = esalt_buf0[15]; - - w2_t[0] = esalt_buf1[ 0]; - w2_t[1] = esalt_buf1[ 1]; - w2_t[2] = esalt_buf1[ 2]; - w2_t[3] = esalt_buf1[ 3]; - - w3_t[0] = esalt_buf1[ 4]; - w3_t[1] = esalt_buf1[ 5]; - w3_t[2] = esalt_buf1[ 6]; - w3_t[3] = esalt_buf1[ 7]; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - // this is for sure the final block - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - r_a = a; - r_b = b; - r_c = c; - r_d = d; - - w0_t[0] = esalt_buf1[ 8]; - w0_t[1] = esalt_buf1[ 9]; - w0_t[2] = esalt_buf1[10]; - w0_t[3] = esalt_buf1[11]; - - w1_t[0] = esalt_buf1[12]; - w1_t[1] = esalt_buf1[13]; - w1_t[2] = esalt_buf1[14]; - w1_t[3] = esalt_buf1[15]; - - w2_t[0] = esalt_buf2[ 0]; - w2_t[1] = esalt_buf2[ 1]; - w2_t[2] = esalt_buf2[ 2]; - w2_t[3] = esalt_buf2[ 3]; - - w3_t[0] = esalt_buf2[ 4]; - w3_t[1] = esalt_buf2[ 5]; - w3_t[2] = digest_esalt_len * 8; - w3_t[3] = 0; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -__device__ static void m11400m_1_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const sip_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - const u32 salt_len = esalt_bufs[salt_pos].salt_len; // not a bug, we need to get it from the esalt - - const u32 pw_salt_len = salt_len + pw_len; - - u32 salt_buf0[16]; - - salt_buf0[ 0] = esalt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[ 1] = esalt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[ 2] = esalt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[ 3] = esalt_bufs[salt_pos].salt_buf[ 3]; - salt_buf0[ 4] = esalt_bufs[salt_pos].salt_buf[ 4]; - salt_buf0[ 5] = esalt_bufs[salt_pos].salt_buf[ 5]; - salt_buf0[ 6] = esalt_bufs[salt_pos].salt_buf[ 6]; - salt_buf0[ 7] = esalt_bufs[salt_pos].salt_buf[ 7]; - salt_buf0[ 8] = esalt_bufs[salt_pos].salt_buf[ 8]; - salt_buf0[ 9] = esalt_bufs[salt_pos].salt_buf[ 9]; - salt_buf0[10] = esalt_bufs[salt_pos].salt_buf[10]; - salt_buf0[11] = esalt_bufs[salt_pos].salt_buf[11]; - salt_buf0[12] = esalt_bufs[salt_pos].salt_buf[12]; - salt_buf0[13] = esalt_bufs[salt_pos].salt_buf[13]; - salt_buf0[14] = esalt_bufs[salt_pos].salt_buf[14]; - salt_buf0[15] = esalt_bufs[salt_pos].salt_buf[15]; - - u32 salt_buf1[16]; - - salt_buf1[ 0] = esalt_bufs[salt_pos].salt_buf[16]; - salt_buf1[ 1] = esalt_bufs[salt_pos].salt_buf[17]; - salt_buf1[ 2] = esalt_bufs[salt_pos].salt_buf[18]; - salt_buf1[ 3] = esalt_bufs[salt_pos].salt_buf[19]; - salt_buf1[ 4] = esalt_bufs[salt_pos].salt_buf[20]; - salt_buf1[ 5] = esalt_bufs[salt_pos].salt_buf[21]; - salt_buf1[ 6] = esalt_bufs[salt_pos].salt_buf[22]; - salt_buf1[ 7] = esalt_bufs[salt_pos].salt_buf[23]; - salt_buf1[ 8] = esalt_bufs[salt_pos].salt_buf[24]; - salt_buf1[ 9] = esalt_bufs[salt_pos].salt_buf[25]; - salt_buf1[10] = esalt_bufs[salt_pos].salt_buf[26]; - salt_buf1[11] = esalt_bufs[salt_pos].salt_buf[27]; - salt_buf1[12] = esalt_bufs[salt_pos].salt_buf[28]; - salt_buf1[13] = esalt_bufs[salt_pos].salt_buf[29]; - salt_buf1[14] = 0; - salt_buf1[15] = 0; - - /** - * esalt - */ - - const u32 esalt_len = esalt_bufs[salt_pos].esalt_len; - - u32 esalt_buf0[16]; - - esalt_buf0[ 0] = esalt_bufs[salt_pos].esalt_buf[ 0]; - esalt_buf0[ 1] = esalt_bufs[salt_pos].esalt_buf[ 1]; - esalt_buf0[ 2] = esalt_bufs[salt_pos].esalt_buf[ 2]; - esalt_buf0[ 3] = esalt_bufs[salt_pos].esalt_buf[ 3]; - esalt_buf0[ 4] = esalt_bufs[salt_pos].esalt_buf[ 4]; - esalt_buf0[ 5] = esalt_bufs[salt_pos].esalt_buf[ 5]; - esalt_buf0[ 6] = esalt_bufs[salt_pos].esalt_buf[ 6]; - esalt_buf0[ 7] = esalt_bufs[salt_pos].esalt_buf[ 7]; - esalt_buf0[ 8] = esalt_bufs[salt_pos].esalt_buf[ 8]; - esalt_buf0[ 9] = esalt_bufs[salt_pos].esalt_buf[ 9]; - esalt_buf0[10] = esalt_bufs[salt_pos].esalt_buf[10]; - esalt_buf0[11] = esalt_bufs[salt_pos].esalt_buf[11]; - esalt_buf0[12] = esalt_bufs[salt_pos].esalt_buf[12]; - esalt_buf0[13] = esalt_bufs[salt_pos].esalt_buf[13]; - esalt_buf0[14] = esalt_bufs[salt_pos].esalt_buf[14]; - esalt_buf0[15] = esalt_bufs[salt_pos].esalt_buf[15]; - - u32 esalt_buf1[16]; - - esalt_buf1[ 0] = esalt_bufs[salt_pos].esalt_buf[16]; - esalt_buf1[ 1] = esalt_bufs[salt_pos].esalt_buf[17]; - esalt_buf1[ 2] = esalt_bufs[salt_pos].esalt_buf[18]; - esalt_buf1[ 3] = esalt_bufs[salt_pos].esalt_buf[19]; - esalt_buf1[ 4] = esalt_bufs[salt_pos].esalt_buf[20]; - esalt_buf1[ 5] = esalt_bufs[salt_pos].esalt_buf[21]; - esalt_buf1[ 6] = esalt_bufs[salt_pos].esalt_buf[22]; - esalt_buf1[ 7] = esalt_bufs[salt_pos].esalt_buf[23]; - esalt_buf1[ 8] = esalt_bufs[salt_pos].esalt_buf[24]; - esalt_buf1[ 9] = esalt_bufs[salt_pos].esalt_buf[25]; - esalt_buf1[10] = esalt_bufs[salt_pos].esalt_buf[26]; - esalt_buf1[11] = esalt_bufs[salt_pos].esalt_buf[27]; - esalt_buf1[12] = esalt_bufs[salt_pos].esalt_buf[28]; - esalt_buf1[13] = esalt_bufs[salt_pos].esalt_buf[29]; - esalt_buf1[14] = esalt_bufs[salt_pos].esalt_buf[30]; - esalt_buf1[15] = esalt_bufs[salt_pos].esalt_buf[31]; - - const u32 digest_esalt_len = 32 + esalt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /* - * HA1 = md5 ($salt . $pass) - */ - - // append the pass to the salt - - u32x block0[16]; - - block0[ 0] = salt_buf0[ 0]; - block0[ 1] = salt_buf0[ 1]; - block0[ 2] = salt_buf0[ 2]; - block0[ 3] = salt_buf0[ 3]; - block0[ 4] = salt_buf0[ 4]; - block0[ 5] = salt_buf0[ 5]; - block0[ 6] = salt_buf0[ 6]; - block0[ 7] = salt_buf0[ 7]; - block0[ 8] = salt_buf0[ 8]; - block0[ 9] = salt_buf0[ 9]; - block0[10] = salt_buf0[10]; - block0[11] = salt_buf0[11]; - block0[12] = salt_buf0[12]; - block0[13] = salt_buf0[13]; - block0[14] = salt_buf0[14]; - block0[15] = salt_buf0[15]; - - u32x block1[16]; - - block1[ 0] = salt_buf1[ 0]; - block1[ 1] = salt_buf1[ 1]; - block1[ 2] = salt_buf1[ 2]; - block1[ 3] = salt_buf1[ 3]; - block1[ 4] = salt_buf1[ 4]; - block1[ 5] = salt_buf1[ 5]; - block1[ 6] = salt_buf1[ 6]; - block1[ 7] = salt_buf1[ 7]; - block1[ 8] = salt_buf1[ 8]; - block1[ 9] = salt_buf1[ 9]; - block1[10] = salt_buf1[10]; - block1[11] = salt_buf1[11]; - block1[12] = salt_buf1[12]; - block1[13] = salt_buf1[13]; - block1[14] = salt_buf1[14]; - block1[15] = salt_buf1[15]; - - memcat32 (block0, block1, salt_len, w0, w1, w2, w3, pw_len); - - u32x w0_t[4]; - - w0_t[0] = block0[ 0]; - w0_t[1] = block0[ 1]; - w0_t[2] = block0[ 2]; - w0_t[3] = block0[ 3]; - - u32x w1_t[4]; - - w1_t[0] = block0[ 4]; - w1_t[1] = block0[ 5]; - w1_t[2] = block0[ 6]; - w1_t[3] = block0[ 7]; - - u32x w2_t[4]; - - w2_t[0] = block0[ 8]; - w2_t[1] = block0[ 9]; - w2_t[2] = block0[10]; - w2_t[3] = block0[11]; - - u32x w3_t[4]; - - w3_t[0] = block0[12]; - w3_t[1] = block0[13]; - w3_t[2] = block0[14]; - w3_t[3] = block0[15]; - - // md5 - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - - w0_t[0] = block1[ 0]; - w0_t[1] = block1[ 1]; - w0_t[2] = block1[ 2]; - w0_t[3] = block1[ 3]; - - w1_t[0] = block1[ 4]; - w1_t[1] = block1[ 5]; - w1_t[2] = block1[ 6]; - w1_t[3] = block1[ 7]; - - w2_t[0] = block1[ 8]; - w2_t[1] = block1[ 9]; - w2_t[2] = block1[10]; - w2_t[3] = block1[11]; - - w3_t[0] = block1[12]; - w3_t[1] = block1[13]; - w3_t[2] = pw_salt_len * 8; - w3_t[3] = 0; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - /* - * final = md5 ($HA1 . $esalt) - * we have at least 2 MD5 blocks/transformations, but we might need 3 - */ - - w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - w2_t[0] = esalt_buf0[0]; - w2_t[1] = esalt_buf0[1]; - w2_t[2] = esalt_buf0[2]; - w2_t[3] = esalt_buf0[3]; - - w3_t[0] = esalt_buf0[4]; - w3_t[1] = esalt_buf0[5]; - w3_t[2] = esalt_buf0[6]; - w3_t[3] = esalt_buf0[7]; - - // md5 - // 1st transform - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - r_a = a; - r_b = b; - r_c = c; - r_d = d; - - // 2nd transform - - w0_t[0] = esalt_buf0[ 8]; - w0_t[1] = esalt_buf0[ 9]; - w0_t[2] = esalt_buf0[10]; - w0_t[3] = esalt_buf0[11]; - - w1_t[0] = esalt_buf0[12]; - w1_t[1] = esalt_buf0[13]; - w1_t[2] = esalt_buf0[14]; - w1_t[3] = esalt_buf0[15]; - - w2_t[0] = esalt_buf1[ 0]; - w2_t[1] = esalt_buf1[ 1]; - w2_t[2] = esalt_buf1[ 2]; - w2_t[3] = esalt_buf1[ 3]; - - w3_t[0] = esalt_buf1[ 4]; - w3_t[1] = esalt_buf1[ 5]; - w3_t[2] = digest_esalt_len * 8; - w3_t[3] = 0; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -__device__ static void m11400m_1_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const sip_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - const u32 salt_len = esalt_bufs[salt_pos].salt_len; // not a bug, we need to get it from the esalt - - const u32 pw_salt_len = salt_len + pw_len; - - u32 salt_buf0[16]; - - salt_buf0[ 0] = esalt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[ 1] = esalt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[ 2] = esalt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[ 3] = esalt_bufs[salt_pos].salt_buf[ 3]; - salt_buf0[ 4] = esalt_bufs[salt_pos].salt_buf[ 4]; - salt_buf0[ 5] = esalt_bufs[salt_pos].salt_buf[ 5]; - salt_buf0[ 6] = esalt_bufs[salt_pos].salt_buf[ 6]; - salt_buf0[ 7] = esalt_bufs[salt_pos].salt_buf[ 7]; - salt_buf0[ 8] = esalt_bufs[salt_pos].salt_buf[ 8]; - salt_buf0[ 9] = esalt_bufs[salt_pos].salt_buf[ 9]; - salt_buf0[10] = esalt_bufs[salt_pos].salt_buf[10]; - salt_buf0[11] = esalt_bufs[salt_pos].salt_buf[11]; - salt_buf0[12] = esalt_bufs[salt_pos].salt_buf[12]; - salt_buf0[13] = esalt_bufs[salt_pos].salt_buf[13]; - salt_buf0[14] = esalt_bufs[salt_pos].salt_buf[14]; - salt_buf0[15] = esalt_bufs[salt_pos].salt_buf[15]; - - u32 salt_buf1[16]; - - salt_buf1[ 0] = esalt_bufs[salt_pos].salt_buf[16]; - salt_buf1[ 1] = esalt_bufs[salt_pos].salt_buf[17]; - salt_buf1[ 2] = esalt_bufs[salt_pos].salt_buf[18]; - salt_buf1[ 3] = esalt_bufs[salt_pos].salt_buf[19]; - salt_buf1[ 4] = esalt_bufs[salt_pos].salt_buf[20]; - salt_buf1[ 5] = esalt_bufs[salt_pos].salt_buf[21]; - salt_buf1[ 6] = esalt_bufs[salt_pos].salt_buf[22]; - salt_buf1[ 7] = esalt_bufs[salt_pos].salt_buf[23]; - salt_buf1[ 8] = esalt_bufs[salt_pos].salt_buf[24]; - salt_buf1[ 9] = esalt_bufs[salt_pos].salt_buf[25]; - salt_buf1[10] = esalt_bufs[salt_pos].salt_buf[26]; - salt_buf1[11] = esalt_bufs[salt_pos].salt_buf[27]; - salt_buf1[12] = esalt_bufs[salt_pos].salt_buf[28]; - salt_buf1[13] = esalt_bufs[salt_pos].salt_buf[29]; - salt_buf1[14] = 0; - salt_buf1[15] = 0; - - /** - * esalt - */ - - const u32 esalt_len = esalt_bufs[salt_pos].esalt_len; - - u32 esalt_buf0[16]; - - esalt_buf0[ 0] = esalt_bufs[salt_pos].esalt_buf[ 0]; - esalt_buf0[ 1] = esalt_bufs[salt_pos].esalt_buf[ 1]; - esalt_buf0[ 2] = esalt_bufs[salt_pos].esalt_buf[ 2]; - esalt_buf0[ 3] = esalt_bufs[salt_pos].esalt_buf[ 3]; - esalt_buf0[ 4] = esalt_bufs[salt_pos].esalt_buf[ 4]; - esalt_buf0[ 5] = esalt_bufs[salt_pos].esalt_buf[ 5]; - esalt_buf0[ 6] = esalt_bufs[salt_pos].esalt_buf[ 6]; - esalt_buf0[ 7] = esalt_bufs[salt_pos].esalt_buf[ 7]; - esalt_buf0[ 8] = esalt_bufs[salt_pos].esalt_buf[ 8]; - esalt_buf0[ 9] = esalt_bufs[salt_pos].esalt_buf[ 9]; - esalt_buf0[10] = esalt_bufs[salt_pos].esalt_buf[10]; - esalt_buf0[11] = esalt_bufs[salt_pos].esalt_buf[11]; - esalt_buf0[12] = esalt_bufs[salt_pos].esalt_buf[12]; - esalt_buf0[13] = esalt_bufs[salt_pos].esalt_buf[13]; - esalt_buf0[14] = esalt_bufs[salt_pos].esalt_buf[14]; - esalt_buf0[15] = esalt_bufs[salt_pos].esalt_buf[15]; - - u32 esalt_buf1[16]; - - esalt_buf1[ 0] = esalt_bufs[salt_pos].esalt_buf[16]; - esalt_buf1[ 1] = esalt_bufs[salt_pos].esalt_buf[17]; - esalt_buf1[ 2] = esalt_bufs[salt_pos].esalt_buf[18]; - esalt_buf1[ 3] = esalt_bufs[salt_pos].esalt_buf[19]; - esalt_buf1[ 4] = esalt_bufs[salt_pos].esalt_buf[20]; - esalt_buf1[ 5] = esalt_bufs[salt_pos].esalt_buf[21]; - esalt_buf1[ 6] = esalt_bufs[salt_pos].esalt_buf[22]; - esalt_buf1[ 7] = esalt_bufs[salt_pos].esalt_buf[23]; - esalt_buf1[ 8] = esalt_bufs[salt_pos].esalt_buf[24]; - esalt_buf1[ 9] = esalt_bufs[salt_pos].esalt_buf[25]; - esalt_buf1[10] = esalt_bufs[salt_pos].esalt_buf[26]; - esalt_buf1[11] = esalt_bufs[salt_pos].esalt_buf[27]; - esalt_buf1[12] = esalt_bufs[salt_pos].esalt_buf[28]; - esalt_buf1[13] = esalt_bufs[salt_pos].esalt_buf[29]; - esalt_buf1[14] = esalt_bufs[salt_pos].esalt_buf[30]; - esalt_buf1[15] = esalt_bufs[salt_pos].esalt_buf[31]; - - u32 esalt_buf2[16]; - - esalt_buf2[ 0] = esalt_bufs[salt_pos].esalt_buf[32]; - esalt_buf2[ 1] = esalt_bufs[salt_pos].esalt_buf[33]; - esalt_buf2[ 2] = esalt_bufs[salt_pos].esalt_buf[34]; - esalt_buf2[ 3] = esalt_bufs[salt_pos].esalt_buf[35]; - esalt_buf2[ 4] = esalt_bufs[salt_pos].esalt_buf[36]; - esalt_buf2[ 5] = esalt_bufs[salt_pos].esalt_buf[37]; - esalt_buf2[ 6] = 0; - esalt_buf2[ 7] = 0; - esalt_buf2[ 8] = 0; - esalt_buf2[ 9] = 0; - esalt_buf2[10] = 0; - esalt_buf2[11] = 0; - esalt_buf2[12] = 0; - esalt_buf2[13] = 0; - esalt_buf2[14] = 0; - esalt_buf2[15] = 0; - - const u32 digest_esalt_len = 32 + esalt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /* - * HA1 = md5 ($salt . $pass) - */ - - // append the pass to the salt - - u32x block0[16]; - - block0[ 0] = salt_buf0[ 0]; - block0[ 1] = salt_buf0[ 1]; - block0[ 2] = salt_buf0[ 2]; - block0[ 3] = salt_buf0[ 3]; - block0[ 4] = salt_buf0[ 4]; - block0[ 5] = salt_buf0[ 5]; - block0[ 6] = salt_buf0[ 6]; - block0[ 7] = salt_buf0[ 7]; - block0[ 8] = salt_buf0[ 8]; - block0[ 9] = salt_buf0[ 9]; - block0[10] = salt_buf0[10]; - block0[11] = salt_buf0[11]; - block0[12] = salt_buf0[12]; - block0[13] = salt_buf0[13]; - block0[14] = salt_buf0[14]; - block0[15] = salt_buf0[15]; - - u32x block1[16]; - - block1[ 0] = salt_buf1[ 0]; - block1[ 1] = salt_buf1[ 1]; - block1[ 2] = salt_buf1[ 2]; - block1[ 3] = salt_buf1[ 3]; - block1[ 4] = salt_buf1[ 4]; - block1[ 5] = salt_buf1[ 5]; - block1[ 6] = salt_buf1[ 6]; - block1[ 7] = salt_buf1[ 7]; - block1[ 8] = salt_buf1[ 8]; - block1[ 9] = salt_buf1[ 9]; - block1[10] = salt_buf1[10]; - block1[11] = salt_buf1[11]; - block1[12] = salt_buf1[12]; - block1[13] = salt_buf1[13]; - block1[14] = salt_buf1[14]; - block1[15] = salt_buf1[15]; - - memcat32 (block0, block1, salt_len, w0, w1, w2, w3, pw_len); - - u32x w0_t[4]; - - w0_t[0] = block0[ 0]; - w0_t[1] = block0[ 1]; - w0_t[2] = block0[ 2]; - w0_t[3] = block0[ 3]; - - u32x w1_t[4]; - - w1_t[0] = block0[ 4]; - w1_t[1] = block0[ 5]; - w1_t[2] = block0[ 6]; - w1_t[3] = block0[ 7]; - - u32x w2_t[4]; - - w2_t[0] = block0[ 8]; - w2_t[1] = block0[ 9]; - w2_t[2] = block0[10]; - w2_t[3] = block0[11]; - - u32x w3_t[4]; - - w3_t[0] = block0[12]; - w3_t[1] = block0[13]; - w3_t[2] = block0[14]; - w3_t[3] = block0[15]; - - // md5 - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - - w0_t[0] = block1[ 0]; - w0_t[1] = block1[ 1]; - w0_t[2] = block1[ 2]; - w0_t[3] = block1[ 3]; - - w1_t[0] = block1[ 4]; - w1_t[1] = block1[ 5]; - w1_t[2] = block1[ 6]; - w1_t[3] = block1[ 7]; - - w2_t[0] = block1[ 8]; - w2_t[1] = block1[ 9]; - w2_t[2] = block1[10]; - w2_t[3] = block1[11]; - - w3_t[0] = block1[12]; - w3_t[1] = block1[13]; - w3_t[2] = pw_salt_len * 8; - w3_t[3] = 0; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - /* - * final = md5 ($HA1 . $esalt) - * we have at least 2 MD5 blocks/transformations, but we might need 3 - */ - - w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - w2_t[0] = esalt_buf0[0]; - w2_t[1] = esalt_buf0[1]; - w2_t[2] = esalt_buf0[2]; - w2_t[3] = esalt_buf0[3]; - - w3_t[0] = esalt_buf0[4]; - w3_t[1] = esalt_buf0[5]; - w3_t[2] = esalt_buf0[6]; - w3_t[3] = esalt_buf0[7]; - - // md5 - // 1st transform - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - r_a = a; - r_b = b; - r_c = c; - r_d = d; - - // 2nd transform - - w0_t[0] = esalt_buf0[ 8]; - w0_t[1] = esalt_buf0[ 9]; - w0_t[2] = esalt_buf0[10]; - w0_t[3] = esalt_buf0[11]; - - w1_t[0] = esalt_buf0[12]; - w1_t[1] = esalt_buf0[13]; - w1_t[2] = esalt_buf0[14]; - w1_t[3] = esalt_buf0[15]; - - w2_t[0] = esalt_buf1[ 0]; - w2_t[1] = esalt_buf1[ 1]; - w2_t[2] = esalt_buf1[ 2]; - w2_t[3] = esalt_buf1[ 3]; - - w3_t[0] = esalt_buf1[ 4]; - w3_t[1] = esalt_buf1[ 5]; - w3_t[2] = esalt_buf1[ 6]; - w3_t[3] = esalt_buf1[ 7]; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - // this is for sure the final block - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - r_a = a; - r_b = b; - r_c = c; - r_d = d; - - w0_t[0] = esalt_buf1[ 8]; - w0_t[1] = esalt_buf1[ 9]; - w0_t[2] = esalt_buf1[10]; - w0_t[3] = esalt_buf1[11]; - - w1_t[0] = esalt_buf1[12]; - w1_t[1] = esalt_buf1[13]; - w1_t[2] = esalt_buf1[14]; - w1_t[3] = esalt_buf1[15]; - - w2_t[0] = esalt_buf2[ 0]; - w2_t[1] = esalt_buf2[ 1]; - w2_t[2] = esalt_buf2[ 2]; - w2_t[3] = esalt_buf2[ 3]; - - w3_t[0] = esalt_buf2[ 4]; - w3_t[1] = esalt_buf2[ 5]; - w3_t[2] = digest_esalt_len * 8; - w3_t[3] = 0; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -__device__ static void m11400s_0_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const sip_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - const u32 salt_len = esalt_bufs[salt_pos].salt_len; // not a bug, we need to get it from the esalt - - const u32 pw_salt_len = salt_len + pw_len; - - u32 salt_buf0[16]; - - salt_buf0[ 0] = esalt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[ 1] = esalt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[ 2] = esalt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[ 3] = esalt_bufs[salt_pos].salt_buf[ 3]; - salt_buf0[ 4] = esalt_bufs[salt_pos].salt_buf[ 4]; - salt_buf0[ 5] = esalt_bufs[salt_pos].salt_buf[ 5]; - salt_buf0[ 6] = esalt_bufs[salt_pos].salt_buf[ 6]; - salt_buf0[ 7] = esalt_bufs[salt_pos].salt_buf[ 7]; - salt_buf0[ 8] = esalt_bufs[salt_pos].salt_buf[ 8]; - salt_buf0[ 9] = esalt_bufs[salt_pos].salt_buf[ 9]; - salt_buf0[10] = esalt_bufs[salt_pos].salt_buf[10]; - salt_buf0[11] = esalt_bufs[salt_pos].salt_buf[11]; - salt_buf0[12] = esalt_bufs[salt_pos].salt_buf[12]; - salt_buf0[13] = esalt_bufs[salt_pos].salt_buf[13]; - salt_buf0[14] = esalt_bufs[salt_pos].salt_buf[14]; - salt_buf0[15] = esalt_bufs[salt_pos].salt_buf[15]; - - u32 salt_buf1[16]; - - salt_buf1[ 0] = esalt_bufs[salt_pos].salt_buf[16]; - salt_buf1[ 1] = esalt_bufs[salt_pos].salt_buf[17]; - salt_buf1[ 2] = esalt_bufs[salt_pos].salt_buf[18]; - salt_buf1[ 3] = esalt_bufs[salt_pos].salt_buf[19]; - salt_buf1[ 4] = esalt_bufs[salt_pos].salt_buf[20]; - salt_buf1[ 5] = esalt_bufs[salt_pos].salt_buf[21]; - salt_buf1[ 6] = esalt_bufs[salt_pos].salt_buf[22]; - salt_buf1[ 7] = esalt_bufs[salt_pos].salt_buf[23]; - salt_buf1[ 8] = esalt_bufs[salt_pos].salt_buf[24]; - salt_buf1[ 9] = esalt_bufs[salt_pos].salt_buf[25]; - salt_buf1[10] = esalt_bufs[salt_pos].salt_buf[26]; - salt_buf1[11] = esalt_bufs[salt_pos].salt_buf[27]; - salt_buf1[12] = esalt_bufs[salt_pos].salt_buf[28]; - salt_buf1[13] = esalt_bufs[salt_pos].salt_buf[29]; - salt_buf1[14] = 0; - salt_buf1[15] = 0; - - /** - * esalt - */ - - const u32 esalt_len = esalt_bufs[salt_pos].esalt_len; - - u32 esalt_buf0[16]; - - esalt_buf0[ 0] = esalt_bufs[salt_pos].esalt_buf[ 0]; - esalt_buf0[ 1] = esalt_bufs[salt_pos].esalt_buf[ 1]; - esalt_buf0[ 2] = esalt_bufs[salt_pos].esalt_buf[ 2]; - esalt_buf0[ 3] = esalt_bufs[salt_pos].esalt_buf[ 3]; - esalt_buf0[ 4] = esalt_bufs[salt_pos].esalt_buf[ 4]; - esalt_buf0[ 5] = esalt_bufs[salt_pos].esalt_buf[ 5]; - esalt_buf0[ 6] = esalt_bufs[salt_pos].esalt_buf[ 6]; - esalt_buf0[ 7] = esalt_bufs[salt_pos].esalt_buf[ 7]; - esalt_buf0[ 8] = esalt_bufs[salt_pos].esalt_buf[ 8]; - esalt_buf0[ 9] = esalt_bufs[salt_pos].esalt_buf[ 9]; - esalt_buf0[10] = esalt_bufs[salt_pos].esalt_buf[10]; - esalt_buf0[11] = esalt_bufs[salt_pos].esalt_buf[11]; - esalt_buf0[12] = esalt_bufs[salt_pos].esalt_buf[12]; - esalt_buf0[13] = esalt_bufs[salt_pos].esalt_buf[13]; - esalt_buf0[14] = esalt_bufs[salt_pos].esalt_buf[14]; - esalt_buf0[15] = esalt_bufs[salt_pos].esalt_buf[15]; - - u32 esalt_buf1[16]; - - esalt_buf1[ 0] = esalt_bufs[salt_pos].esalt_buf[16]; - esalt_buf1[ 1] = esalt_bufs[salt_pos].esalt_buf[17]; - esalt_buf1[ 2] = esalt_bufs[salt_pos].esalt_buf[18]; - esalt_buf1[ 3] = esalt_bufs[salt_pos].esalt_buf[19]; - esalt_buf1[ 4] = esalt_bufs[salt_pos].esalt_buf[20]; - esalt_buf1[ 5] = esalt_bufs[salt_pos].esalt_buf[21]; - esalt_buf1[ 6] = esalt_bufs[salt_pos].esalt_buf[22]; - esalt_buf1[ 7] = esalt_bufs[salt_pos].esalt_buf[23]; - esalt_buf1[ 8] = esalt_bufs[salt_pos].esalt_buf[24]; - esalt_buf1[ 9] = esalt_bufs[salt_pos].esalt_buf[25]; - esalt_buf1[10] = esalt_bufs[salt_pos].esalt_buf[26]; - esalt_buf1[11] = esalt_bufs[salt_pos].esalt_buf[27]; - esalt_buf1[12] = esalt_bufs[salt_pos].esalt_buf[28]; - esalt_buf1[13] = esalt_bufs[salt_pos].esalt_buf[29]; - esalt_buf1[14] = esalt_bufs[salt_pos].esalt_buf[30]; - esalt_buf1[15] = esalt_bufs[salt_pos].esalt_buf[31]; - - const u32 digest_esalt_len = 32 + esalt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /* - * HA1 = md5 ($salt . $pass) - */ - - // append the pass to the salt - - u32x block0[16]; - - block0[ 0] = salt_buf0[ 0]; - block0[ 1] = salt_buf0[ 1]; - block0[ 2] = salt_buf0[ 2]; - block0[ 3] = salt_buf0[ 3]; - block0[ 4] = salt_buf0[ 4]; - block0[ 5] = salt_buf0[ 5]; - block0[ 6] = salt_buf0[ 6]; - block0[ 7] = salt_buf0[ 7]; - block0[ 8] = salt_buf0[ 8]; - block0[ 9] = salt_buf0[ 9]; - block0[10] = salt_buf0[10]; - block0[11] = salt_buf0[11]; - block0[12] = salt_buf0[12]; - block0[13] = salt_buf0[13]; - block0[14] = salt_buf0[14]; - block0[15] = salt_buf0[15]; - - u32x block1[16]; - - block1[ 0] = salt_buf1[ 0]; - block1[ 1] = salt_buf1[ 1]; - block1[ 2] = salt_buf1[ 2]; - block1[ 3] = salt_buf1[ 3]; - block1[ 4] = salt_buf1[ 4]; - block1[ 5] = salt_buf1[ 5]; - block1[ 6] = salt_buf1[ 6]; - block1[ 7] = salt_buf1[ 7]; - block1[ 8] = salt_buf1[ 8]; - block1[ 9] = salt_buf1[ 9]; - block1[10] = salt_buf1[10]; - block1[11] = salt_buf1[11]; - block1[12] = salt_buf1[12]; - block1[13] = salt_buf1[13]; - block1[14] = salt_buf1[14]; - block1[15] = salt_buf1[15]; - - memcat32 (block0, block1, salt_len, w0, w1, w2, w3, pw_len); - - u32x w0_t[4]; - - w0_t[0] = block0[ 0]; - w0_t[1] = block0[ 1]; - w0_t[2] = block0[ 2]; - w0_t[3] = block0[ 3]; - - u32x w1_t[4]; - - w1_t[0] = block0[ 4]; - w1_t[1] = block0[ 5]; - w1_t[2] = block0[ 6]; - w1_t[3] = block0[ 7]; - - u32x w2_t[4]; - - w2_t[0] = block0[ 8]; - w2_t[1] = block0[ 9]; - w2_t[2] = block0[10]; - w2_t[3] = block0[11]; - - u32x w3_t[4]; - - w3_t[0] = block0[12]; - w3_t[1] = block0[13]; - w3_t[2] = pw_salt_len * 8; - w3_t[3] = 0; - - // md5 - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - /* - * final = md5 ($HA1 . $esalt) - * we have at least 2 MD5 blocks/transformations, but we might need 3 - */ - - w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - w2_t[0] = esalt_buf0[0]; - w2_t[1] = esalt_buf0[1]; - w2_t[2] = esalt_buf0[2]; - w2_t[3] = esalt_buf0[3]; - - w3_t[0] = esalt_buf0[4]; - w3_t[1] = esalt_buf0[5]; - w3_t[2] = esalt_buf0[6]; - w3_t[3] = esalt_buf0[7]; - - // md5 - // 1st transform - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - - // 2nd transform - - w0_t[0] = esalt_buf0[ 8]; - w0_t[1] = esalt_buf0[ 9]; - w0_t[2] = esalt_buf0[10]; - w0_t[3] = esalt_buf0[11]; - - w1_t[0] = esalt_buf0[12]; - w1_t[1] = esalt_buf0[13]; - w1_t[2] = esalt_buf0[14]; - w1_t[3] = esalt_buf0[15]; - - w2_t[0] = esalt_buf1[ 0]; - w2_t[1] = esalt_buf1[ 1]; - w2_t[2] = esalt_buf1[ 2]; - w2_t[3] = esalt_buf1[ 3]; - - w3_t[0] = esalt_buf1[ 4]; - w3_t[1] = esalt_buf1[ 5]; - w3_t[2] = digest_esalt_len * 8; - w3_t[3] = 0; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -__device__ static void m11400s_0_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const sip_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - const u32 salt_len = esalt_bufs[salt_pos].salt_len; // not a bug, we need to get it from the esalt - - const u32 pw_salt_len = salt_len + pw_len; - - u32 salt_buf0[16]; - - salt_buf0[ 0] = esalt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[ 1] = esalt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[ 2] = esalt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[ 3] = esalt_bufs[salt_pos].salt_buf[ 3]; - salt_buf0[ 4] = esalt_bufs[salt_pos].salt_buf[ 4]; - salt_buf0[ 5] = esalt_bufs[salt_pos].salt_buf[ 5]; - salt_buf0[ 6] = esalt_bufs[salt_pos].salt_buf[ 6]; - salt_buf0[ 7] = esalt_bufs[salt_pos].salt_buf[ 7]; - salt_buf0[ 8] = esalt_bufs[salt_pos].salt_buf[ 8]; - salt_buf0[ 9] = esalt_bufs[salt_pos].salt_buf[ 9]; - salt_buf0[10] = esalt_bufs[salt_pos].salt_buf[10]; - salt_buf0[11] = esalt_bufs[salt_pos].salt_buf[11]; - salt_buf0[12] = esalt_bufs[salt_pos].salt_buf[12]; - salt_buf0[13] = esalt_bufs[salt_pos].salt_buf[13]; - salt_buf0[14] = esalt_bufs[salt_pos].salt_buf[14]; - salt_buf0[15] = esalt_bufs[salt_pos].salt_buf[15]; - - u32 salt_buf1[16]; - - salt_buf1[ 0] = esalt_bufs[salt_pos].salt_buf[16]; - salt_buf1[ 1] = esalt_bufs[salt_pos].salt_buf[17]; - salt_buf1[ 2] = esalt_bufs[salt_pos].salt_buf[18]; - salt_buf1[ 3] = esalt_bufs[salt_pos].salt_buf[19]; - salt_buf1[ 4] = esalt_bufs[salt_pos].salt_buf[20]; - salt_buf1[ 5] = esalt_bufs[salt_pos].salt_buf[21]; - salt_buf1[ 6] = esalt_bufs[salt_pos].salt_buf[22]; - salt_buf1[ 7] = esalt_bufs[salt_pos].salt_buf[23]; - salt_buf1[ 8] = esalt_bufs[salt_pos].salt_buf[24]; - salt_buf1[ 9] = esalt_bufs[salt_pos].salt_buf[25]; - salt_buf1[10] = esalt_bufs[salt_pos].salt_buf[26]; - salt_buf1[11] = esalt_bufs[salt_pos].salt_buf[27]; - salt_buf1[12] = esalt_bufs[salt_pos].salt_buf[28]; - salt_buf1[13] = esalt_bufs[salt_pos].salt_buf[29]; - salt_buf1[14] = 0; - salt_buf1[15] = 0; - - /** - * esalt - */ - - const u32 esalt_len = esalt_bufs[salt_pos].esalt_len; - - u32 esalt_buf0[16]; - - esalt_buf0[ 0] = esalt_bufs[salt_pos].esalt_buf[ 0]; - esalt_buf0[ 1] = esalt_bufs[salt_pos].esalt_buf[ 1]; - esalt_buf0[ 2] = esalt_bufs[salt_pos].esalt_buf[ 2]; - esalt_buf0[ 3] = esalt_bufs[salt_pos].esalt_buf[ 3]; - esalt_buf0[ 4] = esalt_bufs[salt_pos].esalt_buf[ 4]; - esalt_buf0[ 5] = esalt_bufs[salt_pos].esalt_buf[ 5]; - esalt_buf0[ 6] = esalt_bufs[salt_pos].esalt_buf[ 6]; - esalt_buf0[ 7] = esalt_bufs[salt_pos].esalt_buf[ 7]; - esalt_buf0[ 8] = esalt_bufs[salt_pos].esalt_buf[ 8]; - esalt_buf0[ 9] = esalt_bufs[salt_pos].esalt_buf[ 9]; - esalt_buf0[10] = esalt_bufs[salt_pos].esalt_buf[10]; - esalt_buf0[11] = esalt_bufs[salt_pos].esalt_buf[11]; - esalt_buf0[12] = esalt_bufs[salt_pos].esalt_buf[12]; - esalt_buf0[13] = esalt_bufs[salt_pos].esalt_buf[13]; - esalt_buf0[14] = esalt_bufs[salt_pos].esalt_buf[14]; - esalt_buf0[15] = esalt_bufs[salt_pos].esalt_buf[15]; - - u32 esalt_buf1[16]; - - esalt_buf1[ 0] = esalt_bufs[salt_pos].esalt_buf[16]; - esalt_buf1[ 1] = esalt_bufs[salt_pos].esalt_buf[17]; - esalt_buf1[ 2] = esalt_bufs[salt_pos].esalt_buf[18]; - esalt_buf1[ 3] = esalt_bufs[salt_pos].esalt_buf[19]; - esalt_buf1[ 4] = esalt_bufs[salt_pos].esalt_buf[20]; - esalt_buf1[ 5] = esalt_bufs[salt_pos].esalt_buf[21]; - esalt_buf1[ 6] = esalt_bufs[salt_pos].esalt_buf[22]; - esalt_buf1[ 7] = esalt_bufs[salt_pos].esalt_buf[23]; - esalt_buf1[ 8] = esalt_bufs[salt_pos].esalt_buf[24]; - esalt_buf1[ 9] = esalt_bufs[salt_pos].esalt_buf[25]; - esalt_buf1[10] = esalt_bufs[salt_pos].esalt_buf[26]; - esalt_buf1[11] = esalt_bufs[salt_pos].esalt_buf[27]; - esalt_buf1[12] = esalt_bufs[salt_pos].esalt_buf[28]; - esalt_buf1[13] = esalt_bufs[salt_pos].esalt_buf[29]; - esalt_buf1[14] = esalt_bufs[salt_pos].esalt_buf[30]; - esalt_buf1[15] = esalt_bufs[salt_pos].esalt_buf[31]; - - u32 esalt_buf2[16]; - - esalt_buf2[ 0] = esalt_bufs[salt_pos].esalt_buf[32]; - esalt_buf2[ 1] = esalt_bufs[salt_pos].esalt_buf[33]; - esalt_buf2[ 2] = esalt_bufs[salt_pos].esalt_buf[34]; - esalt_buf2[ 3] = esalt_bufs[salt_pos].esalt_buf[35]; - esalt_buf2[ 4] = esalt_bufs[salt_pos].esalt_buf[36]; - esalt_buf2[ 5] = esalt_bufs[salt_pos].esalt_buf[37]; - esalt_buf2[ 6] = 0; - esalt_buf2[ 7] = 0; - esalt_buf2[ 8] = 0; - esalt_buf2[ 9] = 0; - esalt_buf2[10] = 0; - esalt_buf2[11] = 0; - esalt_buf2[12] = 0; - esalt_buf2[13] = 0; - esalt_buf2[14] = 0; - esalt_buf2[15] = 0; - - const u32 digest_esalt_len = 32 + esalt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /* - * HA1 = md5 ($salt . $pass) - */ - - // append the pass to the salt - - u32x block0[16]; - - block0[ 0] = salt_buf0[ 0]; - block0[ 1] = salt_buf0[ 1]; - block0[ 2] = salt_buf0[ 2]; - block0[ 3] = salt_buf0[ 3]; - block0[ 4] = salt_buf0[ 4]; - block0[ 5] = salt_buf0[ 5]; - block0[ 6] = salt_buf0[ 6]; - block0[ 7] = salt_buf0[ 7]; - block0[ 8] = salt_buf0[ 8]; - block0[ 9] = salt_buf0[ 9]; - block0[10] = salt_buf0[10]; - block0[11] = salt_buf0[11]; - block0[12] = salt_buf0[12]; - block0[13] = salt_buf0[13]; - block0[14] = salt_buf0[14]; - block0[15] = salt_buf0[15]; - - u32x block1[16]; - - block1[ 0] = salt_buf1[ 0]; - block1[ 1] = salt_buf1[ 1]; - block1[ 2] = salt_buf1[ 2]; - block1[ 3] = salt_buf1[ 3]; - block1[ 4] = salt_buf1[ 4]; - block1[ 5] = salt_buf1[ 5]; - block1[ 6] = salt_buf1[ 6]; - block1[ 7] = salt_buf1[ 7]; - block1[ 8] = salt_buf1[ 8]; - block1[ 9] = salt_buf1[ 9]; - block1[10] = salt_buf1[10]; - block1[11] = salt_buf1[11]; - block1[12] = salt_buf1[12]; - block1[13] = salt_buf1[13]; - block1[14] = salt_buf1[14]; - block1[15] = salt_buf1[15]; - - memcat32 (block0, block1, salt_len, w0, w1, w2, w3, pw_len); - - u32x w0_t[4]; - - w0_t[0] = block0[ 0]; - w0_t[1] = block0[ 1]; - w0_t[2] = block0[ 2]; - w0_t[3] = block0[ 3]; - - u32x w1_t[4]; - - w1_t[0] = block0[ 4]; - w1_t[1] = block0[ 5]; - w1_t[2] = block0[ 6]; - w1_t[3] = block0[ 7]; - - u32x w2_t[4]; - - w2_t[0] = block0[ 8]; - w2_t[1] = block0[ 9]; - w2_t[2] = block0[10]; - w2_t[3] = block0[11]; - - u32x w3_t[4]; - - w3_t[0] = block0[12]; - w3_t[1] = block0[13]; - w3_t[2] = pw_salt_len * 8; - w3_t[3] = 0; - - // md5 - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - /* - * final = md5 ($HA1 . $esalt) - * we have at least 2 MD5 blocks/transformations, but we might need 3 - */ - - w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - w2_t[0] = esalt_buf0[0]; - w2_t[1] = esalt_buf0[1]; - w2_t[2] = esalt_buf0[2]; - w2_t[3] = esalt_buf0[3]; - - w3_t[0] = esalt_buf0[4]; - w3_t[1] = esalt_buf0[5]; - w3_t[2] = esalt_buf0[6]; - w3_t[3] = esalt_buf0[7]; - - // md5 - // 1st transform - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - - // 2nd transform - - w0_t[0] = esalt_buf0[ 8]; - w0_t[1] = esalt_buf0[ 9]; - w0_t[2] = esalt_buf0[10]; - w0_t[3] = esalt_buf0[11]; - - w1_t[0] = esalt_buf0[12]; - w1_t[1] = esalt_buf0[13]; - w1_t[2] = esalt_buf0[14]; - w1_t[3] = esalt_buf0[15]; - - w2_t[0] = esalt_buf1[ 0]; - w2_t[1] = esalt_buf1[ 1]; - w2_t[2] = esalt_buf1[ 2]; - w2_t[3] = esalt_buf1[ 3]; - - w3_t[0] = esalt_buf1[ 4]; - w3_t[1] = esalt_buf1[ 5]; - w3_t[2] = esalt_buf1[ 6]; - w3_t[3] = esalt_buf1[ 7]; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - // this is for sure the final block - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - r_a = a; - r_b = b; - r_c = c; - r_d = d; - - w0_t[0] = esalt_buf1[ 8]; - w0_t[1] = esalt_buf1[ 9]; - w0_t[2] = esalt_buf1[10]; - w0_t[3] = esalt_buf1[11]; - - w1_t[0] = esalt_buf1[12]; - w1_t[1] = esalt_buf1[13]; - w1_t[2] = esalt_buf1[14]; - w1_t[3] = esalt_buf1[15]; - - w2_t[0] = esalt_buf2[ 0]; - w2_t[1] = esalt_buf2[ 1]; - w2_t[2] = esalt_buf2[ 2]; - w2_t[3] = esalt_buf2[ 3]; - - w3_t[0] = esalt_buf2[ 4]; - w3_t[1] = esalt_buf2[ 5]; - w3_t[2] = digest_esalt_len * 8; - w3_t[3] = 0; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -__device__ static void m11400s_1_0 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const sip_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - const u32 salt_len = esalt_bufs[salt_pos].salt_len; // not a bug, we need to get it from the esalt - - const u32 pw_salt_len = salt_len + pw_len; - - u32 salt_buf0[16]; - - salt_buf0[ 0] = esalt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[ 1] = esalt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[ 2] = esalt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[ 3] = esalt_bufs[salt_pos].salt_buf[ 3]; - salt_buf0[ 4] = esalt_bufs[salt_pos].salt_buf[ 4]; - salt_buf0[ 5] = esalt_bufs[salt_pos].salt_buf[ 5]; - salt_buf0[ 6] = esalt_bufs[salt_pos].salt_buf[ 6]; - salt_buf0[ 7] = esalt_bufs[salt_pos].salt_buf[ 7]; - salt_buf0[ 8] = esalt_bufs[salt_pos].salt_buf[ 8]; - salt_buf0[ 9] = esalt_bufs[salt_pos].salt_buf[ 9]; - salt_buf0[10] = esalt_bufs[salt_pos].salt_buf[10]; - salt_buf0[11] = esalt_bufs[salt_pos].salt_buf[11]; - salt_buf0[12] = esalt_bufs[salt_pos].salt_buf[12]; - salt_buf0[13] = esalt_bufs[salt_pos].salt_buf[13]; - salt_buf0[14] = esalt_bufs[salt_pos].salt_buf[14]; - salt_buf0[15] = esalt_bufs[salt_pos].salt_buf[15]; - - u32 salt_buf1[16]; - - salt_buf1[ 0] = esalt_bufs[salt_pos].salt_buf[16]; - salt_buf1[ 1] = esalt_bufs[salt_pos].salt_buf[17]; - salt_buf1[ 2] = esalt_bufs[salt_pos].salt_buf[18]; - salt_buf1[ 3] = esalt_bufs[salt_pos].salt_buf[19]; - salt_buf1[ 4] = esalt_bufs[salt_pos].salt_buf[20]; - salt_buf1[ 5] = esalt_bufs[salt_pos].salt_buf[21]; - salt_buf1[ 6] = esalt_bufs[salt_pos].salt_buf[22]; - salt_buf1[ 7] = esalt_bufs[salt_pos].salt_buf[23]; - salt_buf1[ 8] = esalt_bufs[salt_pos].salt_buf[24]; - salt_buf1[ 9] = esalt_bufs[salt_pos].salt_buf[25]; - salt_buf1[10] = esalt_bufs[salt_pos].salt_buf[26]; - salt_buf1[11] = esalt_bufs[salt_pos].salt_buf[27]; - salt_buf1[12] = esalt_bufs[salt_pos].salt_buf[28]; - salt_buf1[13] = esalt_bufs[salt_pos].salt_buf[29]; - salt_buf1[14] = 0; - salt_buf1[15] = 0; - - /** - * esalt - */ - - const u32 esalt_len = esalt_bufs[salt_pos].esalt_len; - - u32 esalt_buf0[16]; - - esalt_buf0[ 0] = esalt_bufs[salt_pos].esalt_buf[ 0]; - esalt_buf0[ 1] = esalt_bufs[salt_pos].esalt_buf[ 1]; - esalt_buf0[ 2] = esalt_bufs[salt_pos].esalt_buf[ 2]; - esalt_buf0[ 3] = esalt_bufs[salt_pos].esalt_buf[ 3]; - esalt_buf0[ 4] = esalt_bufs[salt_pos].esalt_buf[ 4]; - esalt_buf0[ 5] = esalt_bufs[salt_pos].esalt_buf[ 5]; - esalt_buf0[ 6] = esalt_bufs[salt_pos].esalt_buf[ 6]; - esalt_buf0[ 7] = esalt_bufs[salt_pos].esalt_buf[ 7]; - esalt_buf0[ 8] = esalt_bufs[salt_pos].esalt_buf[ 8]; - esalt_buf0[ 9] = esalt_bufs[salt_pos].esalt_buf[ 9]; - esalt_buf0[10] = esalt_bufs[salt_pos].esalt_buf[10]; - esalt_buf0[11] = esalt_bufs[salt_pos].esalt_buf[11]; - esalt_buf0[12] = esalt_bufs[salt_pos].esalt_buf[12]; - esalt_buf0[13] = esalt_bufs[salt_pos].esalt_buf[13]; - esalt_buf0[14] = esalt_bufs[salt_pos].esalt_buf[14]; - esalt_buf0[15] = esalt_bufs[salt_pos].esalt_buf[15]; - - u32 esalt_buf1[16]; - - esalt_buf1[ 0] = esalt_bufs[salt_pos].esalt_buf[16]; - esalt_buf1[ 1] = esalt_bufs[salt_pos].esalt_buf[17]; - esalt_buf1[ 2] = esalt_bufs[salt_pos].esalt_buf[18]; - esalt_buf1[ 3] = esalt_bufs[salt_pos].esalt_buf[19]; - esalt_buf1[ 4] = esalt_bufs[salt_pos].esalt_buf[20]; - esalt_buf1[ 5] = esalt_bufs[salt_pos].esalt_buf[21]; - esalt_buf1[ 6] = esalt_bufs[salt_pos].esalt_buf[22]; - esalt_buf1[ 7] = esalt_bufs[salt_pos].esalt_buf[23]; - esalt_buf1[ 8] = esalt_bufs[salt_pos].esalt_buf[24]; - esalt_buf1[ 9] = esalt_bufs[salt_pos].esalt_buf[25]; - esalt_buf1[10] = esalt_bufs[salt_pos].esalt_buf[26]; - esalt_buf1[11] = esalt_bufs[salt_pos].esalt_buf[27]; - esalt_buf1[12] = esalt_bufs[salt_pos].esalt_buf[28]; - esalt_buf1[13] = esalt_bufs[salt_pos].esalt_buf[29]; - esalt_buf1[14] = esalt_bufs[salt_pos].esalt_buf[30]; - esalt_buf1[15] = esalt_bufs[salt_pos].esalt_buf[31]; - - const u32 digest_esalt_len = 32 + esalt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /* - * HA1 = md5 ($salt . $pass) - */ - - // append the pass to the salt - - u32x block0[16]; - - block0[ 0] = salt_buf0[ 0]; - block0[ 1] = salt_buf0[ 1]; - block0[ 2] = salt_buf0[ 2]; - block0[ 3] = salt_buf0[ 3]; - block0[ 4] = salt_buf0[ 4]; - block0[ 5] = salt_buf0[ 5]; - block0[ 6] = salt_buf0[ 6]; - block0[ 7] = salt_buf0[ 7]; - block0[ 8] = salt_buf0[ 8]; - block0[ 9] = salt_buf0[ 9]; - block0[10] = salt_buf0[10]; - block0[11] = salt_buf0[11]; - block0[12] = salt_buf0[12]; - block0[13] = salt_buf0[13]; - block0[14] = salt_buf0[14]; - block0[15] = salt_buf0[15]; - - u32x block1[16]; - - block1[ 0] = salt_buf1[ 0]; - block1[ 1] = salt_buf1[ 1]; - block1[ 2] = salt_buf1[ 2]; - block1[ 3] = salt_buf1[ 3]; - block1[ 4] = salt_buf1[ 4]; - block1[ 5] = salt_buf1[ 5]; - block1[ 6] = salt_buf1[ 6]; - block1[ 7] = salt_buf1[ 7]; - block1[ 8] = salt_buf1[ 8]; - block1[ 9] = salt_buf1[ 9]; - block1[10] = salt_buf1[10]; - block1[11] = salt_buf1[11]; - block1[12] = salt_buf1[12]; - block1[13] = salt_buf1[13]; - block1[14] = salt_buf1[14]; - block1[15] = salt_buf1[15]; - - memcat32 (block0, block1, salt_len, w0, w1, w2, w3, pw_len); - - u32x w0_t[4]; - - w0_t[0] = block0[ 0]; - w0_t[1] = block0[ 1]; - w0_t[2] = block0[ 2]; - w0_t[3] = block0[ 3]; - - u32x w1_t[4]; - - w1_t[0] = block0[ 4]; - w1_t[1] = block0[ 5]; - w1_t[2] = block0[ 6]; - w1_t[3] = block0[ 7]; - - u32x w2_t[4]; - - w2_t[0] = block0[ 8]; - w2_t[1] = block0[ 9]; - w2_t[2] = block0[10]; - w2_t[3] = block0[11]; - - u32x w3_t[4]; - - w3_t[0] = block0[12]; - w3_t[1] = block0[13]; - w3_t[2] = block0[14]; - w3_t[3] = block0[15]; - - // md5 - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - - w0_t[0] = block1[ 0]; - w0_t[1] = block1[ 1]; - w0_t[2] = block1[ 2]; - w0_t[3] = block1[ 3]; - - w1_t[0] = block1[ 4]; - w1_t[1] = block1[ 5]; - w1_t[2] = block1[ 6]; - w1_t[3] = block1[ 7]; - - w2_t[0] = block1[ 8]; - w2_t[1] = block1[ 9]; - w2_t[2] = block1[10]; - w2_t[3] = block1[11]; - - w3_t[0] = block1[12]; - w3_t[1] = block1[13]; - w3_t[2] = pw_salt_len * 8; - w3_t[3] = 0; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - /* - * final = md5 ($HA1 . $esalt) - * we have at least 2 MD5 blocks/transformations, but we might need 3 - */ - - w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - w2_t[0] = esalt_buf0[0]; - w2_t[1] = esalt_buf0[1]; - w2_t[2] = esalt_buf0[2]; - w2_t[3] = esalt_buf0[3]; - - w3_t[0] = esalt_buf0[4]; - w3_t[1] = esalt_buf0[5]; - w3_t[2] = esalt_buf0[6]; - w3_t[3] = esalt_buf0[7]; - - // md5 - // 1st transform - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - r_a = a; - r_b = b; - r_c = c; - r_d = d; - - // 2nd transform - - w0_t[0] = esalt_buf0[ 8]; - w0_t[1] = esalt_buf0[ 9]; - w0_t[2] = esalt_buf0[10]; - w0_t[3] = esalt_buf0[11]; - - w1_t[0] = esalt_buf0[12]; - w1_t[1] = esalt_buf0[13]; - w1_t[2] = esalt_buf0[14]; - w1_t[3] = esalt_buf0[15]; - - w2_t[0] = esalt_buf1[ 0]; - w2_t[1] = esalt_buf1[ 1]; - w2_t[2] = esalt_buf1[ 2]; - w2_t[3] = esalt_buf1[ 3]; - - w3_t[0] = esalt_buf1[ 4]; - w3_t[1] = esalt_buf1[ 5]; - w3_t[2] = digest_esalt_len * 8; - w3_t[3] = 0; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -__device__ static void m11400s_1_1 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const sip_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * salt - */ - - const u32 salt_len = esalt_bufs[salt_pos].salt_len; // not a bug, we need to get it from the esalt - - const u32 pw_salt_len = salt_len + pw_len; - - u32 salt_buf0[16]; - - salt_buf0[ 0] = esalt_bufs[salt_pos].salt_buf[ 0]; - salt_buf0[ 1] = esalt_bufs[salt_pos].salt_buf[ 1]; - salt_buf0[ 2] = esalt_bufs[salt_pos].salt_buf[ 2]; - salt_buf0[ 3] = esalt_bufs[salt_pos].salt_buf[ 3]; - salt_buf0[ 4] = esalt_bufs[salt_pos].salt_buf[ 4]; - salt_buf0[ 5] = esalt_bufs[salt_pos].salt_buf[ 5]; - salt_buf0[ 6] = esalt_bufs[salt_pos].salt_buf[ 6]; - salt_buf0[ 7] = esalt_bufs[salt_pos].salt_buf[ 7]; - salt_buf0[ 8] = esalt_bufs[salt_pos].salt_buf[ 8]; - salt_buf0[ 9] = esalt_bufs[salt_pos].salt_buf[ 9]; - salt_buf0[10] = esalt_bufs[salt_pos].salt_buf[10]; - salt_buf0[11] = esalt_bufs[salt_pos].salt_buf[11]; - salt_buf0[12] = esalt_bufs[salt_pos].salt_buf[12]; - salt_buf0[13] = esalt_bufs[salt_pos].salt_buf[13]; - salt_buf0[14] = esalt_bufs[salt_pos].salt_buf[14]; - salt_buf0[15] = esalt_bufs[salt_pos].salt_buf[15]; - - u32 salt_buf1[16]; - - salt_buf1[ 0] = esalt_bufs[salt_pos].salt_buf[16]; - salt_buf1[ 1] = esalt_bufs[salt_pos].salt_buf[17]; - salt_buf1[ 2] = esalt_bufs[salt_pos].salt_buf[18]; - salt_buf1[ 3] = esalt_bufs[salt_pos].salt_buf[19]; - salt_buf1[ 4] = esalt_bufs[salt_pos].salt_buf[20]; - salt_buf1[ 5] = esalt_bufs[salt_pos].salt_buf[21]; - salt_buf1[ 6] = esalt_bufs[salt_pos].salt_buf[22]; - salt_buf1[ 7] = esalt_bufs[salt_pos].salt_buf[23]; - salt_buf1[ 8] = esalt_bufs[salt_pos].salt_buf[24]; - salt_buf1[ 9] = esalt_bufs[salt_pos].salt_buf[25]; - salt_buf1[10] = esalt_bufs[salt_pos].salt_buf[26]; - salt_buf1[11] = esalt_bufs[salt_pos].salt_buf[27]; - salt_buf1[12] = esalt_bufs[salt_pos].salt_buf[28]; - salt_buf1[13] = esalt_bufs[salt_pos].salt_buf[29]; - salt_buf1[14] = 0; - salt_buf1[15] = 0; - - /** - * esalt - */ - - const u32 esalt_len = esalt_bufs[salt_pos].esalt_len; - - u32 esalt_buf0[16]; - - esalt_buf0[ 0] = esalt_bufs[salt_pos].esalt_buf[ 0]; - esalt_buf0[ 1] = esalt_bufs[salt_pos].esalt_buf[ 1]; - esalt_buf0[ 2] = esalt_bufs[salt_pos].esalt_buf[ 2]; - esalt_buf0[ 3] = esalt_bufs[salt_pos].esalt_buf[ 3]; - esalt_buf0[ 4] = esalt_bufs[salt_pos].esalt_buf[ 4]; - esalt_buf0[ 5] = esalt_bufs[salt_pos].esalt_buf[ 5]; - esalt_buf0[ 6] = esalt_bufs[salt_pos].esalt_buf[ 6]; - esalt_buf0[ 7] = esalt_bufs[salt_pos].esalt_buf[ 7]; - esalt_buf0[ 8] = esalt_bufs[salt_pos].esalt_buf[ 8]; - esalt_buf0[ 9] = esalt_bufs[salt_pos].esalt_buf[ 9]; - esalt_buf0[10] = esalt_bufs[salt_pos].esalt_buf[10]; - esalt_buf0[11] = esalt_bufs[salt_pos].esalt_buf[11]; - esalt_buf0[12] = esalt_bufs[salt_pos].esalt_buf[12]; - esalt_buf0[13] = esalt_bufs[salt_pos].esalt_buf[13]; - esalt_buf0[14] = esalt_bufs[salt_pos].esalt_buf[14]; - esalt_buf0[15] = esalt_bufs[salt_pos].esalt_buf[15]; - - u32 esalt_buf1[16]; - - esalt_buf1[ 0] = esalt_bufs[salt_pos].esalt_buf[16]; - esalt_buf1[ 1] = esalt_bufs[salt_pos].esalt_buf[17]; - esalt_buf1[ 2] = esalt_bufs[salt_pos].esalt_buf[18]; - esalt_buf1[ 3] = esalt_bufs[salt_pos].esalt_buf[19]; - esalt_buf1[ 4] = esalt_bufs[salt_pos].esalt_buf[20]; - esalt_buf1[ 5] = esalt_bufs[salt_pos].esalt_buf[21]; - esalt_buf1[ 6] = esalt_bufs[salt_pos].esalt_buf[22]; - esalt_buf1[ 7] = esalt_bufs[salt_pos].esalt_buf[23]; - esalt_buf1[ 8] = esalt_bufs[salt_pos].esalt_buf[24]; - esalt_buf1[ 9] = esalt_bufs[salt_pos].esalt_buf[25]; - esalt_buf1[10] = esalt_bufs[salt_pos].esalt_buf[26]; - esalt_buf1[11] = esalt_bufs[salt_pos].esalt_buf[27]; - esalt_buf1[12] = esalt_bufs[salt_pos].esalt_buf[28]; - esalt_buf1[13] = esalt_bufs[salt_pos].esalt_buf[29]; - esalt_buf1[14] = esalt_bufs[salt_pos].esalt_buf[30]; - esalt_buf1[15] = esalt_bufs[salt_pos].esalt_buf[31]; - - u32 esalt_buf2[16]; - - esalt_buf2[ 0] = esalt_bufs[salt_pos].esalt_buf[32]; - esalt_buf2[ 1] = esalt_bufs[salt_pos].esalt_buf[33]; - esalt_buf2[ 2] = esalt_bufs[salt_pos].esalt_buf[34]; - esalt_buf2[ 3] = esalt_bufs[salt_pos].esalt_buf[35]; - esalt_buf2[ 4] = esalt_bufs[salt_pos].esalt_buf[36]; - esalt_buf2[ 5] = esalt_bufs[salt_pos].esalt_buf[37]; - esalt_buf2[ 6] = 0; - esalt_buf2[ 7] = 0; - esalt_buf2[ 8] = 0; - esalt_buf2[ 9] = 0; - esalt_buf2[10] = 0; - esalt_buf2[11] = 0; - esalt_buf2[12] = 0; - esalt_buf2[13] = 0; - esalt_buf2[14] = 0; - esalt_buf2[15] = 0; - - const u32 digest_esalt_len = 32 + esalt_len; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /* - * HA1 = md5 ($salt . $pass) - */ - - // append the pass to the salt - - u32x block0[16]; - - block0[ 0] = salt_buf0[ 0]; - block0[ 1] = salt_buf0[ 1]; - block0[ 2] = salt_buf0[ 2]; - block0[ 3] = salt_buf0[ 3]; - block0[ 4] = salt_buf0[ 4]; - block0[ 5] = salt_buf0[ 5]; - block0[ 6] = salt_buf0[ 6]; - block0[ 7] = salt_buf0[ 7]; - block0[ 8] = salt_buf0[ 8]; - block0[ 9] = salt_buf0[ 9]; - block0[10] = salt_buf0[10]; - block0[11] = salt_buf0[11]; - block0[12] = salt_buf0[12]; - block0[13] = salt_buf0[13]; - block0[14] = salt_buf0[14]; - block0[15] = salt_buf0[15]; - - u32x block1[16]; - - block1[ 0] = salt_buf1[ 0]; - block1[ 1] = salt_buf1[ 1]; - block1[ 2] = salt_buf1[ 2]; - block1[ 3] = salt_buf1[ 3]; - block1[ 4] = salt_buf1[ 4]; - block1[ 5] = salt_buf1[ 5]; - block1[ 6] = salt_buf1[ 6]; - block1[ 7] = salt_buf1[ 7]; - block1[ 8] = salt_buf1[ 8]; - block1[ 9] = salt_buf1[ 9]; - block1[10] = salt_buf1[10]; - block1[11] = salt_buf1[11]; - block1[12] = salt_buf1[12]; - block1[13] = salt_buf1[13]; - block1[14] = salt_buf1[14]; - block1[15] = salt_buf1[15]; - - memcat32 (block0, block1, salt_len, w0, w1, w2, w3, pw_len); - - u32x w0_t[4]; - - w0_t[0] = block0[ 0]; - w0_t[1] = block0[ 1]; - w0_t[2] = block0[ 2]; - w0_t[3] = block0[ 3]; - - u32x w1_t[4]; - - w1_t[0] = block0[ 4]; - w1_t[1] = block0[ 5]; - w1_t[2] = block0[ 6]; - w1_t[3] = block0[ 7]; - - u32x w2_t[4]; - - w2_t[0] = block0[ 8]; - w2_t[1] = block0[ 9]; - w2_t[2] = block0[10]; - w2_t[3] = block0[11]; - - u32x w3_t[4]; - - w3_t[0] = block0[12]; - w3_t[1] = block0[13]; - w3_t[2] = block0[14]; - w3_t[3] = block0[15]; - - // md5 - - u32x tmp2; - - u32x a = MD5M_A; - u32x b = MD5M_B; - u32x c = MD5M_C; - u32x d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - u32x r_a = a; - u32x r_b = b; - u32x r_c = c; - u32x r_d = d; - - w0_t[0] = block1[ 0]; - w0_t[1] = block1[ 1]; - w0_t[2] = block1[ 2]; - w0_t[3] = block1[ 3]; - - w1_t[0] = block1[ 4]; - w1_t[1] = block1[ 5]; - w1_t[2] = block1[ 6]; - w1_t[3] = block1[ 7]; - - w2_t[0] = block1[ 8]; - w2_t[1] = block1[ 9]; - w2_t[2] = block1[10]; - w2_t[3] = block1[11]; - - w3_t[0] = block1[12]; - w3_t[1] = block1[13]; - w3_t[2] = pw_salt_len * 8; - w3_t[3] = 0; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - /* - * final = md5 ($HA1 . $esalt) - * we have at least 2 MD5 blocks/transformations, but we might need 3 - */ - - w0_t[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 - | uint_to_hex_lower8 ((a >> 8) & 255) << 16; - w0_t[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 - | uint_to_hex_lower8 ((a >> 24) & 255) << 16; - w0_t[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 - | uint_to_hex_lower8 ((b >> 8) & 255) << 16; - w0_t[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 - | uint_to_hex_lower8 ((b >> 24) & 255) << 16; - w1_t[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 - | uint_to_hex_lower8 ((c >> 8) & 255) << 16; - w1_t[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 - | uint_to_hex_lower8 ((c >> 24) & 255) << 16; - w1_t[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 - | uint_to_hex_lower8 ((d >> 8) & 255) << 16; - w1_t[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 - | uint_to_hex_lower8 ((d >> 24) & 255) << 16; - - w2_t[0] = esalt_buf0[0]; - w2_t[1] = esalt_buf0[1]; - w2_t[2] = esalt_buf0[2]; - w2_t[3] = esalt_buf0[3]; - - w3_t[0] = esalt_buf0[4]; - w3_t[1] = esalt_buf0[5]; - w3_t[2] = esalt_buf0[6]; - w3_t[3] = esalt_buf0[7]; - - // md5 - // 1st transform - - a = MD5M_A; - b = MD5M_B; - c = MD5M_C; - d = MD5M_D; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += MD5M_A; - b += MD5M_B; - c += MD5M_C; - d += MD5M_D; - - r_a = a; - r_b = b; - r_c = c; - r_d = d; - - // 2nd transform - - w0_t[0] = esalt_buf0[ 8]; - w0_t[1] = esalt_buf0[ 9]; - w0_t[2] = esalt_buf0[10]; - w0_t[3] = esalt_buf0[11]; - - w1_t[0] = esalt_buf0[12]; - w1_t[1] = esalt_buf0[13]; - w1_t[2] = esalt_buf0[14]; - w1_t[3] = esalt_buf0[15]; - - w2_t[0] = esalt_buf1[ 0]; - w2_t[1] = esalt_buf1[ 1]; - w2_t[2] = esalt_buf1[ 2]; - w2_t[3] = esalt_buf1[ 3]; - - w3_t[0] = esalt_buf1[ 4]; - w3_t[1] = esalt_buf1[ 5]; - w3_t[2] = esalt_buf1[ 6]; - w3_t[3] = esalt_buf1[ 7]; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - // this is for sure the final block - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - r_a = a; - r_b = b; - r_c = c; - r_d = d; - - w0_t[0] = esalt_buf1[ 8]; - w0_t[1] = esalt_buf1[ 9]; - w0_t[2] = esalt_buf1[10]; - w0_t[3] = esalt_buf1[11]; - - w1_t[0] = esalt_buf1[12]; - w1_t[1] = esalt_buf1[13]; - w1_t[2] = esalt_buf1[14]; - w1_t[3] = esalt_buf1[15]; - - w2_t[0] = esalt_buf2[ 0]; - w2_t[1] = esalt_buf2[ 1]; - w2_t[2] = esalt_buf2[ 2]; - w2_t[3] = esalt_buf2[ 3]; - - w3_t[0] = esalt_buf2[ 4]; - w3_t[1] = esalt_buf2[ 5]; - w3_t[2] = digest_esalt_len * 8; - w3_t[3] = 0; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t[0], MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w0_t[1], MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w0_t[2], MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w0_t[3], MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w1_t[0], MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t[1], MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w1_t[2], MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w1_t[3], MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w2_t[0], MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w2_t[1], MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t[2], MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w2_t[3], MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w3_t[0], MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w3_t[1], MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w3_t[2], MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t[3], MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w0_t[1], MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w1_t[2], MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w2_t[3], MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t[0], MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w1_t[1], MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t[2], MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t[3], MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w1_t[0], MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w2_t[1], MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w3_t[2], MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w0_t[3], MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w2_t[0], MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w3_t[1], MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w0_t[2], MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w1_t[3], MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w3_t[0], MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w1_t[1], MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w2_t[0], MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w2_t[3], MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w3_t[2], MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w0_t[1], MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w1_t[0], MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w1_t[3], MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t[2], MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w3_t[1], MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t[0], MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w0_t[3], MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w1_t[2], MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w2_t[1], MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w3_t[0], MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t[3], MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w0_t[2], MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t[0], MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w1_t[3], MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w3_t[2], MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t[1], MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w3_t[0], MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w0_t[3], MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t[2], MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w0_t[1], MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w2_t[0], MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t[3], MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w1_t[2], MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w3_t[1], MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w1_t[0], MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w2_t[3], MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w0_t[2], MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w2_t[1], MD5C3f, MD5S33); - - a += r_a; - b += r_b; - c += r_c; - d += r_d; - - const u32x r0 = a; - const u32x r1 = d; - const u32x r2 = c; - const u32x r3 = b; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11400_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const sip_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - const u32 esalt_len = esalt_bufs[salt_pos].esalt_len; - const u32 salt_len = esalt_bufs[salt_pos].salt_len; - - const u32 sw_1 = ((32 + esalt_len + 1) > 119); - const u32 sw_2 = ((pw_len + salt_len) > 55) << 1; - - switch (sw_1 | sw_2) - { - case 0: - m11400m_0_0 (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); - break; - case 1: - m11400m_0_1 (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); - break; - case 2: - m11400m_1_0 (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); - break; - case 3: - m11400m_1_1 (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); - break; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11400_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const sip_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - const u32 esalt_len = esalt_bufs[salt_pos].esalt_len; - const u32 salt_len = esalt_bufs[salt_pos].salt_len; - - const u32 sw_1 = ((32 + esalt_len + 1) > 119); - const u32 sw_2 = ((pw_len + salt_len) > 55) << 1; - - switch (sw_1 | sw_2) - { - case 0: - m11400m_0_0 (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); - break; - case 1: - m11400m_0_1 (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); - break; - case 2: - m11400m_1_0 (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); - break; - case 3: - m11400m_1_1 (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); - break; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11400_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const sip_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - const u32 esalt_len = esalt_bufs[salt_pos].esalt_len; - const u32 salt_len = esalt_bufs[salt_pos].salt_len; - - const u32 sw_1 = ((32 + esalt_len + 1) > 119); - const u32 sw_2 = ((pw_len + salt_len) > 55) << 1; - - switch (sw_1 | sw_2) - { - case 0: - m11400m_0_0 (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); - break; - case 1: - m11400m_0_1 (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); - break; - case 2: - m11400m_1_0 (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); - break; - case 3: - m11400m_1_1 (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); - break; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11400_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const sip_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - const u32 esalt_len = esalt_bufs[salt_pos].esalt_len; - const u32 salt_len = esalt_bufs[salt_pos].salt_len; - - const u32 sw_1 = ((32 + esalt_len + 1) > 119); - const u32 sw_2 = ((pw_len + salt_len) > 55) << 1; - - switch (sw_1 | sw_2) - { - case 0: - m11400s_0_0 (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); - break; - case 1: - m11400s_0_1 (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); - break; - case 2: - m11400s_1_0 (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); - break; - case 3: - m11400s_1_1 (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); - break; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11400_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const sip_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = pws[gid].i[14]; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - const u32 esalt_len = esalt_bufs[salt_pos].esalt_len; - const u32 salt_len = esalt_bufs[salt_pos].salt_len; - - const u32 sw_1 = ((32 + esalt_len + 1) > 119); - const u32 sw_2 = ((pw_len + salt_len) > 55) << 1; - - switch (sw_1 | sw_2) - { - case 0: - m11400s_0_0 (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); - break; - case 1: - m11400s_0_1 (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); - break; - case 2: - m11400s_1_0 (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); - break; - case 3: - m11400s_1_1 (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); - break; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11400_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const sip_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - const u32 esalt_len = esalt_bufs[salt_pos].esalt_len; - const u32 salt_len = esalt_bufs[salt_pos].salt_len; - - const u32 sw_1 = ((32 + esalt_len + 1) > 119); - const u32 sw_2 = ((pw_len + salt_len) > 55) << 1; - - switch (sw_1 | sw_2) - { - case 0: - m11400s_0_0 (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); - break; - case 1: - m11400s_0_1 (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); - break; - case 2: - m11400s_1_0 (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); - break; - case 3: - m11400s_1_1 (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); - break; - } -} diff --git a/nv/m11500_a0.cu b/nv/m11500_a0.cu deleted file mode 100644 index 231a1ff..0000000 --- a/nv/m11500_a0.cu +++ /dev/null @@ -1,366 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _CRC32_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ const u32 crc32tab[0x100] = -{ - 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, - 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, - 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, - 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, - 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, - 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, - 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, - 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, - 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, - 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, - 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, - 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, - 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, - 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, - 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, - 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, - 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, - 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, - 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, - 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, - 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, - 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, - 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, - 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, - 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, - 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, - 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, - 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, - 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, - 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, - 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, - 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, - 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, - 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, - 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, - 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, - 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, - 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, - 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, - 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, - 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, - 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, - 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, - 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, - 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, - 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, - 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, - 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, - 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, - 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, - 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, - 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, - 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, - 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, - 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, - 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, - 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, - 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, - 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, - 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, - 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, - 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, - 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, - 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d -}; - -__device__ static u32x round_crc32 (u32x a, const u32x v) -{ - const u32x k = (a ^ v) & 0xff; - - const u32x s = a >> 8; - - #ifdef VECT_SIZE1 - a = crc32tab[k]; - #endif - - #ifdef VECT_SIZE2 - a.x = crc32tab[k.x]; - a.y = crc32tab[k.y]; - #endif - - #ifdef VECT_SIZE4 - a.x = crc32tab[k.x]; - a.y = crc32tab[k.y]; - a.z = crc32tab[k.z]; - a.w = crc32tab[k.w]; - #endif - - a ^= s; - - return a; -} - -__device__ static u32x crc32 (const u32x w[16], const u32 pw_len, const u32 iv) -{ - u32x a = iv ^ ~0; - - if (pw_len >= 1) a = round_crc32 (a, w[0] >> 0); - if (pw_len >= 2) a = round_crc32 (a, w[0] >> 8); - if (pw_len >= 3) a = round_crc32 (a, w[0] >> 16); - if (pw_len >= 4) a = round_crc32 (a, w[0] >> 24); - - for (u32 i = 4, j = 1; i < pw_len; i += 4, j += 1) - { - if (pw_len >= (i + 1)) a = round_crc32 (a, w[j] >> 0); - if (pw_len >= (i + 2)) a = round_crc32 (a, w[j] >> 8); - if (pw_len >= (i + 3)) a = round_crc32 (a, w[j] >> 16); - if (pw_len >= (i + 4)) a = round_crc32 (a, w[j] >> 24); - } - - return ~a; -} - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m11500_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 iv = salt_bufs[salt_pos].salt_buf[0]; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - u32x w_t[16]; - - w_t[ 0] = w0[0]; - w_t[ 1] = w0[1]; - w_t[ 2] = w0[2]; - w_t[ 3] = w0[3]; - w_t[ 4] = w1[0]; - w_t[ 5] = w1[1]; - w_t[ 6] = w1[2]; - w_t[ 7] = w1[3]; - w_t[ 8] = 0; - w_t[ 9] = 0; - w_t[10] = 0; - w_t[11] = 0; - w_t[12] = 0; - w_t[13] = 0; - w_t[14] = 0; - w_t[15] = 0; - - u32x a = crc32 (w_t, out_len, iv); - u32x b = 0; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11500_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11500_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11500_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * digest - */ - - const u32 iv = salt_bufs[salt_pos].salt_buf[0]; - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - u32x w_t[16]; - - w_t[ 0] = w0[0]; - w_t[ 1] = w0[1]; - w_t[ 2] = w0[2]; - w_t[ 3] = w0[3]; - w_t[ 4] = w1[0]; - w_t[ 5] = w1[1]; - w_t[ 6] = w1[2]; - w_t[ 7] = w1[3]; - w_t[ 8] = 0; - w_t[ 9] = 0; - w_t[10] = 0; - w_t[11] = 0; - w_t[12] = 0; - w_t[13] = 0; - w_t[14] = 0; - w_t[15] = 0; - - u32x a = crc32 (w_t, out_len, iv); - u32x b = 0; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11500_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11500_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m11500_a1.cu b/nv/m11500_a1.cu deleted file mode 100644 index e3afdd8..0000000 --- a/nv/m11500_a1.cu +++ /dev/null @@ -1,444 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _CRC32_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -__device__ const u32 crc32tab[0x100] = -{ - 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, - 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, - 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, - 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, - 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, - 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, - 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, - 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, - 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, - 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, - 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, - 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, - 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, - 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, - 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, - 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, - 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, - 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, - 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, - 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, - 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, - 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, - 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, - 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, - 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, - 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, - 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, - 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, - 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, - 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, - 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, - 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, - 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, - 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, - 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, - 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, - 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, - 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, - 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, - 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, - 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, - 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, - 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, - 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, - 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, - 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, - 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, - 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, - 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, - 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, - 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, - 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, - 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, - 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, - 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, - 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, - 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, - 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, - 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, - 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, - 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, - 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, - 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, - 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d -}; - -__device__ static u32x round_crc32 (u32x a, const u32x v) -{ - const u32x k = (a ^ v) & 0xff; - - const u32x s = a >> 8; - - #ifdef VECT_SIZE1 - a = crc32tab[k]; - #endif - - #ifdef VECT_SIZE2 - a.x = crc32tab[k.x]; - a.y = crc32tab[k.y]; - #endif - - #ifdef VECT_SIZE4 - a.x = crc32tab[k.x]; - a.y = crc32tab[k.y]; - a.z = crc32tab[k.z]; - a.w = crc32tab[k.w]; - #endif - - a ^= s; - - return a; -} - -__device__ static u32x crc32 (const u32x w[16], const u32 pw_len, const u32 iv) -{ - u32x a = iv ^ ~0; - - if (pw_len >= 1) a = round_crc32 (a, w[0] >> 0); - if (pw_len >= 2) a = round_crc32 (a, w[0] >> 8); - if (pw_len >= 3) a = round_crc32 (a, w[0] >> 16); - if (pw_len >= 4) a = round_crc32 (a, w[0] >> 24); - - for (u32 i = 4, j = 1; i < pw_len; i += 4, j += 1) - { - if (pw_len >= (i + 1)) a = round_crc32 (a, w[j] >> 0); - if (pw_len >= (i + 2)) a = round_crc32 (a, w[j] >> 8); - if (pw_len >= (i + 3)) a = round_crc32 (a, w[j] >> 16); - if (pw_len >= (i + 4)) a = round_crc32 (a, w[j] >> 24); - } - - return ~a; -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m11500_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 iv = salt_bufs[salt_pos].salt_buf[0]; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w_t[16]; - - w_t[ 0] = wordl0[0] | wordr0[0]; - w_t[ 1] = wordl0[1] | wordr0[1]; - w_t[ 2] = wordl0[2] | wordr0[2]; - w_t[ 3] = wordl0[3] | wordr0[3]; - w_t[ 4] = wordl1[0] | wordr1[0]; - w_t[ 5] = wordl1[1] | wordr1[1]; - w_t[ 6] = wordl1[2] | wordr1[2]; - w_t[ 7] = wordl1[3] | wordr1[3]; - w_t[ 8] = wordl2[0] | wordr2[0]; - w_t[ 9] = wordl2[1] | wordr2[1]; - w_t[10] = wordl2[2] | wordr2[2]; - w_t[11] = wordl2[3] | wordr2[3]; - w_t[12] = wordl3[0] | wordr3[0]; - w_t[13] = wordl3[1] | wordr3[1]; - w_t[14] = wordl3[2] | wordr3[2]; - w_t[15] = 0; - - u32x a = crc32 (w_t, pw_len, iv); - u32x b = 0; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11500_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11500_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11500_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * digest - */ - - const u32 iv = salt_bufs[salt_pos].salt_buf[0]; - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w_t[16]; - - w_t[ 0] = wordl0[0] | wordr0[0]; - w_t[ 1] = wordl0[1] | wordr0[1]; - w_t[ 2] = wordl0[2] | wordr0[2]; - w_t[ 3] = wordl0[3] | wordr0[3]; - w_t[ 4] = wordl1[0] | wordr1[0]; - w_t[ 5] = wordl1[1] | wordr1[1]; - w_t[ 6] = wordl1[2] | wordr1[2]; - w_t[ 7] = wordl1[3] | wordr1[3]; - w_t[ 8] = wordl2[0] | wordr2[0]; - w_t[ 9] = wordl2[1] | wordr2[1]; - w_t[10] = wordl2[2] | wordr2[2]; - w_t[11] = wordl2[3] | wordr2[3]; - w_t[12] = wordl3[0] | wordr3[0]; - w_t[13] = wordl3[1] | wordr3[1]; - w_t[14] = wordl3[2] | wordr3[2]; - w_t[15] = 0; - - u32x a = crc32 (w_t, pw_len, iv); - u32x b = 0; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11500_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11500_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m11500_a3.cu b/nv/m11500_a3.cu deleted file mode 100644 index 8427626..0000000 --- a/nv/m11500_a3.cu +++ /dev/null @@ -1,516 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _CRC32_ -#define _SCALAR_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE4 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4_warp.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4_warp.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4_warp.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4_warp.c" -#endif - -__device__ const u32 crc32tab[0x100] = -{ - 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, - 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, - 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, - 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, - 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, - 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, - 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, - 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, - 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, - 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, - 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, - 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, - 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, - 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, - 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, - 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, - 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, - 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, - 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, - 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, - 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, - 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, - 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, - 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, - 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, - 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, - 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, - 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, - 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, - 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, - 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, - 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, - 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, - 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, - 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, - 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, - 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, - 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, - 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, - 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, - 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, - 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, - 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, - 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, - 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, - 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, - 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, - 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, - 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, - 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, - 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, - 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, - 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, - 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, - 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, - 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, - 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, - 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, - 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, - 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, - 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, - 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, - 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, - 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d -}; - -__device__ static u32x round_crc32 (u32x a, const u32x v) -{ - const u32x k = (a ^ v) & 0xff; - - const u32x s = a >> 8; - - #ifdef VECT_SIZE1 - a = crc32tab[k]; - #endif - - #ifdef VECT_SIZE2 - a.x = crc32tab[k.x]; - a.y = crc32tab[k.y]; - #endif - - #ifdef VECT_SIZE4 - a.x = crc32tab[k.x]; - a.y = crc32tab[k.y]; - a.z = crc32tab[k.z]; - a.w = crc32tab[k.w]; - #endif - - a ^= s; - - return a; -} - -__device__ static u32x crc32 (const u32x w[16], const u32 pw_len, const u32 iv) -{ - u32x a = iv ^ ~0; - - if (pw_len >= 1) a = round_crc32 (a, w[0] >> 0); - if (pw_len >= 2) a = round_crc32 (a, w[0] >> 8); - if (pw_len >= 3) a = round_crc32 (a, w[0] >> 16); - if (pw_len >= 4) a = round_crc32 (a, w[0] >> 24); - - for (u32 i = 4, j = 1; i < pw_len; i += 4, j += 1) - { - if (pw_len >= (i + 1)) a = round_crc32 (a, w[j] >> 0); - if (pw_len >= (i + 2)) a = round_crc32 (a, w[j] >> 8); - if (pw_len >= (i + 3)) a = round_crc32 (a, w[j] >> 16); - if (pw_len >= (i + 4)) a = round_crc32 (a, w[j] >> 24); - } - - return ~a; -} - -__device__ __constant__ u32x c_bfs[1024]; - -__device__ static void m11500m (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - - /** - * digest - */ - - const u32 iv = salt_bufs[salt_pos].salt_buf[0]; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x w_t[16]; - - w_t[ 0] = w0; - w_t[ 1] = w[ 1]; - w_t[ 2] = w[ 2]; - w_t[ 3] = w[ 3]; - w_t[ 4] = w[ 4]; - w_t[ 5] = w[ 5]; - w_t[ 6] = w[ 6]; - w_t[ 7] = w[ 7]; - w_t[ 8] = w[ 8]; - w_t[ 9] = w[ 9]; - w_t[10] = w[10]; - w_t[11] = w[11]; - w_t[12] = w[12]; - w_t[13] = w[13]; - w_t[14] = w[14]; - w_t[15] = w[15]; - - u32x a = crc32 (w_t, pw_len, iv); - u32x b = 0; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_M - } -} - -__device__ static void m11500s (u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 iv = salt_bufs[salt_pos].salt_buf[0]; - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - const u32 bf_loops = ceil ((float) bfs_cnt / VECT_DIV); - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bf_loops; il_pos++) - { - const u32x w0r = c_bfs[il_pos]; - - const u32x w0 = w0l | w0r; - - u32x w_t[16]; - - w_t[ 0] = w0; - w_t[ 1] = w[ 1]; - w_t[ 2] = w[ 2]; - w_t[ 3] = w[ 3]; - w_t[ 4] = w[ 4]; - w_t[ 5] = w[ 5]; - w_t[ 6] = w[ 6]; - w_t[ 7] = w[ 7]; - w_t[ 8] = w[ 8]; - w_t[ 9] = w[ 9]; - w_t[10] = w[10]; - w_t[11] = w[11]; - w_t[12] = w[12]; - w_t[13] = w[13]; - w_t[14] = w[14]; - w_t[15] = w[15]; - - u32x a = crc32 (w_t, pw_len, iv); - u32x b = 0; - - const u32x r0 = a; - const u32x r1 = b; - const u32x r2 = 0; - const u32x r3 = 0; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11500_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m11500m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11500_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m11500m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11500_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m11500m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11500_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m11500s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11500_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m11500s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11500_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const u32x *words_buf_r, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * main - */ - - m11500s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m11600.cu b/nv/m11600.cu deleted file mode 100644 index 3880034..0000000 --- a/nv/m11600.cu +++ /dev/null @@ -1,1923 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SEVEN_ZIP_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -__device__ __constant__ u32 te0[256] = -{ - 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, - 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, - 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, - 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, - 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, - 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, - 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, - 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, - 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, - 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, - 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, - 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, - 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, - 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, - 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, - 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, - 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, - 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, - 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, - 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, - 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, - 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, - 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, - 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, - 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, - 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, - 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, - 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, - 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, - 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, - 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, - 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, - 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, - 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, - 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, - 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, - 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, - 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, - 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, - 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, - 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, - 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, - 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, - 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, - 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, - 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, - 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, - 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, - 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, - 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, - 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, - 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, - 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, - 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, - 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, - 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, - 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, - 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, - 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, - 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, - 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, - 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, - 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, - 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a, -}; - -__device__ __constant__ u32 te1[256] = -{ - 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b, - 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5, - 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b, - 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676, - 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d, - 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0, - 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf, - 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0, - 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626, - 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc, - 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1, - 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515, - 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3, - 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a, - 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2, - 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575, - 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a, - 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0, - 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3, - 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484, - 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded, - 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b, - 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939, - 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf, - 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb, - 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585, - 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f, - 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8, - 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f, - 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5, - 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121, - 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2, - 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec, - 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717, - 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d, - 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373, - 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc, - 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888, - 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414, - 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb, - 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a, - 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c, - 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262, - 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979, - 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d, - 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9, - 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea, - 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808, - 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e, - 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6, - 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f, - 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a, - 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666, - 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e, - 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9, - 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e, - 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111, - 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494, - 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9, - 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf, - 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d, - 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868, - 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f, - 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616, -}; - -__device__ __constant__ u32 te2[256] = -{ - 0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b, - 0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5, - 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b, - 0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76, - 0xca458fca, 0x829d1f82, 0xc94089c9, 0x7d87fa7d, - 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0, - 0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af, - 0x9cbf239c, 0xa4f753a4, 0x7296e472, 0xc05b9bc0, - 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26, - 0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc, - 0x345c6834, 0xa5f451a5, 0xe534d1e5, 0xf108f9f1, - 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15, - 0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3, - 0x18283018, 0x96a13796, 0x050f0a05, 0x9ab52f9a, - 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2, - 0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75, - 0x091b1209, 0x839e1d83, 0x2c74582c, 0x1a2e341a, - 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0, - 0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3, - 0x297b5229, 0xe33edde3, 0x2f715e2f, 0x84971384, - 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed, - 0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b, - 0x6abed46a, 0xcb468dcb, 0xbed967be, 0x394b7239, - 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf, - 0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb, - 0x43c58643, 0x4dd79a4d, 0x33556633, 0x85941185, - 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f, - 0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8, - 0x51f3a251, 0xa3fe5da3, 0x40c08040, 0x8f8a058f, - 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5, - 0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221, - 0x10302010, 0xff1ae5ff, 0xf30efdf3, 0xd26dbfd2, - 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec, - 0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17, - 0xc45793c4, 0xa7f255a7, 0x7e82fc7e, 0x3d477a3d, - 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673, - 0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc, - 0x22664422, 0x2a7e542a, 0x90ab3b90, 0x88830b88, - 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814, - 0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb, - 0xe03bdbe0, 0x32566432, 0x3a4e743a, 0x0a1e140a, - 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c, - 0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462, - 0x91a83991, 0x95a43195, 0xe437d3e4, 0x798bf279, - 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d, - 0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9, - 0x6cb4d86c, 0x56faac56, 0xf407f3f4, 0xea25cfea, - 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008, - 0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e, - 0x1c24381c, 0xa6f157a6, 0xb4c773b4, 0xc65197c6, - 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f, - 0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a, - 0x7090e070, 0x3e427c3e, 0xb5c471b5, 0x66aacc66, - 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e, - 0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9, - 0x86911786, 0xc15899c1, 0x1d273a1d, 0x9eb9279e, - 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211, - 0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394, - 0x9bb62d9b, 0x1e223c1e, 0x87921587, 0xe920c9e9, - 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df, - 0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d, - 0xbfda65bf, 0xe631d7e6, 0x42c68442, 0x68b8d068, - 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f, - 0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16, -}; - -__device__ __constant__ u32 te3[256] = -{ - 0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6, - 0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491, - 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56, - 0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec, - 0xcaca458f, 0x82829d1f, 0xc9c94089, 0x7d7d87fa, - 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb, - 0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45, - 0x9c9cbf23, 0xa4a4f753, 0x727296e4, 0xc0c05b9b, - 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c, - 0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83, - 0x34345c68, 0xa5a5f451, 0xe5e534d1, 0xf1f108f9, - 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a, - 0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d, - 0x18182830, 0x9696a137, 0x05050f0a, 0x9a9ab52f, - 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf, - 0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea, - 0x09091b12, 0x83839e1d, 0x2c2c7458, 0x1a1a2e34, - 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b, - 0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d, - 0x29297b52, 0xe3e33edd, 0x2f2f715e, 0x84849713, - 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1, - 0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6, - 0x6a6abed4, 0xcbcb468d, 0xbebed967, 0x39394b72, - 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85, - 0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed, - 0x4343c586, 0x4d4dd79a, 0x33335566, 0x85859411, - 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe, - 0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b, - 0x5151f3a2, 0xa3a3fe5d, 0x4040c080, 0x8f8f8a05, - 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1, - 0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342, - 0x10103020, 0xffff1ae5, 0xf3f30efd, 0xd2d26dbf, - 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3, - 0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e, - 0xc4c45793, 0xa7a7f255, 0x7e7e82fc, 0x3d3d477a, - 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6, - 0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3, - 0x22226644, 0x2a2a7e54, 0x9090ab3b, 0x8888830b, - 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28, - 0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad, - 0xe0e03bdb, 0x32325664, 0x3a3a4e74, 0x0a0a1e14, - 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8, - 0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4, - 0x9191a839, 0x9595a431, 0xe4e437d3, 0x79798bf2, - 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da, - 0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049, - 0x6c6cb4d8, 0x5656faac, 0xf4f407f3, 0xeaea25cf, - 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810, - 0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c, - 0x1c1c2438, 0xa6a6f157, 0xb4b4c773, 0xc6c65197, - 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e, - 0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f, - 0x707090e0, 0x3e3e427c, 0xb5b5c471, 0x6666aacc, - 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c, - 0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069, - 0x86869117, 0xc1c15899, 0x1d1d273a, 0x9e9eb927, - 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322, - 0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733, - 0x9b9bb62d, 0x1e1e223c, 0x87879215, 0xe9e920c9, - 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5, - 0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a, - 0xbfbfda65, 0xe6e631d7, 0x4242c684, 0x6868b8d0, - 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e, - 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c, -}; - -__device__ __constant__ u32 te4[256] = -{ - 0x63636363, 0x7c7c7c7c, 0x77777777, 0x7b7b7b7b, - 0xf2f2f2f2, 0x6b6b6b6b, 0x6f6f6f6f, 0xc5c5c5c5, - 0x30303030, 0x01010101, 0x67676767, 0x2b2b2b2b, - 0xfefefefe, 0xd7d7d7d7, 0xabababab, 0x76767676, - 0xcacacaca, 0x82828282, 0xc9c9c9c9, 0x7d7d7d7d, - 0xfafafafa, 0x59595959, 0x47474747, 0xf0f0f0f0, - 0xadadadad, 0xd4d4d4d4, 0xa2a2a2a2, 0xafafafaf, - 0x9c9c9c9c, 0xa4a4a4a4, 0x72727272, 0xc0c0c0c0, - 0xb7b7b7b7, 0xfdfdfdfd, 0x93939393, 0x26262626, - 0x36363636, 0x3f3f3f3f, 0xf7f7f7f7, 0xcccccccc, - 0x34343434, 0xa5a5a5a5, 0xe5e5e5e5, 0xf1f1f1f1, - 0x71717171, 0xd8d8d8d8, 0x31313131, 0x15151515, - 0x04040404, 0xc7c7c7c7, 0x23232323, 0xc3c3c3c3, - 0x18181818, 0x96969696, 0x05050505, 0x9a9a9a9a, - 0x07070707, 0x12121212, 0x80808080, 0xe2e2e2e2, - 0xebebebeb, 0x27272727, 0xb2b2b2b2, 0x75757575, - 0x09090909, 0x83838383, 0x2c2c2c2c, 0x1a1a1a1a, - 0x1b1b1b1b, 0x6e6e6e6e, 0x5a5a5a5a, 0xa0a0a0a0, - 0x52525252, 0x3b3b3b3b, 0xd6d6d6d6, 0xb3b3b3b3, - 0x29292929, 0xe3e3e3e3, 0x2f2f2f2f, 0x84848484, - 0x53535353, 0xd1d1d1d1, 0x00000000, 0xedededed, - 0x20202020, 0xfcfcfcfc, 0xb1b1b1b1, 0x5b5b5b5b, - 0x6a6a6a6a, 0xcbcbcbcb, 0xbebebebe, 0x39393939, - 0x4a4a4a4a, 0x4c4c4c4c, 0x58585858, 0xcfcfcfcf, - 0xd0d0d0d0, 0xefefefef, 0xaaaaaaaa, 0xfbfbfbfb, - 0x43434343, 0x4d4d4d4d, 0x33333333, 0x85858585, - 0x45454545, 0xf9f9f9f9, 0x02020202, 0x7f7f7f7f, - 0x50505050, 0x3c3c3c3c, 0x9f9f9f9f, 0xa8a8a8a8, - 0x51515151, 0xa3a3a3a3, 0x40404040, 0x8f8f8f8f, - 0x92929292, 0x9d9d9d9d, 0x38383838, 0xf5f5f5f5, - 0xbcbcbcbc, 0xb6b6b6b6, 0xdadadada, 0x21212121, - 0x10101010, 0xffffffff, 0xf3f3f3f3, 0xd2d2d2d2, - 0xcdcdcdcd, 0x0c0c0c0c, 0x13131313, 0xecececec, - 0x5f5f5f5f, 0x97979797, 0x44444444, 0x17171717, - 0xc4c4c4c4, 0xa7a7a7a7, 0x7e7e7e7e, 0x3d3d3d3d, - 0x64646464, 0x5d5d5d5d, 0x19191919, 0x73737373, - 0x60606060, 0x81818181, 0x4f4f4f4f, 0xdcdcdcdc, - 0x22222222, 0x2a2a2a2a, 0x90909090, 0x88888888, - 0x46464646, 0xeeeeeeee, 0xb8b8b8b8, 0x14141414, - 0xdededede, 0x5e5e5e5e, 0x0b0b0b0b, 0xdbdbdbdb, - 0xe0e0e0e0, 0x32323232, 0x3a3a3a3a, 0x0a0a0a0a, - 0x49494949, 0x06060606, 0x24242424, 0x5c5c5c5c, - 0xc2c2c2c2, 0xd3d3d3d3, 0xacacacac, 0x62626262, - 0x91919191, 0x95959595, 0xe4e4e4e4, 0x79797979, - 0xe7e7e7e7, 0xc8c8c8c8, 0x37373737, 0x6d6d6d6d, - 0x8d8d8d8d, 0xd5d5d5d5, 0x4e4e4e4e, 0xa9a9a9a9, - 0x6c6c6c6c, 0x56565656, 0xf4f4f4f4, 0xeaeaeaea, - 0x65656565, 0x7a7a7a7a, 0xaeaeaeae, 0x08080808, - 0xbabababa, 0x78787878, 0x25252525, 0x2e2e2e2e, - 0x1c1c1c1c, 0xa6a6a6a6, 0xb4b4b4b4, 0xc6c6c6c6, - 0xe8e8e8e8, 0xdddddddd, 0x74747474, 0x1f1f1f1f, - 0x4b4b4b4b, 0xbdbdbdbd, 0x8b8b8b8b, 0x8a8a8a8a, - 0x70707070, 0x3e3e3e3e, 0xb5b5b5b5, 0x66666666, - 0x48484848, 0x03030303, 0xf6f6f6f6, 0x0e0e0e0e, - 0x61616161, 0x35353535, 0x57575757, 0xb9b9b9b9, - 0x86868686, 0xc1c1c1c1, 0x1d1d1d1d, 0x9e9e9e9e, - 0xe1e1e1e1, 0xf8f8f8f8, 0x98989898, 0x11111111, - 0x69696969, 0xd9d9d9d9, 0x8e8e8e8e, 0x94949494, - 0x9b9b9b9b, 0x1e1e1e1e, 0x87878787, 0xe9e9e9e9, - 0xcececece, 0x55555555, 0x28282828, 0xdfdfdfdf, - 0x8c8c8c8c, 0xa1a1a1a1, 0x89898989, 0x0d0d0d0d, - 0xbfbfbfbf, 0xe6e6e6e6, 0x42424242, 0x68686868, - 0x41414141, 0x99999999, 0x2d2d2d2d, 0x0f0f0f0f, - 0xb0b0b0b0, 0x54545454, 0xbbbbbbbb, 0x16161616, -}; - -__device__ __constant__ u32 td0[256] = -{ - 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, - 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, - 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, - 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, - 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, - 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, - 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, - 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, - 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, - 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, - 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, - 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, - 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, - 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, - 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, - 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, - 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, - 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, - 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, - 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, - 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, - 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, - 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, - 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, - 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, - 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, - 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, - 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, - 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, - 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, - 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, - 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, - 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, - 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, - 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, - 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, - 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, - 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, - 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, - 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, - 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, - 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, - 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, - 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, - 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, - 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, - 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, - 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, - 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, - 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, - 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, - 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, - 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, - 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, - 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, - 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, - 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, - 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, - 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, - 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, - 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, - 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, - 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, - 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742, -}; - -__device__ __constant__ u32 td1[256] = -{ - 0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e, - 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303, - 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c, - 0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3, - 0x49deb15a, 0x6725ba1b, 0x9845ea0e, 0xe15dfec0, - 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9, - 0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259, - 0x2dd4be83, 0xd3587421, 0x2949e069, 0x448ec9c8, - 0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971, - 0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a, - 0x1863df4a, 0x82e51a31, 0x60975133, 0x4562537f, - 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b, - 0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8, - 0x23ab73d3, 0xe2724b02, 0x57e31f8f, 0x2a6655ab, - 0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708, - 0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682, - 0x2b8acf1c, 0x92a779b4, 0xf0f307f2, 0xa14e69e2, - 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe, - 0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb, - 0x390b83ec, 0xaa4060ef, 0x065e719f, 0x51bd6e10, - 0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd, - 0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015, - 0x241998fb, 0x97d6bde9, 0xcc894043, 0x7767d99e, - 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee, - 0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000, - 0x83098086, 0x48322bed, 0xac1e1170, 0x4e6c5a72, - 0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39, - 0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e, - 0xb10c0a67, 0x0f9357e7, 0xd2b4ee96, 0x9e1b9b91, - 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a, - 0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17, - 0x0b0e090d, 0xadf28bc7, 0xb92db6a8, 0xc8141ea9, - 0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60, - 0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e, - 0x768b4329, 0xdccb23c6, 0x68b6edfc, 0x63b8e4f1, - 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611, - 0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1, - 0x4b1d9e2f, 0xf3dcb230, 0xec0d8652, 0xd077c1e3, - 0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964, - 0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390, - 0xc787494e, 0xc1d938d1, 0xfe8ccaa2, 0x3698d40b, - 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf, - 0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46, - 0xc2f68d13, 0xe890d8b8, 0x5e2e39f7, 0xf582c3af, - 0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512, - 0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb, - 0x09cd2678, 0xf46e5918, 0x01ec9ab7, 0xa8834f9a, - 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8, - 0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c, - 0xaf31a4b2, 0x312a3f23, 0x30c6a594, 0xc035a266, - 0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8, - 0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6, - 0x8d764dd6, 0x4d43efb0, 0x54ccaa4d, 0xdfe49604, - 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551, - 0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41, - 0x5ab3671d, 0x5292dbd2, 0x33e91056, 0x136dd647, - 0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c, - 0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1, - 0x599cd2df, 0x3f55f273, 0x791814ce, 0xbf73c737, - 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db, - 0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340, - 0x72161dc3, 0x0cbce225, 0x8b283c49, 0x41ff0d95, - 0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1, - 0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857, -}; - -__device__ __constant__ u32 td2[256] = -{ - 0xa75051f4, 0x65537e41, 0xa4c31a17, 0x5e963a27, - 0x6bcb3bab, 0x45f11f9d, 0x58abacfa, 0x03934be3, - 0xfa552030, 0x6df6ad76, 0x769188cc, 0x4c25f502, - 0xd7fc4fe5, 0xcbd7c52a, 0x44802635, 0xa38fb562, - 0x5a49deb1, 0x1b6725ba, 0x0e9845ea, 0xc0e15dfe, - 0x7502c32f, 0xf012814c, 0x97a38d46, 0xf9c66bd3, - 0x5fe7038f, 0x9c951592, 0x7aebbf6d, 0x59da9552, - 0x832dd4be, 0x21d35874, 0x692949e0, 0xc8448ec9, - 0x896a75c2, 0x7978f48e, 0x3e6b9958, 0x71dd27b9, - 0x4fb6bee1, 0xad17f088, 0xac66c920, 0x3ab47dce, - 0x4a1863df, 0x3182e51a, 0x33609751, 0x7f456253, - 0x77e0b164, 0xae84bb6b, 0xa01cfe81, 0x2b94f908, - 0x68587048, 0xfd198f45, 0x6c8794de, 0xf8b7527b, - 0xd323ab73, 0x02e2724b, 0x8f57e31f, 0xab2a6655, - 0x2807b2eb, 0xc2032fb5, 0x7b9a86c5, 0x08a5d337, - 0x87f23028, 0xa5b223bf, 0x6aba0203, 0x825ced16, - 0x1c2b8acf, 0xb492a779, 0xf2f0f307, 0xe2a14e69, - 0xf4cd65da, 0xbed50605, 0x621fd134, 0xfe8ac4a6, - 0x539d342e, 0x55a0a2f3, 0xe132058a, 0xeb75a4f6, - 0xec390b83, 0xefaa4060, 0x9f065e71, 0x1051bd6e, - 0x8af93e21, 0x063d96dd, 0x05aedd3e, 0xbd464de6, - 0x8db59154, 0x5d0571c4, 0xd46f0406, 0x15ff6050, - 0xfb241998, 0xe997d6bd, 0x43cc8940, 0x9e7767d9, - 0x42bdb0e8, 0x8b880789, 0x5b38e719, 0xeedb79c8, - 0x0a47a17c, 0x0fe97c42, 0x1ec9f884, 0x00000000, - 0x86830980, 0xed48322b, 0x70ac1e11, 0x724e6c5a, - 0xfffbfd0e, 0x38560f85, 0xd51e3dae, 0x3927362d, - 0xd9640a0f, 0xa621685c, 0x54d19b5b, 0x2e3a2436, - 0x67b10c0a, 0xe70f9357, 0x96d2b4ee, 0x919e1b9b, - 0xc54f80c0, 0x20a261dc, 0x4b695a77, 0x1a161c12, - 0xba0ae293, 0x2ae5c0a0, 0xe0433c22, 0x171d121b, - 0x0d0b0e09, 0xc7adf28b, 0xa8b92db6, 0xa9c8141e, - 0x198557f1, 0x074caf75, 0xddbbee99, 0x60fda37f, - 0x269ff701, 0xf5bc5c72, 0x3bc54466, 0x7e345bfb, - 0x29768b43, 0xc6dccb23, 0xfc68b6ed, 0xf163b8e4, - 0xdccad731, 0x85104263, 0x22401397, 0x112084c6, - 0x247d854a, 0x3df8d2bb, 0x3211aef9, 0xa16dc729, - 0x2f4b1d9e, 0x30f3dcb2, 0x52ec0d86, 0xe3d077c1, - 0x166c2bb3, 0xb999a970, 0x48fa1194, 0x642247e9, - 0x8cc4a8fc, 0x3f1aa0f0, 0x2cd8567d, 0x90ef2233, - 0x4ec78749, 0xd1c1d938, 0xa2fe8cca, 0x0b3698d4, - 0x81cfa6f5, 0xde28a57a, 0x8e26dab7, 0xbfa43fad, - 0x9de42c3a, 0x920d5078, 0xcc9b6a5f, 0x4662547e, - 0x13c2f68d, 0xb8e890d8, 0xf75e2e39, 0xaff582c3, - 0x80be9f5d, 0x937c69d0, 0x2da96fd5, 0x12b3cf25, - 0x993bc8ac, 0x7da71018, 0x636ee89c, 0xbb7bdb3b, - 0x7809cd26, 0x18f46e59, 0xb701ec9a, 0x9aa8834f, - 0x6e65e695, 0xe67eaaff, 0xcf0821bc, 0xe8e6ef15, - 0x9bd9bae7, 0x36ce4a6f, 0x09d4ea9f, 0x7cd629b0, - 0xb2af31a4, 0x23312a3f, 0x9430c6a5, 0x66c035a2, - 0xbc37744e, 0xcaa6fc82, 0xd0b0e090, 0xd81533a7, - 0x984af104, 0xdaf741ec, 0x500e7fcd, 0xf62f1791, - 0xd68d764d, 0xb04d43ef, 0x4d54ccaa, 0x04dfe496, - 0xb5e39ed1, 0x881b4c6a, 0x1fb8c12c, 0x517f4665, - 0xea049d5e, 0x355d018c, 0x7473fa87, 0x412efb0b, - 0x1d5ab367, 0xd25292db, 0x5633e910, 0x47136dd6, - 0x618c9ad7, 0x0c7a37a1, 0x148e59f8, 0x3c89eb13, - 0x27eecea9, 0xc935b761, 0xe5ede11c, 0xb13c7a47, - 0xdf599cd2, 0x733f55f2, 0xce791814, 0x37bf73c7, - 0xcdea53f7, 0xaa5b5ffd, 0x6f14df3d, 0xdb867844, - 0xf381caaf, 0xc43eb968, 0x342c3824, 0x405fc2a3, - 0xc372161d, 0x250cbce2, 0x498b283c, 0x9541ff0d, - 0x017139a8, 0xb3de080c, 0xe49cd8b4, 0xc1906456, - 0x84617bcb, 0xb670d532, 0x5c74486c, 0x5742d0b8, -}; - -__device__ __constant__ u32 td3[256] = -{ - 0xf4a75051, 0x4165537e, 0x17a4c31a, 0x275e963a, - 0xab6bcb3b, 0x9d45f11f, 0xfa58abac, 0xe303934b, - 0x30fa5520, 0x766df6ad, 0xcc769188, 0x024c25f5, - 0xe5d7fc4f, 0x2acbd7c5, 0x35448026, 0x62a38fb5, - 0xb15a49de, 0xba1b6725, 0xea0e9845, 0xfec0e15d, - 0x2f7502c3, 0x4cf01281, 0x4697a38d, 0xd3f9c66b, - 0x8f5fe703, 0x929c9515, 0x6d7aebbf, 0x5259da95, - 0xbe832dd4, 0x7421d358, 0xe0692949, 0xc9c8448e, - 0xc2896a75, 0x8e7978f4, 0x583e6b99, 0xb971dd27, - 0xe14fb6be, 0x88ad17f0, 0x20ac66c9, 0xce3ab47d, - 0xdf4a1863, 0x1a3182e5, 0x51336097, 0x537f4562, - 0x6477e0b1, 0x6bae84bb, 0x81a01cfe, 0x082b94f9, - 0x48685870, 0x45fd198f, 0xde6c8794, 0x7bf8b752, - 0x73d323ab, 0x4b02e272, 0x1f8f57e3, 0x55ab2a66, - 0xeb2807b2, 0xb5c2032f, 0xc57b9a86, 0x3708a5d3, - 0x2887f230, 0xbfa5b223, 0x036aba02, 0x16825ced, - 0xcf1c2b8a, 0x79b492a7, 0x07f2f0f3, 0x69e2a14e, - 0xdaf4cd65, 0x05bed506, 0x34621fd1, 0xa6fe8ac4, - 0x2e539d34, 0xf355a0a2, 0x8ae13205, 0xf6eb75a4, - 0x83ec390b, 0x60efaa40, 0x719f065e, 0x6e1051bd, - 0x218af93e, 0xdd063d96, 0x3e05aedd, 0xe6bd464d, - 0x548db591, 0xc45d0571, 0x06d46f04, 0x5015ff60, - 0x98fb2419, 0xbde997d6, 0x4043cc89, 0xd99e7767, - 0xe842bdb0, 0x898b8807, 0x195b38e7, 0xc8eedb79, - 0x7c0a47a1, 0x420fe97c, 0x841ec9f8, 0x00000000, - 0x80868309, 0x2bed4832, 0x1170ac1e, 0x5a724e6c, - 0x0efffbfd, 0x8538560f, 0xaed51e3d, 0x2d392736, - 0x0fd9640a, 0x5ca62168, 0x5b54d19b, 0x362e3a24, - 0x0a67b10c, 0x57e70f93, 0xee96d2b4, 0x9b919e1b, - 0xc0c54f80, 0xdc20a261, 0x774b695a, 0x121a161c, - 0x93ba0ae2, 0xa02ae5c0, 0x22e0433c, 0x1b171d12, - 0x090d0b0e, 0x8bc7adf2, 0xb6a8b92d, 0x1ea9c814, - 0xf1198557, 0x75074caf, 0x99ddbbee, 0x7f60fda3, - 0x01269ff7, 0x72f5bc5c, 0x663bc544, 0xfb7e345b, - 0x4329768b, 0x23c6dccb, 0xedfc68b6, 0xe4f163b8, - 0x31dccad7, 0x63851042, 0x97224013, 0xc6112084, - 0x4a247d85, 0xbb3df8d2, 0xf93211ae, 0x29a16dc7, - 0x9e2f4b1d, 0xb230f3dc, 0x8652ec0d, 0xc1e3d077, - 0xb3166c2b, 0x70b999a9, 0x9448fa11, 0xe9642247, - 0xfc8cc4a8, 0xf03f1aa0, 0x7d2cd856, 0x3390ef22, - 0x494ec787, 0x38d1c1d9, 0xcaa2fe8c, 0xd40b3698, - 0xf581cfa6, 0x7ade28a5, 0xb78e26da, 0xadbfa43f, - 0x3a9de42c, 0x78920d50, 0x5fcc9b6a, 0x7e466254, - 0x8d13c2f6, 0xd8b8e890, 0x39f75e2e, 0xc3aff582, - 0x5d80be9f, 0xd0937c69, 0xd52da96f, 0x2512b3cf, - 0xac993bc8, 0x187da710, 0x9c636ee8, 0x3bbb7bdb, - 0x267809cd, 0x5918f46e, 0x9ab701ec, 0x4f9aa883, - 0x956e65e6, 0xffe67eaa, 0xbccf0821, 0x15e8e6ef, - 0xe79bd9ba, 0x6f36ce4a, 0x9f09d4ea, 0xb07cd629, - 0xa4b2af31, 0x3f23312a, 0xa59430c6, 0xa266c035, - 0x4ebc3774, 0x82caa6fc, 0x90d0b0e0, 0xa7d81533, - 0x04984af1, 0xecdaf741, 0xcd500e7f, 0x91f62f17, - 0x4dd68d76, 0xefb04d43, 0xaa4d54cc, 0x9604dfe4, - 0xd1b5e39e, 0x6a881b4c, 0x2c1fb8c1, 0x65517f46, - 0x5eea049d, 0x8c355d01, 0x877473fa, 0x0b412efb, - 0x671d5ab3, 0xdbd25292, 0x105633e9, 0xd647136d, - 0xd7618c9a, 0xa10c7a37, 0xf8148e59, 0x133c89eb, - 0xa927eece, 0x61c935b7, 0x1ce5ede1, 0x47b13c7a, - 0xd2df599c, 0xf2733f55, 0x14ce7918, 0xc737bf73, - 0xf7cdea53, 0xfdaa5b5f, 0x3d6f14df, 0x44db8678, - 0xaff381ca, 0x68c43eb9, 0x24342c38, 0xa3405fc2, - 0x1dc37216, 0xe2250cbc, 0x3c498b28, 0x0d9541ff, - 0xa8017139, 0x0cb3de08, 0xb4e49cd8, 0x56c19064, - 0xcb84617b, 0x32b670d5, 0x6c5c7448, 0xb85742d0, -}; - -__device__ __constant__ u32 td4[256] = -{ - 0x52525252, 0x09090909, 0x6a6a6a6a, 0xd5d5d5d5, - 0x30303030, 0x36363636, 0xa5a5a5a5, 0x38383838, - 0xbfbfbfbf, 0x40404040, 0xa3a3a3a3, 0x9e9e9e9e, - 0x81818181, 0xf3f3f3f3, 0xd7d7d7d7, 0xfbfbfbfb, - 0x7c7c7c7c, 0xe3e3e3e3, 0x39393939, 0x82828282, - 0x9b9b9b9b, 0x2f2f2f2f, 0xffffffff, 0x87878787, - 0x34343434, 0x8e8e8e8e, 0x43434343, 0x44444444, - 0xc4c4c4c4, 0xdededede, 0xe9e9e9e9, 0xcbcbcbcb, - 0x54545454, 0x7b7b7b7b, 0x94949494, 0x32323232, - 0xa6a6a6a6, 0xc2c2c2c2, 0x23232323, 0x3d3d3d3d, - 0xeeeeeeee, 0x4c4c4c4c, 0x95959595, 0x0b0b0b0b, - 0x42424242, 0xfafafafa, 0xc3c3c3c3, 0x4e4e4e4e, - 0x08080808, 0x2e2e2e2e, 0xa1a1a1a1, 0x66666666, - 0x28282828, 0xd9d9d9d9, 0x24242424, 0xb2b2b2b2, - 0x76767676, 0x5b5b5b5b, 0xa2a2a2a2, 0x49494949, - 0x6d6d6d6d, 0x8b8b8b8b, 0xd1d1d1d1, 0x25252525, - 0x72727272, 0xf8f8f8f8, 0xf6f6f6f6, 0x64646464, - 0x86868686, 0x68686868, 0x98989898, 0x16161616, - 0xd4d4d4d4, 0xa4a4a4a4, 0x5c5c5c5c, 0xcccccccc, - 0x5d5d5d5d, 0x65656565, 0xb6b6b6b6, 0x92929292, - 0x6c6c6c6c, 0x70707070, 0x48484848, 0x50505050, - 0xfdfdfdfd, 0xedededed, 0xb9b9b9b9, 0xdadadada, - 0x5e5e5e5e, 0x15151515, 0x46464646, 0x57575757, - 0xa7a7a7a7, 0x8d8d8d8d, 0x9d9d9d9d, 0x84848484, - 0x90909090, 0xd8d8d8d8, 0xabababab, 0x00000000, - 0x8c8c8c8c, 0xbcbcbcbc, 0xd3d3d3d3, 0x0a0a0a0a, - 0xf7f7f7f7, 0xe4e4e4e4, 0x58585858, 0x05050505, - 0xb8b8b8b8, 0xb3b3b3b3, 0x45454545, 0x06060606, - 0xd0d0d0d0, 0x2c2c2c2c, 0x1e1e1e1e, 0x8f8f8f8f, - 0xcacacaca, 0x3f3f3f3f, 0x0f0f0f0f, 0x02020202, - 0xc1c1c1c1, 0xafafafaf, 0xbdbdbdbd, 0x03030303, - 0x01010101, 0x13131313, 0x8a8a8a8a, 0x6b6b6b6b, - 0x3a3a3a3a, 0x91919191, 0x11111111, 0x41414141, - 0x4f4f4f4f, 0x67676767, 0xdcdcdcdc, 0xeaeaeaea, - 0x97979797, 0xf2f2f2f2, 0xcfcfcfcf, 0xcececece, - 0xf0f0f0f0, 0xb4b4b4b4, 0xe6e6e6e6, 0x73737373, - 0x96969696, 0xacacacac, 0x74747474, 0x22222222, - 0xe7e7e7e7, 0xadadadad, 0x35353535, 0x85858585, - 0xe2e2e2e2, 0xf9f9f9f9, 0x37373737, 0xe8e8e8e8, - 0x1c1c1c1c, 0x75757575, 0xdfdfdfdf, 0x6e6e6e6e, - 0x47474747, 0xf1f1f1f1, 0x1a1a1a1a, 0x71717171, - 0x1d1d1d1d, 0x29292929, 0xc5c5c5c5, 0x89898989, - 0x6f6f6f6f, 0xb7b7b7b7, 0x62626262, 0x0e0e0e0e, - 0xaaaaaaaa, 0x18181818, 0xbebebebe, 0x1b1b1b1b, - 0xfcfcfcfc, 0x56565656, 0x3e3e3e3e, 0x4b4b4b4b, - 0xc6c6c6c6, 0xd2d2d2d2, 0x79797979, 0x20202020, - 0x9a9a9a9a, 0xdbdbdbdb, 0xc0c0c0c0, 0xfefefefe, - 0x78787878, 0xcdcdcdcd, 0x5a5a5a5a, 0xf4f4f4f4, - 0x1f1f1f1f, 0xdddddddd, 0xa8a8a8a8, 0x33333333, - 0x88888888, 0x07070707, 0xc7c7c7c7, 0x31313131, - 0xb1b1b1b1, 0x12121212, 0x10101010, 0x59595959, - 0x27272727, 0x80808080, 0xecececec, 0x5f5f5f5f, - 0x60606060, 0x51515151, 0x7f7f7f7f, 0xa9a9a9a9, - 0x19191919, 0xb5b5b5b5, 0x4a4a4a4a, 0x0d0d0d0d, - 0x2d2d2d2d, 0xe5e5e5e5, 0x7a7a7a7a, 0x9f9f9f9f, - 0x93939393, 0xc9c9c9c9, 0x9c9c9c9c, 0xefefefef, - 0xa0a0a0a0, 0xe0e0e0e0, 0x3b3b3b3b, 0x4d4d4d4d, - 0xaeaeaeae, 0x2a2a2a2a, 0xf5f5f5f5, 0xb0b0b0b0, - 0xc8c8c8c8, 0xebebebeb, 0xbbbbbbbb, 0x3c3c3c3c, - 0x83838383, 0x53535353, 0x99999999, 0x61616161, - 0x17171717, 0x2b2b2b2b, 0x04040404, 0x7e7e7e7e, - 0xbabababa, 0x77777777, 0xd6d6d6d6, 0x26262626, - 0xe1e1e1e1, 0x69696969, 0x14141414, 0x63636363, - 0x55555555, 0x21212121, 0x0c0c0c0c, 0x7d7d7d7d, -}; - -__device__ __constant__ u32 rcon[] = -{ - 0x01000000, 0x02000000, 0x04000000, 0x08000000, - 0x10000000, 0x20000000, 0x40000000, 0x80000000, - 0x1b000000, 0x36000000, -}; - -__device__ static void AES256_ExpandKey (u32 *userkey, u32 *rek, u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - rek[0] = userkey[0]; - rek[1] = userkey[1]; - rek[2] = userkey[2]; - rek[3] = userkey[3]; - rek[4] = userkey[4]; - rek[5] = userkey[5]; - rek[6] = userkey[6]; - rek[7] = userkey[7]; - - int i; - int j; - - i = 0; - j = 0; - - u32 run = 1; - - while (run) - { - u32 temp = rek[j + 7]; - - rek[j + 8] = rek[j + 0] - ^ (s_te2[(temp >> 16) & 0xff] & 0xff000000) - ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000) - ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00) - ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff) - ^ rcon[i]; - - rek[j + 9] = rek[j + 1] ^ rek[j + 8]; - rek[j + 10] = rek[j + 2] ^ rek[j + 9]; - rek[j + 11] = rek[j + 3] ^ rek[j + 10]; - - if (++i == 7) - { - run = 0; - continue; - } - - temp = rek[j + 11]; - - rek[j + 12] = rek[j + 4] - ^ (s_te2[(temp >> 24) & 0xff] & 0xff000000) - ^ (s_te3[(temp >> 16) & 0xff] & 0x00ff0000) - ^ (s_te0[(temp >> 8) & 0xff] & 0x0000ff00) - ^ (s_te1[(temp >> 0) & 0xff] & 0x000000ff); - - rek[j + 13] = rek[j + 5] ^ rek[j + 12]; - rek[j + 14] = rek[j + 6] ^ rek[j + 13]; - rek[j + 15] = rek[j + 7] ^ rek[j + 14]; - - j += 8; - } -} - -__device__ static void AES256_InvertKey (u32 *rdk, u32 s_td0[256], u32 s_td1[256], u32 s_td2[256], u32 s_td3[256], u32 s_td4[256], u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - for (u32 i = 0, j = 56; i < j; i += 4, j -= 4) - { - u32 temp; - - temp = rdk[i + 0]; rdk[i + 0] = rdk[j + 0]; rdk[j + 0] = temp; - temp = rdk[i + 1]; rdk[i + 1] = rdk[j + 1]; rdk[j + 1] = temp; - temp = rdk[i + 2]; rdk[i + 2] = rdk[j + 2]; rdk[j + 2] = temp; - temp = rdk[i + 3]; rdk[i + 3] = rdk[j + 3]; rdk[j + 3] = temp; - } - - for (u32 i = 1, j = 4; i < 14; i += 1, j += 4) - { - rdk[j + 0] = - s_td0[s_te1[(rdk[j + 0] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 0] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 0] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 0] >> 0) & 0xff] & 0xff]; - - rdk[j + 1] = - s_td0[s_te1[(rdk[j + 1] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 1] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 1] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 1] >> 0) & 0xff] & 0xff]; - - rdk[j + 2] = - s_td0[s_te1[(rdk[j + 2] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 2] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 2] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 2] >> 0) & 0xff] & 0xff]; - - rdk[j + 3] = - s_td0[s_te1[(rdk[j + 3] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 3] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 3] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 3] >> 0) & 0xff] & 0xff]; - } -} - -__device__ static void AES256_decrypt (const u32 *in, u32 *out, const u32 *rdk, u32 s_td0[256], u32 s_td1[256], u32 s_td2[256], u32 s_td3[256], u32 s_td4[256]) -{ - u32 s0 = in[0] ^ rdk[0]; - u32 s1 = in[1] ^ rdk[1]; - u32 s2 = in[2] ^ rdk[2]; - u32 s3 = in[3] ^ rdk[3]; - - u32 t0; - u32 t1; - u32 t2; - u32 t3; - - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[ 4]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[ 5]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[ 6]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[ 7]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[ 8]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[ 9]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[10]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[11]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[12]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[13]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[14]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[15]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[16]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[17]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[18]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[19]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[20]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[21]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[22]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[23]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[24]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[25]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[26]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[27]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[28]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[29]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[30]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[31]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[32]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[33]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[34]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[35]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[36]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[37]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[38]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[39]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[40]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[41]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[42]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[43]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[44]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[45]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[46]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[47]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[48]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[49]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[50]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[51]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[52]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[53]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[54]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[55]; - - out[0] = (s_td4[(t0 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t3 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t2 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t1 >> 0) & 0xff] & 0x000000ff) - ^ rdk[56]; - - out[1] = (s_td4[(t1 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t0 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t3 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t2 >> 0) & 0xff] & 0x000000ff) - ^ rdk[57]; - - out[2] = (s_td4[(t2 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t1 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t0 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t3 >> 0) & 0xff] & 0x000000ff) - ^ rdk[58]; - - out[3] = (s_td4[(t3 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t2 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t1 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t0 >> 0) & 0xff] & 0x000000ff) - ^ rdk[59]; -} - -__device__ __constant__ u32 k_sha256[64] = -{ - SHA256C00, SHA256C01, SHA256C02, SHA256C03, - SHA256C04, SHA256C05, SHA256C06, SHA256C07, - SHA256C08, SHA256C09, SHA256C0a, SHA256C0b, - SHA256C0c, SHA256C0d, SHA256C0e, SHA256C0f, - SHA256C10, SHA256C11, SHA256C12, SHA256C13, - SHA256C14, SHA256C15, SHA256C16, SHA256C17, - SHA256C18, SHA256C19, SHA256C1a, SHA256C1b, - SHA256C1c, SHA256C1d, SHA256C1e, SHA256C1f, - SHA256C20, SHA256C21, SHA256C22, SHA256C23, - SHA256C24, SHA256C25, SHA256C26, SHA256C27, - SHA256C28, SHA256C29, SHA256C2a, SHA256C2b, - SHA256C2c, SHA256C2d, SHA256C2e, SHA256C2f, - SHA256C30, SHA256C31, SHA256C32, SHA256C33, - SHA256C34, SHA256C35, SHA256C36, SHA256C37, - SHA256C38, SHA256C39, SHA256C3a, SHA256C3b, - SHA256C3c, SHA256C3d, SHA256C3e, SHA256C3f, -}; - -__device__ static void sha256_transform (const u32x w[16], u32x digest[8]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = swap_workaround (w[ 0]); - u32x w1_t = swap_workaround (w[ 1]); - u32x w2_t = swap_workaround (w[ 2]); - u32x w3_t = swap_workaround (w[ 3]); - u32x w4_t = swap_workaround (w[ 4]); - u32x w5_t = swap_workaround (w[ 5]); - u32x w6_t = swap_workaround (w[ 6]); - u32x w7_t = swap_workaround (w[ 7]); - u32x w8_t = swap_workaround (w[ 8]); - u32x w9_t = swap_workaround (w[ 9]); - u32x wa_t = swap_workaround (w[10]); - u32x wb_t = swap_workaround (w[11]); - u32x wc_t = swap_workaround (w[12]); - u32x wd_t = swap_workaround (w[13]); - u32x we_t = swap_workaround (w[14]); - u32x wf_t = swap_workaround (w[15]); - - #define ROUND_EXPAND() \ - { \ - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); \ - } - - #define ROUND_STEP(i) \ - { \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha256[i + 0]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha256[i + 1]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha256[i + 2]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha256[i + 3]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha256[i + 4]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha256[i + 5]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha256[i + 6]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha256[i + 7]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha256[i + 8]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha256[i + 9]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha256[i + 10]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha256[i + 11]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha256[i + 12]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha256[i + 13]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, k_sha256[i + 14]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha256[i + 15]); \ - } - - ROUND_STEP (0); - - for (int i = 16; i < 64; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; -} - -__device__ const u32 crc32tab[0x100] = -{ - 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, - 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, - 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, - 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, - 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, - 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, - 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, - 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, - 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, - 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, - 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, - 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, - 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, - 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, - 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, - 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, - 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, - 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, - 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, - 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, - 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, - 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, - 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, - 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, - 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, - 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, - 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, - 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, - 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, - 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, - 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, - 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, - 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, - 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, - 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, - 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, - 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, - 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, - 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, - 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, - 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, - 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, - 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, - 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, - 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, - 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, - 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, - 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, - 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, - 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, - 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, - 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, - 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, - 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, - 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, - 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, - 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, - 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, - 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, - 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, - 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, - 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, - 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, - 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d -}; - -__device__ static u32x round_crc32 (u32x a, const u32x v) -{ - const u32x k = (a ^ v) & 0xff; - - const u32x s = a >> 8; - - #ifdef VECT_SIZE1 - a = crc32tab[k]; - #endif - - #ifdef VECT_SIZE2 - a.x = crc32tab[k.x]; - a.y = crc32tab[k.y]; - #endif - - #ifdef VECT_SIZE4 - a.x = crc32tab[k.x]; - a.y = crc32tab[k.y]; - a.z = crc32tab[k.z]; - a.w = crc32tab[k.w]; - #endif - - a ^= s; - - return a; -} - -__device__ static u32x crc32 (const u32x w[16], const u32 pw_len, const u32 iv) -{ - u32x a = iv ^ ~0; - - if (pw_len >= 1) a = round_crc32 (a, w[0] >> 0); - if (pw_len >= 2) a = round_crc32 (a, w[0] >> 8); - if (pw_len >= 3) a = round_crc32 (a, w[0] >> 16); - if (pw_len >= 4) a = round_crc32 (a, w[0] >> 24); - - for (u32 i = 4, j = 1; i < pw_len; i += 4, j += 1) - { - if (pw_len >= (i + 1)) a = round_crc32 (a, w[j] >> 0); - if (pw_len >= (i + 2)) a = round_crc32 (a, w[j] >> 8); - if (pw_len >= (i + 3)) a = round_crc32 (a, w[j] >> 16); - if (pw_len >= (i + 4)) a = round_crc32 (a, w[j] >> 24); - } - - return ~a; -} - -__device__ static void bzero16 (u32x block[16]) -{ - block[ 0] = 0; - block[ 1] = 0; - block[ 2] = 0; - block[ 3] = 0; - block[ 4] = 0; - block[ 5] = 0; - block[ 6] = 0; - block[ 7] = 0; - block[ 8] = 0; - block[ 9] = 0; - block[10] = 0; - block[11] = 0; - block[12] = 0; - block[13] = 0; - block[14] = 0; - block[15] = 0; -} - -__device__ static u32 memcat8c (u32x block[16], const u32 block_len, const u32x append[4], const u32 append_len, u32x digest[8]) -{ - const u32 div = block_len / 4; - - u32x tmp0; - u32x tmp1; - u32x tmp2; - - #if __CUDA_ARCH__ >= 200 - - const int offset_minus_4 = 4 - (block_len & 3); - - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - - tmp0 = __byte_perm ( 0, append[0], selector); - tmp1 = __byte_perm (append[0], append[1], selector); - tmp2 = __byte_perm (append[1], 0, selector); - - #endif - - u32x carry[2] = { 0, 0 }; - - switch (div) - { - case 0: block[ 0] |= tmp0; - block[ 1] = tmp1; - block[ 2] = tmp2; - break; - case 1: block[ 1] |= tmp0; - block[ 2] = tmp1; - block[ 3] = tmp2; - break; - case 2: block[ 2] |= tmp0; - block[ 3] = tmp1; - block[ 4] = tmp2; - break; - case 3: block[ 3] |= tmp0; - block[ 4] = tmp1; - block[ 5] = tmp2; - break; - case 4: block[ 4] |= tmp0; - block[ 5] = tmp1; - block[ 6] = tmp2; - break; - case 5: block[ 5] |= tmp0; - block[ 6] = tmp1; - block[ 7] = tmp2; - break; - case 6: block[ 6] |= tmp0; - block[ 7] = tmp1; - block[ 8] = tmp2; - break; - case 7: block[ 7] |= tmp0; - block[ 8] = tmp1; - block[ 9] = tmp2; - break; - case 8: block[ 8] |= tmp0; - block[ 9] = tmp1; - block[10] = tmp2; - break; - case 9: block[ 9] |= tmp0; - block[10] = tmp1; - block[11] = tmp2; - break; - case 10: block[10] |= tmp0; - block[11] = tmp1; - block[12] = tmp2; - break; - case 11: block[11] |= tmp0; - block[12] = tmp1; - block[13] = tmp2; - break; - case 12: block[12] |= tmp0; - block[13] = tmp1; - block[14] = tmp2; - break; - case 13: block[13] |= tmp0; - block[14] = tmp1; - block[15] = tmp2; - break; - case 14: block[14] |= tmp0; - block[15] = tmp1; - carry[ 0] = tmp2; - break; - case 15: block[15] |= tmp0; - carry[ 0] = tmp1; - carry[ 1] = tmp2; - break; - } - - u32 new_len = block_len + append_len; - - if (new_len >= 64) - { - new_len -= 64; - - sha256_transform (block, digest); - - bzero16 (block); - - block[0] = carry[0]; - block[1] = carry[1]; - } - - return new_len; -} - -__device__ static u32 memcat32c (u32x block[16], const u32 block_len, const u32x append[4], const u32 append_len, u32x digest[8]) -{ - const u32 div = block_len / 4; - - u32x tmp0; - u32x tmp1; - u32x tmp2; - u32x tmp3; - u32x tmp4; - u32x tmp5; - u32x tmp6; - u32x tmp7; - u32x tmp8; - - #if __CUDA_ARCH__ >= 200 - - const int offset_minus_4 = 4 - (block_len & 3); - - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - - tmp0 = __byte_perm ( 0, append[0], selector); - tmp1 = __byte_perm (append[0], append[1], selector); - tmp2 = __byte_perm (append[1], append[2], selector); - tmp3 = __byte_perm (append[2], append[3], selector); - tmp4 = __byte_perm (append[3], append[4], selector); - tmp5 = __byte_perm (append[4], append[5], selector); - tmp6 = __byte_perm (append[5], append[6], selector); - tmp7 = __byte_perm (append[6], append[7], selector); - tmp8 = __byte_perm (append[7], 0, selector); - - #endif - - u32x carry[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; - - switch (div) - { - case 0: block[ 0] |= tmp0; - block[ 1] = tmp1; - block[ 2] = tmp2; - block[ 3] = tmp3; - block[ 4] = tmp4; - block[ 5] = tmp5; - block[ 6] = tmp6; - block[ 7] = tmp7; - block[ 8] = tmp8; - break; - case 1: block[ 1] |= tmp0; - block[ 2] = tmp1; - block[ 3] = tmp2; - block[ 4] = tmp3; - block[ 5] = tmp4; - block[ 6] = tmp5; - block[ 7] = tmp6; - block[ 8] = tmp7; - block[ 9] = tmp8; - break; - case 2: block[ 2] |= tmp0; - block[ 3] = tmp1; - block[ 4] = tmp2; - block[ 5] = tmp3; - block[ 6] = tmp4; - block[ 7] = tmp5; - block[ 8] = tmp6; - block[ 9] = tmp7; - block[10] = tmp8; - break; - case 3: block[ 3] |= tmp0; - block[ 4] = tmp1; - block[ 5] = tmp2; - block[ 6] = tmp3; - block[ 7] = tmp4; - block[ 8] = tmp5; - block[ 9] = tmp6; - block[10] = tmp7; - block[11] = tmp8; - break; - case 4: block[ 4] |= tmp0; - block[ 5] = tmp1; - block[ 6] = tmp2; - block[ 7] = tmp3; - block[ 8] = tmp4; - block[ 9] = tmp5; - block[10] = tmp6; - block[11] = tmp7; - block[12] = tmp8; - break; - case 5: block[ 5] |= tmp0; - block[ 6] = tmp1; - block[ 7] = tmp2; - block[ 8] = tmp3; - block[ 9] = tmp4; - block[10] = tmp5; - block[11] = tmp6; - block[12] = tmp7; - block[13] = tmp8; - break; - case 6: block[ 6] |= tmp0; - block[ 7] = tmp1; - block[ 8] = tmp2; - block[ 9] = tmp3; - block[10] = tmp4; - block[11] = tmp5; - block[12] = tmp6; - block[13] = tmp7; - block[14] = tmp8; - break; - case 7: block[ 7] |= tmp0; - block[ 8] = tmp1; - block[ 9] = tmp2; - block[10] = tmp3; - block[11] = tmp4; - block[12] = tmp5; - block[13] = tmp6; - block[14] = tmp7; - block[15] = tmp8; - break; - case 8: block[ 8] |= tmp0; - block[ 9] = tmp1; - block[10] = tmp2; - block[11] = tmp3; - block[12] = tmp4; - block[13] = tmp5; - block[14] = tmp6; - block[15] = tmp7; - carry[ 0] = tmp8; - break; - case 9: block[ 9] |= tmp0; - block[10] = tmp1; - block[11] = tmp2; - block[12] = tmp3; - block[13] = tmp4; - block[14] = tmp5; - block[15] = tmp6; - carry[ 0] = tmp7; - carry[ 1] = tmp8; - break; - case 10: block[10] |= tmp0; - block[11] = tmp1; - block[12] = tmp2; - block[13] = tmp3; - block[14] = tmp4; - block[15] = tmp5; - carry[ 0] = tmp6; - carry[ 1] = tmp7; - carry[ 2] = tmp8; - break; - case 11: block[11] |= tmp0; - block[12] = tmp1; - block[13] = tmp2; - block[14] = tmp3; - block[15] = tmp4; - carry[ 0] = tmp5; - carry[ 1] = tmp6; - carry[ 2] = tmp7; - carry[ 3] = tmp8; - break; - case 12: block[12] |= tmp0; - block[13] = tmp1; - block[14] = tmp2; - block[15] = tmp3; - carry[ 0] = tmp4; - carry[ 1] = tmp5; - carry[ 2] = tmp6; - carry[ 3] = tmp7; - carry[ 4] = tmp8; - break; - case 13: block[13] |= tmp0; - block[14] = tmp1; - block[15] = tmp2; - carry[ 0] = tmp3; - carry[ 1] = tmp4; - carry[ 2] = tmp5; - carry[ 3] = tmp6; - carry[ 4] = tmp7; - carry[ 5] = tmp8; - break; - case 14: block[14] |= tmp0; - block[15] = tmp1; - carry[ 0] = tmp2; - carry[ 1] = tmp3; - carry[ 2] = tmp4; - carry[ 3] = tmp5; - carry[ 4] = tmp6; - carry[ 5] = tmp7; - carry[ 6] = tmp8; - break; - case 15: block[15] |= tmp0; - carry[ 0] = tmp1; - carry[ 1] = tmp2; - carry[ 2] = tmp3; - carry[ 3] = tmp4; - carry[ 4] = tmp5; - carry[ 5] = tmp6; - carry[ 6] = tmp7; - carry[ 7] = tmp8; - break; - } - - u32 new_len = block_len + append_len; - - if (new_len >= 64) - { - new_len -= 64; - - sha256_transform (block, digest); - - bzero16 (block); - - block[0] = carry[0]; - block[1] = carry[1]; - block[2] = carry[2]; - block[3] = carry[3]; - block[4] = carry[4]; - block[5] = carry[5]; - block[6] = carry[6]; - block[7] = carry[7]; - } - - return new_len; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11600_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, seven_zip_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const seven_zip_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - /** - * algo starts here already - */ - - u32x dgst[8]; - - dgst[0] = SHA256M_A; - dgst[1] = SHA256M_B; - dgst[2] = SHA256M_C; - dgst[3] = SHA256M_D; - dgst[4] = SHA256M_E; - dgst[5] = SHA256M_F; - dgst[6] = SHA256M_G; - dgst[7] = SHA256M_H; - - u32x block[16]; - - bzero16 (block); - - u32 block_len = 0; - u32 final_len = 0; - - /** - * context save - */ - - tmps[gid].dgst[0] = dgst[0]; - tmps[gid].dgst[1] = dgst[1]; - tmps[gid].dgst[2] = dgst[2]; - tmps[gid].dgst[3] = dgst[3]; - tmps[gid].dgst[4] = dgst[4]; - tmps[gid].dgst[5] = dgst[5]; - tmps[gid].dgst[6] = dgst[6]; - tmps[gid].dgst[7] = dgst[7]; - - tmps[gid].block[ 0] = block[ 0]; - tmps[gid].block[ 1] = block[ 1]; - tmps[gid].block[ 2] = block[ 2]; - tmps[gid].block[ 3] = block[ 3]; - tmps[gid].block[ 4] = block[ 4]; - tmps[gid].block[ 5] = block[ 5]; - tmps[gid].block[ 6] = block[ 6]; - tmps[gid].block[ 7] = block[ 7]; - tmps[gid].block[ 8] = block[ 8]; - tmps[gid].block[ 9] = block[ 9]; - tmps[gid].block[10] = block[10]; - tmps[gid].block[11] = block[11]; - tmps[gid].block[12] = block[12]; - tmps[gid].block[13] = block[13]; - tmps[gid].block[14] = block[14]; - tmps[gid].block[15] = block[15]; - - tmps[gid].block_len = block_len; - tmps[gid].final_len = final_len; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11600_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, seven_zip_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const seven_zip_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw[8]; - - pw[0] = pws[gid].i[ 0]; - pw[1] = pws[gid].i[ 1]; - pw[2] = pws[gid].i[ 2]; - pw[3] = pws[gid].i[ 3]; - pw[4] = 0; - pw[5] = 0; - pw[6] = 0; - pw[7] = 0; - - u32 pw_len = pws[gid].pw_len; - - make_unicode (&pw[0], &pw[0], &pw[4]); - - pw_len *= 2; - - /** - * context load - */ - - u32x dgst[8]; - - dgst[0] = tmps[gid].dgst[0]; - dgst[1] = tmps[gid].dgst[1]; - dgst[2] = tmps[gid].dgst[2]; - dgst[3] = tmps[gid].dgst[3]; - dgst[4] = tmps[gid].dgst[4]; - dgst[5] = tmps[gid].dgst[5]; - dgst[6] = tmps[gid].dgst[6]; - dgst[7] = tmps[gid].dgst[7]; - - u32x block[16]; - - block[ 0] = tmps[gid].block[ 0]; - block[ 1] = tmps[gid].block[ 1]; - block[ 2] = tmps[gid].block[ 2]; - block[ 3] = tmps[gid].block[ 3]; - block[ 4] = tmps[gid].block[ 4]; - block[ 5] = tmps[gid].block[ 5]; - block[ 6] = tmps[gid].block[ 6]; - block[ 7] = tmps[gid].block[ 7]; - block[ 8] = tmps[gid].block[ 8]; - block[ 9] = tmps[gid].block[ 9]; - block[10] = tmps[gid].block[10]; - block[11] = tmps[gid].block[11]; - block[12] = tmps[gid].block[12]; - block[13] = tmps[gid].block[13]; - block[14] = tmps[gid].block[14]; - block[15] = tmps[gid].block[15]; - - u32 block_len = tmps[gid].block_len; - u32 final_len = tmps[gid].final_len; - - /** - * base - */ - - for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++) - { - u32 it[2]; - - it[0] = j; - it[1] = 0; - - block_len = memcat32c (block, block_len, pw, pw_len, dgst); final_len += pw_len; - block_len = memcat8c (block, block_len, it, 8, dgst); final_len += 8; - } - - /** - * context save - */ - - tmps[gid].dgst[0] = dgst[0]; - tmps[gid].dgst[1] = dgst[1]; - tmps[gid].dgst[2] = dgst[2]; - tmps[gid].dgst[3] = dgst[3]; - tmps[gid].dgst[4] = dgst[4]; - tmps[gid].dgst[5] = dgst[5]; - tmps[gid].dgst[6] = dgst[6]; - tmps[gid].dgst[7] = dgst[7]; - - tmps[gid].block[ 0] = block[ 0]; - tmps[gid].block[ 1] = block[ 1]; - tmps[gid].block[ 2] = block[ 2]; - tmps[gid].block[ 3] = block[ 3]; - tmps[gid].block[ 4] = block[ 4]; - tmps[gid].block[ 5] = block[ 5]; - tmps[gid].block[ 6] = block[ 6]; - tmps[gid].block[ 7] = block[ 7]; - tmps[gid].block[ 8] = block[ 8]; - tmps[gid].block[ 9] = block[ 9]; - tmps[gid].block[10] = block[10]; - tmps[gid].block[11] = block[11]; - tmps[gid].block[12] = block[12]; - tmps[gid].block[13] = block[13]; - tmps[gid].block[14] = block[14]; - tmps[gid].block[15] = block[15]; - - tmps[gid].block_len = block_len; - tmps[gid].final_len = final_len; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11600_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, seven_zip_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const seven_zip_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * aes shared - */ - - __shared__ u32 s_td0[256]; - __shared__ u32 s_td1[256]; - __shared__ u32 s_td2[256]; - __shared__ u32 s_td3[256]; - __shared__ u32 s_td4[256]; - - __shared__ u32 s_te0[256]; - __shared__ u32 s_te1[256]; - __shared__ u32 s_te2[256]; - __shared__ u32 s_te3[256]; - __shared__ u32 s_te4[256]; - - s_td0[lid] = td0[lid]; - s_td1[lid] = td1[lid]; - s_td2[lid] = td2[lid]; - s_td3[lid] = td3[lid]; - s_td4[lid] = td4[lid]; - - s_te0[lid] = te0[lid]; - s_te1[lid] = te1[lid]; - s_te2[lid] = te2[lid]; - s_te3[lid] = te3[lid]; - s_te4[lid] = te4[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * context load - */ - - u32x dgst[8]; - - dgst[0] = tmps[gid].dgst[0]; - dgst[1] = tmps[gid].dgst[1]; - dgst[2] = tmps[gid].dgst[2]; - dgst[3] = tmps[gid].dgst[3]; - dgst[4] = tmps[gid].dgst[4]; - dgst[5] = tmps[gid].dgst[5]; - dgst[6] = tmps[gid].dgst[6]; - dgst[7] = tmps[gid].dgst[7]; - - u32x block[16]; - - block[ 0] = tmps[gid].block[ 0]; - block[ 1] = tmps[gid].block[ 1]; - block[ 2] = tmps[gid].block[ 2]; - block[ 3] = tmps[gid].block[ 3]; - block[ 4] = tmps[gid].block[ 4]; - block[ 5] = tmps[gid].block[ 5]; - block[ 6] = tmps[gid].block[ 6]; - block[ 7] = tmps[gid].block[ 7]; - block[ 8] = tmps[gid].block[ 8]; - block[ 9] = tmps[gid].block[ 9]; - block[10] = tmps[gid].block[10]; - block[11] = tmps[gid].block[11]; - block[12] = tmps[gid].block[12]; - block[13] = tmps[gid].block[13]; - block[14] = tmps[gid].block[14]; - block[15] = tmps[gid].block[15]; - - u32 block_len = tmps[gid].block_len; - u32 final_len = tmps[gid].final_len; - - append_0x80_4 (block, block_len); - - if (block_len >= 56) - { - sha256_transform (block, dgst); - - bzero16 (block); - } - - block[15] = swap_workaround (final_len * 8); - - sha256_transform (block, dgst); - - /** - * final key operations - */ - - u32 iv[4]; - - iv[0] = esalt_bufs[salt_pos].iv_buf[0]; - iv[1] = esalt_bufs[salt_pos].iv_buf[1]; - iv[2] = esalt_bufs[salt_pos].iv_buf[2]; - iv[3] = esalt_bufs[salt_pos].iv_buf[3]; - - u32x ukey[8]; - - ukey[0] = dgst[0]; - ukey[1] = dgst[1]; - ukey[2] = dgst[2]; - ukey[3] = dgst[3]; - ukey[4] = dgst[4]; - ukey[5] = dgst[5]; - ukey[6] = dgst[6]; - ukey[7] = dgst[7]; - - #define KEYLEN 60 - - u32 rk[KEYLEN]; - - AES256_ExpandKey (ukey, rk, s_te0, s_te1, s_te2, s_te3, s_te4); - - AES256_InvertKey (rk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4); - - u32 crc = 0; - - int data_len = esalt_bufs[salt_pos].data_len; - int unpack_size = esalt_bufs[salt_pos].unpack_size; - - int i; - int j; - - for (i = 0, j = 0; i < data_len - 16; i += 16, j += 4) - { - u32 data[4]; - - data[0] = swap_workaround (esalt_bufs[salt_pos].data_buf[j + 0]); - data[1] = swap_workaround (esalt_bufs[salt_pos].data_buf[j + 1]); - data[2] = swap_workaround (esalt_bufs[salt_pos].data_buf[j + 2]); - data[3] = swap_workaround (esalt_bufs[salt_pos].data_buf[j + 3]); - - u32 out[4]; - - AES256_decrypt (data, out, rk, s_td0, s_td1, s_td2, s_td3, s_td4); - - out[0] ^= iv[0]; - out[1] ^= iv[1]; - out[2] ^= iv[2]; - out[3] ^= iv[3]; - - iv[0] = data[0]; - iv[1] = data[1]; - iv[2] = data[2]; - iv[3] = data[3]; - - out[0] = swap_workaround (out[0]); - out[1] = swap_workaround (out[1]); - out[2] = swap_workaround (out[2]); - out[3] = swap_workaround (out[3]); - - crc = crc32 (out, 16, crc); - } - - u32 data[4]; - - data[0] = swap_workaround (esalt_bufs[salt_pos].data_buf[j + 0]); - data[1] = swap_workaround (esalt_bufs[salt_pos].data_buf[j + 1]); - data[2] = swap_workaround (esalt_bufs[salt_pos].data_buf[j + 2]); - data[3] = swap_workaround (esalt_bufs[salt_pos].data_buf[j + 3]); - - u32 out[4]; - - AES256_decrypt (data, out, rk, s_td0, s_td1, s_td2, s_td3, s_td4); - - out[0] ^= iv[0]; - out[1] ^= iv[1]; - out[2] ^= iv[2]; - out[3] ^= iv[3]; - - iv[0] = data[0]; - iv[1] = data[1]; - iv[2] = data[2]; - iv[3] = data[3]; - - out[0] = swap_workaround (out[0]); - out[1] = swap_workaround (out[1]); - out[2] = swap_workaround (out[2]); - out[3] = swap_workaround (out[3]); - - const u32 margin = data_len - unpack_size; - - const u32 left = 16 - margin; - - crc = crc32 (out, left, crc); - - // use padding attack in that case - - if (margin >= 4) - { - switch (margin) - { - case 15: out[0] &= 0xffffff00; - break; - case 14: out[0] &= 0xffff0000; - break; - case 13: out[0] &= 0xff000000; - break; - case 12: out[0] = 0; - break; - case 11: out[0] = 0; - out[1] &= 0xffffff00; - break; - case 10: out[0] = 0; - out[1] &= 0xffff0000; - break; - case 9: out[0] = 0; - out[1] &= 0xff000000; - break; - case 8: out[0] = 0; - out[1] = 0; - break; - case 7: out[0] = 0; - out[1] = 0; - out[2] &= 0xffffff00; - break; - case 6: out[0] = 0; - out[1] = 0; - out[2] &= 0xffff0000; - break; - case 5: out[0] = 0; - out[1] = 0; - out[2] &= 0xff000000; - break; - case 4: out[0] = 0; - out[1] = 0; - out[2] = 0; - break; - case 3: out[0] = 0; - out[1] = 0; - out[2] = 0; - out[3] &= 0xffffff00; - break; - case 2: out[0] = 0; - out[1] = 0; - out[2] = 0; - out[3] &= 0xffff0000; - break; - case 1: out[0] = 0; - out[1] = 0; - out[2] = 0; - out[3] &= 0xff000000; - break; - } - - if ((out[0] == 0) && (out[1] == 0) && (out[2] == 0) && (out[3] == 0)) - { - mark_hash_s0 (plains_buf, hashes_shown, digests_offset + 0, gid, 0); - - d_return_buf[lid] = 1; - } - } - - const u32x r0 = crc; - const u32x r1 = 0; - const u32x r2 = 0; - const u32x r3 = 0; - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m11700_a0.cu b/nv/m11700_a0.cu deleted file mode 100644 index 5a69197..0000000 --- a/nv/m11700_a0.cu +++ /dev/null @@ -1,2675 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _GOST2012_256_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -#define INITVAL 0x0101010101010101 - -#define SBOG_LPSti64 \ - s_sbob_sl64[0][(t[0] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[1][(t[1] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[2][(t[2] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[3][(t[3] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[4][(t[4] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[5][(t[5] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[6][(t[6] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[7][(t[7] >> (i * 8)) & 0xff] - -// constants - -__device__ const u64 sbob_sl64[8][256] = -{ - { - 0xd031c397ce553fe6, - 0x16ba5b01b006b525, - 0xa89bade6296e70c8, - 0x6a1f525d77d3435b, - 0x6e103570573dfa0b, - 0x660efb2a17fc95ab, - 0x76327a9e97634bf6, - 0x4bad9d6462458bf5, - 0xf1830caedbc3f748, - 0xc5c8f542669131ff, - 0x95044a1cdc48b0cb, - 0x892962df3cf8b866, - 0xb0b9e208e930c135, - 0xa14fb3f0611a767c, - 0x8d2605f21c160136, - 0xd6b71922fecc549e, - 0x37089438a5907d8b, - 0x0b5da38e5803d49c, - 0x5a5bcc9cea6f3cbc, - 0xedae246d3b73ffe5, - 0xd2b87e0fde22edce, - 0x5e54abb1ca8185ec, - 0x1de7f88fe80561b9, - 0xad5e1a870135a08c, - 0x2f2adbd665cecc76, - 0x5780b5a782f58358, - 0x3edc8a2eede47b3f, - 0xc9d95c3506bee70f, - 0x83be111d6c4e05ee, - 0xa603b90959367410, - 0x103c81b4809fde5d, - 0x2c69b6027d0c774a, - 0x399080d7d5c87953, - 0x09d41e16487406b4, - 0xcdd63b1826505e5f, - 0xf99dc2f49b0298e8, - 0x9cd0540a943cb67f, - 0xbca84b7f891f17c5, - 0x723d1db3b78df2a6, - 0x78aa6e71e73b4f2e, - 0x1433e699a071670d, - 0x84f21be454620782, - 0x98df3327b4d20f2f, - 0xf049dce2d3769e5c, - 0xdb6c60199656eb7a, - 0x648746b2078b4783, - 0x32cd23598dcbadcf, - 0x1ea4955bf0c7da85, - 0xe9a143401b9d46b5, - 0xfd92a5d9bbec21b8, - 0xc8138c790e0b8e1b, - 0x2ee00b9a6d7ba562, - 0xf85712b893b7f1fc, - 0xeb28fed80bea949d, - 0x564a65eb8a40ea4c, - 0x6c9988e8474a2823, - 0x4535898b121d8f2d, - 0xabd8c03231accbf4, - 0xba2e91cab9867cbd, - 0x7960be3def8e263a, - 0x0c11a977602fd6f0, - 0xcb50e1ad16c93527, - 0xeae22e94035ffd89, - 0x2866d12f5de2ce1a, - 0xff1b1841ab9bf390, - 0x9f9339de8cfe0d43, - 0x964727c8c48a0bf7, - 0x524502c6aaae531c, - 0x9b9c5ef3ac10b413, - 0x4fa2fa4942ab32a5, - 0x3f165a62e551122b, - 0xc74148da76e6e3d7, - 0x924840e5e464b2a7, - 0xd372ae43d69784da, - 0x233b72a105e11a86, - 0xa48a04914941a638, - 0xb4b68525c9de7865, - 0xddeabaaca6cf8002, - 0x0a9773c250b6bd88, - 0xc284ffbb5ebd3393, - 0x8ba0df472c8f6a4e, - 0x2aef6cb74d951c32, - 0x427983722a318d41, - 0x73f7cdffbf389bb2, - 0x074c0af9382c026c, - 0x8a6a0f0b243a035a, - 0x6fdae53c5f88931f, - 0xc68b98967e538ac3, - 0x44ff59c71aa8e639, - 0xe2fce0ce439e9229, - 0xa20cde2479d8cd40, - 0x19e89fa2c8ebd8e9, - 0xf446bbcff398270c, - 0x43b3533e2284e455, - 0xd82f0dcd8e945046, - 0x51066f12b26ce820, - 0xe73957af6bc5426d, - 0x081ece5a40c16fa0, - 0x3b193d4fc5bfab7b, - 0x7fe66488df174d42, - 0x0e9814ef705804d8, - 0x8137ac857c39d7c6, - 0xb1733244e185a821, - 0x695c3f896f11f867, - 0xf6cf0657e3eff524, - 0x1aabf276d02963d5, - 0x2da3664e75b91e5e, - 0x0289bd981077d228, - 0x90c1fd7df413608f, - 0x3c5537b6fd93a917, - 0xaa12107e3919a2e0, - 0x0686dab530996b78, - 0xdaa6b0559ee3826e, - 0xc34e2ff756085a87, - 0x6d5358a44fff4137, - 0xfc587595b35948ac, - 0x7ca5095cc7d5f67e, - 0xfb147f6c8b754ac0, - 0xbfeb26ab91ddacf9, - 0x6896efc567a49173, - 0xca9a31e11e7c5c33, - 0xbbe44186b13315a9, - 0x0ddb793b689abfe4, - 0x70b4a02ba7fa208e, - 0xe47a3a7b7307f951, - 0x8cecd5be14a36822, - 0xeeed49b923b144d9, - 0x17708b4db8b3dc31, - 0x6088219f2765fed3, - 0xb3fa8fdcf1f27a09, - 0x910b2d31fca6099b, - 0x0f52c4a378ed6dcc, - 0x50ccbf5ebad98134, - 0x6bd582117f662a4f, - 0x94ce9a50d4fdd9df, - 0x2b25bcfb45207526, - 0x67c42b661f49fcbf, - 0x492420fc723259dd, - 0x03436dd418c2bb3c, - 0x1f6e4517f872b391, - 0xa08563bc69af1f68, - 0xd43ea4baeebb86b6, - 0x01cad04c08b56914, - 0xac94cacb0980c998, - 0x54c3d8739a373864, - 0x26fec5c02dbacac2, - 0xdea9d778be0d3b3e, - 0x040f672d20eeb950, - 0xe5b0ea377bb29045, - 0xf30ab136cbb42560, - 0x62019c0737122cfb, - 0xe86b930c13282fa1, - 0xcc1ceb542ee5374b, - 0x538fd28aa21b3a08, - 0x1b61223ad89c0ac1, - 0x36c24474ad25149f, - 0x7a23d3e9f74c9d06, - 0xbe21f6e79968c5ed, - 0xcf5f868036278c77, - 0xf705d61beb5a9c30, - 0x4d2b47d152dce08d, - 0x5f9e7bfdc234ecf8, - 0x247778583dcd18ea, - 0x867ba67c4415d5aa, - 0x4ce1979d5a698999, - 0x0000000000000000, - 0xec64f42133c696f1, - 0xb57c5569c16b1171, - 0xc1c7926f467f88af, - 0x654d96fe0f3e2e97, - 0x15f936d5a8c40e19, - 0xb8a72c52a9f1ae95, - 0xa9517daa21db19dc, - 0x58d27104fa18ee94, - 0x5918a148f2ad8780, - 0x5cdd1629daf657c4, - 0x8274c15164fb6cfa, - 0xd1fb13dbc6e056f2, - 0x7d6fd910cf609f6a, - 0xb63f38bdd9a9aa4d, - 0x3d9fe7faf526c003, - 0x74bbc706871499de, - 0xdf630734b6b8522a, - 0x3ad3ed03cd0ac26f, - 0xfadeaf2083c023d4, - 0xc00d42234ecae1bb, - 0x8538cba85cd76e96, - 0xc402250e6e2458eb, - 0x47bc3413026a5d05, - 0xafd7a71f114272a4, - 0x978df784cc3f62e3, - 0xb96dfc1ea144c781, - 0x21b2cf391596c8ae, - 0x318e4e8d950916f3, - 0xce9556cc3e92e563, - 0x385a509bdd7d1047, - 0x358129a0b5e7afa3, - 0xe6f387e363702b79, - 0xe0755d5653e94001, - 0x7be903a5fff9f412, - 0x12b53c2c90e80c75, - 0x3307f315857ec4db, - 0x8fafb86a0c61d31e, - 0xd9e5dd8186213952, - 0x77f8aad29fd622e2, - 0x25bda814357871fe, - 0x7571174a8fa1f0ca, - 0x137fec60985d6561, - 0x30449ec19dbc7fe7, - 0xa540d4dd41f4cf2c, - 0xdc206ae0ae7ae916, - 0x5b911cd0e2da55a8, - 0xb2305f90f947131d, - 0x344bf9ecbd52c6b7, - 0x5d17c665d2433ed0, - 0x18224feec05eb1fd, - 0x9e59e992844b6457, - 0x9a568ebfa4a5dd07, - 0xa3c60e68716da454, - 0x7e2cb4c4d7a22456, - 0x87b176304ca0bcbe, - 0x413aeea632f3367d, - 0x9915e36bbc67663b, - 0x40f03eea3a465f69, - 0x1c2d28c3e0b008ad, - 0x4e682a054a1e5bb1, - 0x05c5b761285bd044, - 0xe1bf8d1a5b5c2915, - 0xf2c0617ac3014c74, - 0xb7f5e8f1d11cc359, - 0x63cb4c4b3fa745ef, - 0x9d1a84469c89df6b, - 0xe33630824b2bfb3d, - 0xd5f474f6e60eefa2, - 0xf58c6b83fb2d4e18, - 0x4676e45f0adf3411, - 0x20781f751d23a1ba, - 0xbd629b3381aa7ed1, - 0xae1d775319f71bb0, - 0xfed1c80da32e9a84, - 0x5509083f92825170, - 0x29ac01635557a70e, - 0xa7c9694551831d04, - 0x8e65682604d4ba0a, - 0x11f651f8882ab749, - 0xd77dc96ef6793d8a, - 0xef2799f52b042dcd, - 0x48eef0b07a8730c9, - 0x22f1a2ed0d547392, - 0x6142f1d32fd097c7, - 0x4a674d286af0e2e1, - 0x80fd7cc9748cbed2, - 0x717e7067af4f499a, - 0x938290a9ecd1dbb3, - 0x88e3b293344dd172, - 0x2734158c250fa3d6, - }, - { - 0x7e37e62dfc7d40c3, - 0x776f25a4ee939e5b, - 0xe045c850dd8fb5ad, - 0x86ed5ba711ff1952, - 0xe91d0bd9cf616b35, - 0x37e0ab256e408ffb, - 0x9607f6c031025a7a, - 0x0b02f5e116d23c9d, - 0xf3d8486bfb50650c, - 0x621cff27c40875f5, - 0x7d40cb71fa5fd34a, - 0x6daa6616daa29062, - 0x9f5f354923ec84e2, - 0xec847c3dc507c3b3, - 0x025a3668043ce205, - 0xa8bf9e6c4dac0b19, - 0xfa808be2e9bebb94, - 0xb5b99c5277c74fa3, - 0x78d9bc95f0397bcc, - 0xe332e50cdbad2624, - 0xc74fce129332797e, - 0x1729eceb2ea709ab, - 0xc2d6b9f69954d1f8, - 0x5d898cbfbab8551a, - 0x859a76fb17dd8adb, - 0x1be85886362f7fb5, - 0xf6413f8ff136cd8a, - 0xd3110fa5bbb7e35c, - 0x0a2feed514cc4d11, - 0xe83010edcd7f1ab9, - 0xa1e75de55f42d581, - 0xeede4a55c13b21b6, - 0xf2f5535ff94e1480, - 0x0cc1b46d1888761e, - 0xbce15fdb6529913b, - 0x2d25e8975a7181c2, - 0x71817f1ce2d7a554, - 0x2e52c5cb5c53124b, - 0xf9f7a6beef9c281d, - 0x9e722e7d21f2f56e, - 0xce170d9b81dca7e6, - 0x0e9b82051cb4941b, - 0x1e712f623c49d733, - 0x21e45cfa42f9f7dc, - 0xcb8e7a7f8bba0f60, - 0x8e98831a010fb646, - 0x474ccf0d8e895b23, - 0xa99285584fb27a95, - 0x8cc2b57205335443, - 0x42d5b8e984eff3a5, - 0x012d1b34021e718c, - 0x57a6626aae74180b, - 0xff19fc06e3d81312, - 0x35ba9d4d6a7c6dfe, - 0xc9d44c178f86ed65, - 0x506523e6a02e5288, - 0x03772d5c06229389, - 0x8b01f4fe0b691ec0, - 0xf8dabd8aed825991, - 0x4c4e3aec985b67be, - 0xb10df0827fbf96a9, - 0x6a69279ad4f8dae1, - 0xe78689dcd3d5ff2e, - 0x812e1a2b1fa553d1, - 0xfbad90d6eba0ca18, - 0x1ac543b234310e39, - 0x1604f7df2cb97827, - 0xa6241c6951189f02, - 0x753513cceaaf7c5e, - 0x64f2a59fc84c4efa, - 0x247d2b1e489f5f5a, - 0xdb64d718ab474c48, - 0x79f4a7a1f2270a40, - 0x1573da832a9bebae, - 0x3497867968621c72, - 0x514838d2a2302304, - 0xf0af6537fd72f685, - 0x1d06023e3a6b44ba, - 0x678588c3ce6edd73, - 0x66a893f7cc70acff, - 0xd4d24e29b5eda9df, - 0x3856321470ea6a6c, - 0x07c3418c0e5a4a83, - 0x2bcbb22f5635bacd, - 0x04b46cd00878d90a, - 0x06ee5ab80c443b0f, - 0x3b211f4876c8f9e5, - 0x0958c38912eede98, - 0xd14b39cdbf8b0159, - 0x397b292072f41be0, - 0x87c0409313e168de, - 0xad26e98847caa39f, - 0x4e140c849c6785bb, - 0xd5ff551db7f3d853, - 0xa0ca46d15d5ca40d, - 0xcd6020c787fe346f, - 0x84b76dcf15c3fb57, - 0xdefda0fca121e4ce, - 0x4b8d7b6096012d3d, - 0x9ac642ad298a2c64, - 0x0875d8bd10f0af14, - 0xb357c6ea7b8374ac, - 0x4d6321d89a451632, - 0xeda96709c719b23f, - 0xf76c24bbf328bc06, - 0xc662d526912c08f2, - 0x3ce25ec47892b366, - 0xb978283f6f4f39bd, - 0xc08c8f9e9d6833fd, - 0x4f3917b09e79f437, - 0x593de06fb2c08c10, - 0xd6887841b1d14bda, - 0x19b26eee32139db0, - 0xb494876675d93e2f, - 0x825937771987c058, - 0x90e9ac783d466175, - 0xf1827e03ff6c8709, - 0x945dc0a8353eb87f, - 0x4516f9658ab5b926, - 0x3f9573987eb020ef, - 0xb855330b6d514831, - 0x2ae6a91b542bcb41, - 0x6331e413c6160479, - 0x408f8e8180d311a0, - 0xeff35161c325503a, - 0xd06622f9bd9570d5, - 0x8876d9a20d4b8d49, - 0xa5533135573a0c8b, - 0xe168d364df91c421, - 0xf41b09e7f50a2f8f, - 0x12b09b0f24c1a12d, - 0xda49cc2ca9593dc4, - 0x1f5c34563e57a6bf, - 0x54d14f36a8568b82, - 0xaf7cdfe043f6419a, - 0xea6a2685c943f8bc, - 0xe5dcbfb4d7e91d2b, - 0xb27addde799d0520, - 0x6b443caed6e6ab6d, - 0x7bae91c9f61be845, - 0x3eb868ac7cae5163, - 0x11c7b65322e332a4, - 0xd23c1491b9a992d0, - 0x8fb5982e0311c7ca, - 0x70ac6428e0c9d4d8, - 0x895bc2960f55fcc5, - 0x76423e90ec8defd7, - 0x6ff0507ede9e7267, - 0x3dcf45f07a8cc2ea, - 0x4aa06054941f5cb1, - 0x5810fb5bb0defd9c, - 0x5efea1e3bc9ac693, - 0x6edd4b4adc8003eb, - 0x741808f8e8b10dd2, - 0x145ec1b728859a22, - 0x28bc9f7350172944, - 0x270a06424ebdccd3, - 0x972aedf4331c2bf6, - 0x059977e40a66a886, - 0x2550302a4a812ed6, - 0xdd8a8da0a7037747, - 0xc515f87a970e9b7b, - 0x3023eaa9601ac578, - 0xb7e3aa3a73fbada6, - 0x0fb699311eaae597, - 0x0000000000000000, - 0x310ef19d6204b4f4, - 0x229371a644db6455, - 0x0decaf591a960792, - 0x5ca4978bb8a62496, - 0x1c2b190a38753536, - 0x41a295b582cd602c, - 0x3279dcc16426277d, - 0xc1a194aa9f764271, - 0x139d803b26dfd0a1, - 0xae51c4d441e83016, - 0xd813fa44ad65dfc1, - 0xac0bf2bc45d4d213, - 0x23be6a9246c515d9, - 0x49d74d08923dcf38, - 0x9d05032127d066e7, - 0x2f7fdeff5e4d63c7, - 0xa47e2a0155247d07, - 0x99b16ff12fa8bfed, - 0x4661d4398c972aaf, - 0xdfd0bbc8a33f9542, - 0xdca79694a51d06cb, - 0xb020ebb67da1e725, - 0xba0f0563696daa34, - 0xe4f1a480d5f76ca7, - 0xc438e34e9510eaf7, - 0x939e81243b64f2fc, - 0x8defae46072d25cf, - 0x2c08f3a3586ff04e, - 0xd7a56375b3cf3a56, - 0x20c947ce40e78650, - 0x43f8a3dd86f18229, - 0x568b795eac6a6987, - 0x8003011f1dbb225d, - 0xf53612d3f7145e03, - 0x189f75da300dec3c, - 0x9570db9c3720c9f3, - 0xbb221e576b73dbb8, - 0x72f65240e4f536dd, - 0x443be25188abc8aa, - 0xe21ffe38d9b357a8, - 0xfd43ca6ee7e4f117, - 0xcaa3614b89a47eec, - 0xfe34e732e1c6629e, - 0x83742c431b99b1d4, - 0xcf3a16af83c2d66a, - 0xaae5a8044990e91c, - 0x26271d764ca3bd5f, - 0x91c4b74c3f5810f9, - 0x7c6dd045f841a2c6, - 0x7f1afd19fe63314f, - 0xc8f957238d989ce9, - 0xa709075d5306ee8e, - 0x55fc5402aa48fa0e, - 0x48fa563c9023beb4, - 0x65dfbeabca523f76, - 0x6c877d22d8bce1ee, - 0xcc4d3bf385e045e3, - 0xbebb69b36115733e, - 0x10eaad6720fd4328, - 0xb6ceb10e71e5dc2a, - 0xbdcc44ef6737e0b7, - 0x523f158ea412b08d, - 0x989c74c52db6ce61, - 0x9beb59992b945de8, - 0x8a2cefca09776f4c, - 0xa3bd6b8d5b7e3784, - 0xeb473db1cb5d8930, - 0xc3fba2c29b4aa074, - 0x9c28181525ce176b, - 0x683311f2d0c438e4, - 0x5fd3bad7be84b71f, - 0xfc6ed15ae5fa809b, - 0x36cdb0116c5efe77, - 0x29918447520958c8, - 0xa29070b959604608, - 0x53120ebaa60cc101, - 0x3a0c047c74d68869, - 0x691e0ac6d2da4968, - 0x73db4974e6eb4751, - 0x7a838afdf40599c9, - 0x5a4acd33b4e21f99, - 0x6046c94fc03497f0, - 0xe6ab92e8d1cb8ea2, - 0x3354c7f5663856f1, - 0xd93ee170af7bae4d, - 0x616bd27bc22ae67c, - 0x92b39a10397a8370, - 0xabc8b3304b8e9890, - 0xbf967287630b02b2, - 0x5b67d607b6fc6e15, - }, - { - 0x8ab0a96846e06a6d, - 0x43c7e80b4bf0b33a, - 0x08c9b3546b161ee5, - 0x39f1c235eba990be, - 0xc1bef2376606c7b2, - 0x2c209233614569aa, - 0xeb01523b6fc3289a, - 0x946953ab935acedd, - 0x272838f63e13340e, - 0x8b0455eca12ba052, - 0x77a1b2c4978ff8a2, - 0xa55122ca13e54086, - 0x2276135862d3f1cd, - 0xdb8ddfde08b76cfe, - 0x5d1e12c89e4a178a, - 0x0e56816b03969867, - 0xee5f79953303ed59, - 0xafed748bab78d71d, - 0x6d929f2df93e53ee, - 0xf5d8a8f8ba798c2a, - 0xf619b1698e39cf6b, - 0x95ddaf2f749104e2, - 0xec2a9c80e0886427, - 0xce5c8fd8825b95ea, - 0xc4e0d9993ac60271, - 0x4699c3a5173076f9, - 0x3d1b151f50a29f42, - 0x9ed505ea2bc75946, - 0x34665acfdc7f4b98, - 0x61b1fb53292342f7, - 0xc721c0080e864130, - 0x8693cd1696fd7b74, - 0x872731927136b14b, - 0xd3446c8a63a1721b, - 0x669a35e8a6680e4a, - 0xcab658f239509a16, - 0xa4e5de4ef42e8ab9, - 0x37a7435ee83f08d9, - 0x134e6239e26c7f96, - 0x82791a3c2df67488, - 0x3f6ef00a8329163c, - 0x8e5a7e42fdeb6591, - 0x5caaee4c7981ddb5, - 0x19f234785af1e80d, - 0x255ddde3ed98bd70, - 0x50898a32a99cccac, - 0x28ca4519da4e6656, - 0xae59880f4cb31d22, - 0x0d9798fa37d6db26, - 0x32f968f0b4ffcd1a, - 0xa00f09644f258545, - 0xfa3ad5175e24de72, - 0xf46c547c5db24615, - 0x713e80fbff0f7e20, - 0x7843cf2b73d2aafa, - 0xbd17ea36aedf62b4, - 0xfd111bacd16f92cf, - 0x4abaa7dbc72d67e0, - 0xb3416b5dad49fad3, - 0xbca316b24914a88b, - 0x15d150068aecf914, - 0xe27c1debe31efc40, - 0x4fe48c759beda223, - 0x7edcfd141b522c78, - 0x4e5070f17c26681c, - 0xe696cac15815f3bc, - 0x35d2a64b3bb481a7, - 0x800cff29fe7dfdf6, - 0x1ed9fac3d5baa4b0, - 0x6c2663a91ef599d1, - 0x03c1199134404341, - 0xf7ad4ded69f20554, - 0xcd9d9649b61bd6ab, - 0xc8c3bde7eadb1368, - 0xd131899fb02afb65, - 0x1d18e352e1fae7f1, - 0xda39235aef7ca6c1, - 0xa1bbf5e0a8ee4f7a, - 0x91377805cf9a0b1e, - 0x3138716180bf8e5b, - 0xd9f83acbdb3ce580, - 0x0275e515d38b897e, - 0x472d3f21f0fbbcc6, - 0x2d946eb7868ea395, - 0xba3c248d21942e09, - 0xe7223645bfde3983, - 0xff64feb902e41bb1, - 0xc97741630d10d957, - 0xc3cb1722b58d4ecc, - 0xa27aec719cae0c3b, - 0x99fecb51a48c15fb, - 0x1465ac826d27332b, - 0xe1bd047ad75ebf01, - 0x79f733af941960c5, - 0x672ec96c41a3c475, - 0xc27feba6524684f3, - 0x64efd0fd75e38734, - 0xed9e60040743ae18, - 0xfb8e2993b9ef144d, - 0x38453eb10c625a81, - 0x6978480742355c12, - 0x48cf42ce14a6ee9e, - 0x1cac1fd606312dce, - 0x7b82d6ba4792e9bb, - 0x9d141c7b1f871a07, - 0x5616b80dc11c4a2e, - 0xb849c198f21fa777, - 0x7ca91801c8d9a506, - 0xb1348e487ec273ad, - 0x41b20d1e987b3a44, - 0x7460ab55a3cfbbe3, - 0x84e628034576f20a, - 0x1b87d16d897a6173, - 0x0fe27defe45d5258, - 0x83cde6b8ca3dbeb7, - 0x0c23647ed01d1119, - 0x7a362a3ea0592384, - 0xb61f40f3f1893f10, - 0x75d457d1440471dc, - 0x4558da34237035b8, - 0xdca6116587fc2043, - 0x8d9b67d3c9ab26d0, - 0x2b0b5c88ee0e2517, - 0x6fe77a382ab5da90, - 0x269cc472d9d8fe31, - 0x63c41e46faa8cb89, - 0xb7abbc771642f52f, - 0x7d1de4852f126f39, - 0xa8c6ba3024339ba0, - 0x600507d7cee888c8, - 0x8fee82c61a20afae, - 0x57a2448926d78011, - 0xfca5e72836a458f0, - 0x072bcebb8f4b4cbd, - 0x497bbe4af36d24a1, - 0x3cafe99bb769557d, - 0x12fa9ebd05a7b5a9, - 0xe8c04baa5b836bdb, - 0x4273148fac3b7905, - 0x908384812851c121, - 0xe557d3506c55b0fd, - 0x72ff996acb4f3d61, - 0x3eda0c8e64e2dc03, - 0xf0868356e6b949e9, - 0x04ead72abb0b0ffc, - 0x17a4b5135967706a, - 0xe3c8e16f04d5367f, - 0xf84f30028daf570c, - 0x1846c8fcbd3a2232, - 0x5b8120f7f6ca9108, - 0xd46fa231ecea3ea6, - 0x334d947453340725, - 0x58403966c28ad249, - 0xbed6f3a79a9f21f5, - 0x68ccb483a5fe962d, - 0xd085751b57e1315a, - 0xfed0023de52fd18e, - 0x4b0e5b5f20e6addf, - 0x1a332de96eb1ab4c, - 0xa3ce10f57b65c604, - 0x108f7ba8d62c3cd7, - 0xab07a3a11073d8e1, - 0x6b0dad1291bed56c, - 0xf2f366433532c097, - 0x2e557726b2cee0d4, - 0x0000000000000000, - 0xcb02a476de9b5029, - 0xe4e32fd48b9e7ac2, - 0x734b65ee2c84f75e, - 0x6e5386bccd7e10af, - 0x01b4fc84e7cbca3f, - 0xcfe8735c65905fd5, - 0x3613bfda0ff4c2e6, - 0x113b872c31e7f6e8, - 0x2fe18ba255052aeb, - 0xe974b72ebc48a1e4, - 0x0abc5641b89d979b, - 0xb46aa5e62202b66e, - 0x44ec26b0c4bbff87, - 0xa6903b5b27a503c7, - 0x7f680190fc99e647, - 0x97a84a3aa71a8d9c, - 0xdd12ede16037ea7c, - 0xc554251ddd0dc84e, - 0x88c54c7d956be313, - 0x4d91696048662b5d, - 0xb08072cc9909b992, - 0xb5de5962c5c97c51, - 0x81b803ad19b637c9, - 0xb2f597d94a8230ec, - 0x0b08aac55f565da4, - 0xf1327fd2017283d6, - 0xad98919e78f35e63, - 0x6ab9519676751f53, - 0x24e921670a53774f, - 0xb9fd3d1c15d46d48, - 0x92f66194fbda485f, - 0x5a35dc7311015b37, - 0xded3f4705477a93d, - 0xc00a0eb381cd0d8d, - 0xbb88d809c65fe436, - 0x16104997beacba55, - 0x21b70ac95693b28c, - 0x59f4c5e225411876, - 0xd5db5eb50b21f499, - 0x55d7a19cf55c096f, - 0xa97246b4c3f8519f, - 0x8552d487a2bd3835, - 0x54635d181297c350, - 0x23c2efdc85183bf2, - 0x9f61f96ecc0c9379, - 0x534893a39ddc8fed, - 0x5edf0b59aa0a54cb, - 0xac2c6d1a9f38945c, - 0xd7aebba0d8aa7de7, - 0x2abfa00c09c5ef28, - 0xd84cc64f3cf72fbf, - 0x2003f64db15878b3, - 0xa724c7dfc06ec9f8, - 0x069f323f68808682, - 0xcc296acd51d01c94, - 0x055e2bae5cc0c5c3, - 0x6270e2c21d6301b6, - 0x3b842720382219c0, - 0xd2f0900e846ab824, - 0x52fc6f277a1745d2, - 0xc6953c8ce94d8b0f, - 0xe009f8fe3095753e, - 0x655b2c7992284d0b, - 0x984a37d54347dfc4, - 0xeab5aebf8808e2a5, - 0x9a3fd2c090cc56ba, - 0x9ca0e0fff84cd038, - 0x4c2595e4afade162, - 0xdf6708f4b3bc6302, - 0xbf620f237d54ebca, - 0x93429d101c118260, - 0x097d4fd08cddd4da, - 0x8c2f9b572e60ecef, - 0x708a7c7f18c4b41f, - 0x3a30dba4dfe9d3ff, - 0x4006f19a7fb0f07b, - 0x5f6bf7dd4dc19ef4, - 0x1f6d064732716e8f, - 0xf9fbcc866a649d33, - 0x308c8de567744464, - 0x8971b0f972a0292c, - 0xd61a47243f61b7d8, - 0xefeb8511d4c82766, - 0x961cb6be40d147a3, - 0xaab35f25f7b812de, - 0x76154e407044329d, - 0x513d76b64e570693, - 0xf3479ac7d2f90aa8, - 0x9b8b2e4477079c85, - 0x297eb99d3d85ac69, - }, - { - 0x3ef29d249b2c0a19, - 0xe9e16322b6f8622f, - 0x5536994047757f7a, - 0x9f4d56d5a47b0b33, - 0x822567466aa1174c, - 0xb8f5057deb082fb2, - 0xcc48c10bf4475f53, - 0x373088d4275dec3a, - 0x968f4325180aed10, - 0x173d232cf7016151, - 0xae4ed09f946fcc13, - 0xfd4b4741c4539873, - 0x1b5b3f0dd9933765, - 0x2ffcb0967b644052, - 0xe02376d20a89840c, - 0xa3ae3a70329b18d7, - 0x419cbd2335de8526, - 0xfafebf115b7c3199, - 0x0397074f85aa9b0d, - 0xc58ad4fb4836b970, - 0xbec60be3fc4104a8, - 0x1eff36dc4b708772, - 0x131fdc33ed8453b6, - 0x0844e33e341764d3, - 0x0ff11b6eab38cd39, - 0x64351f0a7761b85a, - 0x3b5694f509cfba0e, - 0x30857084b87245d0, - 0x47afb3bd2297ae3c, - 0xf2ba5c2f6f6b554a, - 0x74bdc4761f4f70e1, - 0xcfdfc64471edc45e, - 0xe610784c1dc0af16, - 0x7aca29d63c113f28, - 0x2ded411776a859af, - 0xac5f211e99a3d5ee, - 0xd484f949a87ef33b, - 0x3ce36ca596e013e4, - 0xd120f0983a9d432c, - 0x6bc40464dc597563, - 0x69d5f5e5d1956c9e, - 0x9ae95f043698bb24, - 0xc9ecc8da66a4ef44, - 0xd69508c8a5b2eac6, - 0xc40c2235c0503b80, - 0x38c193ba8c652103, - 0x1ceec75d46bc9e8f, - 0xd331011937515ad1, - 0xd8e2e56886eca50f, - 0xb137108d5779c991, - 0x709f3b6905ca4206, - 0x4feb50831680caef, - 0xec456af3241bd238, - 0x58d673afe181abbe, - 0x242f54e7cad9bf8c, - 0x0211f1810dcc19fd, - 0x90bc4dbb0f43c60a, - 0x9518446a9da0761d, - 0xa1bfcbf13f57012a, - 0x2bde4f8961e172b5, - 0x27b853a84f732481, - 0xb0b1e643df1f4b61, - 0x18cc38425c39ac68, - 0xd2b7f7d7bf37d821, - 0x3103864a3014c720, - 0x14aa246372abfa5c, - 0x6e600db54ebac574, - 0x394765740403a3f3, - 0x09c215f0bc71e623, - 0x2a58b947e987f045, - 0x7b4cdf18b477bdd8, - 0x9709b5eb906c6fe0, - 0x73083c268060d90b, - 0xfedc400e41f9037e, - 0x284948c6e44be9b8, - 0x728ecae808065bfb, - 0x06330e9e17492b1a, - 0x5950856169e7294e, - 0xbae4f4fce6c4364f, - 0xca7bcf95e30e7449, - 0x7d7fd186a33e96c2, - 0x52836110d85ad690, - 0x4dfaa1021b4cd312, - 0x913abb75872544fa, - 0xdd46ecb9140f1518, - 0x3d659a6b1e869114, - 0xc23f2cabd719109a, - 0xd713fe062dd46836, - 0xd0a60656b2fbc1dc, - 0x221c5a79dd909496, - 0xefd26dbca1b14935, - 0x0e77eda0235e4fc9, - 0xcbfd395b6b68f6b9, - 0x0de0eaefa6f4d4c4, - 0x0422ff1f1a8532e7, - 0xf969b85eded6aa94, - 0x7f6e2007aef28f3f, - 0x3ad0623b81a938fe, - 0x6624ee8b7aada1a7, - 0xb682e8ddc856607b, - 0xa78cc56f281e2a30, - 0xc79b257a45faa08d, - 0x5b4174e0642b30b3, - 0x5f638bff7eae0254, - 0x4bc9af9c0c05f808, - 0xce59308af98b46ae, - 0x8fc58da9cc55c388, - 0x803496c7676d0eb1, - 0xf33caae1e70dd7ba, - 0xbb6202326ea2b4bf, - 0xd5020f87201871cb, - 0x9d5ca754a9b712ce, - 0x841669d87de83c56, - 0x8a6184785eb6739f, - 0x420bba6cb0741e2b, - 0xf12d5b60eac1ce47, - 0x76ac35f71283691c, - 0x2c6bb7d9fecedb5f, - 0xfccdb18f4c351a83, - 0x1f79c012c3160582, - 0xf0abadae62a74cb7, - 0xe1a5801c82ef06fc, - 0x67a21845f2cb2357, - 0x5114665f5df04d9d, - 0xbf40fd2d74278658, - 0xa0393d3fb73183da, - 0x05a409d192e3b017, - 0xa9fb28cf0b4065f9, - 0x25a9a22942bf3d7c, - 0xdb75e22703463e02, - 0xb326e10c5ab5d06c, - 0xe7968e8295a62de6, - 0xb973f3b3636ead42, - 0xdf571d3819c30ce5, - 0xee549b7229d7cbc5, - 0x12992afd65e2d146, - 0xf8ef4e9056b02864, - 0xb7041e134030e28b, - 0xc02edd2adad50967, - 0x932b4af48ae95d07, - 0x6fe6fb7bc6dc4784, - 0x239aacb755f61666, - 0x401a4bedbdb807d6, - 0x485ea8d389af6305, - 0xa41bc220adb4b13d, - 0x753b32b89729f211, - 0x997e584bb3322029, - 0x1d683193ceda1c7f, - 0xff5ab6c0c99f818e, - 0x16bbd5e27f67e3a1, - 0xa59d34ee25d233cd, - 0x98f8ae853b54a2d9, - 0x6df70afacb105e79, - 0x795d2e99b9bba425, - 0x8e437b6744334178, - 0x0186f6ce886682f0, - 0xebf092a3bb347bd2, - 0xbcd7fa62f18d1d55, - 0xadd9d7d011c5571e, - 0x0bd3e471b1bdffde, - 0xaa6c2f808eeafef4, - 0x5ee57d31f6c880a4, - 0xf50fa47ff044fca0, - 0x1addc9c351f5b595, - 0xea76646d3352f922, - 0x0000000000000000, - 0x85909f16f58ebea6, - 0x46294573aaf12ccc, - 0x0a5512bf39db7d2e, - 0x78dbd85731dd26d5, - 0x29cfbe086c2d6b48, - 0x218b5d36583a0f9b, - 0x152cd2adfacd78ac, - 0x83a39188e2c795bc, - 0xc3b9da655f7f926a, - 0x9ecba01b2c1d89c3, - 0x07b5f8509f2fa9ea, - 0x7ee8d6c926940dcf, - 0x36b67e1aaf3b6eca, - 0x86079859702425ab, - 0xfb7849dfd31ab369, - 0x4c7c57cc932a51e2, - 0xd96413a60e8a27ff, - 0x263ea566c715a671, - 0x6c71fc344376dc89, - 0x4a4f595284637af8, - 0xdaf314e98b20bcf2, - 0x572768c14ab96687, - 0x1088db7c682ec8bb, - 0x887075f9537a6a62, - 0x2e7a4658f302c2a2, - 0x619116dbe582084d, - 0xa87dde018326e709, - 0xdcc01a779c6997e8, - 0xedc39c3dac7d50c8, - 0xa60a33a1a078a8c0, - 0xc1a82be452b38b97, - 0x3f746bea134a88e9, - 0xa228ccbebafd9a27, - 0xabead94e068c7c04, - 0xf48952b178227e50, - 0x5cf48cb0fb049959, - 0x6017e0156de48abd, - 0x4438b4f2a73d3531, - 0x8c528ae649ff5885, - 0xb515ef924dfcfb76, - 0x0c661c212e925634, - 0xb493195cc59a7986, - 0x9cda519a21d1903e, - 0x32948105b5be5c2d, - 0x194ace8cd45f2e98, - 0x438d4ca238129cdb, - 0x9b6fa9cabefe39d4, - 0x81b26009ef0b8c41, - 0xded1ebf691a58e15, - 0x4e6da64d9ee6481f, - 0x54b06f8ecf13fd8a, - 0x49d85e1d01c9e1f5, - 0xafc826511c094ee3, - 0xf698a33075ee67ad, - 0x5ac7822eec4db243, - 0x8dd47c28c199da75, - 0x89f68337db1ce892, - 0xcdce37c57c21dda3, - 0x530597de503c5460, - 0x6a42f2aa543ff793, - 0x5d727a7e73621ba9, - 0xe232875307459df1, - 0x56a19e0fc2dfe477, - 0xc61dd3b4cd9c227d, - 0xe5877f03986a341b, - 0x949eb2a415c6f4ed, - 0x6206119460289340, - 0x6380e75ae84e11b0, - 0x8be772b6d6d0f16f, - 0x50929091d596cf6d, - 0xe86795ec3e9ee0df, - 0x7cf927482b581432, - 0xc86a3e14eec26db4, - 0x7119cda78dacc0f6, - 0xe40189cd100cb6eb, - 0x92adbc3a028fdff7, - 0xb2a017c2d2d3529c, - 0x200dabf8d05c8d6b, - 0x34a78f9ba2f77737, - 0xe3b4719d8f231f01, - 0x45be423c2f5bb7c1, - 0xf71e55fefd88e55d, - 0x6853032b59f3ee6e, - 0x65b3e9c4ff073aaa, - 0x772ac3399ae5ebec, - 0x87816e97f842a75b, - 0x110e2db2e0484a4b, - 0x331277cb3dd8dedd, - 0xbd510cac79eb9fa5, - 0x352179552a91f5c7, - }, - { - 0x05ba7bc82c9b3220, - 0x31a54665f8b65e4f, - 0xb1b651f77547f4d4, - 0x8bfa0d857ba46682, - 0x85a96c5aa16a98bb, - 0x990faef908eb79c9, - 0xa15e37a247f4a62d, - 0x76857dcd5d27741e, - 0xf8c50b800a1820bc, - 0xbe65dcb201f7a2b4, - 0x666d1b986f9426e7, - 0x4cc921bf53c4e648, - 0x95410a0f93d9ca42, - 0x20cdccaa647ba4ef, - 0x429a4060890a1871, - 0x0c4ea4f69b32b38b, - 0xccda362dde354cd3, - 0x96dc23bc7c5b2fa9, - 0xc309bb68aa851ab3, - 0xd26131a73648e013, - 0x021dc52941fc4db2, - 0xcd5adab7704be48a, - 0xa77965d984ed71e6, - 0x32386fd61734bba4, - 0xe82d6dd538ab7245, - 0x5c2147ea6177b4b1, - 0x5da1ab70cf091ce8, - 0xac907fce72b8bdff, - 0x57c85dfd972278a8, - 0xa4e44c6a6b6f940d, - 0x3851995b4f1fdfe4, - 0x62578ccaed71bc9e, - 0xd9882bb0c01d2c0a, - 0x917b9d5d113c503b, - 0xa2c31e11a87643c6, - 0xe463c923a399c1ce, - 0xf71686c57ea876dc, - 0x87b4a973e096d509, - 0xaf0d567d9d3a5814, - 0xb40c2a3f59dcc6f4, - 0x3602f88495d121dd, - 0xd3e1dd3d9836484a, - 0xf945e71aa46688e5, - 0x7518547eb2a591f5, - 0x9366587450c01d89, - 0x9ea81018658c065b, - 0x4f54080cbc4603a3, - 0x2d0384c65137bf3d, - 0xdc325078ec861e2a, - 0xea30a8fc79573ff7, - 0x214d2030ca050cb6, - 0x65f0322b8016c30c, - 0x69be96dd1b247087, - 0xdb95ee9981e161b8, - 0xd1fc1814d9ca05f8, - 0x820ed2bbcc0de729, - 0x63d76050430f14c7, - 0x3bccb0e8a09d3a0f, - 0x8e40764d573f54a2, - 0x39d175c1e16177bd, - 0x12f5a37c734f1f4b, - 0xab37c12f1fdfc26d, - 0x5648b167395cd0f1, - 0x6c04ed1537bf42a7, - 0xed97161d14304065, - 0x7d6c67daab72b807, - 0xec17fa87ba4ee83c, - 0xdfaf79cb0304fbc1, - 0x733f060571bc463e, - 0x78d61c1287e98a27, - 0xd07cf48e77b4ada1, - 0xb9c262536c90dd26, - 0xe2449b5860801605, - 0x8fc09ad7f941fcfb, - 0xfad8cea94be46d0e, - 0xa343f28b0608eb9f, - 0x9b126bd04917347b, - 0x9a92874ae7699c22, - 0x1b017c42c4e69ee0, - 0x3a4c5c720ee39256, - 0x4b6e9f5e3ea399da, - 0x6ba353f45ad83d35, - 0xe7fee0904c1b2425, - 0x22d009832587e95d, - 0x842980c00f1430e2, - 0xc6b3c0a0861e2893, - 0x087433a419d729f2, - 0x341f3dadd42d6c6f, - 0xee0a3faefbb2a58e, - 0x4aee73c490dd3183, - 0xaab72db5b1a16a34, - 0xa92a04065e238fdf, - 0x7b4b35a1686b6fcc, - 0x6a23bf6ef4a6956c, - 0x191cb96b851ad352, - 0x55d598d4d6de351a, - 0xc9604de5f2ae7ef3, - 0x1ca6c2a3a981e172, - 0xde2f9551ad7a5398, - 0x3025aaff56c8f616, - 0x15521d9d1e2860d9, - 0x506fe31cfa45073a, - 0x189c55f12b647b0b, - 0x0180ec9aae7ea859, - 0x7cec8b40050c105e, - 0x2350e5198bf94104, - 0xef8ad33455cc0dd7, - 0x07a7bee16d677f92, - 0xe5e325b90de76997, - 0x5a061591a26e637a, - 0xb611ef1618208b46, - 0x09f4df3eb7a981ab, - 0x1ebb078ae87dacc0, - 0xb791038cb65e231f, - 0x0fd38d4574b05660, - 0x67edf702c1ea8ebe, - 0xba5f4be0831238cd, - 0xe3c477c2cefebe5c, - 0x0dce486c354c1bd2, - 0x8c5db36416c31910, - 0x26ea9ed1a7627324, - 0x039d29b3ef82e5eb, - 0x9f28fc82cbf2ae02, - 0xa8aae89cf05d2786, - 0x431aacfa2774b028, - 0xcf471f9e31b7a938, - 0x581bd0b8e3922ec8, - 0xbc78199b400bef06, - 0x90fb71c7bf42f862, - 0x1f3beb1046030499, - 0x683e7a47b55ad8de, - 0x988f4263a695d190, - 0xd808c72a6e638453, - 0x0627527bc319d7cb, - 0xebb04466d72997ae, - 0xe67e0c0ae2658c7c, - 0x14d2f107b056c880, - 0x7122c32c30400b8c, - 0x8a7ae11fd5dacedb, - 0xa0dedb38e98a0e74, - 0xad109354dcc615a6, - 0x0be91a17f655cc19, - 0x8ddd5ffeb8bdb149, - 0xbfe53028af890aed, - 0xd65ba6f5b4ad7a6a, - 0x7956f0882997227e, - 0x10e8665532b352f9, - 0x0e5361dfdacefe39, - 0xcec7f3049fc90161, - 0xff62b561677f5f2e, - 0x975ccf26d22587f0, - 0x51ef0f86543baf63, - 0x2f1e41ef10cbf28f, - 0x52722635bbb94a88, - 0xae8dbae73344f04d, - 0x410769d36688fd9a, - 0xb3ab94de34bbb966, - 0x801317928df1aa9b, - 0xa564a0f0c5113c54, - 0xf131d4bebdb1a117, - 0x7f71a2f3ea8ef5b5, - 0x40878549c8f655c3, - 0x7ef14e6944f05dec, - 0xd44663dcf55137d8, - 0xf2acfd0d523344fc, - 0x0000000000000000, - 0x5fbc6e598ef5515a, - 0x16cf342ef1aa8532, - 0xb036bd6ddb395c8d, - 0x13754fe6dd31b712, - 0xbbdfa77a2d6c9094, - 0x89e7c8ac3a582b30, - 0x3c6b0e09cdfa459d, - 0xc4ae0589c7e26521, - 0x49735a777f5fd468, - 0xcafd64561d2c9b18, - 0xda1502032f9fc9e1, - 0x8867243694268369, - 0x3782141e3baf8984, - 0x9cb5d53124704be9, - 0xd7db4a6f1ad3d233, - 0xa6f989432a93d9bf, - 0x9d3539ab8a0ee3b0, - 0x53f2caaf15c7e2d1, - 0x6e19283c76430f15, - 0x3debe2936384edc4, - 0x5e3c82c3208bf903, - 0x33b8834cb94a13fd, - 0x6470deb12e686b55, - 0x359fd1377a53c436, - 0x61caa57902f35975, - 0x043a975282e59a79, - 0xfd7f70482683129c, - 0xc52ee913699ccd78, - 0x28b9ff0e7dac8d1d, - 0x5455744e78a09d43, - 0xcb7d88ccb3523341, - 0x44bd121b4a13cfba, - 0x4d49cd25fdba4e11, - 0x3e76cb208c06082f, - 0x3ff627ba2278a076, - 0xc28957f204fbb2ea, - 0x453dfe81e46d67e3, - 0x94c1e6953da7621b, - 0x2c83685cff491764, - 0xf32c1197fc4deca5, - 0x2b24d6bd922e68f6, - 0xb22b78449ac5113f, - 0x48f3b6edd1217c31, - 0x2e9ead75beb55ad6, - 0x174fd8b45fd42d6b, - 0x4ed4e4961238abfa, - 0x92e6b4eefebeb5d0, - 0x46a0d7320bef8208, - 0x47203ba8a5912a51, - 0x24f75bf8e69e3e96, - 0xf0b1382413cf094e, - 0xfee259fbc901f777, - 0x276a724b091cdb7d, - 0xbdf8f501ee75475f, - 0x599b3c224dec8691, - 0x6d84018f99c1eafe, - 0x7498b8e41cdb39ac, - 0xe0595e71217c5bb7, - 0x2aa43a273c50c0af, - 0xf50b43ec3f543b6e, - 0x838e3e2162734f70, - 0xc09492db4507ff58, - 0x72bfea9fdfc2ee67, - 0x11688acf9ccdfaa0, - 0x1a8190d86a9836b9, - 0x7acbd93bc615c795, - 0xc7332c3a286080ca, - 0x863445e94ee87d50, - 0xf6966a5fd0d6de85, - 0xe9ad814f96d5da1c, - 0x70a22fb69e3ea3d5, - 0x0a69f68d582b6440, - 0xb8428ec9c2ee757f, - 0x604a49e3ac8df12c, - 0x5b86f90b0c10cb23, - 0xe1d9b2eb8f02f3ee, - 0x29391394d3d22544, - 0xc8e0a17f5cd0d6aa, - 0xb58cc6a5f7a26ead, - 0x8193fb08238f02c2, - 0xd5c68f465b2f9f81, - 0xfcff9cd288fdbac5, - 0x77059157f359dc47, - 0x1d262e3907ff492b, - 0xfb582233e59ac557, - 0xddb2bce242f8b673, - 0x2577b76248e096cf, - 0x6f99c4a6d83da74c, - 0xc1147e41eb795701, - 0xf48baf76912a9337, - }, - { - 0x45b268a93acde4cc, - 0xaf7f0be884549d08, - 0x048354b3c1468263, - 0x925435c2c80efed2, - 0xee4e37f27fdffba7, - 0x167a33920c60f14d, - 0xfb123b52ea03e584, - 0x4a0cab53fdbb9007, - 0x9deaf6380f788a19, - 0xcb48ec558f0cb32a, - 0xb59dc4b2d6fef7e0, - 0xdcdbca22f4f3ecb6, - 0x11df5813549a9c40, - 0xe33fdedf568aced3, - 0xa0c1c8124322e9c3, - 0x07a56b8158fa6d0d, - 0x77279579b1e1f3dd, - 0xd9b18b74422ac004, - 0xb8ec2d9fffabc294, - 0xf4acf8a82d75914f, - 0x7bbf69b1ef2b6878, - 0xc4f62faf487ac7e1, - 0x76ce809cc67e5d0c, - 0x6711d88f92e4c14c, - 0x627b99d9243dedfe, - 0x234aa5c3dfb68b51, - 0x909b1f15262dbf6d, - 0x4f66ea054b62bcb5, - 0x1ae2cf5a52aa6ae8, - 0xbea053fbd0ce0148, - 0xed6808c0e66314c9, - 0x43fe16cd15a82710, - 0xcd049231a06970f6, - 0xe7bc8a6c97cc4cb0, - 0x337ce835fcb3b9c0, - 0x65def2587cc780f3, - 0x52214ede4132bb50, - 0x95f15e4390f493df, - 0x870839625dd2e0f1, - 0x41313c1afb8b66af, - 0x91720af051b211bc, - 0x477d427ed4eea573, - 0x2e3b4ceef6e3be25, - 0x82627834eb0bcc43, - 0x9c03e3dd78e724c8, - 0x2877328ad9867df9, - 0x14b51945e243b0f2, - 0x574b0f88f7eb97e2, - 0x88b6fa989aa4943a, - 0x19c4f068cb168586, - 0x50ee6409af11faef, - 0x7df317d5c04eaba4, - 0x7a567c5498b4c6a9, - 0xb6bbfb804f42188e, - 0x3cc22bcf3bc5cd0b, - 0xd04336eaaa397713, - 0xf02fac1bec33132c, - 0x2506dba7f0d3488d, - 0xd7e65d6bf2c31a1e, - 0x5eb9b2161ff820f5, - 0x842e0650c46e0f9f, - 0x716beb1d9e843001, - 0xa933758cab315ed4, - 0x3fe414fda2792265, - 0x27c9f1701ef00932, - 0x73a4c1ca70a771be, - 0x94184ba6e76b3d0e, - 0x40d829ff8c14c87e, - 0x0fbec3fac77674cb, - 0x3616a9634a6a9572, - 0x8f139119c25ef937, - 0xf545ed4d5aea3f9e, - 0xe802499650ba387b, - 0x6437e7bd0b582e22, - 0xe6559f89e053e261, - 0x80ad52e305288dfc, - 0x6dc55a23e34b9935, - 0xde14e0f51ad0ad09, - 0xc6390578a659865e, - 0x96d7617109487cb1, - 0xe2d6cb3a21156002, - 0x01e915e5779faed1, - 0xadb0213f6a77dcb7, - 0x9880b76eb9a1a6ab, - 0x5d9f8d248644cf9b, - 0xfd5e4536c5662658, - 0xf1c6b9fe9bacbdfd, - 0xeacd6341be9979c4, - 0xefa7221708405576, - 0x510771ecd88e543e, - 0xc2ba51cb671f043d, - 0x0ad482ac71af5879, - 0xfe787a045cdac936, - 0xb238af338e049aed, - 0xbd866cc94972ee26, - 0x615da6ebbd810290, - 0x3295fdd08b2c1711, - 0xf834046073bf0aea, - 0xf3099329758ffc42, - 0x1caeb13e7dcfa934, - 0xba2307481188832b, - 0x24efce42874ce65c, - 0x0e57d61fb0e9da1a, - 0xb3d1bad6f99b343c, - 0xc0757b1c893c4582, - 0x2b510db8403a9297, - 0x5c7698c1f1db614a, - 0x3e0d0118d5e68cb4, - 0xd60f488e855cb4cf, - 0xae961e0df3cb33d9, - 0x3a8e55ab14a00ed7, - 0x42170328623789c1, - 0x838b6dd19c946292, - 0x895fef7ded3b3aeb, - 0xcfcbb8e64e4a3149, - 0x064c7e642f65c3dc, - 0x3d2b3e2a4c5a63da, - 0x5bd3f340a9210c47, - 0xb474d157a1615931, - 0xac5934da1de87266, - 0x6ee365117af7765b, - 0xc86ed36716b05c44, - 0x9ba6885c201d49c5, - 0xb905387a88346c45, - 0x131072c4bab9ddff, - 0xbf49461ea751af99, - 0xd52977bc1ce05ba1, - 0xb0f785e46027db52, - 0x546d30ba6e57788c, - 0x305ad707650f56ae, - 0xc987c682612ff295, - 0xa5ab8944f5fbc571, - 0x7ed528e759f244ca, - 0x8ddcbbce2c7db888, - 0xaa154abe328db1ba, - 0x1e619be993ece88b, - 0x09f2bd9ee813b717, - 0x7401aa4b285d1cb3, - 0x21858f143195caee, - 0x48c381841398d1b8, - 0xfcb750d3b2f98889, - 0x39a86a998d1ce1b9, - 0x1f888e0ce473465a, - 0x7899568376978716, - 0x02cf2ad7ee2341bf, - 0x85c713b5b3f1a14e, - 0xff916fe12b4567e7, - 0x7c1a0230b7d10575, - 0x0c98fcc85eca9ba5, - 0xa3e7f720da9e06ad, - 0x6a6031a2bbb1f438, - 0x973e74947ed7d260, - 0x2cf4663918c0ff9a, - 0x5f50a7f368678e24, - 0x34d983b4a449d4cd, - 0x68af1b755592b587, - 0x7f3c3d022e6dea1b, - 0xabfc5f5b45121f6b, - 0x0d71e92d29553574, - 0xdffdf5106d4f03d8, - 0x081ba87b9f8c19c6, - 0xdb7ea1a3ac0981bb, - 0xbbca12ad66172dfa, - 0x79704366010829c7, - 0x179326777bff5f9c, - 0x0000000000000000, - 0xeb2476a4c906d715, - 0x724dd42f0738df6f, - 0xb752ee6538ddb65f, - 0x37ffbc863df53ba3, - 0x8efa84fcb5c157e6, - 0xe9eb5c73272596aa, - 0x1b0bdabf2535c439, - 0x86e12c872a4d4e20, - 0x9969a28bce3e087a, - 0xfafb2eb79d9c4b55, - 0x056a4156b6d92cb2, - 0x5a3ae6a5debea296, - 0x22a3b026a8292580, - 0x53c85b3b36ad1581, - 0xb11e900117b87583, - 0xc51f3a4a3fe56930, - 0xe019e1edcf3621bd, - 0xec811d2591fcba18, - 0x445b7d4c4d524a1d, - 0xa8da6069dcaef005, - 0x58f5cc72309de329, - 0xd4c062596b7ff570, - 0xce22ad0339d59f98, - 0x591cd99747024df8, - 0x8b90c5aa03187b54, - 0xf663d27fc356d0f0, - 0xd8589e9135b56ed5, - 0x35309651d3d67a1c, - 0x12f96721cd26732e, - 0xd28c1c3d441a36ac, - 0x492a946164077f69, - 0x2d1d73dc6f5f514b, - 0x6f0a70f40d68d88a, - 0x60b4b30eca1eac41, - 0xd36509d83385987d, - 0x0b3d97490630f6a8, - 0x9eccc90a96c46577, - 0xa20ee2c5ad01a87c, - 0xe49ab55e0e70a3de, - 0xa4429ca182646ba0, - 0xda97b446db962f6a, - 0xcced87d4d7f6de27, - 0x2ab8185d37a53c46, - 0x9f25dcefe15bcba6, - 0xc19c6ef9fea3eb53, - 0xa764a3931bd884ce, - 0x2fd2590b817c10f4, - 0x56a21a6d80743933, - 0xe573a0bb79ef0d0f, - 0x155c0ca095dc1e23, - 0x6c2c4fc694d437e4, - 0x10364df623053291, - 0xdd32dfc7836c4267, - 0x03263f3299bcef6e, - 0x66f8cd6ae57b6f9d, - 0x8c35ae2b5be21659, - 0x31b3c2e21290f87f, - 0x93bd2027bf915003, - 0x69460e90220d1b56, - 0x299e276fae19d328, - 0x63928c3c53a2432f, - 0x7082fef8e91b9ed0, - 0xbc6f792c3eed40f7, - 0x4c40d537d2de53db, - 0x75e8bfae5fc2b262, - 0x4da9c0d2a541fd0a, - 0x4e8fffe03cfd1264, - 0x2620e495696fa7e3, - 0xe1f0f408b8a98f6c, - 0xd1aa230fdda6d9c2, - 0xc7d0109dd1c6288f, - 0x8a79d04f7487d585, - 0x4694579ba3710ba2, - 0x38417f7cfa834f68, - 0x1d47a4db0a5007e5, - 0x206c9af1460a643f, - 0xa128ddf734bd4712, - 0x8144470672b7232d, - 0xf2e086cc02105293, - 0x182de58dbc892b57, - 0xcaa1f9b0f8931dfb, - 0x6b892447cc2e5ae9, - 0xf9dd11850420a43b, - 0x4be5beb68a243ed6, - 0x5584255f19c8d65d, - 0x3b67404e633fa006, - 0xa68db6766c472a1f, - 0xf78ac79ab4c97e21, - 0xc353442e1080aaec, - 0x9a4f9db95782e714, - }, - { - 0xc811a8058c3f55de, - 0x65f5b43196b50619, - 0xf74f96b1d6706e43, - 0x859d1e8bcb43d336, - 0x5aab8a85ccfa3d84, - 0xf9c7bf99c295fcfd, - 0xa21fd5a1de4b630f, - 0xcdb3ef763b8b456d, - 0x803f59f87cf7c385, - 0xb27c73be5f31913c, - 0x98e3ac6633b04821, - 0xbf61674c26b8f818, - 0x0ffbc995c4c130c8, - 0xaaa0862010761a98, - 0x6057f342210116aa, - 0xf63c760c0654cc35, - 0x2ddb45cc667d9042, - 0xbcf45a964bd40382, - 0x68e8a0c3ef3c6f3d, - 0xa7bd92d269ff73bc, - 0x290ae20201ed2287, - 0xb7de34cde885818f, - 0xd901eea7dd61059b, - 0xd6fa273219a03553, - 0xd56f1ae874cccec9, - 0xea31245c2e83f554, - 0x7034555da07be499, - 0xce26d2ac56e7bef7, - 0xfd161857a5054e38, - 0x6a0e7da4527436d1, - 0x5bd86a381cde9ff2, - 0xcaf7756231770c32, - 0xb09aaed9e279c8d0, - 0x5def1091c60674db, - 0x111046a2515e5045, - 0x23536ce4729802fc, - 0xc50cbcf7f5b63cfa, - 0x73a16887cd171f03, - 0x7d2941afd9f28dbd, - 0x3f5e3eb45a4f3b9d, - 0x84eefe361b677140, - 0x3db8e3d3e7076271, - 0x1a3a28f9f20fd248, - 0x7ebc7c75b49e7627, - 0x74e5f293c7eb565c, - 0x18dcf59e4f478ba4, - 0x0c6ef44fa9adcb52, - 0xc699812d98dac760, - 0x788b06dc6e469d0e, - 0xfc65f8ea7521ec4e, - 0x30a5f7219e8e0b55, - 0x2bec3f65bca57b6b, - 0xddd04969baf1b75e, - 0x99904cdbe394ea57, - 0x14b201d1e6ea40f6, - 0xbbb0c08241284add, - 0x50f20463bf8f1dff, - 0xe8d7f93b93cbacb8, - 0x4d8cb68e477c86e8, - 0xc1dd1b3992268e3f, - 0x7c5aa11209d62fcb, - 0x2f3d98abdb35c9ae, - 0x671369562bfd5ff5, - 0x15c1e16c36cee280, - 0x1d7eb2edf8f39b17, - 0xda94d37db00dfe01, - 0x877bc3ec760b8ada, - 0xcb8495dfe153ae44, - 0x05a24773b7b410b3, - 0x12857b783c32abdf, - 0x8eb770d06812513b, - 0x536739b9d2e3e665, - 0x584d57e271b26468, - 0xd789c78fc9849725, - 0xa935bbfa7d1ae102, - 0x8b1537a3dfa64188, - 0xd0cd5d9bc378de7a, - 0x4ac82c9a4d80cfb7, - 0x42777f1b83bdb620, - 0x72d2883a1d33bd75, - 0x5e7a2d4bab6a8f41, - 0xf4daab6bbb1c95d9, - 0x905cffe7fd8d31b6, - 0x83aa6422119b381f, - 0xc0aefb8442022c49, - 0xa0f908c663033ae3, - 0xa428af0804938826, - 0xade41c341a8a53c7, - 0xae7121ee77e6a85d, - 0xc47f5c4a25929e8c, - 0xb538e9aa55cdd863, - 0x06377aa9dad8eb29, - 0xa18ae87bb3279895, - 0x6edfda6a35e48414, - 0x6b7d9d19825094a7, - 0xd41cfa55a4e86cbf, - 0xe5caedc9ea42c59c, - 0xa36c351c0e6fc179, - 0x5181e4de6fabbf89, - 0xfff0c530184d17d4, - 0x9d41eb1584045892, - 0x1c0d525028d73961, - 0xf178ec180ca8856a, - 0x9a0571018ef811cd, - 0x4091a27c3ef5efcc, - 0x19af15239f6329d2, - 0x347450eff91eb990, - 0xe11b4a078dd27759, - 0xb9561de5fc601331, - 0x912f1f5a2da993c0, - 0x1654dcb65ba2191a, - 0x3e2dde098a6b99eb, - 0x8a66d71e0f82e3fe, - 0x8c51adb7d55a08d7, - 0x4533e50f8941ff7f, - 0x02e6dd67bd4859ec, - 0xe068aaba5df6d52f, - 0xc24826e3ff4a75a5, - 0x6c39070d88acddf8, - 0x6486548c4691a46f, - 0xd1bebd26135c7c0c, - 0xb30f93038f15334a, - 0x82d9849fc1bf9a69, - 0x9c320ba85420fae4, - 0xfa528243aff90767, - 0x9ed4d6cfe968a308, - 0xb825fd582c44b147, - 0x9b7691bc5edcb3bb, - 0xc7ea619048fe6516, - 0x1063a61f817af233, - 0x47d538683409a693, - 0x63c2ce984c6ded30, - 0x2a9fdfd86c81d91d, - 0x7b1e3b06032a6694, - 0x666089ebfbd9fd83, - 0x0a598ee67375207b, - 0x07449a140afc495f, - 0x2ca8a571b6593234, - 0x1f986f8a45bbc2fb, - 0x381aa4a050b372c2, - 0x5423a3add81faf3a, - 0x17273c0b8b86bb6c, - 0xfe83258dc869b5a2, - 0x287902bfd1c980f1, - 0xf5a94bd66b3837af, - 0x88800a79b2caba12, - 0x55504310083b0d4c, - 0xdf36940e07b9eeb2, - 0x04d1a7ce6790b2c5, - 0x612413fff125b4dc, - 0x26f12b97c52c124f, - 0x86082351a62f28ac, - 0xef93632f9937e5e7, - 0x3507b052293a1be6, - 0xe72c30ae570a9c70, - 0xd3586041ae1425e0, - 0xde4574b3d79d4cc4, - 0x92ba228040c5685a, - 0xf00b0ca5dc8c271c, - 0xbe1287f1f69c5a6e, - 0xf39e317fb1e0dc86, - 0x495d114020ec342d, - 0x699b407e3f18cd4b, - 0xdca3a9d46ad51528, - 0x0d1d14f279896924, - 0x0000000000000000, - 0x593eb75fa196c61e, - 0x2e4e78160b116bd8, - 0x6d4ae7b058887f8e, - 0xe65fd013872e3e06, - 0x7a6ddbbbd30ec4e2, - 0xac97fc89caaef1b1, - 0x09ccb33c1e19dbe1, - 0x89f3eac462ee1864, - 0x7770cf49aa87adc6, - 0x56c57eca6557f6d6, - 0x03953dda6d6cfb9a, - 0x36928d884456e07c, - 0x1eeb8f37959f608d, - 0x31d6179c4eaaa923, - 0x6fac3ad7e5c02662, - 0x43049fa653991456, - 0xabd3669dc052b8ee, - 0xaf02c153a7c20a2b, - 0x3ccb036e3723c007, - 0x93c9c23d90e1ca2c, - 0xc33bc65e2f6ed7d3, - 0x4cff56339758249e, - 0xb1e94e64325d6aa6, - 0x37e16d359472420a, - 0x79f8e661be623f78, - 0x5214d90402c74413, - 0x482ef1fdf0c8965b, - 0x13f69bc5ec1609a9, - 0x0e88292814e592be, - 0x4e198b542a107d72, - 0xccc00fcbebafe71b, - 0x1b49c844222b703e, - 0x2564164da840e9d5, - 0x20c6513e1ff4f966, - 0xbac3203f910ce8ab, - 0xf2edd1c261c47ef0, - 0x814cb945acd361f3, - 0x95feb8944a392105, - 0x5c9cf02c1622d6ad, - 0x971865f3f77178e9, - 0xbd87ba2b9bf0a1f4, - 0x444005b259655d09, - 0xed75be48247fbc0b, - 0x7596122e17cff42a, - 0xb44b091785e97a15, - 0x966b854e2755da9f, - 0xeee0839249134791, - 0x32432a4623c652b9, - 0xa8465b47ad3e4374, - 0xf8b45f2412b15e8b, - 0x2417f6f078644ba3, - 0xfb2162fe7fdda511, - 0x4bbbcc279da46dc1, - 0x0173e0bdd024a276, - 0x22208c59a2bca08a, - 0x8fc4906db836f34d, - 0xe4b90d743a6667ea, - 0x7147b5e0705f46ef, - 0x2782cb2a1508b039, - 0xec065ef5f45b1e7d, - 0x21b5b183cfd05b10, - 0xdbe733c060295c77, - 0x9fa73672394c017e, - 0xcf55321186c31c81, - 0xd8720e1a0d45a7ed, - 0x3b8f997a3ddf8958, - 0x3afc79c7edfb2b2e, - 0xe9a4198643ef0ece, - 0x5f09cdf67b4e2d37, - 0x4f6a6be9fa34df04, - 0xb6add47038a123f9, - 0x8d224d0a057eaaa1, - 0xc96248b85c1bf7a8, - 0xe3fd9760309a2eb5, - 0x0b2a6e5ba351820d, - 0xeb42c4e1fea75722, - 0x948d58299a1d8373, - 0x7fcf9cc864bad451, - 0xa55b4fb5d4b72a50, - 0x08bf5381ce3d7997, - 0x46a6d8d5e42d04e5, - 0xd22b80fc7e308796, - 0x57b69e77b57354a0, - 0x3969441d8097d0b4, - 0x3330cafbf3e2f0cf, - 0xe28e77dde0be8cc3, - 0x62b12e259c494f46, - 0xa6ce726fb9dbd1ca, - 0x41e242c1eed14dba, - 0x76032ff47aa30fb0, - }, - { - 0xe6f87e5c5b711fd0, - 0x258377800924fa16, - 0xc849e07e852ea4a8, - 0x5b4686a18f06c16a, - 0x0b32e9a2d77b416e, - 0xabda37a467815c66, - 0xf61796a81a686676, - 0xf5dc0b706391954b, - 0x4862f38db7e64bf1, - 0xff5c629a68bd85c5, - 0xcb827da6fcd75795, - 0x66d36daf69b9f089, - 0x356c9f74483d83b0, - 0x7cbcecb1238c99a1, - 0x36a702ac31c4708d, - 0x9eb6a8d02fbcdfd6, - 0x8b19fa51e5b3ae37, - 0x9ccfb5408a127d0b, - 0xbc0c78b508208f5a, - 0xe533e3842288eced, - 0xcec2c7d377c15fd2, - 0xec7817b6505d0f5e, - 0xb94cc2c08336871d, - 0x8c205db4cb0b04ad, - 0x763c855b28a0892f, - 0x588d1b79f6ff3257, - 0x3fecf69e4311933e, - 0x0fc0d39f803a18c9, - 0xee010a26f5f3ad83, - 0x10efe8f4411979a6, - 0x5dcda10c7de93a10, - 0x4a1bee1d1248e92c, - 0x53bff2db21847339, - 0xb4f50ccfa6a23d09, - 0x5fb4bc9cd84798cd, - 0xe88a2d8b071c56f9, - 0x7f7771695a756a9c, - 0xc5f02e71a0ba1ebc, - 0xa663f9ab4215e672, - 0x2eb19e22de5fbb78, - 0x0db9ce0f2594ba14, - 0x82520e6397664d84, - 0x2f031e6a0208ea98, - 0x5c7f2144a1be6bf0, - 0x7a37cb1cd16362db, - 0x83e08e2b4b311c64, - 0xcf70479bab960e32, - 0x856ba986b9dee71e, - 0xb5478c877af56ce9, - 0xb8fe42885f61d6fd, - 0x1bdd0156966238c8, - 0x622157923ef8a92e, - 0xfc97ff42114476f8, - 0x9d7d350856452ceb, - 0x4c90c9b0e0a71256, - 0x2308502dfbcb016c, - 0x2d7a03faa7a64845, - 0xf46e8b38bfc6c4ab, - 0xbdbef8fdd477deba, - 0x3aac4cebc8079b79, - 0xf09cb105e8879d0c, - 0x27fa6a10ac8a58cb, - 0x8960e7c1401d0cea, - 0x1a6f811e4a356928, - 0x90c4fb0773d196ff, - 0x43501a2f609d0a9f, - 0xf7a516e0c63f3796, - 0x1ce4a6b3b8da9252, - 0x1324752c38e08a9b, - 0xa5a864733bec154f, - 0x2bf124575549b33f, - 0xd766db15440dc5c7, - 0xa7d179e39e42b792, - 0xdadf151a61997fd3, - 0x86a0345ec0271423, - 0x38d5517b6da939a4, - 0x6518f077104003b4, - 0x02791d90a5aea2dd, - 0x88d267899c4a5d0a, - 0x930f66df0a2865c2, - 0x4ee9d4204509b08b, - 0x325538916685292a, - 0x412907bfc533a842, - 0xb27e2b62544dc673, - 0x6c5304456295e007, - 0x5af406e95351908a, - 0x1f2f3b6bc123616f, - 0xc37b09dc5255e5c6, - 0x3967d133b1fe6844, - 0x298839c7f0e711e2, - 0x409b87f71964f9a2, - 0xe938adc3db4b0719, - 0x0c0b4e47f9c3ebf4, - 0x5534d576d36b8843, - 0x4610a05aeb8b02d8, - 0x20c3cdf58232f251, - 0x6de1840dbec2b1e7, - 0xa0e8de06b0fa1d08, - 0x7b854b540d34333b, - 0x42e29a67bcca5b7f, - 0xd8a6088ac437dd0e, - 0xc63bb3a9d943ed81, - 0x21714dbd5e65a3b1, - 0x6761ede7b5eea169, - 0x2431f7c8d573abf6, - 0xd51fc685e1a3671a, - 0x5e063cd40410c92d, - 0x283ab98f2cb04002, - 0x8febc06cb2f2f790, - 0x17d64f116fa1d33c, - 0xe07359f1a99ee4aa, - 0x784ed68c74cdc006, - 0x6e2a19d5c73b42da, - 0x8712b4161c7045c3, - 0x371582e4ed93216d, - 0xace390414939f6fc, - 0x7ec5f12186223b7c, - 0xc0b094042bac16fb, - 0xf9d745379a527ebf, - 0x737c3f2ea3b68168, - 0x33e7b8d9bad278ca, - 0xa9a32a34c22ffebb, - 0xe48163ccfedfbd0d, - 0x8e5940246ea5a670, - 0x51c6ef4b842ad1e4, - 0x22bad065279c508c, - 0xd91488c218608cee, - 0x319ea5491f7cda17, - 0xd394e128134c9c60, - 0x094bf43272d5e3b3, - 0x9bf612a5a4aad791, - 0xccbbda43d26ffd0f, - 0x34de1f3c946ad250, - 0x4f5b5468995ee16b, - 0xdf9faf6fea8f7794, - 0x2648ea5870dd092b, - 0xbfc7e56d71d97c67, - 0xdde6b2ff4f21d549, - 0x3c276b463ae86003, - 0x91767b4faf86c71f, - 0x68a13e7835d4b9a0, - 0xb68c115f030c9fd4, - 0x141dd2c916582001, - 0x983d8f7ddd5324ac, - 0x64aa703fcc175254, - 0xc2c989948e02b426, - 0x3e5e76d69f46c2de, - 0x50746f03587d8004, - 0x45db3d829272f1e5, - 0x60584a029b560bf3, - 0xfbae58a73ffcdc62, - 0xa15a5e4e6cad4ce8, - 0x4ba96e55ce1fb8cc, - 0x08f9747aae82b253, - 0xc102144cf7fb471b, - 0x9f042898f3eb8e36, - 0x068b27adf2effb7a, - 0xedca97fe8c0a5ebe, - 0x778e0513f4f7d8cf, - 0x302c2501c32b8bf7, - 0x8d92ddfc175c554d, - 0xf865c57f46052f5f, - 0xeaf3301ba2b2f424, - 0xaa68b7ecbbd60d86, - 0x998f0f350104754c, - 0x0000000000000000, - 0xf12e314d34d0ccec, - 0x710522be061823b5, - 0xaf280d9930c005c1, - 0x97fd5ce25d693c65, - 0x19a41cc633cc9a15, - 0x95844172f8c79eb8, - 0xdc5432b7937684a9, - 0x9436c13a2490cf58, - 0x802b13f332c8ef59, - 0xc442ae397ced4f5c, - 0xfa1cd8efe3ab8d82, - 0xf2e5ac954d293fd1, - 0x6ad823e8907a1b7d, - 0x4d2249f83cf043b6, - 0x03cb9dd879f9f33d, - 0xde2d2f2736d82674, - 0x2a43a41f891ee2df, - 0x6f98999d1b6c133a, - 0xd4ad46cd3df436fa, - 0xbb35df50269825c0, - 0x964fdcaa813e6d85, - 0xeb41b0537ee5a5c4, - 0x0540ba758b160847, - 0xa41ae43be7bb44af, - 0xe3b8c429d0671797, - 0x819993bbee9fbeb9, - 0xae9a8dd1ec975421, - 0xf3572cdd917e6e31, - 0x6393d7dae2aff8ce, - 0x47a2201237dc5338, - 0xa32343dec903ee35, - 0x79fc56c4a89a91e6, - 0x01b28048dc5751e0, - 0x1296f564e4b7db7b, - 0x75f7188351597a12, - 0xdb6d9552bdce2e33, - 0x1e9dbb231d74308f, - 0x520d7293fdd322d9, - 0xe20a44610c304677, - 0xfeeee2d2b4ead425, - 0xca30fdee20800675, - 0x61eaca4a47015a13, - 0xe74afe1487264e30, - 0x2cc883b27bf119a5, - 0x1664cf59b3f682dc, - 0xa811aa7c1e78af5b, - 0x1d5626fb648dc3b2, - 0xb73e9117df5bce34, - 0xd05f7cf06ab56f5d, - 0xfd257f0acd132718, - 0x574dc8e676c52a9e, - 0x0739a7e52eb8aa9a, - 0x5486553e0f3cd9a3, - 0x56ff48aeaa927b7e, - 0xbe756525ad8e2d87, - 0x7d0e6cf9ffdbc841, - 0x3b1ecca31450ca99, - 0x6913be30e983e840, - 0xad511009956ea71c, - 0xb1b5b6ba2db4354e, - 0x4469bdca4e25a005, - 0x15af5281ca0f71e1, - 0x744598cb8d0e2bf2, - 0x593f9b312aa863b7, - 0xefb38a6e29a4fc63, - 0x6b6aa3a04c2d4a9d, - 0x3d95eb0ee6bf31e3, - 0xa291c3961554bfd5, - 0x18169c8eef9bcbf5, - 0x115d68bc9d4e2846, - 0xba875f18facf7420, - 0xd1edfcb8b6e23ebd, - 0xb00736f2f1e364ae, - 0x84d929ce6589b6fe, - 0x70b7a2f6da4f7255, - 0x0e7253d75c6d4929, - 0x04f23a3d574159a7, - 0x0a8069ea0b2c108e, - 0x49d073c56bb11a11, - 0x8aab7a1939e4ffd7, - 0xcd095a0b0e38acef, - 0xc9fb60365979f548, - 0x92bde697d67f3422, - 0xc78933e10514bc61, - 0xe1c1d9b975c9b54a, - 0xd2266160cf1bcd80, - 0x9a4492ed78fd8671, - 0xb3ccab2a881a9793, - 0x72cebf667fe1d088, - 0xd6d45b5d985a9427, - }, -}; - -__device__ const u64 sbob_rc64[12][8] = -{ - { - 0xe9daca1eda5b08b1, - 0x1f7c65c0812fcbeb, - 0x16d0452e43766a2f, - 0xfcc485758db84e71, - 0x0169679291e07c4b, - 0x15d360a4082a42a2, - 0x234d74cc36747605, - 0x0745a6f2596580dd, - }, - { - 0x1a2f9da98ab5a36f, - 0xd7b5700f469de34f, - 0x982b230a72eafef3, - 0x3101b5160f5ed561, - 0x5899d6126b17b59a, - 0xcaa70adbc261b55c, - 0x56cdcbd71ba2dd55, - 0xb79bb121700479e6, - }, - { - 0xc72fce2bacdc74f5, - 0x35843d6a28fc390a, - 0x8b1f9c525f5ef106, - 0x7b7b29b11475eaf2, - 0xb19e3590e40fe2d3, - 0x09db6260373ac9c1, - 0x31db7a8643f4b6c2, - 0xb20aba0af5961e99, - }, - { - 0xd26615e8b3df1fef, - 0xdde4715da0e148f9, - 0x7d3c5c337e858e48, - 0x3f355e68ad1c729d, - 0x75d603ed822cd7a9, - 0xbe0352933313b7d8, - 0xf137e893a1ea5334, - 0x2ed1e384bcbe0c22, - }, - { - 0x994747adac6bea4b, - 0x6323a96c0c413f9a, - 0x4a1086161f1c157f, - 0xbdff0f80d7359e35, - 0xa3f53a254717cdbf, - 0x161a2723b700ffdf, - 0xf563eaa97ea2567a, - 0x57fe6c7cfd581760, - }, - { - 0xd9d33a1daeae4fae, - 0xc039307a3bc3a46f, - 0x6ca44251f9c4662d, - 0xc68ef09ab49a7f18, - 0xb4b79a1cb7a6facf, - 0xb6c6bec2661ff20a, - 0x354f903672c571bf, - 0x6e7d64467a4068fa, - }, - { - 0xecc5aaee160ec7f4, - 0x540924bffe86ac51, - 0xc987bfe6c7c69e39, - 0xc9937a19333e47d3, - 0x372c822dc5ab9209, - 0x04054a2883694706, - 0xf34a3ca24c451735, - 0x93d4143a4d568688, - }, - { - 0xa7c9934d425b1f9b, - 0x41416e0c02aae703, - 0x1ede369c71f8b74e, - 0x9ac4db4d3b44b489, - 0x90069b92cb2b89f4, - 0x2fc4a5d12b8dd169, - 0xd9a8515935c2ac36, - 0x1ee702bfd40d7fa4, - }, - { - 0x9b223116545a8f37, - 0xde5f16ecd89a4c94, - 0x244289251b3a7d3a, - 0x84090de0b755d93c, - 0xb1ceb2db0b440a80, - 0x549c07a69a8a2b7b, - 0x602a1fcb92dc380e, - 0xdb5a238351446172, - }, - { - 0x526f0580a6debeab, - 0xf3f3e4b248e52a38, - 0xdb788aff1ce74189, - 0x0361331b8ae1ff1f, - 0x4b3369af0267e79f, - 0xf452763b306c1e7a, - 0xc3b63b15d1fa9836, - 0xed9c4598fbc7b474, - }, - { - 0xfb89c8efd09ecd7b, - 0x94fe5a63cdc60230, - 0x6107abebbb6bfad8, - 0x7966841421800120, - 0xcab948eaef711d8a, - 0x986e477d1dcdbaef, - 0x5dd86fc04a59a2de, - 0x1b2df381cda4ca6b, - }, - { - 0xba3116f167e78e37, - 0x7ab14904b08013d2, - 0x771ddfbc323ca4cd, - 0x9b9f2130d41220f8, - 0x86cc91189def805d, - 0x5228e188aaa41de7, - 0x991bb2d9d517f4fa, - 0x20d71bf14a92bc48, - }, -}; - -__device__ static void streebog_g (u64 h[8], const u64 m[8], u64 s_sbob_sl64[8][256]) -{ - u64 k[8]; - u64 s[8]; - u64 t[8]; - - #pragma unroll - for (int i = 0; i < 8; i++) - { - t[i] = h[i]; - } - - for (int i = 0; i < 8; i++) - { - k[i] = SBOG_LPSti64; - } - - #pragma unroll - for (int i = 0; i < 8; i++) - { - s[i] = m[i]; - } - - for (int r = 0; r < 12; r++) - { - #pragma unroll - for (int i = 0; i < 8; i++) - { - t[i] = s[i] ^ k[i]; - } - - #pragma unroll - for (int i = 0; i < 8; i++) - { - s[i] = SBOG_LPSti64; - } - - for (int i = 0; i < 8; i++) - { - t[i] = k[i] ^ sbob_rc64[r][i]; - } - - #pragma unroll - for (int i = 0; i < 8; i++) - { - k[i] = SBOG_LPSti64; - } - } - - #pragma unroll - for (int i = 0; i < 8; i++) - { - h[i] ^= s[i] ^ k[i] ^ m[i]; - } -} - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m11700_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * shared lookup table - */ - - __shared__ u64 s_sbob_sl64[8][256]; - - if (lid < 64) - { - const u32 lid4 = lid * 4; - - s_sbob_sl64[0][lid4 + 0] = sbob_sl64[0][lid4 + 0]; - s_sbob_sl64[0][lid4 + 1] = sbob_sl64[0][lid4 + 1]; - s_sbob_sl64[0][lid4 + 2] = sbob_sl64[0][lid4 + 2]; - s_sbob_sl64[0][lid4 + 3] = sbob_sl64[0][lid4 + 3]; - s_sbob_sl64[1][lid4 + 0] = sbob_sl64[1][lid4 + 0]; - s_sbob_sl64[1][lid4 + 1] = sbob_sl64[1][lid4 + 1]; - s_sbob_sl64[1][lid4 + 2] = sbob_sl64[1][lid4 + 2]; - s_sbob_sl64[1][lid4 + 3] = sbob_sl64[1][lid4 + 3]; - s_sbob_sl64[2][lid4 + 0] = sbob_sl64[2][lid4 + 0]; - s_sbob_sl64[2][lid4 + 1] = sbob_sl64[2][lid4 + 1]; - s_sbob_sl64[2][lid4 + 2] = sbob_sl64[2][lid4 + 2]; - s_sbob_sl64[2][lid4 + 3] = sbob_sl64[2][lid4 + 3]; - s_sbob_sl64[3][lid4 + 0] = sbob_sl64[3][lid4 + 0]; - s_sbob_sl64[3][lid4 + 1] = sbob_sl64[3][lid4 + 1]; - s_sbob_sl64[3][lid4 + 2] = sbob_sl64[3][lid4 + 2]; - s_sbob_sl64[3][lid4 + 3] = sbob_sl64[3][lid4 + 3]; - s_sbob_sl64[4][lid4 + 0] = sbob_sl64[4][lid4 + 0]; - s_sbob_sl64[4][lid4 + 1] = sbob_sl64[4][lid4 + 1]; - s_sbob_sl64[4][lid4 + 2] = sbob_sl64[4][lid4 + 2]; - s_sbob_sl64[4][lid4 + 3] = sbob_sl64[4][lid4 + 3]; - s_sbob_sl64[5][lid4 + 0] = sbob_sl64[5][lid4 + 0]; - s_sbob_sl64[5][lid4 + 1] = sbob_sl64[5][lid4 + 1]; - s_sbob_sl64[5][lid4 + 2] = sbob_sl64[5][lid4 + 2]; - s_sbob_sl64[5][lid4 + 3] = sbob_sl64[5][lid4 + 3]; - s_sbob_sl64[6][lid4 + 0] = sbob_sl64[6][lid4 + 0]; - s_sbob_sl64[6][lid4 + 1] = sbob_sl64[6][lid4 + 1]; - s_sbob_sl64[6][lid4 + 2] = sbob_sl64[6][lid4 + 2]; - s_sbob_sl64[6][lid4 + 3] = sbob_sl64[6][lid4 + 3]; - s_sbob_sl64[7][lid4 + 0] = sbob_sl64[7][lid4 + 0]; - s_sbob_sl64[7][lid4 + 1] = sbob_sl64[7][lid4 + 1]; - s_sbob_sl64[7][lid4 + 2] = sbob_sl64[7][lid4 + 2]; - s_sbob_sl64[7][lid4 + 3] = sbob_sl64[7][lid4 + 3]; - } - - __syncthreads (); - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w[16]; - - w[ 0] = pw_buf0[0]; - w[ 1] = pw_buf0[1]; - w[ 2] = pw_buf0[2]; - w[ 3] = pw_buf0[3]; - w[ 4] = pw_buf1[0]; - w[ 5] = pw_buf1[1]; - w[ 6] = pw_buf1[2]; - w[ 7] = pw_buf1[3]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, &w[0], &w[1], pw_len); - - append_0x01_2 (&w[0], &w[1], out_len); - - /** - * reverse message block - */ - - u64 m[8]; - - m[0] = hl32_to_64 (w[15], w[14]); - m[1] = hl32_to_64 (w[13], w[12]); - m[2] = hl32_to_64 (w[11], w[10]); - m[3] = hl32_to_64 (w[ 9], w[ 8]); - m[4] = hl32_to_64 (w[ 7], w[ 6]); - m[5] = hl32_to_64 (w[ 5], w[ 4]); - m[6] = hl32_to_64 (w[ 3], w[ 2]); - m[7] = hl32_to_64 (w[ 1], w[ 0]); - - m[0] = swap_workaround (m[0]); - m[1] = swap_workaround (m[1]); - m[2] = swap_workaround (m[2]); - m[3] = swap_workaround (m[3]); - m[4] = swap_workaround (m[4]); - m[5] = swap_workaround (m[5]); - m[6] = swap_workaround (m[6]); - m[7] = swap_workaround (m[7]); - - // state buffer (hash) - - u64 h[8]; - - h[0] = INITVAL; - h[1] = INITVAL; - h[2] = INITVAL; - h[3] = INITVAL; - h[4] = INITVAL; - h[5] = INITVAL; - h[6] = INITVAL; - h[7] = INITVAL; - - streebog_g (h, m, s_sbob_sl64); - - u64 z[8]; - - z[0] = 0; - z[1] = 0; - z[2] = 0; - z[3] = 0; - z[4] = 0; - z[5] = 0; - z[6] = 0; - z[7] = swap_workaround ((u64) (out_len * 8)); - - streebog_g (h, z, s_sbob_sl64); - streebog_g (h, m, s_sbob_sl64); - - const u32 r0 = l32_from_64 (h[0]); - const u32 r1 = h32_from_64 (h[0]); - const u32 r2 = l32_from_64 (h[1]); - const u32 r3 = h32_from_64 (h[1]); - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11700_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11700_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11700_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - - /** - * shared lookup table - */ - - - __shared__ u64 s_sbob_sl64[8][256]; - - const u32 lid4 = lid * 4; - - if (lid < 64) - { - s_sbob_sl64[0][lid4 + 0] = sbob_sl64[0][lid4 + 0]; - s_sbob_sl64[0][lid4 + 1] = sbob_sl64[0][lid4 + 1]; - s_sbob_sl64[0][lid4 + 2] = sbob_sl64[0][lid4 + 2]; - s_sbob_sl64[0][lid4 + 3] = sbob_sl64[0][lid4 + 3]; - s_sbob_sl64[1][lid4 + 0] = sbob_sl64[1][lid4 + 0]; - s_sbob_sl64[1][lid4 + 1] = sbob_sl64[1][lid4 + 1]; - s_sbob_sl64[1][lid4 + 2] = sbob_sl64[1][lid4 + 2]; - s_sbob_sl64[1][lid4 + 3] = sbob_sl64[1][lid4 + 3]; - s_sbob_sl64[2][lid4 + 0] = sbob_sl64[2][lid4 + 0]; - s_sbob_sl64[2][lid4 + 1] = sbob_sl64[2][lid4 + 1]; - s_sbob_sl64[2][lid4 + 2] = sbob_sl64[2][lid4 + 2]; - s_sbob_sl64[2][lid4 + 3] = sbob_sl64[2][lid4 + 3]; - s_sbob_sl64[3][lid4 + 0] = sbob_sl64[3][lid4 + 0]; - s_sbob_sl64[3][lid4 + 1] = sbob_sl64[3][lid4 + 1]; - s_sbob_sl64[3][lid4 + 2] = sbob_sl64[3][lid4 + 2]; - s_sbob_sl64[3][lid4 + 3] = sbob_sl64[3][lid4 + 3]; - s_sbob_sl64[4][lid4 + 0] = sbob_sl64[4][lid4 + 0]; - s_sbob_sl64[4][lid4 + 1] = sbob_sl64[4][lid4 + 1]; - s_sbob_sl64[4][lid4 + 2] = sbob_sl64[4][lid4 + 2]; - s_sbob_sl64[4][lid4 + 3] = sbob_sl64[4][lid4 + 3]; - s_sbob_sl64[5][lid4 + 0] = sbob_sl64[5][lid4 + 0]; - s_sbob_sl64[5][lid4 + 1] = sbob_sl64[5][lid4 + 1]; - s_sbob_sl64[5][lid4 + 2] = sbob_sl64[5][lid4 + 2]; - s_sbob_sl64[5][lid4 + 3] = sbob_sl64[5][lid4 + 3]; - s_sbob_sl64[6][lid4 + 0] = sbob_sl64[6][lid4 + 0]; - s_sbob_sl64[6][lid4 + 1] = sbob_sl64[6][lid4 + 1]; - s_sbob_sl64[6][lid4 + 2] = sbob_sl64[6][lid4 + 2]; - s_sbob_sl64[6][lid4 + 3] = sbob_sl64[6][lid4 + 3]; - s_sbob_sl64[7][lid4 + 0] = sbob_sl64[7][lid4 + 0]; - s_sbob_sl64[7][lid4 + 1] = sbob_sl64[7][lid4 + 1]; - s_sbob_sl64[7][lid4 + 2] = sbob_sl64[7][lid4 + 2]; - s_sbob_sl64[7][lid4 + 3] = sbob_sl64[7][lid4 + 3]; - } - - __syncthreads (); - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w[16]; - - w[ 0] = pw_buf0[0]; - w[ 1] = pw_buf0[1]; - w[ 2] = pw_buf0[2]; - w[ 3] = pw_buf0[3]; - w[ 4] = pw_buf1[0]; - w[ 5] = pw_buf1[1]; - w[ 6] = pw_buf1[2]; - w[ 7] = pw_buf1[3]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, &w[0], &w[1], pw_len); - - append_0x01_2 (&w[0], &w[1], out_len); - - /** - * reverse message block - */ - - u64 m[8]; - - m[0] = hl32_to_64 (w[15], w[14]); - m[1] = hl32_to_64 (w[13], w[12]); - m[2] = hl32_to_64 (w[11], w[10]); - m[3] = hl32_to_64 (w[ 9], w[ 8]); - m[4] = hl32_to_64 (w[ 7], w[ 6]); - m[5] = hl32_to_64 (w[ 5], w[ 4]); - m[6] = hl32_to_64 (w[ 3], w[ 2]); - m[7] = hl32_to_64 (w[ 1], w[ 0]); - - m[0] = swap_workaround (m[0]); - m[1] = swap_workaround (m[1]); - m[2] = swap_workaround (m[2]); - m[3] = swap_workaround (m[3]); - m[4] = swap_workaround (m[4]); - m[5] = swap_workaround (m[5]); - m[6] = swap_workaround (m[6]); - m[7] = swap_workaround (m[7]); - - // state buffer (hash) - - u64 h[8]; - - h[0] = INITVAL; - h[1] = INITVAL; - h[2] = INITVAL; - h[3] = INITVAL; - h[4] = INITVAL; - h[5] = INITVAL; - h[6] = INITVAL; - h[7] = INITVAL; - - streebog_g (h, m, s_sbob_sl64); - - u64 z[8]; - - z[0] = 0; - z[1] = 0; - z[2] = 0; - z[3] = 0; - z[4] = 0; - z[5] = 0; - z[6] = 0; - z[7] = swap_workaround ((u64) (out_len * 8)); - - streebog_g (h, z, s_sbob_sl64); - streebog_g (h, m, s_sbob_sl64); - - const u32 r0 = l32_from_64 (h[0]); - const u32 r1 = h32_from_64 (h[0]); - const u32 r2 = l32_from_64 (h[1]); - const u32 r3 = h32_from_64 (h[1]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11700_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11700_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m11700_a1.cu b/nv/m11700_a1.cu deleted file mode 100644 index c2c5b90..0000000 --- a/nv/m11700_a1.cu +++ /dev/null @@ -1,2783 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _GOST2012_256_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -#define INITVAL 0x0101010101010101 - -#define SBOG_LPSti64 \ - s_sbob_sl64[0][(t[0] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[1][(t[1] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[2][(t[2] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[3][(t[3] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[4][(t[4] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[5][(t[5] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[6][(t[6] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[7][(t[7] >> (i * 8)) & 0xff] - -// constants - -__device__ __constant__ u64 sbob_sl64[8][256] = -{ - { - 0xd031c397ce553fe6, - 0x16ba5b01b006b525, - 0xa89bade6296e70c8, - 0x6a1f525d77d3435b, - 0x6e103570573dfa0b, - 0x660efb2a17fc95ab, - 0x76327a9e97634bf6, - 0x4bad9d6462458bf5, - 0xf1830caedbc3f748, - 0xc5c8f542669131ff, - 0x95044a1cdc48b0cb, - 0x892962df3cf8b866, - 0xb0b9e208e930c135, - 0xa14fb3f0611a767c, - 0x8d2605f21c160136, - 0xd6b71922fecc549e, - 0x37089438a5907d8b, - 0x0b5da38e5803d49c, - 0x5a5bcc9cea6f3cbc, - 0xedae246d3b73ffe5, - 0xd2b87e0fde22edce, - 0x5e54abb1ca8185ec, - 0x1de7f88fe80561b9, - 0xad5e1a870135a08c, - 0x2f2adbd665cecc76, - 0x5780b5a782f58358, - 0x3edc8a2eede47b3f, - 0xc9d95c3506bee70f, - 0x83be111d6c4e05ee, - 0xa603b90959367410, - 0x103c81b4809fde5d, - 0x2c69b6027d0c774a, - 0x399080d7d5c87953, - 0x09d41e16487406b4, - 0xcdd63b1826505e5f, - 0xf99dc2f49b0298e8, - 0x9cd0540a943cb67f, - 0xbca84b7f891f17c5, - 0x723d1db3b78df2a6, - 0x78aa6e71e73b4f2e, - 0x1433e699a071670d, - 0x84f21be454620782, - 0x98df3327b4d20f2f, - 0xf049dce2d3769e5c, - 0xdb6c60199656eb7a, - 0x648746b2078b4783, - 0x32cd23598dcbadcf, - 0x1ea4955bf0c7da85, - 0xe9a143401b9d46b5, - 0xfd92a5d9bbec21b8, - 0xc8138c790e0b8e1b, - 0x2ee00b9a6d7ba562, - 0xf85712b893b7f1fc, - 0xeb28fed80bea949d, - 0x564a65eb8a40ea4c, - 0x6c9988e8474a2823, - 0x4535898b121d8f2d, - 0xabd8c03231accbf4, - 0xba2e91cab9867cbd, - 0x7960be3def8e263a, - 0x0c11a977602fd6f0, - 0xcb50e1ad16c93527, - 0xeae22e94035ffd89, - 0x2866d12f5de2ce1a, - 0xff1b1841ab9bf390, - 0x9f9339de8cfe0d43, - 0x964727c8c48a0bf7, - 0x524502c6aaae531c, - 0x9b9c5ef3ac10b413, - 0x4fa2fa4942ab32a5, - 0x3f165a62e551122b, - 0xc74148da76e6e3d7, - 0x924840e5e464b2a7, - 0xd372ae43d69784da, - 0x233b72a105e11a86, - 0xa48a04914941a638, - 0xb4b68525c9de7865, - 0xddeabaaca6cf8002, - 0x0a9773c250b6bd88, - 0xc284ffbb5ebd3393, - 0x8ba0df472c8f6a4e, - 0x2aef6cb74d951c32, - 0x427983722a318d41, - 0x73f7cdffbf389bb2, - 0x074c0af9382c026c, - 0x8a6a0f0b243a035a, - 0x6fdae53c5f88931f, - 0xc68b98967e538ac3, - 0x44ff59c71aa8e639, - 0xe2fce0ce439e9229, - 0xa20cde2479d8cd40, - 0x19e89fa2c8ebd8e9, - 0xf446bbcff398270c, - 0x43b3533e2284e455, - 0xd82f0dcd8e945046, - 0x51066f12b26ce820, - 0xe73957af6bc5426d, - 0x081ece5a40c16fa0, - 0x3b193d4fc5bfab7b, - 0x7fe66488df174d42, - 0x0e9814ef705804d8, - 0x8137ac857c39d7c6, - 0xb1733244e185a821, - 0x695c3f896f11f867, - 0xf6cf0657e3eff524, - 0x1aabf276d02963d5, - 0x2da3664e75b91e5e, - 0x0289bd981077d228, - 0x90c1fd7df413608f, - 0x3c5537b6fd93a917, - 0xaa12107e3919a2e0, - 0x0686dab530996b78, - 0xdaa6b0559ee3826e, - 0xc34e2ff756085a87, - 0x6d5358a44fff4137, - 0xfc587595b35948ac, - 0x7ca5095cc7d5f67e, - 0xfb147f6c8b754ac0, - 0xbfeb26ab91ddacf9, - 0x6896efc567a49173, - 0xca9a31e11e7c5c33, - 0xbbe44186b13315a9, - 0x0ddb793b689abfe4, - 0x70b4a02ba7fa208e, - 0xe47a3a7b7307f951, - 0x8cecd5be14a36822, - 0xeeed49b923b144d9, - 0x17708b4db8b3dc31, - 0x6088219f2765fed3, - 0xb3fa8fdcf1f27a09, - 0x910b2d31fca6099b, - 0x0f52c4a378ed6dcc, - 0x50ccbf5ebad98134, - 0x6bd582117f662a4f, - 0x94ce9a50d4fdd9df, - 0x2b25bcfb45207526, - 0x67c42b661f49fcbf, - 0x492420fc723259dd, - 0x03436dd418c2bb3c, - 0x1f6e4517f872b391, - 0xa08563bc69af1f68, - 0xd43ea4baeebb86b6, - 0x01cad04c08b56914, - 0xac94cacb0980c998, - 0x54c3d8739a373864, - 0x26fec5c02dbacac2, - 0xdea9d778be0d3b3e, - 0x040f672d20eeb950, - 0xe5b0ea377bb29045, - 0xf30ab136cbb42560, - 0x62019c0737122cfb, - 0xe86b930c13282fa1, - 0xcc1ceb542ee5374b, - 0x538fd28aa21b3a08, - 0x1b61223ad89c0ac1, - 0x36c24474ad25149f, - 0x7a23d3e9f74c9d06, - 0xbe21f6e79968c5ed, - 0xcf5f868036278c77, - 0xf705d61beb5a9c30, - 0x4d2b47d152dce08d, - 0x5f9e7bfdc234ecf8, - 0x247778583dcd18ea, - 0x867ba67c4415d5aa, - 0x4ce1979d5a698999, - 0x0000000000000000, - 0xec64f42133c696f1, - 0xb57c5569c16b1171, - 0xc1c7926f467f88af, - 0x654d96fe0f3e2e97, - 0x15f936d5a8c40e19, - 0xb8a72c52a9f1ae95, - 0xa9517daa21db19dc, - 0x58d27104fa18ee94, - 0x5918a148f2ad8780, - 0x5cdd1629daf657c4, - 0x8274c15164fb6cfa, - 0xd1fb13dbc6e056f2, - 0x7d6fd910cf609f6a, - 0xb63f38bdd9a9aa4d, - 0x3d9fe7faf526c003, - 0x74bbc706871499de, - 0xdf630734b6b8522a, - 0x3ad3ed03cd0ac26f, - 0xfadeaf2083c023d4, - 0xc00d42234ecae1bb, - 0x8538cba85cd76e96, - 0xc402250e6e2458eb, - 0x47bc3413026a5d05, - 0xafd7a71f114272a4, - 0x978df784cc3f62e3, - 0xb96dfc1ea144c781, - 0x21b2cf391596c8ae, - 0x318e4e8d950916f3, - 0xce9556cc3e92e563, - 0x385a509bdd7d1047, - 0x358129a0b5e7afa3, - 0xe6f387e363702b79, - 0xe0755d5653e94001, - 0x7be903a5fff9f412, - 0x12b53c2c90e80c75, - 0x3307f315857ec4db, - 0x8fafb86a0c61d31e, - 0xd9e5dd8186213952, - 0x77f8aad29fd622e2, - 0x25bda814357871fe, - 0x7571174a8fa1f0ca, - 0x137fec60985d6561, - 0x30449ec19dbc7fe7, - 0xa540d4dd41f4cf2c, - 0xdc206ae0ae7ae916, - 0x5b911cd0e2da55a8, - 0xb2305f90f947131d, - 0x344bf9ecbd52c6b7, - 0x5d17c665d2433ed0, - 0x18224feec05eb1fd, - 0x9e59e992844b6457, - 0x9a568ebfa4a5dd07, - 0xa3c60e68716da454, - 0x7e2cb4c4d7a22456, - 0x87b176304ca0bcbe, - 0x413aeea632f3367d, - 0x9915e36bbc67663b, - 0x40f03eea3a465f69, - 0x1c2d28c3e0b008ad, - 0x4e682a054a1e5bb1, - 0x05c5b761285bd044, - 0xe1bf8d1a5b5c2915, - 0xf2c0617ac3014c74, - 0xb7f5e8f1d11cc359, - 0x63cb4c4b3fa745ef, - 0x9d1a84469c89df6b, - 0xe33630824b2bfb3d, - 0xd5f474f6e60eefa2, - 0xf58c6b83fb2d4e18, - 0x4676e45f0adf3411, - 0x20781f751d23a1ba, - 0xbd629b3381aa7ed1, - 0xae1d775319f71bb0, - 0xfed1c80da32e9a84, - 0x5509083f92825170, - 0x29ac01635557a70e, - 0xa7c9694551831d04, - 0x8e65682604d4ba0a, - 0x11f651f8882ab749, - 0xd77dc96ef6793d8a, - 0xef2799f52b042dcd, - 0x48eef0b07a8730c9, - 0x22f1a2ed0d547392, - 0x6142f1d32fd097c7, - 0x4a674d286af0e2e1, - 0x80fd7cc9748cbed2, - 0x717e7067af4f499a, - 0x938290a9ecd1dbb3, - 0x88e3b293344dd172, - 0x2734158c250fa3d6, - }, - { - 0x7e37e62dfc7d40c3, - 0x776f25a4ee939e5b, - 0xe045c850dd8fb5ad, - 0x86ed5ba711ff1952, - 0xe91d0bd9cf616b35, - 0x37e0ab256e408ffb, - 0x9607f6c031025a7a, - 0x0b02f5e116d23c9d, - 0xf3d8486bfb50650c, - 0x621cff27c40875f5, - 0x7d40cb71fa5fd34a, - 0x6daa6616daa29062, - 0x9f5f354923ec84e2, - 0xec847c3dc507c3b3, - 0x025a3668043ce205, - 0xa8bf9e6c4dac0b19, - 0xfa808be2e9bebb94, - 0xb5b99c5277c74fa3, - 0x78d9bc95f0397bcc, - 0xe332e50cdbad2624, - 0xc74fce129332797e, - 0x1729eceb2ea709ab, - 0xc2d6b9f69954d1f8, - 0x5d898cbfbab8551a, - 0x859a76fb17dd8adb, - 0x1be85886362f7fb5, - 0xf6413f8ff136cd8a, - 0xd3110fa5bbb7e35c, - 0x0a2feed514cc4d11, - 0xe83010edcd7f1ab9, - 0xa1e75de55f42d581, - 0xeede4a55c13b21b6, - 0xf2f5535ff94e1480, - 0x0cc1b46d1888761e, - 0xbce15fdb6529913b, - 0x2d25e8975a7181c2, - 0x71817f1ce2d7a554, - 0x2e52c5cb5c53124b, - 0xf9f7a6beef9c281d, - 0x9e722e7d21f2f56e, - 0xce170d9b81dca7e6, - 0x0e9b82051cb4941b, - 0x1e712f623c49d733, - 0x21e45cfa42f9f7dc, - 0xcb8e7a7f8bba0f60, - 0x8e98831a010fb646, - 0x474ccf0d8e895b23, - 0xa99285584fb27a95, - 0x8cc2b57205335443, - 0x42d5b8e984eff3a5, - 0x012d1b34021e718c, - 0x57a6626aae74180b, - 0xff19fc06e3d81312, - 0x35ba9d4d6a7c6dfe, - 0xc9d44c178f86ed65, - 0x506523e6a02e5288, - 0x03772d5c06229389, - 0x8b01f4fe0b691ec0, - 0xf8dabd8aed825991, - 0x4c4e3aec985b67be, - 0xb10df0827fbf96a9, - 0x6a69279ad4f8dae1, - 0xe78689dcd3d5ff2e, - 0x812e1a2b1fa553d1, - 0xfbad90d6eba0ca18, - 0x1ac543b234310e39, - 0x1604f7df2cb97827, - 0xa6241c6951189f02, - 0x753513cceaaf7c5e, - 0x64f2a59fc84c4efa, - 0x247d2b1e489f5f5a, - 0xdb64d718ab474c48, - 0x79f4a7a1f2270a40, - 0x1573da832a9bebae, - 0x3497867968621c72, - 0x514838d2a2302304, - 0xf0af6537fd72f685, - 0x1d06023e3a6b44ba, - 0x678588c3ce6edd73, - 0x66a893f7cc70acff, - 0xd4d24e29b5eda9df, - 0x3856321470ea6a6c, - 0x07c3418c0e5a4a83, - 0x2bcbb22f5635bacd, - 0x04b46cd00878d90a, - 0x06ee5ab80c443b0f, - 0x3b211f4876c8f9e5, - 0x0958c38912eede98, - 0xd14b39cdbf8b0159, - 0x397b292072f41be0, - 0x87c0409313e168de, - 0xad26e98847caa39f, - 0x4e140c849c6785bb, - 0xd5ff551db7f3d853, - 0xa0ca46d15d5ca40d, - 0xcd6020c787fe346f, - 0x84b76dcf15c3fb57, - 0xdefda0fca121e4ce, - 0x4b8d7b6096012d3d, - 0x9ac642ad298a2c64, - 0x0875d8bd10f0af14, - 0xb357c6ea7b8374ac, - 0x4d6321d89a451632, - 0xeda96709c719b23f, - 0xf76c24bbf328bc06, - 0xc662d526912c08f2, - 0x3ce25ec47892b366, - 0xb978283f6f4f39bd, - 0xc08c8f9e9d6833fd, - 0x4f3917b09e79f437, - 0x593de06fb2c08c10, - 0xd6887841b1d14bda, - 0x19b26eee32139db0, - 0xb494876675d93e2f, - 0x825937771987c058, - 0x90e9ac783d466175, - 0xf1827e03ff6c8709, - 0x945dc0a8353eb87f, - 0x4516f9658ab5b926, - 0x3f9573987eb020ef, - 0xb855330b6d514831, - 0x2ae6a91b542bcb41, - 0x6331e413c6160479, - 0x408f8e8180d311a0, - 0xeff35161c325503a, - 0xd06622f9bd9570d5, - 0x8876d9a20d4b8d49, - 0xa5533135573a0c8b, - 0xe168d364df91c421, - 0xf41b09e7f50a2f8f, - 0x12b09b0f24c1a12d, - 0xda49cc2ca9593dc4, - 0x1f5c34563e57a6bf, - 0x54d14f36a8568b82, - 0xaf7cdfe043f6419a, - 0xea6a2685c943f8bc, - 0xe5dcbfb4d7e91d2b, - 0xb27addde799d0520, - 0x6b443caed6e6ab6d, - 0x7bae91c9f61be845, - 0x3eb868ac7cae5163, - 0x11c7b65322e332a4, - 0xd23c1491b9a992d0, - 0x8fb5982e0311c7ca, - 0x70ac6428e0c9d4d8, - 0x895bc2960f55fcc5, - 0x76423e90ec8defd7, - 0x6ff0507ede9e7267, - 0x3dcf45f07a8cc2ea, - 0x4aa06054941f5cb1, - 0x5810fb5bb0defd9c, - 0x5efea1e3bc9ac693, - 0x6edd4b4adc8003eb, - 0x741808f8e8b10dd2, - 0x145ec1b728859a22, - 0x28bc9f7350172944, - 0x270a06424ebdccd3, - 0x972aedf4331c2bf6, - 0x059977e40a66a886, - 0x2550302a4a812ed6, - 0xdd8a8da0a7037747, - 0xc515f87a970e9b7b, - 0x3023eaa9601ac578, - 0xb7e3aa3a73fbada6, - 0x0fb699311eaae597, - 0x0000000000000000, - 0x310ef19d6204b4f4, - 0x229371a644db6455, - 0x0decaf591a960792, - 0x5ca4978bb8a62496, - 0x1c2b190a38753536, - 0x41a295b582cd602c, - 0x3279dcc16426277d, - 0xc1a194aa9f764271, - 0x139d803b26dfd0a1, - 0xae51c4d441e83016, - 0xd813fa44ad65dfc1, - 0xac0bf2bc45d4d213, - 0x23be6a9246c515d9, - 0x49d74d08923dcf38, - 0x9d05032127d066e7, - 0x2f7fdeff5e4d63c7, - 0xa47e2a0155247d07, - 0x99b16ff12fa8bfed, - 0x4661d4398c972aaf, - 0xdfd0bbc8a33f9542, - 0xdca79694a51d06cb, - 0xb020ebb67da1e725, - 0xba0f0563696daa34, - 0xe4f1a480d5f76ca7, - 0xc438e34e9510eaf7, - 0x939e81243b64f2fc, - 0x8defae46072d25cf, - 0x2c08f3a3586ff04e, - 0xd7a56375b3cf3a56, - 0x20c947ce40e78650, - 0x43f8a3dd86f18229, - 0x568b795eac6a6987, - 0x8003011f1dbb225d, - 0xf53612d3f7145e03, - 0x189f75da300dec3c, - 0x9570db9c3720c9f3, - 0xbb221e576b73dbb8, - 0x72f65240e4f536dd, - 0x443be25188abc8aa, - 0xe21ffe38d9b357a8, - 0xfd43ca6ee7e4f117, - 0xcaa3614b89a47eec, - 0xfe34e732e1c6629e, - 0x83742c431b99b1d4, - 0xcf3a16af83c2d66a, - 0xaae5a8044990e91c, - 0x26271d764ca3bd5f, - 0x91c4b74c3f5810f9, - 0x7c6dd045f841a2c6, - 0x7f1afd19fe63314f, - 0xc8f957238d989ce9, - 0xa709075d5306ee8e, - 0x55fc5402aa48fa0e, - 0x48fa563c9023beb4, - 0x65dfbeabca523f76, - 0x6c877d22d8bce1ee, - 0xcc4d3bf385e045e3, - 0xbebb69b36115733e, - 0x10eaad6720fd4328, - 0xb6ceb10e71e5dc2a, - 0xbdcc44ef6737e0b7, - 0x523f158ea412b08d, - 0x989c74c52db6ce61, - 0x9beb59992b945de8, - 0x8a2cefca09776f4c, - 0xa3bd6b8d5b7e3784, - 0xeb473db1cb5d8930, - 0xc3fba2c29b4aa074, - 0x9c28181525ce176b, - 0x683311f2d0c438e4, - 0x5fd3bad7be84b71f, - 0xfc6ed15ae5fa809b, - 0x36cdb0116c5efe77, - 0x29918447520958c8, - 0xa29070b959604608, - 0x53120ebaa60cc101, - 0x3a0c047c74d68869, - 0x691e0ac6d2da4968, - 0x73db4974e6eb4751, - 0x7a838afdf40599c9, - 0x5a4acd33b4e21f99, - 0x6046c94fc03497f0, - 0xe6ab92e8d1cb8ea2, - 0x3354c7f5663856f1, - 0xd93ee170af7bae4d, - 0x616bd27bc22ae67c, - 0x92b39a10397a8370, - 0xabc8b3304b8e9890, - 0xbf967287630b02b2, - 0x5b67d607b6fc6e15, - }, - { - 0x8ab0a96846e06a6d, - 0x43c7e80b4bf0b33a, - 0x08c9b3546b161ee5, - 0x39f1c235eba990be, - 0xc1bef2376606c7b2, - 0x2c209233614569aa, - 0xeb01523b6fc3289a, - 0x946953ab935acedd, - 0x272838f63e13340e, - 0x8b0455eca12ba052, - 0x77a1b2c4978ff8a2, - 0xa55122ca13e54086, - 0x2276135862d3f1cd, - 0xdb8ddfde08b76cfe, - 0x5d1e12c89e4a178a, - 0x0e56816b03969867, - 0xee5f79953303ed59, - 0xafed748bab78d71d, - 0x6d929f2df93e53ee, - 0xf5d8a8f8ba798c2a, - 0xf619b1698e39cf6b, - 0x95ddaf2f749104e2, - 0xec2a9c80e0886427, - 0xce5c8fd8825b95ea, - 0xc4e0d9993ac60271, - 0x4699c3a5173076f9, - 0x3d1b151f50a29f42, - 0x9ed505ea2bc75946, - 0x34665acfdc7f4b98, - 0x61b1fb53292342f7, - 0xc721c0080e864130, - 0x8693cd1696fd7b74, - 0x872731927136b14b, - 0xd3446c8a63a1721b, - 0x669a35e8a6680e4a, - 0xcab658f239509a16, - 0xa4e5de4ef42e8ab9, - 0x37a7435ee83f08d9, - 0x134e6239e26c7f96, - 0x82791a3c2df67488, - 0x3f6ef00a8329163c, - 0x8e5a7e42fdeb6591, - 0x5caaee4c7981ddb5, - 0x19f234785af1e80d, - 0x255ddde3ed98bd70, - 0x50898a32a99cccac, - 0x28ca4519da4e6656, - 0xae59880f4cb31d22, - 0x0d9798fa37d6db26, - 0x32f968f0b4ffcd1a, - 0xa00f09644f258545, - 0xfa3ad5175e24de72, - 0xf46c547c5db24615, - 0x713e80fbff0f7e20, - 0x7843cf2b73d2aafa, - 0xbd17ea36aedf62b4, - 0xfd111bacd16f92cf, - 0x4abaa7dbc72d67e0, - 0xb3416b5dad49fad3, - 0xbca316b24914a88b, - 0x15d150068aecf914, - 0xe27c1debe31efc40, - 0x4fe48c759beda223, - 0x7edcfd141b522c78, - 0x4e5070f17c26681c, - 0xe696cac15815f3bc, - 0x35d2a64b3bb481a7, - 0x800cff29fe7dfdf6, - 0x1ed9fac3d5baa4b0, - 0x6c2663a91ef599d1, - 0x03c1199134404341, - 0xf7ad4ded69f20554, - 0xcd9d9649b61bd6ab, - 0xc8c3bde7eadb1368, - 0xd131899fb02afb65, - 0x1d18e352e1fae7f1, - 0xda39235aef7ca6c1, - 0xa1bbf5e0a8ee4f7a, - 0x91377805cf9a0b1e, - 0x3138716180bf8e5b, - 0xd9f83acbdb3ce580, - 0x0275e515d38b897e, - 0x472d3f21f0fbbcc6, - 0x2d946eb7868ea395, - 0xba3c248d21942e09, - 0xe7223645bfde3983, - 0xff64feb902e41bb1, - 0xc97741630d10d957, - 0xc3cb1722b58d4ecc, - 0xa27aec719cae0c3b, - 0x99fecb51a48c15fb, - 0x1465ac826d27332b, - 0xe1bd047ad75ebf01, - 0x79f733af941960c5, - 0x672ec96c41a3c475, - 0xc27feba6524684f3, - 0x64efd0fd75e38734, - 0xed9e60040743ae18, - 0xfb8e2993b9ef144d, - 0x38453eb10c625a81, - 0x6978480742355c12, - 0x48cf42ce14a6ee9e, - 0x1cac1fd606312dce, - 0x7b82d6ba4792e9bb, - 0x9d141c7b1f871a07, - 0x5616b80dc11c4a2e, - 0xb849c198f21fa777, - 0x7ca91801c8d9a506, - 0xb1348e487ec273ad, - 0x41b20d1e987b3a44, - 0x7460ab55a3cfbbe3, - 0x84e628034576f20a, - 0x1b87d16d897a6173, - 0x0fe27defe45d5258, - 0x83cde6b8ca3dbeb7, - 0x0c23647ed01d1119, - 0x7a362a3ea0592384, - 0xb61f40f3f1893f10, - 0x75d457d1440471dc, - 0x4558da34237035b8, - 0xdca6116587fc2043, - 0x8d9b67d3c9ab26d0, - 0x2b0b5c88ee0e2517, - 0x6fe77a382ab5da90, - 0x269cc472d9d8fe31, - 0x63c41e46faa8cb89, - 0xb7abbc771642f52f, - 0x7d1de4852f126f39, - 0xa8c6ba3024339ba0, - 0x600507d7cee888c8, - 0x8fee82c61a20afae, - 0x57a2448926d78011, - 0xfca5e72836a458f0, - 0x072bcebb8f4b4cbd, - 0x497bbe4af36d24a1, - 0x3cafe99bb769557d, - 0x12fa9ebd05a7b5a9, - 0xe8c04baa5b836bdb, - 0x4273148fac3b7905, - 0x908384812851c121, - 0xe557d3506c55b0fd, - 0x72ff996acb4f3d61, - 0x3eda0c8e64e2dc03, - 0xf0868356e6b949e9, - 0x04ead72abb0b0ffc, - 0x17a4b5135967706a, - 0xe3c8e16f04d5367f, - 0xf84f30028daf570c, - 0x1846c8fcbd3a2232, - 0x5b8120f7f6ca9108, - 0xd46fa231ecea3ea6, - 0x334d947453340725, - 0x58403966c28ad249, - 0xbed6f3a79a9f21f5, - 0x68ccb483a5fe962d, - 0xd085751b57e1315a, - 0xfed0023de52fd18e, - 0x4b0e5b5f20e6addf, - 0x1a332de96eb1ab4c, - 0xa3ce10f57b65c604, - 0x108f7ba8d62c3cd7, - 0xab07a3a11073d8e1, - 0x6b0dad1291bed56c, - 0xf2f366433532c097, - 0x2e557726b2cee0d4, - 0x0000000000000000, - 0xcb02a476de9b5029, - 0xe4e32fd48b9e7ac2, - 0x734b65ee2c84f75e, - 0x6e5386bccd7e10af, - 0x01b4fc84e7cbca3f, - 0xcfe8735c65905fd5, - 0x3613bfda0ff4c2e6, - 0x113b872c31e7f6e8, - 0x2fe18ba255052aeb, - 0xe974b72ebc48a1e4, - 0x0abc5641b89d979b, - 0xb46aa5e62202b66e, - 0x44ec26b0c4bbff87, - 0xa6903b5b27a503c7, - 0x7f680190fc99e647, - 0x97a84a3aa71a8d9c, - 0xdd12ede16037ea7c, - 0xc554251ddd0dc84e, - 0x88c54c7d956be313, - 0x4d91696048662b5d, - 0xb08072cc9909b992, - 0xb5de5962c5c97c51, - 0x81b803ad19b637c9, - 0xb2f597d94a8230ec, - 0x0b08aac55f565da4, - 0xf1327fd2017283d6, - 0xad98919e78f35e63, - 0x6ab9519676751f53, - 0x24e921670a53774f, - 0xb9fd3d1c15d46d48, - 0x92f66194fbda485f, - 0x5a35dc7311015b37, - 0xded3f4705477a93d, - 0xc00a0eb381cd0d8d, - 0xbb88d809c65fe436, - 0x16104997beacba55, - 0x21b70ac95693b28c, - 0x59f4c5e225411876, - 0xd5db5eb50b21f499, - 0x55d7a19cf55c096f, - 0xa97246b4c3f8519f, - 0x8552d487a2bd3835, - 0x54635d181297c350, - 0x23c2efdc85183bf2, - 0x9f61f96ecc0c9379, - 0x534893a39ddc8fed, - 0x5edf0b59aa0a54cb, - 0xac2c6d1a9f38945c, - 0xd7aebba0d8aa7de7, - 0x2abfa00c09c5ef28, - 0xd84cc64f3cf72fbf, - 0x2003f64db15878b3, - 0xa724c7dfc06ec9f8, - 0x069f323f68808682, - 0xcc296acd51d01c94, - 0x055e2bae5cc0c5c3, - 0x6270e2c21d6301b6, - 0x3b842720382219c0, - 0xd2f0900e846ab824, - 0x52fc6f277a1745d2, - 0xc6953c8ce94d8b0f, - 0xe009f8fe3095753e, - 0x655b2c7992284d0b, - 0x984a37d54347dfc4, - 0xeab5aebf8808e2a5, - 0x9a3fd2c090cc56ba, - 0x9ca0e0fff84cd038, - 0x4c2595e4afade162, - 0xdf6708f4b3bc6302, - 0xbf620f237d54ebca, - 0x93429d101c118260, - 0x097d4fd08cddd4da, - 0x8c2f9b572e60ecef, - 0x708a7c7f18c4b41f, - 0x3a30dba4dfe9d3ff, - 0x4006f19a7fb0f07b, - 0x5f6bf7dd4dc19ef4, - 0x1f6d064732716e8f, - 0xf9fbcc866a649d33, - 0x308c8de567744464, - 0x8971b0f972a0292c, - 0xd61a47243f61b7d8, - 0xefeb8511d4c82766, - 0x961cb6be40d147a3, - 0xaab35f25f7b812de, - 0x76154e407044329d, - 0x513d76b64e570693, - 0xf3479ac7d2f90aa8, - 0x9b8b2e4477079c85, - 0x297eb99d3d85ac69, - }, - { - 0x3ef29d249b2c0a19, - 0xe9e16322b6f8622f, - 0x5536994047757f7a, - 0x9f4d56d5a47b0b33, - 0x822567466aa1174c, - 0xb8f5057deb082fb2, - 0xcc48c10bf4475f53, - 0x373088d4275dec3a, - 0x968f4325180aed10, - 0x173d232cf7016151, - 0xae4ed09f946fcc13, - 0xfd4b4741c4539873, - 0x1b5b3f0dd9933765, - 0x2ffcb0967b644052, - 0xe02376d20a89840c, - 0xa3ae3a70329b18d7, - 0x419cbd2335de8526, - 0xfafebf115b7c3199, - 0x0397074f85aa9b0d, - 0xc58ad4fb4836b970, - 0xbec60be3fc4104a8, - 0x1eff36dc4b708772, - 0x131fdc33ed8453b6, - 0x0844e33e341764d3, - 0x0ff11b6eab38cd39, - 0x64351f0a7761b85a, - 0x3b5694f509cfba0e, - 0x30857084b87245d0, - 0x47afb3bd2297ae3c, - 0xf2ba5c2f6f6b554a, - 0x74bdc4761f4f70e1, - 0xcfdfc64471edc45e, - 0xe610784c1dc0af16, - 0x7aca29d63c113f28, - 0x2ded411776a859af, - 0xac5f211e99a3d5ee, - 0xd484f949a87ef33b, - 0x3ce36ca596e013e4, - 0xd120f0983a9d432c, - 0x6bc40464dc597563, - 0x69d5f5e5d1956c9e, - 0x9ae95f043698bb24, - 0xc9ecc8da66a4ef44, - 0xd69508c8a5b2eac6, - 0xc40c2235c0503b80, - 0x38c193ba8c652103, - 0x1ceec75d46bc9e8f, - 0xd331011937515ad1, - 0xd8e2e56886eca50f, - 0xb137108d5779c991, - 0x709f3b6905ca4206, - 0x4feb50831680caef, - 0xec456af3241bd238, - 0x58d673afe181abbe, - 0x242f54e7cad9bf8c, - 0x0211f1810dcc19fd, - 0x90bc4dbb0f43c60a, - 0x9518446a9da0761d, - 0xa1bfcbf13f57012a, - 0x2bde4f8961e172b5, - 0x27b853a84f732481, - 0xb0b1e643df1f4b61, - 0x18cc38425c39ac68, - 0xd2b7f7d7bf37d821, - 0x3103864a3014c720, - 0x14aa246372abfa5c, - 0x6e600db54ebac574, - 0x394765740403a3f3, - 0x09c215f0bc71e623, - 0x2a58b947e987f045, - 0x7b4cdf18b477bdd8, - 0x9709b5eb906c6fe0, - 0x73083c268060d90b, - 0xfedc400e41f9037e, - 0x284948c6e44be9b8, - 0x728ecae808065bfb, - 0x06330e9e17492b1a, - 0x5950856169e7294e, - 0xbae4f4fce6c4364f, - 0xca7bcf95e30e7449, - 0x7d7fd186a33e96c2, - 0x52836110d85ad690, - 0x4dfaa1021b4cd312, - 0x913abb75872544fa, - 0xdd46ecb9140f1518, - 0x3d659a6b1e869114, - 0xc23f2cabd719109a, - 0xd713fe062dd46836, - 0xd0a60656b2fbc1dc, - 0x221c5a79dd909496, - 0xefd26dbca1b14935, - 0x0e77eda0235e4fc9, - 0xcbfd395b6b68f6b9, - 0x0de0eaefa6f4d4c4, - 0x0422ff1f1a8532e7, - 0xf969b85eded6aa94, - 0x7f6e2007aef28f3f, - 0x3ad0623b81a938fe, - 0x6624ee8b7aada1a7, - 0xb682e8ddc856607b, - 0xa78cc56f281e2a30, - 0xc79b257a45faa08d, - 0x5b4174e0642b30b3, - 0x5f638bff7eae0254, - 0x4bc9af9c0c05f808, - 0xce59308af98b46ae, - 0x8fc58da9cc55c388, - 0x803496c7676d0eb1, - 0xf33caae1e70dd7ba, - 0xbb6202326ea2b4bf, - 0xd5020f87201871cb, - 0x9d5ca754a9b712ce, - 0x841669d87de83c56, - 0x8a6184785eb6739f, - 0x420bba6cb0741e2b, - 0xf12d5b60eac1ce47, - 0x76ac35f71283691c, - 0x2c6bb7d9fecedb5f, - 0xfccdb18f4c351a83, - 0x1f79c012c3160582, - 0xf0abadae62a74cb7, - 0xe1a5801c82ef06fc, - 0x67a21845f2cb2357, - 0x5114665f5df04d9d, - 0xbf40fd2d74278658, - 0xa0393d3fb73183da, - 0x05a409d192e3b017, - 0xa9fb28cf0b4065f9, - 0x25a9a22942bf3d7c, - 0xdb75e22703463e02, - 0xb326e10c5ab5d06c, - 0xe7968e8295a62de6, - 0xb973f3b3636ead42, - 0xdf571d3819c30ce5, - 0xee549b7229d7cbc5, - 0x12992afd65e2d146, - 0xf8ef4e9056b02864, - 0xb7041e134030e28b, - 0xc02edd2adad50967, - 0x932b4af48ae95d07, - 0x6fe6fb7bc6dc4784, - 0x239aacb755f61666, - 0x401a4bedbdb807d6, - 0x485ea8d389af6305, - 0xa41bc220adb4b13d, - 0x753b32b89729f211, - 0x997e584bb3322029, - 0x1d683193ceda1c7f, - 0xff5ab6c0c99f818e, - 0x16bbd5e27f67e3a1, - 0xa59d34ee25d233cd, - 0x98f8ae853b54a2d9, - 0x6df70afacb105e79, - 0x795d2e99b9bba425, - 0x8e437b6744334178, - 0x0186f6ce886682f0, - 0xebf092a3bb347bd2, - 0xbcd7fa62f18d1d55, - 0xadd9d7d011c5571e, - 0x0bd3e471b1bdffde, - 0xaa6c2f808eeafef4, - 0x5ee57d31f6c880a4, - 0xf50fa47ff044fca0, - 0x1addc9c351f5b595, - 0xea76646d3352f922, - 0x0000000000000000, - 0x85909f16f58ebea6, - 0x46294573aaf12ccc, - 0x0a5512bf39db7d2e, - 0x78dbd85731dd26d5, - 0x29cfbe086c2d6b48, - 0x218b5d36583a0f9b, - 0x152cd2adfacd78ac, - 0x83a39188e2c795bc, - 0xc3b9da655f7f926a, - 0x9ecba01b2c1d89c3, - 0x07b5f8509f2fa9ea, - 0x7ee8d6c926940dcf, - 0x36b67e1aaf3b6eca, - 0x86079859702425ab, - 0xfb7849dfd31ab369, - 0x4c7c57cc932a51e2, - 0xd96413a60e8a27ff, - 0x263ea566c715a671, - 0x6c71fc344376dc89, - 0x4a4f595284637af8, - 0xdaf314e98b20bcf2, - 0x572768c14ab96687, - 0x1088db7c682ec8bb, - 0x887075f9537a6a62, - 0x2e7a4658f302c2a2, - 0x619116dbe582084d, - 0xa87dde018326e709, - 0xdcc01a779c6997e8, - 0xedc39c3dac7d50c8, - 0xa60a33a1a078a8c0, - 0xc1a82be452b38b97, - 0x3f746bea134a88e9, - 0xa228ccbebafd9a27, - 0xabead94e068c7c04, - 0xf48952b178227e50, - 0x5cf48cb0fb049959, - 0x6017e0156de48abd, - 0x4438b4f2a73d3531, - 0x8c528ae649ff5885, - 0xb515ef924dfcfb76, - 0x0c661c212e925634, - 0xb493195cc59a7986, - 0x9cda519a21d1903e, - 0x32948105b5be5c2d, - 0x194ace8cd45f2e98, - 0x438d4ca238129cdb, - 0x9b6fa9cabefe39d4, - 0x81b26009ef0b8c41, - 0xded1ebf691a58e15, - 0x4e6da64d9ee6481f, - 0x54b06f8ecf13fd8a, - 0x49d85e1d01c9e1f5, - 0xafc826511c094ee3, - 0xf698a33075ee67ad, - 0x5ac7822eec4db243, - 0x8dd47c28c199da75, - 0x89f68337db1ce892, - 0xcdce37c57c21dda3, - 0x530597de503c5460, - 0x6a42f2aa543ff793, - 0x5d727a7e73621ba9, - 0xe232875307459df1, - 0x56a19e0fc2dfe477, - 0xc61dd3b4cd9c227d, - 0xe5877f03986a341b, - 0x949eb2a415c6f4ed, - 0x6206119460289340, - 0x6380e75ae84e11b0, - 0x8be772b6d6d0f16f, - 0x50929091d596cf6d, - 0xe86795ec3e9ee0df, - 0x7cf927482b581432, - 0xc86a3e14eec26db4, - 0x7119cda78dacc0f6, - 0xe40189cd100cb6eb, - 0x92adbc3a028fdff7, - 0xb2a017c2d2d3529c, - 0x200dabf8d05c8d6b, - 0x34a78f9ba2f77737, - 0xe3b4719d8f231f01, - 0x45be423c2f5bb7c1, - 0xf71e55fefd88e55d, - 0x6853032b59f3ee6e, - 0x65b3e9c4ff073aaa, - 0x772ac3399ae5ebec, - 0x87816e97f842a75b, - 0x110e2db2e0484a4b, - 0x331277cb3dd8dedd, - 0xbd510cac79eb9fa5, - 0x352179552a91f5c7, - }, - { - 0x05ba7bc82c9b3220, - 0x31a54665f8b65e4f, - 0xb1b651f77547f4d4, - 0x8bfa0d857ba46682, - 0x85a96c5aa16a98bb, - 0x990faef908eb79c9, - 0xa15e37a247f4a62d, - 0x76857dcd5d27741e, - 0xf8c50b800a1820bc, - 0xbe65dcb201f7a2b4, - 0x666d1b986f9426e7, - 0x4cc921bf53c4e648, - 0x95410a0f93d9ca42, - 0x20cdccaa647ba4ef, - 0x429a4060890a1871, - 0x0c4ea4f69b32b38b, - 0xccda362dde354cd3, - 0x96dc23bc7c5b2fa9, - 0xc309bb68aa851ab3, - 0xd26131a73648e013, - 0x021dc52941fc4db2, - 0xcd5adab7704be48a, - 0xa77965d984ed71e6, - 0x32386fd61734bba4, - 0xe82d6dd538ab7245, - 0x5c2147ea6177b4b1, - 0x5da1ab70cf091ce8, - 0xac907fce72b8bdff, - 0x57c85dfd972278a8, - 0xa4e44c6a6b6f940d, - 0x3851995b4f1fdfe4, - 0x62578ccaed71bc9e, - 0xd9882bb0c01d2c0a, - 0x917b9d5d113c503b, - 0xa2c31e11a87643c6, - 0xe463c923a399c1ce, - 0xf71686c57ea876dc, - 0x87b4a973e096d509, - 0xaf0d567d9d3a5814, - 0xb40c2a3f59dcc6f4, - 0x3602f88495d121dd, - 0xd3e1dd3d9836484a, - 0xf945e71aa46688e5, - 0x7518547eb2a591f5, - 0x9366587450c01d89, - 0x9ea81018658c065b, - 0x4f54080cbc4603a3, - 0x2d0384c65137bf3d, - 0xdc325078ec861e2a, - 0xea30a8fc79573ff7, - 0x214d2030ca050cb6, - 0x65f0322b8016c30c, - 0x69be96dd1b247087, - 0xdb95ee9981e161b8, - 0xd1fc1814d9ca05f8, - 0x820ed2bbcc0de729, - 0x63d76050430f14c7, - 0x3bccb0e8a09d3a0f, - 0x8e40764d573f54a2, - 0x39d175c1e16177bd, - 0x12f5a37c734f1f4b, - 0xab37c12f1fdfc26d, - 0x5648b167395cd0f1, - 0x6c04ed1537bf42a7, - 0xed97161d14304065, - 0x7d6c67daab72b807, - 0xec17fa87ba4ee83c, - 0xdfaf79cb0304fbc1, - 0x733f060571bc463e, - 0x78d61c1287e98a27, - 0xd07cf48e77b4ada1, - 0xb9c262536c90dd26, - 0xe2449b5860801605, - 0x8fc09ad7f941fcfb, - 0xfad8cea94be46d0e, - 0xa343f28b0608eb9f, - 0x9b126bd04917347b, - 0x9a92874ae7699c22, - 0x1b017c42c4e69ee0, - 0x3a4c5c720ee39256, - 0x4b6e9f5e3ea399da, - 0x6ba353f45ad83d35, - 0xe7fee0904c1b2425, - 0x22d009832587e95d, - 0x842980c00f1430e2, - 0xc6b3c0a0861e2893, - 0x087433a419d729f2, - 0x341f3dadd42d6c6f, - 0xee0a3faefbb2a58e, - 0x4aee73c490dd3183, - 0xaab72db5b1a16a34, - 0xa92a04065e238fdf, - 0x7b4b35a1686b6fcc, - 0x6a23bf6ef4a6956c, - 0x191cb96b851ad352, - 0x55d598d4d6de351a, - 0xc9604de5f2ae7ef3, - 0x1ca6c2a3a981e172, - 0xde2f9551ad7a5398, - 0x3025aaff56c8f616, - 0x15521d9d1e2860d9, - 0x506fe31cfa45073a, - 0x189c55f12b647b0b, - 0x0180ec9aae7ea859, - 0x7cec8b40050c105e, - 0x2350e5198bf94104, - 0xef8ad33455cc0dd7, - 0x07a7bee16d677f92, - 0xe5e325b90de76997, - 0x5a061591a26e637a, - 0xb611ef1618208b46, - 0x09f4df3eb7a981ab, - 0x1ebb078ae87dacc0, - 0xb791038cb65e231f, - 0x0fd38d4574b05660, - 0x67edf702c1ea8ebe, - 0xba5f4be0831238cd, - 0xe3c477c2cefebe5c, - 0x0dce486c354c1bd2, - 0x8c5db36416c31910, - 0x26ea9ed1a7627324, - 0x039d29b3ef82e5eb, - 0x9f28fc82cbf2ae02, - 0xa8aae89cf05d2786, - 0x431aacfa2774b028, - 0xcf471f9e31b7a938, - 0x581bd0b8e3922ec8, - 0xbc78199b400bef06, - 0x90fb71c7bf42f862, - 0x1f3beb1046030499, - 0x683e7a47b55ad8de, - 0x988f4263a695d190, - 0xd808c72a6e638453, - 0x0627527bc319d7cb, - 0xebb04466d72997ae, - 0xe67e0c0ae2658c7c, - 0x14d2f107b056c880, - 0x7122c32c30400b8c, - 0x8a7ae11fd5dacedb, - 0xa0dedb38e98a0e74, - 0xad109354dcc615a6, - 0x0be91a17f655cc19, - 0x8ddd5ffeb8bdb149, - 0xbfe53028af890aed, - 0xd65ba6f5b4ad7a6a, - 0x7956f0882997227e, - 0x10e8665532b352f9, - 0x0e5361dfdacefe39, - 0xcec7f3049fc90161, - 0xff62b561677f5f2e, - 0x975ccf26d22587f0, - 0x51ef0f86543baf63, - 0x2f1e41ef10cbf28f, - 0x52722635bbb94a88, - 0xae8dbae73344f04d, - 0x410769d36688fd9a, - 0xb3ab94de34bbb966, - 0x801317928df1aa9b, - 0xa564a0f0c5113c54, - 0xf131d4bebdb1a117, - 0x7f71a2f3ea8ef5b5, - 0x40878549c8f655c3, - 0x7ef14e6944f05dec, - 0xd44663dcf55137d8, - 0xf2acfd0d523344fc, - 0x0000000000000000, - 0x5fbc6e598ef5515a, - 0x16cf342ef1aa8532, - 0xb036bd6ddb395c8d, - 0x13754fe6dd31b712, - 0xbbdfa77a2d6c9094, - 0x89e7c8ac3a582b30, - 0x3c6b0e09cdfa459d, - 0xc4ae0589c7e26521, - 0x49735a777f5fd468, - 0xcafd64561d2c9b18, - 0xda1502032f9fc9e1, - 0x8867243694268369, - 0x3782141e3baf8984, - 0x9cb5d53124704be9, - 0xd7db4a6f1ad3d233, - 0xa6f989432a93d9bf, - 0x9d3539ab8a0ee3b0, - 0x53f2caaf15c7e2d1, - 0x6e19283c76430f15, - 0x3debe2936384edc4, - 0x5e3c82c3208bf903, - 0x33b8834cb94a13fd, - 0x6470deb12e686b55, - 0x359fd1377a53c436, - 0x61caa57902f35975, - 0x043a975282e59a79, - 0xfd7f70482683129c, - 0xc52ee913699ccd78, - 0x28b9ff0e7dac8d1d, - 0x5455744e78a09d43, - 0xcb7d88ccb3523341, - 0x44bd121b4a13cfba, - 0x4d49cd25fdba4e11, - 0x3e76cb208c06082f, - 0x3ff627ba2278a076, - 0xc28957f204fbb2ea, - 0x453dfe81e46d67e3, - 0x94c1e6953da7621b, - 0x2c83685cff491764, - 0xf32c1197fc4deca5, - 0x2b24d6bd922e68f6, - 0xb22b78449ac5113f, - 0x48f3b6edd1217c31, - 0x2e9ead75beb55ad6, - 0x174fd8b45fd42d6b, - 0x4ed4e4961238abfa, - 0x92e6b4eefebeb5d0, - 0x46a0d7320bef8208, - 0x47203ba8a5912a51, - 0x24f75bf8e69e3e96, - 0xf0b1382413cf094e, - 0xfee259fbc901f777, - 0x276a724b091cdb7d, - 0xbdf8f501ee75475f, - 0x599b3c224dec8691, - 0x6d84018f99c1eafe, - 0x7498b8e41cdb39ac, - 0xe0595e71217c5bb7, - 0x2aa43a273c50c0af, - 0xf50b43ec3f543b6e, - 0x838e3e2162734f70, - 0xc09492db4507ff58, - 0x72bfea9fdfc2ee67, - 0x11688acf9ccdfaa0, - 0x1a8190d86a9836b9, - 0x7acbd93bc615c795, - 0xc7332c3a286080ca, - 0x863445e94ee87d50, - 0xf6966a5fd0d6de85, - 0xe9ad814f96d5da1c, - 0x70a22fb69e3ea3d5, - 0x0a69f68d582b6440, - 0xb8428ec9c2ee757f, - 0x604a49e3ac8df12c, - 0x5b86f90b0c10cb23, - 0xe1d9b2eb8f02f3ee, - 0x29391394d3d22544, - 0xc8e0a17f5cd0d6aa, - 0xb58cc6a5f7a26ead, - 0x8193fb08238f02c2, - 0xd5c68f465b2f9f81, - 0xfcff9cd288fdbac5, - 0x77059157f359dc47, - 0x1d262e3907ff492b, - 0xfb582233e59ac557, - 0xddb2bce242f8b673, - 0x2577b76248e096cf, - 0x6f99c4a6d83da74c, - 0xc1147e41eb795701, - 0xf48baf76912a9337, - }, - { - 0x45b268a93acde4cc, - 0xaf7f0be884549d08, - 0x048354b3c1468263, - 0x925435c2c80efed2, - 0xee4e37f27fdffba7, - 0x167a33920c60f14d, - 0xfb123b52ea03e584, - 0x4a0cab53fdbb9007, - 0x9deaf6380f788a19, - 0xcb48ec558f0cb32a, - 0xb59dc4b2d6fef7e0, - 0xdcdbca22f4f3ecb6, - 0x11df5813549a9c40, - 0xe33fdedf568aced3, - 0xa0c1c8124322e9c3, - 0x07a56b8158fa6d0d, - 0x77279579b1e1f3dd, - 0xd9b18b74422ac004, - 0xb8ec2d9fffabc294, - 0xf4acf8a82d75914f, - 0x7bbf69b1ef2b6878, - 0xc4f62faf487ac7e1, - 0x76ce809cc67e5d0c, - 0x6711d88f92e4c14c, - 0x627b99d9243dedfe, - 0x234aa5c3dfb68b51, - 0x909b1f15262dbf6d, - 0x4f66ea054b62bcb5, - 0x1ae2cf5a52aa6ae8, - 0xbea053fbd0ce0148, - 0xed6808c0e66314c9, - 0x43fe16cd15a82710, - 0xcd049231a06970f6, - 0xe7bc8a6c97cc4cb0, - 0x337ce835fcb3b9c0, - 0x65def2587cc780f3, - 0x52214ede4132bb50, - 0x95f15e4390f493df, - 0x870839625dd2e0f1, - 0x41313c1afb8b66af, - 0x91720af051b211bc, - 0x477d427ed4eea573, - 0x2e3b4ceef6e3be25, - 0x82627834eb0bcc43, - 0x9c03e3dd78e724c8, - 0x2877328ad9867df9, - 0x14b51945e243b0f2, - 0x574b0f88f7eb97e2, - 0x88b6fa989aa4943a, - 0x19c4f068cb168586, - 0x50ee6409af11faef, - 0x7df317d5c04eaba4, - 0x7a567c5498b4c6a9, - 0xb6bbfb804f42188e, - 0x3cc22bcf3bc5cd0b, - 0xd04336eaaa397713, - 0xf02fac1bec33132c, - 0x2506dba7f0d3488d, - 0xd7e65d6bf2c31a1e, - 0x5eb9b2161ff820f5, - 0x842e0650c46e0f9f, - 0x716beb1d9e843001, - 0xa933758cab315ed4, - 0x3fe414fda2792265, - 0x27c9f1701ef00932, - 0x73a4c1ca70a771be, - 0x94184ba6e76b3d0e, - 0x40d829ff8c14c87e, - 0x0fbec3fac77674cb, - 0x3616a9634a6a9572, - 0x8f139119c25ef937, - 0xf545ed4d5aea3f9e, - 0xe802499650ba387b, - 0x6437e7bd0b582e22, - 0xe6559f89e053e261, - 0x80ad52e305288dfc, - 0x6dc55a23e34b9935, - 0xde14e0f51ad0ad09, - 0xc6390578a659865e, - 0x96d7617109487cb1, - 0xe2d6cb3a21156002, - 0x01e915e5779faed1, - 0xadb0213f6a77dcb7, - 0x9880b76eb9a1a6ab, - 0x5d9f8d248644cf9b, - 0xfd5e4536c5662658, - 0xf1c6b9fe9bacbdfd, - 0xeacd6341be9979c4, - 0xefa7221708405576, - 0x510771ecd88e543e, - 0xc2ba51cb671f043d, - 0x0ad482ac71af5879, - 0xfe787a045cdac936, - 0xb238af338e049aed, - 0xbd866cc94972ee26, - 0x615da6ebbd810290, - 0x3295fdd08b2c1711, - 0xf834046073bf0aea, - 0xf3099329758ffc42, - 0x1caeb13e7dcfa934, - 0xba2307481188832b, - 0x24efce42874ce65c, - 0x0e57d61fb0e9da1a, - 0xb3d1bad6f99b343c, - 0xc0757b1c893c4582, - 0x2b510db8403a9297, - 0x5c7698c1f1db614a, - 0x3e0d0118d5e68cb4, - 0xd60f488e855cb4cf, - 0xae961e0df3cb33d9, - 0x3a8e55ab14a00ed7, - 0x42170328623789c1, - 0x838b6dd19c946292, - 0x895fef7ded3b3aeb, - 0xcfcbb8e64e4a3149, - 0x064c7e642f65c3dc, - 0x3d2b3e2a4c5a63da, - 0x5bd3f340a9210c47, - 0xb474d157a1615931, - 0xac5934da1de87266, - 0x6ee365117af7765b, - 0xc86ed36716b05c44, - 0x9ba6885c201d49c5, - 0xb905387a88346c45, - 0x131072c4bab9ddff, - 0xbf49461ea751af99, - 0xd52977bc1ce05ba1, - 0xb0f785e46027db52, - 0x546d30ba6e57788c, - 0x305ad707650f56ae, - 0xc987c682612ff295, - 0xa5ab8944f5fbc571, - 0x7ed528e759f244ca, - 0x8ddcbbce2c7db888, - 0xaa154abe328db1ba, - 0x1e619be993ece88b, - 0x09f2bd9ee813b717, - 0x7401aa4b285d1cb3, - 0x21858f143195caee, - 0x48c381841398d1b8, - 0xfcb750d3b2f98889, - 0x39a86a998d1ce1b9, - 0x1f888e0ce473465a, - 0x7899568376978716, - 0x02cf2ad7ee2341bf, - 0x85c713b5b3f1a14e, - 0xff916fe12b4567e7, - 0x7c1a0230b7d10575, - 0x0c98fcc85eca9ba5, - 0xa3e7f720da9e06ad, - 0x6a6031a2bbb1f438, - 0x973e74947ed7d260, - 0x2cf4663918c0ff9a, - 0x5f50a7f368678e24, - 0x34d983b4a449d4cd, - 0x68af1b755592b587, - 0x7f3c3d022e6dea1b, - 0xabfc5f5b45121f6b, - 0x0d71e92d29553574, - 0xdffdf5106d4f03d8, - 0x081ba87b9f8c19c6, - 0xdb7ea1a3ac0981bb, - 0xbbca12ad66172dfa, - 0x79704366010829c7, - 0x179326777bff5f9c, - 0x0000000000000000, - 0xeb2476a4c906d715, - 0x724dd42f0738df6f, - 0xb752ee6538ddb65f, - 0x37ffbc863df53ba3, - 0x8efa84fcb5c157e6, - 0xe9eb5c73272596aa, - 0x1b0bdabf2535c439, - 0x86e12c872a4d4e20, - 0x9969a28bce3e087a, - 0xfafb2eb79d9c4b55, - 0x056a4156b6d92cb2, - 0x5a3ae6a5debea296, - 0x22a3b026a8292580, - 0x53c85b3b36ad1581, - 0xb11e900117b87583, - 0xc51f3a4a3fe56930, - 0xe019e1edcf3621bd, - 0xec811d2591fcba18, - 0x445b7d4c4d524a1d, - 0xa8da6069dcaef005, - 0x58f5cc72309de329, - 0xd4c062596b7ff570, - 0xce22ad0339d59f98, - 0x591cd99747024df8, - 0x8b90c5aa03187b54, - 0xf663d27fc356d0f0, - 0xd8589e9135b56ed5, - 0x35309651d3d67a1c, - 0x12f96721cd26732e, - 0xd28c1c3d441a36ac, - 0x492a946164077f69, - 0x2d1d73dc6f5f514b, - 0x6f0a70f40d68d88a, - 0x60b4b30eca1eac41, - 0xd36509d83385987d, - 0x0b3d97490630f6a8, - 0x9eccc90a96c46577, - 0xa20ee2c5ad01a87c, - 0xe49ab55e0e70a3de, - 0xa4429ca182646ba0, - 0xda97b446db962f6a, - 0xcced87d4d7f6de27, - 0x2ab8185d37a53c46, - 0x9f25dcefe15bcba6, - 0xc19c6ef9fea3eb53, - 0xa764a3931bd884ce, - 0x2fd2590b817c10f4, - 0x56a21a6d80743933, - 0xe573a0bb79ef0d0f, - 0x155c0ca095dc1e23, - 0x6c2c4fc694d437e4, - 0x10364df623053291, - 0xdd32dfc7836c4267, - 0x03263f3299bcef6e, - 0x66f8cd6ae57b6f9d, - 0x8c35ae2b5be21659, - 0x31b3c2e21290f87f, - 0x93bd2027bf915003, - 0x69460e90220d1b56, - 0x299e276fae19d328, - 0x63928c3c53a2432f, - 0x7082fef8e91b9ed0, - 0xbc6f792c3eed40f7, - 0x4c40d537d2de53db, - 0x75e8bfae5fc2b262, - 0x4da9c0d2a541fd0a, - 0x4e8fffe03cfd1264, - 0x2620e495696fa7e3, - 0xe1f0f408b8a98f6c, - 0xd1aa230fdda6d9c2, - 0xc7d0109dd1c6288f, - 0x8a79d04f7487d585, - 0x4694579ba3710ba2, - 0x38417f7cfa834f68, - 0x1d47a4db0a5007e5, - 0x206c9af1460a643f, - 0xa128ddf734bd4712, - 0x8144470672b7232d, - 0xf2e086cc02105293, - 0x182de58dbc892b57, - 0xcaa1f9b0f8931dfb, - 0x6b892447cc2e5ae9, - 0xf9dd11850420a43b, - 0x4be5beb68a243ed6, - 0x5584255f19c8d65d, - 0x3b67404e633fa006, - 0xa68db6766c472a1f, - 0xf78ac79ab4c97e21, - 0xc353442e1080aaec, - 0x9a4f9db95782e714, - }, - { - 0xc811a8058c3f55de, - 0x65f5b43196b50619, - 0xf74f96b1d6706e43, - 0x859d1e8bcb43d336, - 0x5aab8a85ccfa3d84, - 0xf9c7bf99c295fcfd, - 0xa21fd5a1de4b630f, - 0xcdb3ef763b8b456d, - 0x803f59f87cf7c385, - 0xb27c73be5f31913c, - 0x98e3ac6633b04821, - 0xbf61674c26b8f818, - 0x0ffbc995c4c130c8, - 0xaaa0862010761a98, - 0x6057f342210116aa, - 0xf63c760c0654cc35, - 0x2ddb45cc667d9042, - 0xbcf45a964bd40382, - 0x68e8a0c3ef3c6f3d, - 0xa7bd92d269ff73bc, - 0x290ae20201ed2287, - 0xb7de34cde885818f, - 0xd901eea7dd61059b, - 0xd6fa273219a03553, - 0xd56f1ae874cccec9, - 0xea31245c2e83f554, - 0x7034555da07be499, - 0xce26d2ac56e7bef7, - 0xfd161857a5054e38, - 0x6a0e7da4527436d1, - 0x5bd86a381cde9ff2, - 0xcaf7756231770c32, - 0xb09aaed9e279c8d0, - 0x5def1091c60674db, - 0x111046a2515e5045, - 0x23536ce4729802fc, - 0xc50cbcf7f5b63cfa, - 0x73a16887cd171f03, - 0x7d2941afd9f28dbd, - 0x3f5e3eb45a4f3b9d, - 0x84eefe361b677140, - 0x3db8e3d3e7076271, - 0x1a3a28f9f20fd248, - 0x7ebc7c75b49e7627, - 0x74e5f293c7eb565c, - 0x18dcf59e4f478ba4, - 0x0c6ef44fa9adcb52, - 0xc699812d98dac760, - 0x788b06dc6e469d0e, - 0xfc65f8ea7521ec4e, - 0x30a5f7219e8e0b55, - 0x2bec3f65bca57b6b, - 0xddd04969baf1b75e, - 0x99904cdbe394ea57, - 0x14b201d1e6ea40f6, - 0xbbb0c08241284add, - 0x50f20463bf8f1dff, - 0xe8d7f93b93cbacb8, - 0x4d8cb68e477c86e8, - 0xc1dd1b3992268e3f, - 0x7c5aa11209d62fcb, - 0x2f3d98abdb35c9ae, - 0x671369562bfd5ff5, - 0x15c1e16c36cee280, - 0x1d7eb2edf8f39b17, - 0xda94d37db00dfe01, - 0x877bc3ec760b8ada, - 0xcb8495dfe153ae44, - 0x05a24773b7b410b3, - 0x12857b783c32abdf, - 0x8eb770d06812513b, - 0x536739b9d2e3e665, - 0x584d57e271b26468, - 0xd789c78fc9849725, - 0xa935bbfa7d1ae102, - 0x8b1537a3dfa64188, - 0xd0cd5d9bc378de7a, - 0x4ac82c9a4d80cfb7, - 0x42777f1b83bdb620, - 0x72d2883a1d33bd75, - 0x5e7a2d4bab6a8f41, - 0xf4daab6bbb1c95d9, - 0x905cffe7fd8d31b6, - 0x83aa6422119b381f, - 0xc0aefb8442022c49, - 0xa0f908c663033ae3, - 0xa428af0804938826, - 0xade41c341a8a53c7, - 0xae7121ee77e6a85d, - 0xc47f5c4a25929e8c, - 0xb538e9aa55cdd863, - 0x06377aa9dad8eb29, - 0xa18ae87bb3279895, - 0x6edfda6a35e48414, - 0x6b7d9d19825094a7, - 0xd41cfa55a4e86cbf, - 0xe5caedc9ea42c59c, - 0xa36c351c0e6fc179, - 0x5181e4de6fabbf89, - 0xfff0c530184d17d4, - 0x9d41eb1584045892, - 0x1c0d525028d73961, - 0xf178ec180ca8856a, - 0x9a0571018ef811cd, - 0x4091a27c3ef5efcc, - 0x19af15239f6329d2, - 0x347450eff91eb990, - 0xe11b4a078dd27759, - 0xb9561de5fc601331, - 0x912f1f5a2da993c0, - 0x1654dcb65ba2191a, - 0x3e2dde098a6b99eb, - 0x8a66d71e0f82e3fe, - 0x8c51adb7d55a08d7, - 0x4533e50f8941ff7f, - 0x02e6dd67bd4859ec, - 0xe068aaba5df6d52f, - 0xc24826e3ff4a75a5, - 0x6c39070d88acddf8, - 0x6486548c4691a46f, - 0xd1bebd26135c7c0c, - 0xb30f93038f15334a, - 0x82d9849fc1bf9a69, - 0x9c320ba85420fae4, - 0xfa528243aff90767, - 0x9ed4d6cfe968a308, - 0xb825fd582c44b147, - 0x9b7691bc5edcb3bb, - 0xc7ea619048fe6516, - 0x1063a61f817af233, - 0x47d538683409a693, - 0x63c2ce984c6ded30, - 0x2a9fdfd86c81d91d, - 0x7b1e3b06032a6694, - 0x666089ebfbd9fd83, - 0x0a598ee67375207b, - 0x07449a140afc495f, - 0x2ca8a571b6593234, - 0x1f986f8a45bbc2fb, - 0x381aa4a050b372c2, - 0x5423a3add81faf3a, - 0x17273c0b8b86bb6c, - 0xfe83258dc869b5a2, - 0x287902bfd1c980f1, - 0xf5a94bd66b3837af, - 0x88800a79b2caba12, - 0x55504310083b0d4c, - 0xdf36940e07b9eeb2, - 0x04d1a7ce6790b2c5, - 0x612413fff125b4dc, - 0x26f12b97c52c124f, - 0x86082351a62f28ac, - 0xef93632f9937e5e7, - 0x3507b052293a1be6, - 0xe72c30ae570a9c70, - 0xd3586041ae1425e0, - 0xde4574b3d79d4cc4, - 0x92ba228040c5685a, - 0xf00b0ca5dc8c271c, - 0xbe1287f1f69c5a6e, - 0xf39e317fb1e0dc86, - 0x495d114020ec342d, - 0x699b407e3f18cd4b, - 0xdca3a9d46ad51528, - 0x0d1d14f279896924, - 0x0000000000000000, - 0x593eb75fa196c61e, - 0x2e4e78160b116bd8, - 0x6d4ae7b058887f8e, - 0xe65fd013872e3e06, - 0x7a6ddbbbd30ec4e2, - 0xac97fc89caaef1b1, - 0x09ccb33c1e19dbe1, - 0x89f3eac462ee1864, - 0x7770cf49aa87adc6, - 0x56c57eca6557f6d6, - 0x03953dda6d6cfb9a, - 0x36928d884456e07c, - 0x1eeb8f37959f608d, - 0x31d6179c4eaaa923, - 0x6fac3ad7e5c02662, - 0x43049fa653991456, - 0xabd3669dc052b8ee, - 0xaf02c153a7c20a2b, - 0x3ccb036e3723c007, - 0x93c9c23d90e1ca2c, - 0xc33bc65e2f6ed7d3, - 0x4cff56339758249e, - 0xb1e94e64325d6aa6, - 0x37e16d359472420a, - 0x79f8e661be623f78, - 0x5214d90402c74413, - 0x482ef1fdf0c8965b, - 0x13f69bc5ec1609a9, - 0x0e88292814e592be, - 0x4e198b542a107d72, - 0xccc00fcbebafe71b, - 0x1b49c844222b703e, - 0x2564164da840e9d5, - 0x20c6513e1ff4f966, - 0xbac3203f910ce8ab, - 0xf2edd1c261c47ef0, - 0x814cb945acd361f3, - 0x95feb8944a392105, - 0x5c9cf02c1622d6ad, - 0x971865f3f77178e9, - 0xbd87ba2b9bf0a1f4, - 0x444005b259655d09, - 0xed75be48247fbc0b, - 0x7596122e17cff42a, - 0xb44b091785e97a15, - 0x966b854e2755da9f, - 0xeee0839249134791, - 0x32432a4623c652b9, - 0xa8465b47ad3e4374, - 0xf8b45f2412b15e8b, - 0x2417f6f078644ba3, - 0xfb2162fe7fdda511, - 0x4bbbcc279da46dc1, - 0x0173e0bdd024a276, - 0x22208c59a2bca08a, - 0x8fc4906db836f34d, - 0xe4b90d743a6667ea, - 0x7147b5e0705f46ef, - 0x2782cb2a1508b039, - 0xec065ef5f45b1e7d, - 0x21b5b183cfd05b10, - 0xdbe733c060295c77, - 0x9fa73672394c017e, - 0xcf55321186c31c81, - 0xd8720e1a0d45a7ed, - 0x3b8f997a3ddf8958, - 0x3afc79c7edfb2b2e, - 0xe9a4198643ef0ece, - 0x5f09cdf67b4e2d37, - 0x4f6a6be9fa34df04, - 0xb6add47038a123f9, - 0x8d224d0a057eaaa1, - 0xc96248b85c1bf7a8, - 0xe3fd9760309a2eb5, - 0x0b2a6e5ba351820d, - 0xeb42c4e1fea75722, - 0x948d58299a1d8373, - 0x7fcf9cc864bad451, - 0xa55b4fb5d4b72a50, - 0x08bf5381ce3d7997, - 0x46a6d8d5e42d04e5, - 0xd22b80fc7e308796, - 0x57b69e77b57354a0, - 0x3969441d8097d0b4, - 0x3330cafbf3e2f0cf, - 0xe28e77dde0be8cc3, - 0x62b12e259c494f46, - 0xa6ce726fb9dbd1ca, - 0x41e242c1eed14dba, - 0x76032ff47aa30fb0, - }, - { - 0xe6f87e5c5b711fd0, - 0x258377800924fa16, - 0xc849e07e852ea4a8, - 0x5b4686a18f06c16a, - 0x0b32e9a2d77b416e, - 0xabda37a467815c66, - 0xf61796a81a686676, - 0xf5dc0b706391954b, - 0x4862f38db7e64bf1, - 0xff5c629a68bd85c5, - 0xcb827da6fcd75795, - 0x66d36daf69b9f089, - 0x356c9f74483d83b0, - 0x7cbcecb1238c99a1, - 0x36a702ac31c4708d, - 0x9eb6a8d02fbcdfd6, - 0x8b19fa51e5b3ae37, - 0x9ccfb5408a127d0b, - 0xbc0c78b508208f5a, - 0xe533e3842288eced, - 0xcec2c7d377c15fd2, - 0xec7817b6505d0f5e, - 0xb94cc2c08336871d, - 0x8c205db4cb0b04ad, - 0x763c855b28a0892f, - 0x588d1b79f6ff3257, - 0x3fecf69e4311933e, - 0x0fc0d39f803a18c9, - 0xee010a26f5f3ad83, - 0x10efe8f4411979a6, - 0x5dcda10c7de93a10, - 0x4a1bee1d1248e92c, - 0x53bff2db21847339, - 0xb4f50ccfa6a23d09, - 0x5fb4bc9cd84798cd, - 0xe88a2d8b071c56f9, - 0x7f7771695a756a9c, - 0xc5f02e71a0ba1ebc, - 0xa663f9ab4215e672, - 0x2eb19e22de5fbb78, - 0x0db9ce0f2594ba14, - 0x82520e6397664d84, - 0x2f031e6a0208ea98, - 0x5c7f2144a1be6bf0, - 0x7a37cb1cd16362db, - 0x83e08e2b4b311c64, - 0xcf70479bab960e32, - 0x856ba986b9dee71e, - 0xb5478c877af56ce9, - 0xb8fe42885f61d6fd, - 0x1bdd0156966238c8, - 0x622157923ef8a92e, - 0xfc97ff42114476f8, - 0x9d7d350856452ceb, - 0x4c90c9b0e0a71256, - 0x2308502dfbcb016c, - 0x2d7a03faa7a64845, - 0xf46e8b38bfc6c4ab, - 0xbdbef8fdd477deba, - 0x3aac4cebc8079b79, - 0xf09cb105e8879d0c, - 0x27fa6a10ac8a58cb, - 0x8960e7c1401d0cea, - 0x1a6f811e4a356928, - 0x90c4fb0773d196ff, - 0x43501a2f609d0a9f, - 0xf7a516e0c63f3796, - 0x1ce4a6b3b8da9252, - 0x1324752c38e08a9b, - 0xa5a864733bec154f, - 0x2bf124575549b33f, - 0xd766db15440dc5c7, - 0xa7d179e39e42b792, - 0xdadf151a61997fd3, - 0x86a0345ec0271423, - 0x38d5517b6da939a4, - 0x6518f077104003b4, - 0x02791d90a5aea2dd, - 0x88d267899c4a5d0a, - 0x930f66df0a2865c2, - 0x4ee9d4204509b08b, - 0x325538916685292a, - 0x412907bfc533a842, - 0xb27e2b62544dc673, - 0x6c5304456295e007, - 0x5af406e95351908a, - 0x1f2f3b6bc123616f, - 0xc37b09dc5255e5c6, - 0x3967d133b1fe6844, - 0x298839c7f0e711e2, - 0x409b87f71964f9a2, - 0xe938adc3db4b0719, - 0x0c0b4e47f9c3ebf4, - 0x5534d576d36b8843, - 0x4610a05aeb8b02d8, - 0x20c3cdf58232f251, - 0x6de1840dbec2b1e7, - 0xa0e8de06b0fa1d08, - 0x7b854b540d34333b, - 0x42e29a67bcca5b7f, - 0xd8a6088ac437dd0e, - 0xc63bb3a9d943ed81, - 0x21714dbd5e65a3b1, - 0x6761ede7b5eea169, - 0x2431f7c8d573abf6, - 0xd51fc685e1a3671a, - 0x5e063cd40410c92d, - 0x283ab98f2cb04002, - 0x8febc06cb2f2f790, - 0x17d64f116fa1d33c, - 0xe07359f1a99ee4aa, - 0x784ed68c74cdc006, - 0x6e2a19d5c73b42da, - 0x8712b4161c7045c3, - 0x371582e4ed93216d, - 0xace390414939f6fc, - 0x7ec5f12186223b7c, - 0xc0b094042bac16fb, - 0xf9d745379a527ebf, - 0x737c3f2ea3b68168, - 0x33e7b8d9bad278ca, - 0xa9a32a34c22ffebb, - 0xe48163ccfedfbd0d, - 0x8e5940246ea5a670, - 0x51c6ef4b842ad1e4, - 0x22bad065279c508c, - 0xd91488c218608cee, - 0x319ea5491f7cda17, - 0xd394e128134c9c60, - 0x094bf43272d5e3b3, - 0x9bf612a5a4aad791, - 0xccbbda43d26ffd0f, - 0x34de1f3c946ad250, - 0x4f5b5468995ee16b, - 0xdf9faf6fea8f7794, - 0x2648ea5870dd092b, - 0xbfc7e56d71d97c67, - 0xdde6b2ff4f21d549, - 0x3c276b463ae86003, - 0x91767b4faf86c71f, - 0x68a13e7835d4b9a0, - 0xb68c115f030c9fd4, - 0x141dd2c916582001, - 0x983d8f7ddd5324ac, - 0x64aa703fcc175254, - 0xc2c989948e02b426, - 0x3e5e76d69f46c2de, - 0x50746f03587d8004, - 0x45db3d829272f1e5, - 0x60584a029b560bf3, - 0xfbae58a73ffcdc62, - 0xa15a5e4e6cad4ce8, - 0x4ba96e55ce1fb8cc, - 0x08f9747aae82b253, - 0xc102144cf7fb471b, - 0x9f042898f3eb8e36, - 0x068b27adf2effb7a, - 0xedca97fe8c0a5ebe, - 0x778e0513f4f7d8cf, - 0x302c2501c32b8bf7, - 0x8d92ddfc175c554d, - 0xf865c57f46052f5f, - 0xeaf3301ba2b2f424, - 0xaa68b7ecbbd60d86, - 0x998f0f350104754c, - 0x0000000000000000, - 0xf12e314d34d0ccec, - 0x710522be061823b5, - 0xaf280d9930c005c1, - 0x97fd5ce25d693c65, - 0x19a41cc633cc9a15, - 0x95844172f8c79eb8, - 0xdc5432b7937684a9, - 0x9436c13a2490cf58, - 0x802b13f332c8ef59, - 0xc442ae397ced4f5c, - 0xfa1cd8efe3ab8d82, - 0xf2e5ac954d293fd1, - 0x6ad823e8907a1b7d, - 0x4d2249f83cf043b6, - 0x03cb9dd879f9f33d, - 0xde2d2f2736d82674, - 0x2a43a41f891ee2df, - 0x6f98999d1b6c133a, - 0xd4ad46cd3df436fa, - 0xbb35df50269825c0, - 0x964fdcaa813e6d85, - 0xeb41b0537ee5a5c4, - 0x0540ba758b160847, - 0xa41ae43be7bb44af, - 0xe3b8c429d0671797, - 0x819993bbee9fbeb9, - 0xae9a8dd1ec975421, - 0xf3572cdd917e6e31, - 0x6393d7dae2aff8ce, - 0x47a2201237dc5338, - 0xa32343dec903ee35, - 0x79fc56c4a89a91e6, - 0x01b28048dc5751e0, - 0x1296f564e4b7db7b, - 0x75f7188351597a12, - 0xdb6d9552bdce2e33, - 0x1e9dbb231d74308f, - 0x520d7293fdd322d9, - 0xe20a44610c304677, - 0xfeeee2d2b4ead425, - 0xca30fdee20800675, - 0x61eaca4a47015a13, - 0xe74afe1487264e30, - 0x2cc883b27bf119a5, - 0x1664cf59b3f682dc, - 0xa811aa7c1e78af5b, - 0x1d5626fb648dc3b2, - 0xb73e9117df5bce34, - 0xd05f7cf06ab56f5d, - 0xfd257f0acd132718, - 0x574dc8e676c52a9e, - 0x0739a7e52eb8aa9a, - 0x5486553e0f3cd9a3, - 0x56ff48aeaa927b7e, - 0xbe756525ad8e2d87, - 0x7d0e6cf9ffdbc841, - 0x3b1ecca31450ca99, - 0x6913be30e983e840, - 0xad511009956ea71c, - 0xb1b5b6ba2db4354e, - 0x4469bdca4e25a005, - 0x15af5281ca0f71e1, - 0x744598cb8d0e2bf2, - 0x593f9b312aa863b7, - 0xefb38a6e29a4fc63, - 0x6b6aa3a04c2d4a9d, - 0x3d95eb0ee6bf31e3, - 0xa291c3961554bfd5, - 0x18169c8eef9bcbf5, - 0x115d68bc9d4e2846, - 0xba875f18facf7420, - 0xd1edfcb8b6e23ebd, - 0xb00736f2f1e364ae, - 0x84d929ce6589b6fe, - 0x70b7a2f6da4f7255, - 0x0e7253d75c6d4929, - 0x04f23a3d574159a7, - 0x0a8069ea0b2c108e, - 0x49d073c56bb11a11, - 0x8aab7a1939e4ffd7, - 0xcd095a0b0e38acef, - 0xc9fb60365979f548, - 0x92bde697d67f3422, - 0xc78933e10514bc61, - 0xe1c1d9b975c9b54a, - 0xd2266160cf1bcd80, - 0x9a4492ed78fd8671, - 0xb3ccab2a881a9793, - 0x72cebf667fe1d088, - 0xd6d45b5d985a9427, - }, -}; - -__device__ __constant__ u64 sbob_rc64[12][8] = -{ - { - 0xe9daca1eda5b08b1, - 0x1f7c65c0812fcbeb, - 0x16d0452e43766a2f, - 0xfcc485758db84e71, - 0x0169679291e07c4b, - 0x15d360a4082a42a2, - 0x234d74cc36747605, - 0x0745a6f2596580dd, - }, - { - 0x1a2f9da98ab5a36f, - 0xd7b5700f469de34f, - 0x982b230a72eafef3, - 0x3101b5160f5ed561, - 0x5899d6126b17b59a, - 0xcaa70adbc261b55c, - 0x56cdcbd71ba2dd55, - 0xb79bb121700479e6, - }, - { - 0xc72fce2bacdc74f5, - 0x35843d6a28fc390a, - 0x8b1f9c525f5ef106, - 0x7b7b29b11475eaf2, - 0xb19e3590e40fe2d3, - 0x09db6260373ac9c1, - 0x31db7a8643f4b6c2, - 0xb20aba0af5961e99, - }, - { - 0xd26615e8b3df1fef, - 0xdde4715da0e148f9, - 0x7d3c5c337e858e48, - 0x3f355e68ad1c729d, - 0x75d603ed822cd7a9, - 0xbe0352933313b7d8, - 0xf137e893a1ea5334, - 0x2ed1e384bcbe0c22, - }, - { - 0x994747adac6bea4b, - 0x6323a96c0c413f9a, - 0x4a1086161f1c157f, - 0xbdff0f80d7359e35, - 0xa3f53a254717cdbf, - 0x161a2723b700ffdf, - 0xf563eaa97ea2567a, - 0x57fe6c7cfd581760, - }, - { - 0xd9d33a1daeae4fae, - 0xc039307a3bc3a46f, - 0x6ca44251f9c4662d, - 0xc68ef09ab49a7f18, - 0xb4b79a1cb7a6facf, - 0xb6c6bec2661ff20a, - 0x354f903672c571bf, - 0x6e7d64467a4068fa, - }, - { - 0xecc5aaee160ec7f4, - 0x540924bffe86ac51, - 0xc987bfe6c7c69e39, - 0xc9937a19333e47d3, - 0x372c822dc5ab9209, - 0x04054a2883694706, - 0xf34a3ca24c451735, - 0x93d4143a4d568688, - }, - { - 0xa7c9934d425b1f9b, - 0x41416e0c02aae703, - 0x1ede369c71f8b74e, - 0x9ac4db4d3b44b489, - 0x90069b92cb2b89f4, - 0x2fc4a5d12b8dd169, - 0xd9a8515935c2ac36, - 0x1ee702bfd40d7fa4, - }, - { - 0x9b223116545a8f37, - 0xde5f16ecd89a4c94, - 0x244289251b3a7d3a, - 0x84090de0b755d93c, - 0xb1ceb2db0b440a80, - 0x549c07a69a8a2b7b, - 0x602a1fcb92dc380e, - 0xdb5a238351446172, - }, - { - 0x526f0580a6debeab, - 0xf3f3e4b248e52a38, - 0xdb788aff1ce74189, - 0x0361331b8ae1ff1f, - 0x4b3369af0267e79f, - 0xf452763b306c1e7a, - 0xc3b63b15d1fa9836, - 0xed9c4598fbc7b474, - }, - { - 0xfb89c8efd09ecd7b, - 0x94fe5a63cdc60230, - 0x6107abebbb6bfad8, - 0x7966841421800120, - 0xcab948eaef711d8a, - 0x986e477d1dcdbaef, - 0x5dd86fc04a59a2de, - 0x1b2df381cda4ca6b, - }, - { - 0xba3116f167e78e37, - 0x7ab14904b08013d2, - 0x771ddfbc323ca4cd, - 0x9b9f2130d41220f8, - 0x86cc91189def805d, - 0x5228e188aaa41de7, - 0x991bb2d9d517f4fa, - 0x20d71bf14a92bc48, - }, -}; - -__device__ static void streebog_g (u64 h[8], const u64 m[8], u64 s_sbob_sl64[8][256]) -{ - u64 k[8]; - u64 s[8]; - u64 t[8]; - - #pragma unroll - for (int i = 0; i < 8; i++) - { - t[i] = h[i]; - } - - for (int i = 0; i < 8; i++) - { - k[i] = SBOG_LPSti64; - } - - #pragma unroll - for (int i = 0; i < 8; i++) - { - s[i] = m[i]; - } - - for (int r = 0; r < 12; r++) - { - #pragma unroll - for (int i = 0; i < 8; i++) - { - t[i] = s[i] ^ k[i]; - } - - #pragma unroll - for (int i = 0; i < 8; i++) - { - s[i] = SBOG_LPSti64; - } - - for (int i = 0; i < 8; i++) - { - t[i] = k[i] ^ sbob_rc64[r][i]; - } - - #pragma unroll - for (int i = 0; i < 8; i++) - { - k[i] = SBOG_LPSti64; - } - } - - #pragma unroll - for (int i = 0; i < 8; i++) - { - h[i] ^= s[i] ^ k[i] ^ m[i]; - } -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m11700_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * shared lookup table - */ - - __shared__ u64 s_sbob_sl64[8][256]; - - if (lid < 64) - { - const u32 lid4 = lid * 4; - - s_sbob_sl64[0][lid4 + 0] = sbob_sl64[0][lid4 + 0]; - s_sbob_sl64[0][lid4 + 1] = sbob_sl64[0][lid4 + 1]; - s_sbob_sl64[0][lid4 + 2] = sbob_sl64[0][lid4 + 2]; - s_sbob_sl64[0][lid4 + 3] = sbob_sl64[0][lid4 + 3]; - s_sbob_sl64[1][lid4 + 0] = sbob_sl64[1][lid4 + 0]; - s_sbob_sl64[1][lid4 + 1] = sbob_sl64[1][lid4 + 1]; - s_sbob_sl64[1][lid4 + 2] = sbob_sl64[1][lid4 + 2]; - s_sbob_sl64[1][lid4 + 3] = sbob_sl64[1][lid4 + 3]; - s_sbob_sl64[2][lid4 + 0] = sbob_sl64[2][lid4 + 0]; - s_sbob_sl64[2][lid4 + 1] = sbob_sl64[2][lid4 + 1]; - s_sbob_sl64[2][lid4 + 2] = sbob_sl64[2][lid4 + 2]; - s_sbob_sl64[2][lid4 + 3] = sbob_sl64[2][lid4 + 3]; - s_sbob_sl64[3][lid4 + 0] = sbob_sl64[3][lid4 + 0]; - s_sbob_sl64[3][lid4 + 1] = sbob_sl64[3][lid4 + 1]; - s_sbob_sl64[3][lid4 + 2] = sbob_sl64[3][lid4 + 2]; - s_sbob_sl64[3][lid4 + 3] = sbob_sl64[3][lid4 + 3]; - s_sbob_sl64[4][lid4 + 0] = sbob_sl64[4][lid4 + 0]; - s_sbob_sl64[4][lid4 + 1] = sbob_sl64[4][lid4 + 1]; - s_sbob_sl64[4][lid4 + 2] = sbob_sl64[4][lid4 + 2]; - s_sbob_sl64[4][lid4 + 3] = sbob_sl64[4][lid4 + 3]; - s_sbob_sl64[5][lid4 + 0] = sbob_sl64[5][lid4 + 0]; - s_sbob_sl64[5][lid4 + 1] = sbob_sl64[5][lid4 + 1]; - s_sbob_sl64[5][lid4 + 2] = sbob_sl64[5][lid4 + 2]; - s_sbob_sl64[5][lid4 + 3] = sbob_sl64[5][lid4 + 3]; - s_sbob_sl64[6][lid4 + 0] = sbob_sl64[6][lid4 + 0]; - s_sbob_sl64[6][lid4 + 1] = sbob_sl64[6][lid4 + 1]; - s_sbob_sl64[6][lid4 + 2] = sbob_sl64[6][lid4 + 2]; - s_sbob_sl64[6][lid4 + 3] = sbob_sl64[6][lid4 + 3]; - s_sbob_sl64[7][lid4 + 0] = sbob_sl64[7][lid4 + 0]; - s_sbob_sl64[7][lid4 + 1] = sbob_sl64[7][lid4 + 1]; - s_sbob_sl64[7][lid4 + 2] = sbob_sl64[7][lid4 + 2]; - s_sbob_sl64[7][lid4 + 3] = sbob_sl64[7][lid4 + 3]; - } - - __syncthreads (); - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w[16]; - - w[ 0] = wordl0[0] | wordr0[0]; - w[ 1] = wordl0[1] | wordr0[1]; - w[ 2] = wordl0[2] | wordr0[2]; - w[ 3] = wordl0[3] | wordr0[3]; - w[ 4] = wordl1[0] | wordr1[0]; - w[ 5] = wordl1[1] | wordr1[1]; - w[ 6] = wordl1[2] | wordr1[2]; - w[ 7] = wordl1[3] | wordr1[3]; - w[ 8] = wordl2[0] | wordr2[0]; - w[ 9] = wordl2[1] | wordr2[1]; - w[10] = wordl2[2] | wordr2[2]; - w[11] = wordl2[3] | wordr2[3]; - w[12] = wordl3[0] | wordr3[0]; - w[13] = wordl3[1] | wordr3[1]; - w[14] = wordl3[1] | wordr3[1]; - w[15] = wordl3[1] | wordr3[1]; - - append_0x01_4 (&w[0], &w[1], &w[2], &w[3], pw_len); - - /** - * reverse message block - */ - - u64 m[8]; - - m[0] = hl32_to_64 (w[15], w[14]); - m[1] = hl32_to_64 (w[13], w[12]); - m[2] = hl32_to_64 (w[11], w[10]); - m[3] = hl32_to_64 (w[ 9], w[ 8]); - m[4] = hl32_to_64 (w[ 7], w[ 6]); - m[5] = hl32_to_64 (w[ 5], w[ 4]); - m[6] = hl32_to_64 (w[ 3], w[ 2]); - m[7] = hl32_to_64 (w[ 1], w[ 0]); - - m[0] = swap_workaround (m[0]); - m[1] = swap_workaround (m[1]); - m[2] = swap_workaround (m[2]); - m[3] = swap_workaround (m[3]); - m[4] = swap_workaround (m[4]); - m[5] = swap_workaround (m[5]); - m[6] = swap_workaround (m[6]); - m[7] = swap_workaround (m[7]); - - // state buffer (hash) - - u64 h[8]; - - h[0] = INITVAL; - h[1] = INITVAL; - h[2] = INITVAL; - h[3] = INITVAL; - h[4] = INITVAL; - h[5] = INITVAL; - h[6] = INITVAL; - h[7] = INITVAL; - - streebog_g (h, m, s_sbob_sl64); - - u64 z[8]; - - z[0] = 0; - z[1] = 0; - z[2] = 0; - z[3] = 0; - z[4] = 0; - z[5] = 0; - z[6] = 0; - z[7] = swap_workaround ((u64) (pw_len * 8)); - - streebog_g (h, z, s_sbob_sl64); - streebog_g (h, m, s_sbob_sl64); - - const u32 r0 = l32_from_64 (h[0]); - const u32 r1 = h32_from_64 (h[0]); - const u32 r2 = l32_from_64 (h[1]); - const u32 r3 = h32_from_64 (h[1]); - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11700_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11700_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11700_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * shared lookup table - */ - - __shared__ u64 s_sbob_sl64[8][256]; - - if (lid < 64) - { - const u32 lid4 = lid * 4; - - s_sbob_sl64[0][lid4 + 0] = sbob_sl64[0][lid4 + 0]; - s_sbob_sl64[0][lid4 + 1] = sbob_sl64[0][lid4 + 1]; - s_sbob_sl64[0][lid4 + 2] = sbob_sl64[0][lid4 + 2]; - s_sbob_sl64[0][lid4 + 3] = sbob_sl64[0][lid4 + 3]; - s_sbob_sl64[1][lid4 + 0] = sbob_sl64[1][lid4 + 0]; - s_sbob_sl64[1][lid4 + 1] = sbob_sl64[1][lid4 + 1]; - s_sbob_sl64[1][lid4 + 2] = sbob_sl64[1][lid4 + 2]; - s_sbob_sl64[1][lid4 + 3] = sbob_sl64[1][lid4 + 3]; - s_sbob_sl64[2][lid4 + 0] = sbob_sl64[2][lid4 + 0]; - s_sbob_sl64[2][lid4 + 1] = sbob_sl64[2][lid4 + 1]; - s_sbob_sl64[2][lid4 + 2] = sbob_sl64[2][lid4 + 2]; - s_sbob_sl64[2][lid4 + 3] = sbob_sl64[2][lid4 + 3]; - s_sbob_sl64[3][lid4 + 0] = sbob_sl64[3][lid4 + 0]; - s_sbob_sl64[3][lid4 + 1] = sbob_sl64[3][lid4 + 1]; - s_sbob_sl64[3][lid4 + 2] = sbob_sl64[3][lid4 + 2]; - s_sbob_sl64[3][lid4 + 3] = sbob_sl64[3][lid4 + 3]; - s_sbob_sl64[4][lid4 + 0] = sbob_sl64[4][lid4 + 0]; - s_sbob_sl64[4][lid4 + 1] = sbob_sl64[4][lid4 + 1]; - s_sbob_sl64[4][lid4 + 2] = sbob_sl64[4][lid4 + 2]; - s_sbob_sl64[4][lid4 + 3] = sbob_sl64[4][lid4 + 3]; - s_sbob_sl64[5][lid4 + 0] = sbob_sl64[5][lid4 + 0]; - s_sbob_sl64[5][lid4 + 1] = sbob_sl64[5][lid4 + 1]; - s_sbob_sl64[5][lid4 + 2] = sbob_sl64[5][lid4 + 2]; - s_sbob_sl64[5][lid4 + 3] = sbob_sl64[5][lid4 + 3]; - s_sbob_sl64[6][lid4 + 0] = sbob_sl64[6][lid4 + 0]; - s_sbob_sl64[6][lid4 + 1] = sbob_sl64[6][lid4 + 1]; - s_sbob_sl64[6][lid4 + 2] = sbob_sl64[6][lid4 + 2]; - s_sbob_sl64[6][lid4 + 3] = sbob_sl64[6][lid4 + 3]; - s_sbob_sl64[7][lid4 + 0] = sbob_sl64[7][lid4 + 0]; - s_sbob_sl64[7][lid4 + 1] = sbob_sl64[7][lid4 + 1]; - s_sbob_sl64[7][lid4 + 2] = sbob_sl64[7][lid4 + 2]; - s_sbob_sl64[7][lid4 + 3] = sbob_sl64[7][lid4 + 3]; - } - - __syncthreads (); - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w[16]; - - w[ 0] = wordl0[0] | wordr0[0]; - w[ 1] = wordl0[1] | wordr0[1]; - w[ 2] = wordl0[2] | wordr0[2]; - w[ 3] = wordl0[3] | wordr0[3]; - w[ 4] = wordl1[0] | wordr1[0]; - w[ 5] = wordl1[1] | wordr1[1]; - w[ 6] = wordl1[2] | wordr1[2]; - w[ 7] = wordl1[3] | wordr1[3]; - w[ 8] = wordl2[0] | wordr2[0]; - w[ 9] = wordl2[1] | wordr2[1]; - w[10] = wordl2[2] | wordr2[2]; - w[11] = wordl2[3] | wordr2[3]; - w[12] = wordl3[0] | wordr3[0]; - w[13] = wordl3[1] | wordr3[1]; - w[14] = wordl3[1] | wordr3[1]; - w[15] = wordl3[1] | wordr3[1]; - - append_0x01_4 (&w[0], &w[1], &w[2], &w[3], pw_len); - - /** - * reverse message block - */ - - u64 m[8]; - - m[0] = hl32_to_64 (w[15], w[14]); - m[1] = hl32_to_64 (w[13], w[12]); - m[2] = hl32_to_64 (w[11], w[10]); - m[3] = hl32_to_64 (w[ 9], w[ 8]); - m[4] = hl32_to_64 (w[ 7], w[ 6]); - m[5] = hl32_to_64 (w[ 5], w[ 4]); - m[6] = hl32_to_64 (w[ 3], w[ 2]); - m[7] = hl32_to_64 (w[ 1], w[ 0]); - - m[0] = swap_workaround (m[0]); - m[1] = swap_workaround (m[1]); - m[2] = swap_workaround (m[2]); - m[3] = swap_workaround (m[3]); - m[4] = swap_workaround (m[4]); - m[5] = swap_workaround (m[5]); - m[6] = swap_workaround (m[6]); - m[7] = swap_workaround (m[7]); - - // state buffer (hash) - - u64 h[8]; - - h[0] = INITVAL; - h[1] = INITVAL; - h[2] = INITVAL; - h[3] = INITVAL; - h[4] = INITVAL; - h[5] = INITVAL; - h[6] = INITVAL; - h[7] = INITVAL; - - streebog_g (h, m, s_sbob_sl64); - - u64 z[8]; - - z[0] = 0; - z[1] = 0; - z[2] = 0; - z[3] = 0; - z[4] = 0; - z[5] = 0; - z[6] = 0; - z[7] = swap_workaround ((u64) (pw_len * 8)); - - streebog_g (h, z, s_sbob_sl64); - streebog_g (h, m, s_sbob_sl64); - - const u32 r0 = l32_from_64 (h[0]); - const u32 r1 = h32_from_64 (h[0]); - const u32 r2 = l32_from_64 (h[1]); - const u32 r3 = h32_from_64 (h[1]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11700_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11700_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m11700_a3.cu b/nv/m11700_a3.cu deleted file mode 100644 index f2175ea..0000000 --- a/nv/m11700_a3.cu +++ /dev/null @@ -1,2993 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _GOST2012_256_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -#define INITVAL 0x0101010101010101 - -#define SBOG_LPSti64 \ - s_sbob_sl64[0][(t[0] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[1][(t[1] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[2][(t[2] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[3][(t[3] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[4][(t[4] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[5][(t[5] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[6][(t[6] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[7][(t[7] >> (i * 8)) & 0xff] - -// constants - -__device__ __constant__ u64 sbob_sl64[8][256] = -{ - { - 0xd031c397ce553fe6, - 0x16ba5b01b006b525, - 0xa89bade6296e70c8, - 0x6a1f525d77d3435b, - 0x6e103570573dfa0b, - 0x660efb2a17fc95ab, - 0x76327a9e97634bf6, - 0x4bad9d6462458bf5, - 0xf1830caedbc3f748, - 0xc5c8f542669131ff, - 0x95044a1cdc48b0cb, - 0x892962df3cf8b866, - 0xb0b9e208e930c135, - 0xa14fb3f0611a767c, - 0x8d2605f21c160136, - 0xd6b71922fecc549e, - 0x37089438a5907d8b, - 0x0b5da38e5803d49c, - 0x5a5bcc9cea6f3cbc, - 0xedae246d3b73ffe5, - 0xd2b87e0fde22edce, - 0x5e54abb1ca8185ec, - 0x1de7f88fe80561b9, - 0xad5e1a870135a08c, - 0x2f2adbd665cecc76, - 0x5780b5a782f58358, - 0x3edc8a2eede47b3f, - 0xc9d95c3506bee70f, - 0x83be111d6c4e05ee, - 0xa603b90959367410, - 0x103c81b4809fde5d, - 0x2c69b6027d0c774a, - 0x399080d7d5c87953, - 0x09d41e16487406b4, - 0xcdd63b1826505e5f, - 0xf99dc2f49b0298e8, - 0x9cd0540a943cb67f, - 0xbca84b7f891f17c5, - 0x723d1db3b78df2a6, - 0x78aa6e71e73b4f2e, - 0x1433e699a071670d, - 0x84f21be454620782, - 0x98df3327b4d20f2f, - 0xf049dce2d3769e5c, - 0xdb6c60199656eb7a, - 0x648746b2078b4783, - 0x32cd23598dcbadcf, - 0x1ea4955bf0c7da85, - 0xe9a143401b9d46b5, - 0xfd92a5d9bbec21b8, - 0xc8138c790e0b8e1b, - 0x2ee00b9a6d7ba562, - 0xf85712b893b7f1fc, - 0xeb28fed80bea949d, - 0x564a65eb8a40ea4c, - 0x6c9988e8474a2823, - 0x4535898b121d8f2d, - 0xabd8c03231accbf4, - 0xba2e91cab9867cbd, - 0x7960be3def8e263a, - 0x0c11a977602fd6f0, - 0xcb50e1ad16c93527, - 0xeae22e94035ffd89, - 0x2866d12f5de2ce1a, - 0xff1b1841ab9bf390, - 0x9f9339de8cfe0d43, - 0x964727c8c48a0bf7, - 0x524502c6aaae531c, - 0x9b9c5ef3ac10b413, - 0x4fa2fa4942ab32a5, - 0x3f165a62e551122b, - 0xc74148da76e6e3d7, - 0x924840e5e464b2a7, - 0xd372ae43d69784da, - 0x233b72a105e11a86, - 0xa48a04914941a638, - 0xb4b68525c9de7865, - 0xddeabaaca6cf8002, - 0x0a9773c250b6bd88, - 0xc284ffbb5ebd3393, - 0x8ba0df472c8f6a4e, - 0x2aef6cb74d951c32, - 0x427983722a318d41, - 0x73f7cdffbf389bb2, - 0x074c0af9382c026c, - 0x8a6a0f0b243a035a, - 0x6fdae53c5f88931f, - 0xc68b98967e538ac3, - 0x44ff59c71aa8e639, - 0xe2fce0ce439e9229, - 0xa20cde2479d8cd40, - 0x19e89fa2c8ebd8e9, - 0xf446bbcff398270c, - 0x43b3533e2284e455, - 0xd82f0dcd8e945046, - 0x51066f12b26ce820, - 0xe73957af6bc5426d, - 0x081ece5a40c16fa0, - 0x3b193d4fc5bfab7b, - 0x7fe66488df174d42, - 0x0e9814ef705804d8, - 0x8137ac857c39d7c6, - 0xb1733244e185a821, - 0x695c3f896f11f867, - 0xf6cf0657e3eff524, - 0x1aabf276d02963d5, - 0x2da3664e75b91e5e, - 0x0289bd981077d228, - 0x90c1fd7df413608f, - 0x3c5537b6fd93a917, - 0xaa12107e3919a2e0, - 0x0686dab530996b78, - 0xdaa6b0559ee3826e, - 0xc34e2ff756085a87, - 0x6d5358a44fff4137, - 0xfc587595b35948ac, - 0x7ca5095cc7d5f67e, - 0xfb147f6c8b754ac0, - 0xbfeb26ab91ddacf9, - 0x6896efc567a49173, - 0xca9a31e11e7c5c33, - 0xbbe44186b13315a9, - 0x0ddb793b689abfe4, - 0x70b4a02ba7fa208e, - 0xe47a3a7b7307f951, - 0x8cecd5be14a36822, - 0xeeed49b923b144d9, - 0x17708b4db8b3dc31, - 0x6088219f2765fed3, - 0xb3fa8fdcf1f27a09, - 0x910b2d31fca6099b, - 0x0f52c4a378ed6dcc, - 0x50ccbf5ebad98134, - 0x6bd582117f662a4f, - 0x94ce9a50d4fdd9df, - 0x2b25bcfb45207526, - 0x67c42b661f49fcbf, - 0x492420fc723259dd, - 0x03436dd418c2bb3c, - 0x1f6e4517f872b391, - 0xa08563bc69af1f68, - 0xd43ea4baeebb86b6, - 0x01cad04c08b56914, - 0xac94cacb0980c998, - 0x54c3d8739a373864, - 0x26fec5c02dbacac2, - 0xdea9d778be0d3b3e, - 0x040f672d20eeb950, - 0xe5b0ea377bb29045, - 0xf30ab136cbb42560, - 0x62019c0737122cfb, - 0xe86b930c13282fa1, - 0xcc1ceb542ee5374b, - 0x538fd28aa21b3a08, - 0x1b61223ad89c0ac1, - 0x36c24474ad25149f, - 0x7a23d3e9f74c9d06, - 0xbe21f6e79968c5ed, - 0xcf5f868036278c77, - 0xf705d61beb5a9c30, - 0x4d2b47d152dce08d, - 0x5f9e7bfdc234ecf8, - 0x247778583dcd18ea, - 0x867ba67c4415d5aa, - 0x4ce1979d5a698999, - 0x0000000000000000, - 0xec64f42133c696f1, - 0xb57c5569c16b1171, - 0xc1c7926f467f88af, - 0x654d96fe0f3e2e97, - 0x15f936d5a8c40e19, - 0xb8a72c52a9f1ae95, - 0xa9517daa21db19dc, - 0x58d27104fa18ee94, - 0x5918a148f2ad8780, - 0x5cdd1629daf657c4, - 0x8274c15164fb6cfa, - 0xd1fb13dbc6e056f2, - 0x7d6fd910cf609f6a, - 0xb63f38bdd9a9aa4d, - 0x3d9fe7faf526c003, - 0x74bbc706871499de, - 0xdf630734b6b8522a, - 0x3ad3ed03cd0ac26f, - 0xfadeaf2083c023d4, - 0xc00d42234ecae1bb, - 0x8538cba85cd76e96, - 0xc402250e6e2458eb, - 0x47bc3413026a5d05, - 0xafd7a71f114272a4, - 0x978df784cc3f62e3, - 0xb96dfc1ea144c781, - 0x21b2cf391596c8ae, - 0x318e4e8d950916f3, - 0xce9556cc3e92e563, - 0x385a509bdd7d1047, - 0x358129a0b5e7afa3, - 0xe6f387e363702b79, - 0xe0755d5653e94001, - 0x7be903a5fff9f412, - 0x12b53c2c90e80c75, - 0x3307f315857ec4db, - 0x8fafb86a0c61d31e, - 0xd9e5dd8186213952, - 0x77f8aad29fd622e2, - 0x25bda814357871fe, - 0x7571174a8fa1f0ca, - 0x137fec60985d6561, - 0x30449ec19dbc7fe7, - 0xa540d4dd41f4cf2c, - 0xdc206ae0ae7ae916, - 0x5b911cd0e2da55a8, - 0xb2305f90f947131d, - 0x344bf9ecbd52c6b7, - 0x5d17c665d2433ed0, - 0x18224feec05eb1fd, - 0x9e59e992844b6457, - 0x9a568ebfa4a5dd07, - 0xa3c60e68716da454, - 0x7e2cb4c4d7a22456, - 0x87b176304ca0bcbe, - 0x413aeea632f3367d, - 0x9915e36bbc67663b, - 0x40f03eea3a465f69, - 0x1c2d28c3e0b008ad, - 0x4e682a054a1e5bb1, - 0x05c5b761285bd044, - 0xe1bf8d1a5b5c2915, - 0xf2c0617ac3014c74, - 0xb7f5e8f1d11cc359, - 0x63cb4c4b3fa745ef, - 0x9d1a84469c89df6b, - 0xe33630824b2bfb3d, - 0xd5f474f6e60eefa2, - 0xf58c6b83fb2d4e18, - 0x4676e45f0adf3411, - 0x20781f751d23a1ba, - 0xbd629b3381aa7ed1, - 0xae1d775319f71bb0, - 0xfed1c80da32e9a84, - 0x5509083f92825170, - 0x29ac01635557a70e, - 0xa7c9694551831d04, - 0x8e65682604d4ba0a, - 0x11f651f8882ab749, - 0xd77dc96ef6793d8a, - 0xef2799f52b042dcd, - 0x48eef0b07a8730c9, - 0x22f1a2ed0d547392, - 0x6142f1d32fd097c7, - 0x4a674d286af0e2e1, - 0x80fd7cc9748cbed2, - 0x717e7067af4f499a, - 0x938290a9ecd1dbb3, - 0x88e3b293344dd172, - 0x2734158c250fa3d6, - }, - { - 0x7e37e62dfc7d40c3, - 0x776f25a4ee939e5b, - 0xe045c850dd8fb5ad, - 0x86ed5ba711ff1952, - 0xe91d0bd9cf616b35, - 0x37e0ab256e408ffb, - 0x9607f6c031025a7a, - 0x0b02f5e116d23c9d, - 0xf3d8486bfb50650c, - 0x621cff27c40875f5, - 0x7d40cb71fa5fd34a, - 0x6daa6616daa29062, - 0x9f5f354923ec84e2, - 0xec847c3dc507c3b3, - 0x025a3668043ce205, - 0xa8bf9e6c4dac0b19, - 0xfa808be2e9bebb94, - 0xb5b99c5277c74fa3, - 0x78d9bc95f0397bcc, - 0xe332e50cdbad2624, - 0xc74fce129332797e, - 0x1729eceb2ea709ab, - 0xc2d6b9f69954d1f8, - 0x5d898cbfbab8551a, - 0x859a76fb17dd8adb, - 0x1be85886362f7fb5, - 0xf6413f8ff136cd8a, - 0xd3110fa5bbb7e35c, - 0x0a2feed514cc4d11, - 0xe83010edcd7f1ab9, - 0xa1e75de55f42d581, - 0xeede4a55c13b21b6, - 0xf2f5535ff94e1480, - 0x0cc1b46d1888761e, - 0xbce15fdb6529913b, - 0x2d25e8975a7181c2, - 0x71817f1ce2d7a554, - 0x2e52c5cb5c53124b, - 0xf9f7a6beef9c281d, - 0x9e722e7d21f2f56e, - 0xce170d9b81dca7e6, - 0x0e9b82051cb4941b, - 0x1e712f623c49d733, - 0x21e45cfa42f9f7dc, - 0xcb8e7a7f8bba0f60, - 0x8e98831a010fb646, - 0x474ccf0d8e895b23, - 0xa99285584fb27a95, - 0x8cc2b57205335443, - 0x42d5b8e984eff3a5, - 0x012d1b34021e718c, - 0x57a6626aae74180b, - 0xff19fc06e3d81312, - 0x35ba9d4d6a7c6dfe, - 0xc9d44c178f86ed65, - 0x506523e6a02e5288, - 0x03772d5c06229389, - 0x8b01f4fe0b691ec0, - 0xf8dabd8aed825991, - 0x4c4e3aec985b67be, - 0xb10df0827fbf96a9, - 0x6a69279ad4f8dae1, - 0xe78689dcd3d5ff2e, - 0x812e1a2b1fa553d1, - 0xfbad90d6eba0ca18, - 0x1ac543b234310e39, - 0x1604f7df2cb97827, - 0xa6241c6951189f02, - 0x753513cceaaf7c5e, - 0x64f2a59fc84c4efa, - 0x247d2b1e489f5f5a, - 0xdb64d718ab474c48, - 0x79f4a7a1f2270a40, - 0x1573da832a9bebae, - 0x3497867968621c72, - 0x514838d2a2302304, - 0xf0af6537fd72f685, - 0x1d06023e3a6b44ba, - 0x678588c3ce6edd73, - 0x66a893f7cc70acff, - 0xd4d24e29b5eda9df, - 0x3856321470ea6a6c, - 0x07c3418c0e5a4a83, - 0x2bcbb22f5635bacd, - 0x04b46cd00878d90a, - 0x06ee5ab80c443b0f, - 0x3b211f4876c8f9e5, - 0x0958c38912eede98, - 0xd14b39cdbf8b0159, - 0x397b292072f41be0, - 0x87c0409313e168de, - 0xad26e98847caa39f, - 0x4e140c849c6785bb, - 0xd5ff551db7f3d853, - 0xa0ca46d15d5ca40d, - 0xcd6020c787fe346f, - 0x84b76dcf15c3fb57, - 0xdefda0fca121e4ce, - 0x4b8d7b6096012d3d, - 0x9ac642ad298a2c64, - 0x0875d8bd10f0af14, - 0xb357c6ea7b8374ac, - 0x4d6321d89a451632, - 0xeda96709c719b23f, - 0xf76c24bbf328bc06, - 0xc662d526912c08f2, - 0x3ce25ec47892b366, - 0xb978283f6f4f39bd, - 0xc08c8f9e9d6833fd, - 0x4f3917b09e79f437, - 0x593de06fb2c08c10, - 0xd6887841b1d14bda, - 0x19b26eee32139db0, - 0xb494876675d93e2f, - 0x825937771987c058, - 0x90e9ac783d466175, - 0xf1827e03ff6c8709, - 0x945dc0a8353eb87f, - 0x4516f9658ab5b926, - 0x3f9573987eb020ef, - 0xb855330b6d514831, - 0x2ae6a91b542bcb41, - 0x6331e413c6160479, - 0x408f8e8180d311a0, - 0xeff35161c325503a, - 0xd06622f9bd9570d5, - 0x8876d9a20d4b8d49, - 0xa5533135573a0c8b, - 0xe168d364df91c421, - 0xf41b09e7f50a2f8f, - 0x12b09b0f24c1a12d, - 0xda49cc2ca9593dc4, - 0x1f5c34563e57a6bf, - 0x54d14f36a8568b82, - 0xaf7cdfe043f6419a, - 0xea6a2685c943f8bc, - 0xe5dcbfb4d7e91d2b, - 0xb27addde799d0520, - 0x6b443caed6e6ab6d, - 0x7bae91c9f61be845, - 0x3eb868ac7cae5163, - 0x11c7b65322e332a4, - 0xd23c1491b9a992d0, - 0x8fb5982e0311c7ca, - 0x70ac6428e0c9d4d8, - 0x895bc2960f55fcc5, - 0x76423e90ec8defd7, - 0x6ff0507ede9e7267, - 0x3dcf45f07a8cc2ea, - 0x4aa06054941f5cb1, - 0x5810fb5bb0defd9c, - 0x5efea1e3bc9ac693, - 0x6edd4b4adc8003eb, - 0x741808f8e8b10dd2, - 0x145ec1b728859a22, - 0x28bc9f7350172944, - 0x270a06424ebdccd3, - 0x972aedf4331c2bf6, - 0x059977e40a66a886, - 0x2550302a4a812ed6, - 0xdd8a8da0a7037747, - 0xc515f87a970e9b7b, - 0x3023eaa9601ac578, - 0xb7e3aa3a73fbada6, - 0x0fb699311eaae597, - 0x0000000000000000, - 0x310ef19d6204b4f4, - 0x229371a644db6455, - 0x0decaf591a960792, - 0x5ca4978bb8a62496, - 0x1c2b190a38753536, - 0x41a295b582cd602c, - 0x3279dcc16426277d, - 0xc1a194aa9f764271, - 0x139d803b26dfd0a1, - 0xae51c4d441e83016, - 0xd813fa44ad65dfc1, - 0xac0bf2bc45d4d213, - 0x23be6a9246c515d9, - 0x49d74d08923dcf38, - 0x9d05032127d066e7, - 0x2f7fdeff5e4d63c7, - 0xa47e2a0155247d07, - 0x99b16ff12fa8bfed, - 0x4661d4398c972aaf, - 0xdfd0bbc8a33f9542, - 0xdca79694a51d06cb, - 0xb020ebb67da1e725, - 0xba0f0563696daa34, - 0xe4f1a480d5f76ca7, - 0xc438e34e9510eaf7, - 0x939e81243b64f2fc, - 0x8defae46072d25cf, - 0x2c08f3a3586ff04e, - 0xd7a56375b3cf3a56, - 0x20c947ce40e78650, - 0x43f8a3dd86f18229, - 0x568b795eac6a6987, - 0x8003011f1dbb225d, - 0xf53612d3f7145e03, - 0x189f75da300dec3c, - 0x9570db9c3720c9f3, - 0xbb221e576b73dbb8, - 0x72f65240e4f536dd, - 0x443be25188abc8aa, - 0xe21ffe38d9b357a8, - 0xfd43ca6ee7e4f117, - 0xcaa3614b89a47eec, - 0xfe34e732e1c6629e, - 0x83742c431b99b1d4, - 0xcf3a16af83c2d66a, - 0xaae5a8044990e91c, - 0x26271d764ca3bd5f, - 0x91c4b74c3f5810f9, - 0x7c6dd045f841a2c6, - 0x7f1afd19fe63314f, - 0xc8f957238d989ce9, - 0xa709075d5306ee8e, - 0x55fc5402aa48fa0e, - 0x48fa563c9023beb4, - 0x65dfbeabca523f76, - 0x6c877d22d8bce1ee, - 0xcc4d3bf385e045e3, - 0xbebb69b36115733e, - 0x10eaad6720fd4328, - 0xb6ceb10e71e5dc2a, - 0xbdcc44ef6737e0b7, - 0x523f158ea412b08d, - 0x989c74c52db6ce61, - 0x9beb59992b945de8, - 0x8a2cefca09776f4c, - 0xa3bd6b8d5b7e3784, - 0xeb473db1cb5d8930, - 0xc3fba2c29b4aa074, - 0x9c28181525ce176b, - 0x683311f2d0c438e4, - 0x5fd3bad7be84b71f, - 0xfc6ed15ae5fa809b, - 0x36cdb0116c5efe77, - 0x29918447520958c8, - 0xa29070b959604608, - 0x53120ebaa60cc101, - 0x3a0c047c74d68869, - 0x691e0ac6d2da4968, - 0x73db4974e6eb4751, - 0x7a838afdf40599c9, - 0x5a4acd33b4e21f99, - 0x6046c94fc03497f0, - 0xe6ab92e8d1cb8ea2, - 0x3354c7f5663856f1, - 0xd93ee170af7bae4d, - 0x616bd27bc22ae67c, - 0x92b39a10397a8370, - 0xabc8b3304b8e9890, - 0xbf967287630b02b2, - 0x5b67d607b6fc6e15, - }, - { - 0x8ab0a96846e06a6d, - 0x43c7e80b4bf0b33a, - 0x08c9b3546b161ee5, - 0x39f1c235eba990be, - 0xc1bef2376606c7b2, - 0x2c209233614569aa, - 0xeb01523b6fc3289a, - 0x946953ab935acedd, - 0x272838f63e13340e, - 0x8b0455eca12ba052, - 0x77a1b2c4978ff8a2, - 0xa55122ca13e54086, - 0x2276135862d3f1cd, - 0xdb8ddfde08b76cfe, - 0x5d1e12c89e4a178a, - 0x0e56816b03969867, - 0xee5f79953303ed59, - 0xafed748bab78d71d, - 0x6d929f2df93e53ee, - 0xf5d8a8f8ba798c2a, - 0xf619b1698e39cf6b, - 0x95ddaf2f749104e2, - 0xec2a9c80e0886427, - 0xce5c8fd8825b95ea, - 0xc4e0d9993ac60271, - 0x4699c3a5173076f9, - 0x3d1b151f50a29f42, - 0x9ed505ea2bc75946, - 0x34665acfdc7f4b98, - 0x61b1fb53292342f7, - 0xc721c0080e864130, - 0x8693cd1696fd7b74, - 0x872731927136b14b, - 0xd3446c8a63a1721b, - 0x669a35e8a6680e4a, - 0xcab658f239509a16, - 0xa4e5de4ef42e8ab9, - 0x37a7435ee83f08d9, - 0x134e6239e26c7f96, - 0x82791a3c2df67488, - 0x3f6ef00a8329163c, - 0x8e5a7e42fdeb6591, - 0x5caaee4c7981ddb5, - 0x19f234785af1e80d, - 0x255ddde3ed98bd70, - 0x50898a32a99cccac, - 0x28ca4519da4e6656, - 0xae59880f4cb31d22, - 0x0d9798fa37d6db26, - 0x32f968f0b4ffcd1a, - 0xa00f09644f258545, - 0xfa3ad5175e24de72, - 0xf46c547c5db24615, - 0x713e80fbff0f7e20, - 0x7843cf2b73d2aafa, - 0xbd17ea36aedf62b4, - 0xfd111bacd16f92cf, - 0x4abaa7dbc72d67e0, - 0xb3416b5dad49fad3, - 0xbca316b24914a88b, - 0x15d150068aecf914, - 0xe27c1debe31efc40, - 0x4fe48c759beda223, - 0x7edcfd141b522c78, - 0x4e5070f17c26681c, - 0xe696cac15815f3bc, - 0x35d2a64b3bb481a7, - 0x800cff29fe7dfdf6, - 0x1ed9fac3d5baa4b0, - 0x6c2663a91ef599d1, - 0x03c1199134404341, - 0xf7ad4ded69f20554, - 0xcd9d9649b61bd6ab, - 0xc8c3bde7eadb1368, - 0xd131899fb02afb65, - 0x1d18e352e1fae7f1, - 0xda39235aef7ca6c1, - 0xa1bbf5e0a8ee4f7a, - 0x91377805cf9a0b1e, - 0x3138716180bf8e5b, - 0xd9f83acbdb3ce580, - 0x0275e515d38b897e, - 0x472d3f21f0fbbcc6, - 0x2d946eb7868ea395, - 0xba3c248d21942e09, - 0xe7223645bfde3983, - 0xff64feb902e41bb1, - 0xc97741630d10d957, - 0xc3cb1722b58d4ecc, - 0xa27aec719cae0c3b, - 0x99fecb51a48c15fb, - 0x1465ac826d27332b, - 0xe1bd047ad75ebf01, - 0x79f733af941960c5, - 0x672ec96c41a3c475, - 0xc27feba6524684f3, - 0x64efd0fd75e38734, - 0xed9e60040743ae18, - 0xfb8e2993b9ef144d, - 0x38453eb10c625a81, - 0x6978480742355c12, - 0x48cf42ce14a6ee9e, - 0x1cac1fd606312dce, - 0x7b82d6ba4792e9bb, - 0x9d141c7b1f871a07, - 0x5616b80dc11c4a2e, - 0xb849c198f21fa777, - 0x7ca91801c8d9a506, - 0xb1348e487ec273ad, - 0x41b20d1e987b3a44, - 0x7460ab55a3cfbbe3, - 0x84e628034576f20a, - 0x1b87d16d897a6173, - 0x0fe27defe45d5258, - 0x83cde6b8ca3dbeb7, - 0x0c23647ed01d1119, - 0x7a362a3ea0592384, - 0xb61f40f3f1893f10, - 0x75d457d1440471dc, - 0x4558da34237035b8, - 0xdca6116587fc2043, - 0x8d9b67d3c9ab26d0, - 0x2b0b5c88ee0e2517, - 0x6fe77a382ab5da90, - 0x269cc472d9d8fe31, - 0x63c41e46faa8cb89, - 0xb7abbc771642f52f, - 0x7d1de4852f126f39, - 0xa8c6ba3024339ba0, - 0x600507d7cee888c8, - 0x8fee82c61a20afae, - 0x57a2448926d78011, - 0xfca5e72836a458f0, - 0x072bcebb8f4b4cbd, - 0x497bbe4af36d24a1, - 0x3cafe99bb769557d, - 0x12fa9ebd05a7b5a9, - 0xe8c04baa5b836bdb, - 0x4273148fac3b7905, - 0x908384812851c121, - 0xe557d3506c55b0fd, - 0x72ff996acb4f3d61, - 0x3eda0c8e64e2dc03, - 0xf0868356e6b949e9, - 0x04ead72abb0b0ffc, - 0x17a4b5135967706a, - 0xe3c8e16f04d5367f, - 0xf84f30028daf570c, - 0x1846c8fcbd3a2232, - 0x5b8120f7f6ca9108, - 0xd46fa231ecea3ea6, - 0x334d947453340725, - 0x58403966c28ad249, - 0xbed6f3a79a9f21f5, - 0x68ccb483a5fe962d, - 0xd085751b57e1315a, - 0xfed0023de52fd18e, - 0x4b0e5b5f20e6addf, - 0x1a332de96eb1ab4c, - 0xa3ce10f57b65c604, - 0x108f7ba8d62c3cd7, - 0xab07a3a11073d8e1, - 0x6b0dad1291bed56c, - 0xf2f366433532c097, - 0x2e557726b2cee0d4, - 0x0000000000000000, - 0xcb02a476de9b5029, - 0xe4e32fd48b9e7ac2, - 0x734b65ee2c84f75e, - 0x6e5386bccd7e10af, - 0x01b4fc84e7cbca3f, - 0xcfe8735c65905fd5, - 0x3613bfda0ff4c2e6, - 0x113b872c31e7f6e8, - 0x2fe18ba255052aeb, - 0xe974b72ebc48a1e4, - 0x0abc5641b89d979b, - 0xb46aa5e62202b66e, - 0x44ec26b0c4bbff87, - 0xa6903b5b27a503c7, - 0x7f680190fc99e647, - 0x97a84a3aa71a8d9c, - 0xdd12ede16037ea7c, - 0xc554251ddd0dc84e, - 0x88c54c7d956be313, - 0x4d91696048662b5d, - 0xb08072cc9909b992, - 0xb5de5962c5c97c51, - 0x81b803ad19b637c9, - 0xb2f597d94a8230ec, - 0x0b08aac55f565da4, - 0xf1327fd2017283d6, - 0xad98919e78f35e63, - 0x6ab9519676751f53, - 0x24e921670a53774f, - 0xb9fd3d1c15d46d48, - 0x92f66194fbda485f, - 0x5a35dc7311015b37, - 0xded3f4705477a93d, - 0xc00a0eb381cd0d8d, - 0xbb88d809c65fe436, - 0x16104997beacba55, - 0x21b70ac95693b28c, - 0x59f4c5e225411876, - 0xd5db5eb50b21f499, - 0x55d7a19cf55c096f, - 0xa97246b4c3f8519f, - 0x8552d487a2bd3835, - 0x54635d181297c350, - 0x23c2efdc85183bf2, - 0x9f61f96ecc0c9379, - 0x534893a39ddc8fed, - 0x5edf0b59aa0a54cb, - 0xac2c6d1a9f38945c, - 0xd7aebba0d8aa7de7, - 0x2abfa00c09c5ef28, - 0xd84cc64f3cf72fbf, - 0x2003f64db15878b3, - 0xa724c7dfc06ec9f8, - 0x069f323f68808682, - 0xcc296acd51d01c94, - 0x055e2bae5cc0c5c3, - 0x6270e2c21d6301b6, - 0x3b842720382219c0, - 0xd2f0900e846ab824, - 0x52fc6f277a1745d2, - 0xc6953c8ce94d8b0f, - 0xe009f8fe3095753e, - 0x655b2c7992284d0b, - 0x984a37d54347dfc4, - 0xeab5aebf8808e2a5, - 0x9a3fd2c090cc56ba, - 0x9ca0e0fff84cd038, - 0x4c2595e4afade162, - 0xdf6708f4b3bc6302, - 0xbf620f237d54ebca, - 0x93429d101c118260, - 0x097d4fd08cddd4da, - 0x8c2f9b572e60ecef, - 0x708a7c7f18c4b41f, - 0x3a30dba4dfe9d3ff, - 0x4006f19a7fb0f07b, - 0x5f6bf7dd4dc19ef4, - 0x1f6d064732716e8f, - 0xf9fbcc866a649d33, - 0x308c8de567744464, - 0x8971b0f972a0292c, - 0xd61a47243f61b7d8, - 0xefeb8511d4c82766, - 0x961cb6be40d147a3, - 0xaab35f25f7b812de, - 0x76154e407044329d, - 0x513d76b64e570693, - 0xf3479ac7d2f90aa8, - 0x9b8b2e4477079c85, - 0x297eb99d3d85ac69, - }, - { - 0x3ef29d249b2c0a19, - 0xe9e16322b6f8622f, - 0x5536994047757f7a, - 0x9f4d56d5a47b0b33, - 0x822567466aa1174c, - 0xb8f5057deb082fb2, - 0xcc48c10bf4475f53, - 0x373088d4275dec3a, - 0x968f4325180aed10, - 0x173d232cf7016151, - 0xae4ed09f946fcc13, - 0xfd4b4741c4539873, - 0x1b5b3f0dd9933765, - 0x2ffcb0967b644052, - 0xe02376d20a89840c, - 0xa3ae3a70329b18d7, - 0x419cbd2335de8526, - 0xfafebf115b7c3199, - 0x0397074f85aa9b0d, - 0xc58ad4fb4836b970, - 0xbec60be3fc4104a8, - 0x1eff36dc4b708772, - 0x131fdc33ed8453b6, - 0x0844e33e341764d3, - 0x0ff11b6eab38cd39, - 0x64351f0a7761b85a, - 0x3b5694f509cfba0e, - 0x30857084b87245d0, - 0x47afb3bd2297ae3c, - 0xf2ba5c2f6f6b554a, - 0x74bdc4761f4f70e1, - 0xcfdfc64471edc45e, - 0xe610784c1dc0af16, - 0x7aca29d63c113f28, - 0x2ded411776a859af, - 0xac5f211e99a3d5ee, - 0xd484f949a87ef33b, - 0x3ce36ca596e013e4, - 0xd120f0983a9d432c, - 0x6bc40464dc597563, - 0x69d5f5e5d1956c9e, - 0x9ae95f043698bb24, - 0xc9ecc8da66a4ef44, - 0xd69508c8a5b2eac6, - 0xc40c2235c0503b80, - 0x38c193ba8c652103, - 0x1ceec75d46bc9e8f, - 0xd331011937515ad1, - 0xd8e2e56886eca50f, - 0xb137108d5779c991, - 0x709f3b6905ca4206, - 0x4feb50831680caef, - 0xec456af3241bd238, - 0x58d673afe181abbe, - 0x242f54e7cad9bf8c, - 0x0211f1810dcc19fd, - 0x90bc4dbb0f43c60a, - 0x9518446a9da0761d, - 0xa1bfcbf13f57012a, - 0x2bde4f8961e172b5, - 0x27b853a84f732481, - 0xb0b1e643df1f4b61, - 0x18cc38425c39ac68, - 0xd2b7f7d7bf37d821, - 0x3103864a3014c720, - 0x14aa246372abfa5c, - 0x6e600db54ebac574, - 0x394765740403a3f3, - 0x09c215f0bc71e623, - 0x2a58b947e987f045, - 0x7b4cdf18b477bdd8, - 0x9709b5eb906c6fe0, - 0x73083c268060d90b, - 0xfedc400e41f9037e, - 0x284948c6e44be9b8, - 0x728ecae808065bfb, - 0x06330e9e17492b1a, - 0x5950856169e7294e, - 0xbae4f4fce6c4364f, - 0xca7bcf95e30e7449, - 0x7d7fd186a33e96c2, - 0x52836110d85ad690, - 0x4dfaa1021b4cd312, - 0x913abb75872544fa, - 0xdd46ecb9140f1518, - 0x3d659a6b1e869114, - 0xc23f2cabd719109a, - 0xd713fe062dd46836, - 0xd0a60656b2fbc1dc, - 0x221c5a79dd909496, - 0xefd26dbca1b14935, - 0x0e77eda0235e4fc9, - 0xcbfd395b6b68f6b9, - 0x0de0eaefa6f4d4c4, - 0x0422ff1f1a8532e7, - 0xf969b85eded6aa94, - 0x7f6e2007aef28f3f, - 0x3ad0623b81a938fe, - 0x6624ee8b7aada1a7, - 0xb682e8ddc856607b, - 0xa78cc56f281e2a30, - 0xc79b257a45faa08d, - 0x5b4174e0642b30b3, - 0x5f638bff7eae0254, - 0x4bc9af9c0c05f808, - 0xce59308af98b46ae, - 0x8fc58da9cc55c388, - 0x803496c7676d0eb1, - 0xf33caae1e70dd7ba, - 0xbb6202326ea2b4bf, - 0xd5020f87201871cb, - 0x9d5ca754a9b712ce, - 0x841669d87de83c56, - 0x8a6184785eb6739f, - 0x420bba6cb0741e2b, - 0xf12d5b60eac1ce47, - 0x76ac35f71283691c, - 0x2c6bb7d9fecedb5f, - 0xfccdb18f4c351a83, - 0x1f79c012c3160582, - 0xf0abadae62a74cb7, - 0xe1a5801c82ef06fc, - 0x67a21845f2cb2357, - 0x5114665f5df04d9d, - 0xbf40fd2d74278658, - 0xa0393d3fb73183da, - 0x05a409d192e3b017, - 0xa9fb28cf0b4065f9, - 0x25a9a22942bf3d7c, - 0xdb75e22703463e02, - 0xb326e10c5ab5d06c, - 0xe7968e8295a62de6, - 0xb973f3b3636ead42, - 0xdf571d3819c30ce5, - 0xee549b7229d7cbc5, - 0x12992afd65e2d146, - 0xf8ef4e9056b02864, - 0xb7041e134030e28b, - 0xc02edd2adad50967, - 0x932b4af48ae95d07, - 0x6fe6fb7bc6dc4784, - 0x239aacb755f61666, - 0x401a4bedbdb807d6, - 0x485ea8d389af6305, - 0xa41bc220adb4b13d, - 0x753b32b89729f211, - 0x997e584bb3322029, - 0x1d683193ceda1c7f, - 0xff5ab6c0c99f818e, - 0x16bbd5e27f67e3a1, - 0xa59d34ee25d233cd, - 0x98f8ae853b54a2d9, - 0x6df70afacb105e79, - 0x795d2e99b9bba425, - 0x8e437b6744334178, - 0x0186f6ce886682f0, - 0xebf092a3bb347bd2, - 0xbcd7fa62f18d1d55, - 0xadd9d7d011c5571e, - 0x0bd3e471b1bdffde, - 0xaa6c2f808eeafef4, - 0x5ee57d31f6c880a4, - 0xf50fa47ff044fca0, - 0x1addc9c351f5b595, - 0xea76646d3352f922, - 0x0000000000000000, - 0x85909f16f58ebea6, - 0x46294573aaf12ccc, - 0x0a5512bf39db7d2e, - 0x78dbd85731dd26d5, - 0x29cfbe086c2d6b48, - 0x218b5d36583a0f9b, - 0x152cd2adfacd78ac, - 0x83a39188e2c795bc, - 0xc3b9da655f7f926a, - 0x9ecba01b2c1d89c3, - 0x07b5f8509f2fa9ea, - 0x7ee8d6c926940dcf, - 0x36b67e1aaf3b6eca, - 0x86079859702425ab, - 0xfb7849dfd31ab369, - 0x4c7c57cc932a51e2, - 0xd96413a60e8a27ff, - 0x263ea566c715a671, - 0x6c71fc344376dc89, - 0x4a4f595284637af8, - 0xdaf314e98b20bcf2, - 0x572768c14ab96687, - 0x1088db7c682ec8bb, - 0x887075f9537a6a62, - 0x2e7a4658f302c2a2, - 0x619116dbe582084d, - 0xa87dde018326e709, - 0xdcc01a779c6997e8, - 0xedc39c3dac7d50c8, - 0xa60a33a1a078a8c0, - 0xc1a82be452b38b97, - 0x3f746bea134a88e9, - 0xa228ccbebafd9a27, - 0xabead94e068c7c04, - 0xf48952b178227e50, - 0x5cf48cb0fb049959, - 0x6017e0156de48abd, - 0x4438b4f2a73d3531, - 0x8c528ae649ff5885, - 0xb515ef924dfcfb76, - 0x0c661c212e925634, - 0xb493195cc59a7986, - 0x9cda519a21d1903e, - 0x32948105b5be5c2d, - 0x194ace8cd45f2e98, - 0x438d4ca238129cdb, - 0x9b6fa9cabefe39d4, - 0x81b26009ef0b8c41, - 0xded1ebf691a58e15, - 0x4e6da64d9ee6481f, - 0x54b06f8ecf13fd8a, - 0x49d85e1d01c9e1f5, - 0xafc826511c094ee3, - 0xf698a33075ee67ad, - 0x5ac7822eec4db243, - 0x8dd47c28c199da75, - 0x89f68337db1ce892, - 0xcdce37c57c21dda3, - 0x530597de503c5460, - 0x6a42f2aa543ff793, - 0x5d727a7e73621ba9, - 0xe232875307459df1, - 0x56a19e0fc2dfe477, - 0xc61dd3b4cd9c227d, - 0xe5877f03986a341b, - 0x949eb2a415c6f4ed, - 0x6206119460289340, - 0x6380e75ae84e11b0, - 0x8be772b6d6d0f16f, - 0x50929091d596cf6d, - 0xe86795ec3e9ee0df, - 0x7cf927482b581432, - 0xc86a3e14eec26db4, - 0x7119cda78dacc0f6, - 0xe40189cd100cb6eb, - 0x92adbc3a028fdff7, - 0xb2a017c2d2d3529c, - 0x200dabf8d05c8d6b, - 0x34a78f9ba2f77737, - 0xe3b4719d8f231f01, - 0x45be423c2f5bb7c1, - 0xf71e55fefd88e55d, - 0x6853032b59f3ee6e, - 0x65b3e9c4ff073aaa, - 0x772ac3399ae5ebec, - 0x87816e97f842a75b, - 0x110e2db2e0484a4b, - 0x331277cb3dd8dedd, - 0xbd510cac79eb9fa5, - 0x352179552a91f5c7, - }, - { - 0x05ba7bc82c9b3220, - 0x31a54665f8b65e4f, - 0xb1b651f77547f4d4, - 0x8bfa0d857ba46682, - 0x85a96c5aa16a98bb, - 0x990faef908eb79c9, - 0xa15e37a247f4a62d, - 0x76857dcd5d27741e, - 0xf8c50b800a1820bc, - 0xbe65dcb201f7a2b4, - 0x666d1b986f9426e7, - 0x4cc921bf53c4e648, - 0x95410a0f93d9ca42, - 0x20cdccaa647ba4ef, - 0x429a4060890a1871, - 0x0c4ea4f69b32b38b, - 0xccda362dde354cd3, - 0x96dc23bc7c5b2fa9, - 0xc309bb68aa851ab3, - 0xd26131a73648e013, - 0x021dc52941fc4db2, - 0xcd5adab7704be48a, - 0xa77965d984ed71e6, - 0x32386fd61734bba4, - 0xe82d6dd538ab7245, - 0x5c2147ea6177b4b1, - 0x5da1ab70cf091ce8, - 0xac907fce72b8bdff, - 0x57c85dfd972278a8, - 0xa4e44c6a6b6f940d, - 0x3851995b4f1fdfe4, - 0x62578ccaed71bc9e, - 0xd9882bb0c01d2c0a, - 0x917b9d5d113c503b, - 0xa2c31e11a87643c6, - 0xe463c923a399c1ce, - 0xf71686c57ea876dc, - 0x87b4a973e096d509, - 0xaf0d567d9d3a5814, - 0xb40c2a3f59dcc6f4, - 0x3602f88495d121dd, - 0xd3e1dd3d9836484a, - 0xf945e71aa46688e5, - 0x7518547eb2a591f5, - 0x9366587450c01d89, - 0x9ea81018658c065b, - 0x4f54080cbc4603a3, - 0x2d0384c65137bf3d, - 0xdc325078ec861e2a, - 0xea30a8fc79573ff7, - 0x214d2030ca050cb6, - 0x65f0322b8016c30c, - 0x69be96dd1b247087, - 0xdb95ee9981e161b8, - 0xd1fc1814d9ca05f8, - 0x820ed2bbcc0de729, - 0x63d76050430f14c7, - 0x3bccb0e8a09d3a0f, - 0x8e40764d573f54a2, - 0x39d175c1e16177bd, - 0x12f5a37c734f1f4b, - 0xab37c12f1fdfc26d, - 0x5648b167395cd0f1, - 0x6c04ed1537bf42a7, - 0xed97161d14304065, - 0x7d6c67daab72b807, - 0xec17fa87ba4ee83c, - 0xdfaf79cb0304fbc1, - 0x733f060571bc463e, - 0x78d61c1287e98a27, - 0xd07cf48e77b4ada1, - 0xb9c262536c90dd26, - 0xe2449b5860801605, - 0x8fc09ad7f941fcfb, - 0xfad8cea94be46d0e, - 0xa343f28b0608eb9f, - 0x9b126bd04917347b, - 0x9a92874ae7699c22, - 0x1b017c42c4e69ee0, - 0x3a4c5c720ee39256, - 0x4b6e9f5e3ea399da, - 0x6ba353f45ad83d35, - 0xe7fee0904c1b2425, - 0x22d009832587e95d, - 0x842980c00f1430e2, - 0xc6b3c0a0861e2893, - 0x087433a419d729f2, - 0x341f3dadd42d6c6f, - 0xee0a3faefbb2a58e, - 0x4aee73c490dd3183, - 0xaab72db5b1a16a34, - 0xa92a04065e238fdf, - 0x7b4b35a1686b6fcc, - 0x6a23bf6ef4a6956c, - 0x191cb96b851ad352, - 0x55d598d4d6de351a, - 0xc9604de5f2ae7ef3, - 0x1ca6c2a3a981e172, - 0xde2f9551ad7a5398, - 0x3025aaff56c8f616, - 0x15521d9d1e2860d9, - 0x506fe31cfa45073a, - 0x189c55f12b647b0b, - 0x0180ec9aae7ea859, - 0x7cec8b40050c105e, - 0x2350e5198bf94104, - 0xef8ad33455cc0dd7, - 0x07a7bee16d677f92, - 0xe5e325b90de76997, - 0x5a061591a26e637a, - 0xb611ef1618208b46, - 0x09f4df3eb7a981ab, - 0x1ebb078ae87dacc0, - 0xb791038cb65e231f, - 0x0fd38d4574b05660, - 0x67edf702c1ea8ebe, - 0xba5f4be0831238cd, - 0xe3c477c2cefebe5c, - 0x0dce486c354c1bd2, - 0x8c5db36416c31910, - 0x26ea9ed1a7627324, - 0x039d29b3ef82e5eb, - 0x9f28fc82cbf2ae02, - 0xa8aae89cf05d2786, - 0x431aacfa2774b028, - 0xcf471f9e31b7a938, - 0x581bd0b8e3922ec8, - 0xbc78199b400bef06, - 0x90fb71c7bf42f862, - 0x1f3beb1046030499, - 0x683e7a47b55ad8de, - 0x988f4263a695d190, - 0xd808c72a6e638453, - 0x0627527bc319d7cb, - 0xebb04466d72997ae, - 0xe67e0c0ae2658c7c, - 0x14d2f107b056c880, - 0x7122c32c30400b8c, - 0x8a7ae11fd5dacedb, - 0xa0dedb38e98a0e74, - 0xad109354dcc615a6, - 0x0be91a17f655cc19, - 0x8ddd5ffeb8bdb149, - 0xbfe53028af890aed, - 0xd65ba6f5b4ad7a6a, - 0x7956f0882997227e, - 0x10e8665532b352f9, - 0x0e5361dfdacefe39, - 0xcec7f3049fc90161, - 0xff62b561677f5f2e, - 0x975ccf26d22587f0, - 0x51ef0f86543baf63, - 0x2f1e41ef10cbf28f, - 0x52722635bbb94a88, - 0xae8dbae73344f04d, - 0x410769d36688fd9a, - 0xb3ab94de34bbb966, - 0x801317928df1aa9b, - 0xa564a0f0c5113c54, - 0xf131d4bebdb1a117, - 0x7f71a2f3ea8ef5b5, - 0x40878549c8f655c3, - 0x7ef14e6944f05dec, - 0xd44663dcf55137d8, - 0xf2acfd0d523344fc, - 0x0000000000000000, - 0x5fbc6e598ef5515a, - 0x16cf342ef1aa8532, - 0xb036bd6ddb395c8d, - 0x13754fe6dd31b712, - 0xbbdfa77a2d6c9094, - 0x89e7c8ac3a582b30, - 0x3c6b0e09cdfa459d, - 0xc4ae0589c7e26521, - 0x49735a777f5fd468, - 0xcafd64561d2c9b18, - 0xda1502032f9fc9e1, - 0x8867243694268369, - 0x3782141e3baf8984, - 0x9cb5d53124704be9, - 0xd7db4a6f1ad3d233, - 0xa6f989432a93d9bf, - 0x9d3539ab8a0ee3b0, - 0x53f2caaf15c7e2d1, - 0x6e19283c76430f15, - 0x3debe2936384edc4, - 0x5e3c82c3208bf903, - 0x33b8834cb94a13fd, - 0x6470deb12e686b55, - 0x359fd1377a53c436, - 0x61caa57902f35975, - 0x043a975282e59a79, - 0xfd7f70482683129c, - 0xc52ee913699ccd78, - 0x28b9ff0e7dac8d1d, - 0x5455744e78a09d43, - 0xcb7d88ccb3523341, - 0x44bd121b4a13cfba, - 0x4d49cd25fdba4e11, - 0x3e76cb208c06082f, - 0x3ff627ba2278a076, - 0xc28957f204fbb2ea, - 0x453dfe81e46d67e3, - 0x94c1e6953da7621b, - 0x2c83685cff491764, - 0xf32c1197fc4deca5, - 0x2b24d6bd922e68f6, - 0xb22b78449ac5113f, - 0x48f3b6edd1217c31, - 0x2e9ead75beb55ad6, - 0x174fd8b45fd42d6b, - 0x4ed4e4961238abfa, - 0x92e6b4eefebeb5d0, - 0x46a0d7320bef8208, - 0x47203ba8a5912a51, - 0x24f75bf8e69e3e96, - 0xf0b1382413cf094e, - 0xfee259fbc901f777, - 0x276a724b091cdb7d, - 0xbdf8f501ee75475f, - 0x599b3c224dec8691, - 0x6d84018f99c1eafe, - 0x7498b8e41cdb39ac, - 0xe0595e71217c5bb7, - 0x2aa43a273c50c0af, - 0xf50b43ec3f543b6e, - 0x838e3e2162734f70, - 0xc09492db4507ff58, - 0x72bfea9fdfc2ee67, - 0x11688acf9ccdfaa0, - 0x1a8190d86a9836b9, - 0x7acbd93bc615c795, - 0xc7332c3a286080ca, - 0x863445e94ee87d50, - 0xf6966a5fd0d6de85, - 0xe9ad814f96d5da1c, - 0x70a22fb69e3ea3d5, - 0x0a69f68d582b6440, - 0xb8428ec9c2ee757f, - 0x604a49e3ac8df12c, - 0x5b86f90b0c10cb23, - 0xe1d9b2eb8f02f3ee, - 0x29391394d3d22544, - 0xc8e0a17f5cd0d6aa, - 0xb58cc6a5f7a26ead, - 0x8193fb08238f02c2, - 0xd5c68f465b2f9f81, - 0xfcff9cd288fdbac5, - 0x77059157f359dc47, - 0x1d262e3907ff492b, - 0xfb582233e59ac557, - 0xddb2bce242f8b673, - 0x2577b76248e096cf, - 0x6f99c4a6d83da74c, - 0xc1147e41eb795701, - 0xf48baf76912a9337, - }, - { - 0x45b268a93acde4cc, - 0xaf7f0be884549d08, - 0x048354b3c1468263, - 0x925435c2c80efed2, - 0xee4e37f27fdffba7, - 0x167a33920c60f14d, - 0xfb123b52ea03e584, - 0x4a0cab53fdbb9007, - 0x9deaf6380f788a19, - 0xcb48ec558f0cb32a, - 0xb59dc4b2d6fef7e0, - 0xdcdbca22f4f3ecb6, - 0x11df5813549a9c40, - 0xe33fdedf568aced3, - 0xa0c1c8124322e9c3, - 0x07a56b8158fa6d0d, - 0x77279579b1e1f3dd, - 0xd9b18b74422ac004, - 0xb8ec2d9fffabc294, - 0xf4acf8a82d75914f, - 0x7bbf69b1ef2b6878, - 0xc4f62faf487ac7e1, - 0x76ce809cc67e5d0c, - 0x6711d88f92e4c14c, - 0x627b99d9243dedfe, - 0x234aa5c3dfb68b51, - 0x909b1f15262dbf6d, - 0x4f66ea054b62bcb5, - 0x1ae2cf5a52aa6ae8, - 0xbea053fbd0ce0148, - 0xed6808c0e66314c9, - 0x43fe16cd15a82710, - 0xcd049231a06970f6, - 0xe7bc8a6c97cc4cb0, - 0x337ce835fcb3b9c0, - 0x65def2587cc780f3, - 0x52214ede4132bb50, - 0x95f15e4390f493df, - 0x870839625dd2e0f1, - 0x41313c1afb8b66af, - 0x91720af051b211bc, - 0x477d427ed4eea573, - 0x2e3b4ceef6e3be25, - 0x82627834eb0bcc43, - 0x9c03e3dd78e724c8, - 0x2877328ad9867df9, - 0x14b51945e243b0f2, - 0x574b0f88f7eb97e2, - 0x88b6fa989aa4943a, - 0x19c4f068cb168586, - 0x50ee6409af11faef, - 0x7df317d5c04eaba4, - 0x7a567c5498b4c6a9, - 0xb6bbfb804f42188e, - 0x3cc22bcf3bc5cd0b, - 0xd04336eaaa397713, - 0xf02fac1bec33132c, - 0x2506dba7f0d3488d, - 0xd7e65d6bf2c31a1e, - 0x5eb9b2161ff820f5, - 0x842e0650c46e0f9f, - 0x716beb1d9e843001, - 0xa933758cab315ed4, - 0x3fe414fda2792265, - 0x27c9f1701ef00932, - 0x73a4c1ca70a771be, - 0x94184ba6e76b3d0e, - 0x40d829ff8c14c87e, - 0x0fbec3fac77674cb, - 0x3616a9634a6a9572, - 0x8f139119c25ef937, - 0xf545ed4d5aea3f9e, - 0xe802499650ba387b, - 0x6437e7bd0b582e22, - 0xe6559f89e053e261, - 0x80ad52e305288dfc, - 0x6dc55a23e34b9935, - 0xde14e0f51ad0ad09, - 0xc6390578a659865e, - 0x96d7617109487cb1, - 0xe2d6cb3a21156002, - 0x01e915e5779faed1, - 0xadb0213f6a77dcb7, - 0x9880b76eb9a1a6ab, - 0x5d9f8d248644cf9b, - 0xfd5e4536c5662658, - 0xf1c6b9fe9bacbdfd, - 0xeacd6341be9979c4, - 0xefa7221708405576, - 0x510771ecd88e543e, - 0xc2ba51cb671f043d, - 0x0ad482ac71af5879, - 0xfe787a045cdac936, - 0xb238af338e049aed, - 0xbd866cc94972ee26, - 0x615da6ebbd810290, - 0x3295fdd08b2c1711, - 0xf834046073bf0aea, - 0xf3099329758ffc42, - 0x1caeb13e7dcfa934, - 0xba2307481188832b, - 0x24efce42874ce65c, - 0x0e57d61fb0e9da1a, - 0xb3d1bad6f99b343c, - 0xc0757b1c893c4582, - 0x2b510db8403a9297, - 0x5c7698c1f1db614a, - 0x3e0d0118d5e68cb4, - 0xd60f488e855cb4cf, - 0xae961e0df3cb33d9, - 0x3a8e55ab14a00ed7, - 0x42170328623789c1, - 0x838b6dd19c946292, - 0x895fef7ded3b3aeb, - 0xcfcbb8e64e4a3149, - 0x064c7e642f65c3dc, - 0x3d2b3e2a4c5a63da, - 0x5bd3f340a9210c47, - 0xb474d157a1615931, - 0xac5934da1de87266, - 0x6ee365117af7765b, - 0xc86ed36716b05c44, - 0x9ba6885c201d49c5, - 0xb905387a88346c45, - 0x131072c4bab9ddff, - 0xbf49461ea751af99, - 0xd52977bc1ce05ba1, - 0xb0f785e46027db52, - 0x546d30ba6e57788c, - 0x305ad707650f56ae, - 0xc987c682612ff295, - 0xa5ab8944f5fbc571, - 0x7ed528e759f244ca, - 0x8ddcbbce2c7db888, - 0xaa154abe328db1ba, - 0x1e619be993ece88b, - 0x09f2bd9ee813b717, - 0x7401aa4b285d1cb3, - 0x21858f143195caee, - 0x48c381841398d1b8, - 0xfcb750d3b2f98889, - 0x39a86a998d1ce1b9, - 0x1f888e0ce473465a, - 0x7899568376978716, - 0x02cf2ad7ee2341bf, - 0x85c713b5b3f1a14e, - 0xff916fe12b4567e7, - 0x7c1a0230b7d10575, - 0x0c98fcc85eca9ba5, - 0xa3e7f720da9e06ad, - 0x6a6031a2bbb1f438, - 0x973e74947ed7d260, - 0x2cf4663918c0ff9a, - 0x5f50a7f368678e24, - 0x34d983b4a449d4cd, - 0x68af1b755592b587, - 0x7f3c3d022e6dea1b, - 0xabfc5f5b45121f6b, - 0x0d71e92d29553574, - 0xdffdf5106d4f03d8, - 0x081ba87b9f8c19c6, - 0xdb7ea1a3ac0981bb, - 0xbbca12ad66172dfa, - 0x79704366010829c7, - 0x179326777bff5f9c, - 0x0000000000000000, - 0xeb2476a4c906d715, - 0x724dd42f0738df6f, - 0xb752ee6538ddb65f, - 0x37ffbc863df53ba3, - 0x8efa84fcb5c157e6, - 0xe9eb5c73272596aa, - 0x1b0bdabf2535c439, - 0x86e12c872a4d4e20, - 0x9969a28bce3e087a, - 0xfafb2eb79d9c4b55, - 0x056a4156b6d92cb2, - 0x5a3ae6a5debea296, - 0x22a3b026a8292580, - 0x53c85b3b36ad1581, - 0xb11e900117b87583, - 0xc51f3a4a3fe56930, - 0xe019e1edcf3621bd, - 0xec811d2591fcba18, - 0x445b7d4c4d524a1d, - 0xa8da6069dcaef005, - 0x58f5cc72309de329, - 0xd4c062596b7ff570, - 0xce22ad0339d59f98, - 0x591cd99747024df8, - 0x8b90c5aa03187b54, - 0xf663d27fc356d0f0, - 0xd8589e9135b56ed5, - 0x35309651d3d67a1c, - 0x12f96721cd26732e, - 0xd28c1c3d441a36ac, - 0x492a946164077f69, - 0x2d1d73dc6f5f514b, - 0x6f0a70f40d68d88a, - 0x60b4b30eca1eac41, - 0xd36509d83385987d, - 0x0b3d97490630f6a8, - 0x9eccc90a96c46577, - 0xa20ee2c5ad01a87c, - 0xe49ab55e0e70a3de, - 0xa4429ca182646ba0, - 0xda97b446db962f6a, - 0xcced87d4d7f6de27, - 0x2ab8185d37a53c46, - 0x9f25dcefe15bcba6, - 0xc19c6ef9fea3eb53, - 0xa764a3931bd884ce, - 0x2fd2590b817c10f4, - 0x56a21a6d80743933, - 0xe573a0bb79ef0d0f, - 0x155c0ca095dc1e23, - 0x6c2c4fc694d437e4, - 0x10364df623053291, - 0xdd32dfc7836c4267, - 0x03263f3299bcef6e, - 0x66f8cd6ae57b6f9d, - 0x8c35ae2b5be21659, - 0x31b3c2e21290f87f, - 0x93bd2027bf915003, - 0x69460e90220d1b56, - 0x299e276fae19d328, - 0x63928c3c53a2432f, - 0x7082fef8e91b9ed0, - 0xbc6f792c3eed40f7, - 0x4c40d537d2de53db, - 0x75e8bfae5fc2b262, - 0x4da9c0d2a541fd0a, - 0x4e8fffe03cfd1264, - 0x2620e495696fa7e3, - 0xe1f0f408b8a98f6c, - 0xd1aa230fdda6d9c2, - 0xc7d0109dd1c6288f, - 0x8a79d04f7487d585, - 0x4694579ba3710ba2, - 0x38417f7cfa834f68, - 0x1d47a4db0a5007e5, - 0x206c9af1460a643f, - 0xa128ddf734bd4712, - 0x8144470672b7232d, - 0xf2e086cc02105293, - 0x182de58dbc892b57, - 0xcaa1f9b0f8931dfb, - 0x6b892447cc2e5ae9, - 0xf9dd11850420a43b, - 0x4be5beb68a243ed6, - 0x5584255f19c8d65d, - 0x3b67404e633fa006, - 0xa68db6766c472a1f, - 0xf78ac79ab4c97e21, - 0xc353442e1080aaec, - 0x9a4f9db95782e714, - }, - { - 0xc811a8058c3f55de, - 0x65f5b43196b50619, - 0xf74f96b1d6706e43, - 0x859d1e8bcb43d336, - 0x5aab8a85ccfa3d84, - 0xf9c7bf99c295fcfd, - 0xa21fd5a1de4b630f, - 0xcdb3ef763b8b456d, - 0x803f59f87cf7c385, - 0xb27c73be5f31913c, - 0x98e3ac6633b04821, - 0xbf61674c26b8f818, - 0x0ffbc995c4c130c8, - 0xaaa0862010761a98, - 0x6057f342210116aa, - 0xf63c760c0654cc35, - 0x2ddb45cc667d9042, - 0xbcf45a964bd40382, - 0x68e8a0c3ef3c6f3d, - 0xa7bd92d269ff73bc, - 0x290ae20201ed2287, - 0xb7de34cde885818f, - 0xd901eea7dd61059b, - 0xd6fa273219a03553, - 0xd56f1ae874cccec9, - 0xea31245c2e83f554, - 0x7034555da07be499, - 0xce26d2ac56e7bef7, - 0xfd161857a5054e38, - 0x6a0e7da4527436d1, - 0x5bd86a381cde9ff2, - 0xcaf7756231770c32, - 0xb09aaed9e279c8d0, - 0x5def1091c60674db, - 0x111046a2515e5045, - 0x23536ce4729802fc, - 0xc50cbcf7f5b63cfa, - 0x73a16887cd171f03, - 0x7d2941afd9f28dbd, - 0x3f5e3eb45a4f3b9d, - 0x84eefe361b677140, - 0x3db8e3d3e7076271, - 0x1a3a28f9f20fd248, - 0x7ebc7c75b49e7627, - 0x74e5f293c7eb565c, - 0x18dcf59e4f478ba4, - 0x0c6ef44fa9adcb52, - 0xc699812d98dac760, - 0x788b06dc6e469d0e, - 0xfc65f8ea7521ec4e, - 0x30a5f7219e8e0b55, - 0x2bec3f65bca57b6b, - 0xddd04969baf1b75e, - 0x99904cdbe394ea57, - 0x14b201d1e6ea40f6, - 0xbbb0c08241284add, - 0x50f20463bf8f1dff, - 0xe8d7f93b93cbacb8, - 0x4d8cb68e477c86e8, - 0xc1dd1b3992268e3f, - 0x7c5aa11209d62fcb, - 0x2f3d98abdb35c9ae, - 0x671369562bfd5ff5, - 0x15c1e16c36cee280, - 0x1d7eb2edf8f39b17, - 0xda94d37db00dfe01, - 0x877bc3ec760b8ada, - 0xcb8495dfe153ae44, - 0x05a24773b7b410b3, - 0x12857b783c32abdf, - 0x8eb770d06812513b, - 0x536739b9d2e3e665, - 0x584d57e271b26468, - 0xd789c78fc9849725, - 0xa935bbfa7d1ae102, - 0x8b1537a3dfa64188, - 0xd0cd5d9bc378de7a, - 0x4ac82c9a4d80cfb7, - 0x42777f1b83bdb620, - 0x72d2883a1d33bd75, - 0x5e7a2d4bab6a8f41, - 0xf4daab6bbb1c95d9, - 0x905cffe7fd8d31b6, - 0x83aa6422119b381f, - 0xc0aefb8442022c49, - 0xa0f908c663033ae3, - 0xa428af0804938826, - 0xade41c341a8a53c7, - 0xae7121ee77e6a85d, - 0xc47f5c4a25929e8c, - 0xb538e9aa55cdd863, - 0x06377aa9dad8eb29, - 0xa18ae87bb3279895, - 0x6edfda6a35e48414, - 0x6b7d9d19825094a7, - 0xd41cfa55a4e86cbf, - 0xe5caedc9ea42c59c, - 0xa36c351c0e6fc179, - 0x5181e4de6fabbf89, - 0xfff0c530184d17d4, - 0x9d41eb1584045892, - 0x1c0d525028d73961, - 0xf178ec180ca8856a, - 0x9a0571018ef811cd, - 0x4091a27c3ef5efcc, - 0x19af15239f6329d2, - 0x347450eff91eb990, - 0xe11b4a078dd27759, - 0xb9561de5fc601331, - 0x912f1f5a2da993c0, - 0x1654dcb65ba2191a, - 0x3e2dde098a6b99eb, - 0x8a66d71e0f82e3fe, - 0x8c51adb7d55a08d7, - 0x4533e50f8941ff7f, - 0x02e6dd67bd4859ec, - 0xe068aaba5df6d52f, - 0xc24826e3ff4a75a5, - 0x6c39070d88acddf8, - 0x6486548c4691a46f, - 0xd1bebd26135c7c0c, - 0xb30f93038f15334a, - 0x82d9849fc1bf9a69, - 0x9c320ba85420fae4, - 0xfa528243aff90767, - 0x9ed4d6cfe968a308, - 0xb825fd582c44b147, - 0x9b7691bc5edcb3bb, - 0xc7ea619048fe6516, - 0x1063a61f817af233, - 0x47d538683409a693, - 0x63c2ce984c6ded30, - 0x2a9fdfd86c81d91d, - 0x7b1e3b06032a6694, - 0x666089ebfbd9fd83, - 0x0a598ee67375207b, - 0x07449a140afc495f, - 0x2ca8a571b6593234, - 0x1f986f8a45bbc2fb, - 0x381aa4a050b372c2, - 0x5423a3add81faf3a, - 0x17273c0b8b86bb6c, - 0xfe83258dc869b5a2, - 0x287902bfd1c980f1, - 0xf5a94bd66b3837af, - 0x88800a79b2caba12, - 0x55504310083b0d4c, - 0xdf36940e07b9eeb2, - 0x04d1a7ce6790b2c5, - 0x612413fff125b4dc, - 0x26f12b97c52c124f, - 0x86082351a62f28ac, - 0xef93632f9937e5e7, - 0x3507b052293a1be6, - 0xe72c30ae570a9c70, - 0xd3586041ae1425e0, - 0xde4574b3d79d4cc4, - 0x92ba228040c5685a, - 0xf00b0ca5dc8c271c, - 0xbe1287f1f69c5a6e, - 0xf39e317fb1e0dc86, - 0x495d114020ec342d, - 0x699b407e3f18cd4b, - 0xdca3a9d46ad51528, - 0x0d1d14f279896924, - 0x0000000000000000, - 0x593eb75fa196c61e, - 0x2e4e78160b116bd8, - 0x6d4ae7b058887f8e, - 0xe65fd013872e3e06, - 0x7a6ddbbbd30ec4e2, - 0xac97fc89caaef1b1, - 0x09ccb33c1e19dbe1, - 0x89f3eac462ee1864, - 0x7770cf49aa87adc6, - 0x56c57eca6557f6d6, - 0x03953dda6d6cfb9a, - 0x36928d884456e07c, - 0x1eeb8f37959f608d, - 0x31d6179c4eaaa923, - 0x6fac3ad7e5c02662, - 0x43049fa653991456, - 0xabd3669dc052b8ee, - 0xaf02c153a7c20a2b, - 0x3ccb036e3723c007, - 0x93c9c23d90e1ca2c, - 0xc33bc65e2f6ed7d3, - 0x4cff56339758249e, - 0xb1e94e64325d6aa6, - 0x37e16d359472420a, - 0x79f8e661be623f78, - 0x5214d90402c74413, - 0x482ef1fdf0c8965b, - 0x13f69bc5ec1609a9, - 0x0e88292814e592be, - 0x4e198b542a107d72, - 0xccc00fcbebafe71b, - 0x1b49c844222b703e, - 0x2564164da840e9d5, - 0x20c6513e1ff4f966, - 0xbac3203f910ce8ab, - 0xf2edd1c261c47ef0, - 0x814cb945acd361f3, - 0x95feb8944a392105, - 0x5c9cf02c1622d6ad, - 0x971865f3f77178e9, - 0xbd87ba2b9bf0a1f4, - 0x444005b259655d09, - 0xed75be48247fbc0b, - 0x7596122e17cff42a, - 0xb44b091785e97a15, - 0x966b854e2755da9f, - 0xeee0839249134791, - 0x32432a4623c652b9, - 0xa8465b47ad3e4374, - 0xf8b45f2412b15e8b, - 0x2417f6f078644ba3, - 0xfb2162fe7fdda511, - 0x4bbbcc279da46dc1, - 0x0173e0bdd024a276, - 0x22208c59a2bca08a, - 0x8fc4906db836f34d, - 0xe4b90d743a6667ea, - 0x7147b5e0705f46ef, - 0x2782cb2a1508b039, - 0xec065ef5f45b1e7d, - 0x21b5b183cfd05b10, - 0xdbe733c060295c77, - 0x9fa73672394c017e, - 0xcf55321186c31c81, - 0xd8720e1a0d45a7ed, - 0x3b8f997a3ddf8958, - 0x3afc79c7edfb2b2e, - 0xe9a4198643ef0ece, - 0x5f09cdf67b4e2d37, - 0x4f6a6be9fa34df04, - 0xb6add47038a123f9, - 0x8d224d0a057eaaa1, - 0xc96248b85c1bf7a8, - 0xe3fd9760309a2eb5, - 0x0b2a6e5ba351820d, - 0xeb42c4e1fea75722, - 0x948d58299a1d8373, - 0x7fcf9cc864bad451, - 0xa55b4fb5d4b72a50, - 0x08bf5381ce3d7997, - 0x46a6d8d5e42d04e5, - 0xd22b80fc7e308796, - 0x57b69e77b57354a0, - 0x3969441d8097d0b4, - 0x3330cafbf3e2f0cf, - 0xe28e77dde0be8cc3, - 0x62b12e259c494f46, - 0xa6ce726fb9dbd1ca, - 0x41e242c1eed14dba, - 0x76032ff47aa30fb0, - }, - { - 0xe6f87e5c5b711fd0, - 0x258377800924fa16, - 0xc849e07e852ea4a8, - 0x5b4686a18f06c16a, - 0x0b32e9a2d77b416e, - 0xabda37a467815c66, - 0xf61796a81a686676, - 0xf5dc0b706391954b, - 0x4862f38db7e64bf1, - 0xff5c629a68bd85c5, - 0xcb827da6fcd75795, - 0x66d36daf69b9f089, - 0x356c9f74483d83b0, - 0x7cbcecb1238c99a1, - 0x36a702ac31c4708d, - 0x9eb6a8d02fbcdfd6, - 0x8b19fa51e5b3ae37, - 0x9ccfb5408a127d0b, - 0xbc0c78b508208f5a, - 0xe533e3842288eced, - 0xcec2c7d377c15fd2, - 0xec7817b6505d0f5e, - 0xb94cc2c08336871d, - 0x8c205db4cb0b04ad, - 0x763c855b28a0892f, - 0x588d1b79f6ff3257, - 0x3fecf69e4311933e, - 0x0fc0d39f803a18c9, - 0xee010a26f5f3ad83, - 0x10efe8f4411979a6, - 0x5dcda10c7de93a10, - 0x4a1bee1d1248e92c, - 0x53bff2db21847339, - 0xb4f50ccfa6a23d09, - 0x5fb4bc9cd84798cd, - 0xe88a2d8b071c56f9, - 0x7f7771695a756a9c, - 0xc5f02e71a0ba1ebc, - 0xa663f9ab4215e672, - 0x2eb19e22de5fbb78, - 0x0db9ce0f2594ba14, - 0x82520e6397664d84, - 0x2f031e6a0208ea98, - 0x5c7f2144a1be6bf0, - 0x7a37cb1cd16362db, - 0x83e08e2b4b311c64, - 0xcf70479bab960e32, - 0x856ba986b9dee71e, - 0xb5478c877af56ce9, - 0xb8fe42885f61d6fd, - 0x1bdd0156966238c8, - 0x622157923ef8a92e, - 0xfc97ff42114476f8, - 0x9d7d350856452ceb, - 0x4c90c9b0e0a71256, - 0x2308502dfbcb016c, - 0x2d7a03faa7a64845, - 0xf46e8b38bfc6c4ab, - 0xbdbef8fdd477deba, - 0x3aac4cebc8079b79, - 0xf09cb105e8879d0c, - 0x27fa6a10ac8a58cb, - 0x8960e7c1401d0cea, - 0x1a6f811e4a356928, - 0x90c4fb0773d196ff, - 0x43501a2f609d0a9f, - 0xf7a516e0c63f3796, - 0x1ce4a6b3b8da9252, - 0x1324752c38e08a9b, - 0xa5a864733bec154f, - 0x2bf124575549b33f, - 0xd766db15440dc5c7, - 0xa7d179e39e42b792, - 0xdadf151a61997fd3, - 0x86a0345ec0271423, - 0x38d5517b6da939a4, - 0x6518f077104003b4, - 0x02791d90a5aea2dd, - 0x88d267899c4a5d0a, - 0x930f66df0a2865c2, - 0x4ee9d4204509b08b, - 0x325538916685292a, - 0x412907bfc533a842, - 0xb27e2b62544dc673, - 0x6c5304456295e007, - 0x5af406e95351908a, - 0x1f2f3b6bc123616f, - 0xc37b09dc5255e5c6, - 0x3967d133b1fe6844, - 0x298839c7f0e711e2, - 0x409b87f71964f9a2, - 0xe938adc3db4b0719, - 0x0c0b4e47f9c3ebf4, - 0x5534d576d36b8843, - 0x4610a05aeb8b02d8, - 0x20c3cdf58232f251, - 0x6de1840dbec2b1e7, - 0xa0e8de06b0fa1d08, - 0x7b854b540d34333b, - 0x42e29a67bcca5b7f, - 0xd8a6088ac437dd0e, - 0xc63bb3a9d943ed81, - 0x21714dbd5e65a3b1, - 0x6761ede7b5eea169, - 0x2431f7c8d573abf6, - 0xd51fc685e1a3671a, - 0x5e063cd40410c92d, - 0x283ab98f2cb04002, - 0x8febc06cb2f2f790, - 0x17d64f116fa1d33c, - 0xe07359f1a99ee4aa, - 0x784ed68c74cdc006, - 0x6e2a19d5c73b42da, - 0x8712b4161c7045c3, - 0x371582e4ed93216d, - 0xace390414939f6fc, - 0x7ec5f12186223b7c, - 0xc0b094042bac16fb, - 0xf9d745379a527ebf, - 0x737c3f2ea3b68168, - 0x33e7b8d9bad278ca, - 0xa9a32a34c22ffebb, - 0xe48163ccfedfbd0d, - 0x8e5940246ea5a670, - 0x51c6ef4b842ad1e4, - 0x22bad065279c508c, - 0xd91488c218608cee, - 0x319ea5491f7cda17, - 0xd394e128134c9c60, - 0x094bf43272d5e3b3, - 0x9bf612a5a4aad791, - 0xccbbda43d26ffd0f, - 0x34de1f3c946ad250, - 0x4f5b5468995ee16b, - 0xdf9faf6fea8f7794, - 0x2648ea5870dd092b, - 0xbfc7e56d71d97c67, - 0xdde6b2ff4f21d549, - 0x3c276b463ae86003, - 0x91767b4faf86c71f, - 0x68a13e7835d4b9a0, - 0xb68c115f030c9fd4, - 0x141dd2c916582001, - 0x983d8f7ddd5324ac, - 0x64aa703fcc175254, - 0xc2c989948e02b426, - 0x3e5e76d69f46c2de, - 0x50746f03587d8004, - 0x45db3d829272f1e5, - 0x60584a029b560bf3, - 0xfbae58a73ffcdc62, - 0xa15a5e4e6cad4ce8, - 0x4ba96e55ce1fb8cc, - 0x08f9747aae82b253, - 0xc102144cf7fb471b, - 0x9f042898f3eb8e36, - 0x068b27adf2effb7a, - 0xedca97fe8c0a5ebe, - 0x778e0513f4f7d8cf, - 0x302c2501c32b8bf7, - 0x8d92ddfc175c554d, - 0xf865c57f46052f5f, - 0xeaf3301ba2b2f424, - 0xaa68b7ecbbd60d86, - 0x998f0f350104754c, - 0x0000000000000000, - 0xf12e314d34d0ccec, - 0x710522be061823b5, - 0xaf280d9930c005c1, - 0x97fd5ce25d693c65, - 0x19a41cc633cc9a15, - 0x95844172f8c79eb8, - 0xdc5432b7937684a9, - 0x9436c13a2490cf58, - 0x802b13f332c8ef59, - 0xc442ae397ced4f5c, - 0xfa1cd8efe3ab8d82, - 0xf2e5ac954d293fd1, - 0x6ad823e8907a1b7d, - 0x4d2249f83cf043b6, - 0x03cb9dd879f9f33d, - 0xde2d2f2736d82674, - 0x2a43a41f891ee2df, - 0x6f98999d1b6c133a, - 0xd4ad46cd3df436fa, - 0xbb35df50269825c0, - 0x964fdcaa813e6d85, - 0xeb41b0537ee5a5c4, - 0x0540ba758b160847, - 0xa41ae43be7bb44af, - 0xe3b8c429d0671797, - 0x819993bbee9fbeb9, - 0xae9a8dd1ec975421, - 0xf3572cdd917e6e31, - 0x6393d7dae2aff8ce, - 0x47a2201237dc5338, - 0xa32343dec903ee35, - 0x79fc56c4a89a91e6, - 0x01b28048dc5751e0, - 0x1296f564e4b7db7b, - 0x75f7188351597a12, - 0xdb6d9552bdce2e33, - 0x1e9dbb231d74308f, - 0x520d7293fdd322d9, - 0xe20a44610c304677, - 0xfeeee2d2b4ead425, - 0xca30fdee20800675, - 0x61eaca4a47015a13, - 0xe74afe1487264e30, - 0x2cc883b27bf119a5, - 0x1664cf59b3f682dc, - 0xa811aa7c1e78af5b, - 0x1d5626fb648dc3b2, - 0xb73e9117df5bce34, - 0xd05f7cf06ab56f5d, - 0xfd257f0acd132718, - 0x574dc8e676c52a9e, - 0x0739a7e52eb8aa9a, - 0x5486553e0f3cd9a3, - 0x56ff48aeaa927b7e, - 0xbe756525ad8e2d87, - 0x7d0e6cf9ffdbc841, - 0x3b1ecca31450ca99, - 0x6913be30e983e840, - 0xad511009956ea71c, - 0xb1b5b6ba2db4354e, - 0x4469bdca4e25a005, - 0x15af5281ca0f71e1, - 0x744598cb8d0e2bf2, - 0x593f9b312aa863b7, - 0xefb38a6e29a4fc63, - 0x6b6aa3a04c2d4a9d, - 0x3d95eb0ee6bf31e3, - 0xa291c3961554bfd5, - 0x18169c8eef9bcbf5, - 0x115d68bc9d4e2846, - 0xba875f18facf7420, - 0xd1edfcb8b6e23ebd, - 0xb00736f2f1e364ae, - 0x84d929ce6589b6fe, - 0x70b7a2f6da4f7255, - 0x0e7253d75c6d4929, - 0x04f23a3d574159a7, - 0x0a8069ea0b2c108e, - 0x49d073c56bb11a11, - 0x8aab7a1939e4ffd7, - 0xcd095a0b0e38acef, - 0xc9fb60365979f548, - 0x92bde697d67f3422, - 0xc78933e10514bc61, - 0xe1c1d9b975c9b54a, - 0xd2266160cf1bcd80, - 0x9a4492ed78fd8671, - 0xb3ccab2a881a9793, - 0x72cebf667fe1d088, - 0xd6d45b5d985a9427, - }, -}; - -__device__ __constant__ u64 sbob_rc64[12][8] = -{ - { - 0xe9daca1eda5b08b1, - 0x1f7c65c0812fcbeb, - 0x16d0452e43766a2f, - 0xfcc485758db84e71, - 0x0169679291e07c4b, - 0x15d360a4082a42a2, - 0x234d74cc36747605, - 0x0745a6f2596580dd, - }, - { - 0x1a2f9da98ab5a36f, - 0xd7b5700f469de34f, - 0x982b230a72eafef3, - 0x3101b5160f5ed561, - 0x5899d6126b17b59a, - 0xcaa70adbc261b55c, - 0x56cdcbd71ba2dd55, - 0xb79bb121700479e6, - }, - { - 0xc72fce2bacdc74f5, - 0x35843d6a28fc390a, - 0x8b1f9c525f5ef106, - 0x7b7b29b11475eaf2, - 0xb19e3590e40fe2d3, - 0x09db6260373ac9c1, - 0x31db7a8643f4b6c2, - 0xb20aba0af5961e99, - }, - { - 0xd26615e8b3df1fef, - 0xdde4715da0e148f9, - 0x7d3c5c337e858e48, - 0x3f355e68ad1c729d, - 0x75d603ed822cd7a9, - 0xbe0352933313b7d8, - 0xf137e893a1ea5334, - 0x2ed1e384bcbe0c22, - }, - { - 0x994747adac6bea4b, - 0x6323a96c0c413f9a, - 0x4a1086161f1c157f, - 0xbdff0f80d7359e35, - 0xa3f53a254717cdbf, - 0x161a2723b700ffdf, - 0xf563eaa97ea2567a, - 0x57fe6c7cfd581760, - }, - { - 0xd9d33a1daeae4fae, - 0xc039307a3bc3a46f, - 0x6ca44251f9c4662d, - 0xc68ef09ab49a7f18, - 0xb4b79a1cb7a6facf, - 0xb6c6bec2661ff20a, - 0x354f903672c571bf, - 0x6e7d64467a4068fa, - }, - { - 0xecc5aaee160ec7f4, - 0x540924bffe86ac51, - 0xc987bfe6c7c69e39, - 0xc9937a19333e47d3, - 0x372c822dc5ab9209, - 0x04054a2883694706, - 0xf34a3ca24c451735, - 0x93d4143a4d568688, - }, - { - 0xa7c9934d425b1f9b, - 0x41416e0c02aae703, - 0x1ede369c71f8b74e, - 0x9ac4db4d3b44b489, - 0x90069b92cb2b89f4, - 0x2fc4a5d12b8dd169, - 0xd9a8515935c2ac36, - 0x1ee702bfd40d7fa4, - }, - { - 0x9b223116545a8f37, - 0xde5f16ecd89a4c94, - 0x244289251b3a7d3a, - 0x84090de0b755d93c, - 0xb1ceb2db0b440a80, - 0x549c07a69a8a2b7b, - 0x602a1fcb92dc380e, - 0xdb5a238351446172, - }, - { - 0x526f0580a6debeab, - 0xf3f3e4b248e52a38, - 0xdb788aff1ce74189, - 0x0361331b8ae1ff1f, - 0x4b3369af0267e79f, - 0xf452763b306c1e7a, - 0xc3b63b15d1fa9836, - 0xed9c4598fbc7b474, - }, - { - 0xfb89c8efd09ecd7b, - 0x94fe5a63cdc60230, - 0x6107abebbb6bfad8, - 0x7966841421800120, - 0xcab948eaef711d8a, - 0x986e477d1dcdbaef, - 0x5dd86fc04a59a2de, - 0x1b2df381cda4ca6b, - }, - { - 0xba3116f167e78e37, - 0x7ab14904b08013d2, - 0x771ddfbc323ca4cd, - 0x9b9f2130d41220f8, - 0x86cc91189def805d, - 0x5228e188aaa41de7, - 0x991bb2d9d517f4fa, - 0x20d71bf14a92bc48, - }, -}; - -__device__ static void streebog_g (u64 h[8], const u64 m[8], u64 s_sbob_sl64[8][256]) -{ - u64 k[8]; - u64 s[8]; - u64 t[8]; - - #pragma unroll - for (int i = 0; i < 8; i++) - { - t[i] = h[i]; - } - - for (int i = 0; i < 8; i++) - { - k[i] = SBOG_LPSti64; - } - - #pragma unroll - for (int i = 0; i < 8; i++) - { - s[i] = m[i]; - } - - for (int r = 0; r < 12; r++) - { - #pragma unroll - for (int i = 0; i < 8; i++) - { - t[i] = s[i] ^ k[i]; - } - - #pragma unroll - for (int i = 0; i < 8; i++) - { - s[i] = SBOG_LPSti64; - } - - for (int i = 0; i < 8; i++) - { - t[i] = k[i] ^ sbob_rc64[r][i]; - } - - #pragma unroll - for (int i = 0; i < 8; i++) - { - k[i] = SBOG_LPSti64; - } - } - - #pragma unroll - for (int i = 0; i < 8; i++) - { - h[i] ^= s[i] ^ k[i] ^ m[i]; - } -} - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m11700m (u64 s_sbob_sl64[8][256], u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * loop - */ - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w[0] = w0l | w0r; - - /** - * reverse message block - */ - - u64 m[8]; - - m[0] = hl32_to_64 (w[15], w[14]); - m[1] = hl32_to_64 (w[13], w[12]); - m[2] = hl32_to_64 (w[11], w[10]); - m[3] = hl32_to_64 (w[ 9], w[ 8]); - m[4] = hl32_to_64 (w[ 7], w[ 6]); - m[5] = hl32_to_64 (w[ 5], w[ 4]); - m[6] = hl32_to_64 (w[ 3], w[ 2]); - m[7] = hl32_to_64 (w[ 1], w[ 0]); - - m[0] = swap_workaround (m[0]); - m[1] = swap_workaround (m[1]); - m[2] = swap_workaround (m[2]); - m[3] = swap_workaround (m[3]); - m[4] = swap_workaround (m[4]); - m[5] = swap_workaround (m[5]); - m[6] = swap_workaround (m[6]); - m[7] = swap_workaround (m[7]); - - // state buffer (hash) - - u64 h[8]; - - h[0] = INITVAL; - h[1] = INITVAL; - h[2] = INITVAL; - h[3] = INITVAL; - h[4] = INITVAL; - h[5] = INITVAL; - h[6] = INITVAL; - h[7] = INITVAL; - - streebog_g (h, m, s_sbob_sl64); - - u64 z[8]; - - z[0] = 0; - z[1] = 0; - z[2] = 0; - z[3] = 0; - z[4] = 0; - z[5] = 0; - z[6] = 0; - z[7] = swap_workaround ((u64) (pw_len * 8)); - - streebog_g (h, z, s_sbob_sl64); - streebog_g (h, m, s_sbob_sl64); - - const u32 r0 = l32_from_64 (h[0]); - const u32 r1 = h32_from_64 (h[0]); - const u32 r2 = l32_from_64 (h[1]); - const u32 r3 = h32_from_64 (h[1]); - - #include VECT_COMPARE_M - } -} - -__device__ static void m11700s (u64 s_sbob_sl64[8][256], u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w[0] = w0l | w0r; - - /** - * reverse message block - */ - - u64 m[8]; - - m[0] = hl32_to_64 (w[15], w[14]); - m[1] = hl32_to_64 (w[13], w[12]); - m[2] = hl32_to_64 (w[11], w[10]); - m[3] = hl32_to_64 (w[ 9], w[ 8]); - m[4] = hl32_to_64 (w[ 7], w[ 6]); - m[5] = hl32_to_64 (w[ 5], w[ 4]); - m[6] = hl32_to_64 (w[ 3], w[ 2]); - m[7] = hl32_to_64 (w[ 1], w[ 0]); - - m[0] = swap_workaround (m[0]); - m[1] = swap_workaround (m[1]); - m[2] = swap_workaround (m[2]); - m[3] = swap_workaround (m[3]); - m[4] = swap_workaround (m[4]); - m[5] = swap_workaround (m[5]); - m[6] = swap_workaround (m[6]); - m[7] = swap_workaround (m[7]); - - // state buffer (hash) - - u64 h[8]; - - h[0] = INITVAL; - h[1] = INITVAL; - h[2] = INITVAL; - h[3] = INITVAL; - h[4] = INITVAL; - h[5] = INITVAL; - h[6] = INITVAL; - h[7] = INITVAL; - - streebog_g (h, m, s_sbob_sl64); - - u64 z[8]; - - z[0] = 0; - z[1] = 0; - z[2] = 0; - z[3] = 0; - z[4] = 0; - z[5] = 0; - z[6] = 0; - z[7] = swap_workaround ((u64) (pw_len * 8)); - - streebog_g (h, z, s_sbob_sl64); - streebog_g (h, m, s_sbob_sl64); - - const u32 r0 = l32_from_64 (h[0]); - const u32 r1 = h32_from_64 (h[0]); - const u32 r2 = l32_from_64 (h[1]); - const u32 r3 = h32_from_64 (h[1]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11700_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * shared lookup table - */ - - __shared__ u64 s_sbob_sl64[8][256]; - - if (lid < 64) - { - const u32 lid4 = lid * 4; - - s_sbob_sl64[0][lid4 + 0] = sbob_sl64[0][lid4 + 0]; - s_sbob_sl64[0][lid4 + 1] = sbob_sl64[0][lid4 + 1]; - s_sbob_sl64[0][lid4 + 2] = sbob_sl64[0][lid4 + 2]; - s_sbob_sl64[0][lid4 + 3] = sbob_sl64[0][lid4 + 3]; - s_sbob_sl64[1][lid4 + 0] = sbob_sl64[1][lid4 + 0]; - s_sbob_sl64[1][lid4 + 1] = sbob_sl64[1][lid4 + 1]; - s_sbob_sl64[1][lid4 + 2] = sbob_sl64[1][lid4 + 2]; - s_sbob_sl64[1][lid4 + 3] = sbob_sl64[1][lid4 + 3]; - s_sbob_sl64[2][lid4 + 0] = sbob_sl64[2][lid4 + 0]; - s_sbob_sl64[2][lid4 + 1] = sbob_sl64[2][lid4 + 1]; - s_sbob_sl64[2][lid4 + 2] = sbob_sl64[2][lid4 + 2]; - s_sbob_sl64[2][lid4 + 3] = sbob_sl64[2][lid4 + 3]; - s_sbob_sl64[3][lid4 + 0] = sbob_sl64[3][lid4 + 0]; - s_sbob_sl64[3][lid4 + 1] = sbob_sl64[3][lid4 + 1]; - s_sbob_sl64[3][lid4 + 2] = sbob_sl64[3][lid4 + 2]; - s_sbob_sl64[3][lid4 + 3] = sbob_sl64[3][lid4 + 3]; - s_sbob_sl64[4][lid4 + 0] = sbob_sl64[4][lid4 + 0]; - s_sbob_sl64[4][lid4 + 1] = sbob_sl64[4][lid4 + 1]; - s_sbob_sl64[4][lid4 + 2] = sbob_sl64[4][lid4 + 2]; - s_sbob_sl64[4][lid4 + 3] = sbob_sl64[4][lid4 + 3]; - s_sbob_sl64[5][lid4 + 0] = sbob_sl64[5][lid4 + 0]; - s_sbob_sl64[5][lid4 + 1] = sbob_sl64[5][lid4 + 1]; - s_sbob_sl64[5][lid4 + 2] = sbob_sl64[5][lid4 + 2]; - s_sbob_sl64[5][lid4 + 3] = sbob_sl64[5][lid4 + 3]; - s_sbob_sl64[6][lid4 + 0] = sbob_sl64[6][lid4 + 0]; - s_sbob_sl64[6][lid4 + 1] = sbob_sl64[6][lid4 + 1]; - s_sbob_sl64[6][lid4 + 2] = sbob_sl64[6][lid4 + 2]; - s_sbob_sl64[6][lid4 + 3] = sbob_sl64[6][lid4 + 3]; - s_sbob_sl64[7][lid4 + 0] = sbob_sl64[7][lid4 + 0]; - s_sbob_sl64[7][lid4 + 1] = sbob_sl64[7][lid4 + 1]; - s_sbob_sl64[7][lid4 + 2] = sbob_sl64[7][lid4 + 2]; - s_sbob_sl64[7][lid4 + 3] = sbob_sl64[7][lid4 + 3]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m11700m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11700_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * shared lookup table - */ - - __shared__ u64 s_sbob_sl64[8][256]; - - if (lid < 64) - { - const u32 lid4 = lid * 4; - - s_sbob_sl64[0][lid4 + 0] = sbob_sl64[0][lid4 + 0]; - s_sbob_sl64[0][lid4 + 1] = sbob_sl64[0][lid4 + 1]; - s_sbob_sl64[0][lid4 + 2] = sbob_sl64[0][lid4 + 2]; - s_sbob_sl64[0][lid4 + 3] = sbob_sl64[0][lid4 + 3]; - s_sbob_sl64[1][lid4 + 0] = sbob_sl64[1][lid4 + 0]; - s_sbob_sl64[1][lid4 + 1] = sbob_sl64[1][lid4 + 1]; - s_sbob_sl64[1][lid4 + 2] = sbob_sl64[1][lid4 + 2]; - s_sbob_sl64[1][lid4 + 3] = sbob_sl64[1][lid4 + 3]; - s_sbob_sl64[2][lid4 + 0] = sbob_sl64[2][lid4 + 0]; - s_sbob_sl64[2][lid4 + 1] = sbob_sl64[2][lid4 + 1]; - s_sbob_sl64[2][lid4 + 2] = sbob_sl64[2][lid4 + 2]; - s_sbob_sl64[2][lid4 + 3] = sbob_sl64[2][lid4 + 3]; - s_sbob_sl64[3][lid4 + 0] = sbob_sl64[3][lid4 + 0]; - s_sbob_sl64[3][lid4 + 1] = sbob_sl64[3][lid4 + 1]; - s_sbob_sl64[3][lid4 + 2] = sbob_sl64[3][lid4 + 2]; - s_sbob_sl64[3][lid4 + 3] = sbob_sl64[3][lid4 + 3]; - s_sbob_sl64[4][lid4 + 0] = sbob_sl64[4][lid4 + 0]; - s_sbob_sl64[4][lid4 + 1] = sbob_sl64[4][lid4 + 1]; - s_sbob_sl64[4][lid4 + 2] = sbob_sl64[4][lid4 + 2]; - s_sbob_sl64[4][lid4 + 3] = sbob_sl64[4][lid4 + 3]; - s_sbob_sl64[5][lid4 + 0] = sbob_sl64[5][lid4 + 0]; - s_sbob_sl64[5][lid4 + 1] = sbob_sl64[5][lid4 + 1]; - s_sbob_sl64[5][lid4 + 2] = sbob_sl64[5][lid4 + 2]; - s_sbob_sl64[5][lid4 + 3] = sbob_sl64[5][lid4 + 3]; - s_sbob_sl64[6][lid4 + 0] = sbob_sl64[6][lid4 + 0]; - s_sbob_sl64[6][lid4 + 1] = sbob_sl64[6][lid4 + 1]; - s_sbob_sl64[6][lid4 + 2] = sbob_sl64[6][lid4 + 2]; - s_sbob_sl64[6][lid4 + 3] = sbob_sl64[6][lid4 + 3]; - s_sbob_sl64[7][lid4 + 0] = sbob_sl64[7][lid4 + 0]; - s_sbob_sl64[7][lid4 + 1] = sbob_sl64[7][lid4 + 1]; - s_sbob_sl64[7][lid4 + 2] = sbob_sl64[7][lid4 + 2]; - s_sbob_sl64[7][lid4 + 3] = sbob_sl64[7][lid4 + 3]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m11700m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11700_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * shared lookup table - */ - - __shared__ u64 s_sbob_sl64[8][256]; - - if (lid < 64) - { - const u32 lid4 = lid * 4; - - s_sbob_sl64[0][lid4 + 0] = sbob_sl64[0][lid4 + 0]; - s_sbob_sl64[0][lid4 + 1] = sbob_sl64[0][lid4 + 1]; - s_sbob_sl64[0][lid4 + 2] = sbob_sl64[0][lid4 + 2]; - s_sbob_sl64[0][lid4 + 3] = sbob_sl64[0][lid4 + 3]; - s_sbob_sl64[1][lid4 + 0] = sbob_sl64[1][lid4 + 0]; - s_sbob_sl64[1][lid4 + 1] = sbob_sl64[1][lid4 + 1]; - s_sbob_sl64[1][lid4 + 2] = sbob_sl64[1][lid4 + 2]; - s_sbob_sl64[1][lid4 + 3] = sbob_sl64[1][lid4 + 3]; - s_sbob_sl64[2][lid4 + 0] = sbob_sl64[2][lid4 + 0]; - s_sbob_sl64[2][lid4 + 1] = sbob_sl64[2][lid4 + 1]; - s_sbob_sl64[2][lid4 + 2] = sbob_sl64[2][lid4 + 2]; - s_sbob_sl64[2][lid4 + 3] = sbob_sl64[2][lid4 + 3]; - s_sbob_sl64[3][lid4 + 0] = sbob_sl64[3][lid4 + 0]; - s_sbob_sl64[3][lid4 + 1] = sbob_sl64[3][lid4 + 1]; - s_sbob_sl64[3][lid4 + 2] = sbob_sl64[3][lid4 + 2]; - s_sbob_sl64[3][lid4 + 3] = sbob_sl64[3][lid4 + 3]; - s_sbob_sl64[4][lid4 + 0] = sbob_sl64[4][lid4 + 0]; - s_sbob_sl64[4][lid4 + 1] = sbob_sl64[4][lid4 + 1]; - s_sbob_sl64[4][lid4 + 2] = sbob_sl64[4][lid4 + 2]; - s_sbob_sl64[4][lid4 + 3] = sbob_sl64[4][lid4 + 3]; - s_sbob_sl64[5][lid4 + 0] = sbob_sl64[5][lid4 + 0]; - s_sbob_sl64[5][lid4 + 1] = sbob_sl64[5][lid4 + 1]; - s_sbob_sl64[5][lid4 + 2] = sbob_sl64[5][lid4 + 2]; - s_sbob_sl64[5][lid4 + 3] = sbob_sl64[5][lid4 + 3]; - s_sbob_sl64[6][lid4 + 0] = sbob_sl64[6][lid4 + 0]; - s_sbob_sl64[6][lid4 + 1] = sbob_sl64[6][lid4 + 1]; - s_sbob_sl64[6][lid4 + 2] = sbob_sl64[6][lid4 + 2]; - s_sbob_sl64[6][lid4 + 3] = sbob_sl64[6][lid4 + 3]; - s_sbob_sl64[7][lid4 + 0] = sbob_sl64[7][lid4 + 0]; - s_sbob_sl64[7][lid4 + 1] = sbob_sl64[7][lid4 + 1]; - s_sbob_sl64[7][lid4 + 2] = sbob_sl64[7][lid4 + 2]; - s_sbob_sl64[7][lid4 + 3] = sbob_sl64[7][lid4 + 3]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m11700m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11700_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * shared lookup table - */ - - __shared__ u64 s_sbob_sl64[8][256]; - - if (lid < 64) - { - const u32 lid4 = lid * 4; - - s_sbob_sl64[0][lid4 + 0] = sbob_sl64[0][lid4 + 0]; - s_sbob_sl64[0][lid4 + 1] = sbob_sl64[0][lid4 + 1]; - s_sbob_sl64[0][lid4 + 2] = sbob_sl64[0][lid4 + 2]; - s_sbob_sl64[0][lid4 + 3] = sbob_sl64[0][lid4 + 3]; - s_sbob_sl64[1][lid4 + 0] = sbob_sl64[1][lid4 + 0]; - s_sbob_sl64[1][lid4 + 1] = sbob_sl64[1][lid4 + 1]; - s_sbob_sl64[1][lid4 + 2] = sbob_sl64[1][lid4 + 2]; - s_sbob_sl64[1][lid4 + 3] = sbob_sl64[1][lid4 + 3]; - s_sbob_sl64[2][lid4 + 0] = sbob_sl64[2][lid4 + 0]; - s_sbob_sl64[2][lid4 + 1] = sbob_sl64[2][lid4 + 1]; - s_sbob_sl64[2][lid4 + 2] = sbob_sl64[2][lid4 + 2]; - s_sbob_sl64[2][lid4 + 3] = sbob_sl64[2][lid4 + 3]; - s_sbob_sl64[3][lid4 + 0] = sbob_sl64[3][lid4 + 0]; - s_sbob_sl64[3][lid4 + 1] = sbob_sl64[3][lid4 + 1]; - s_sbob_sl64[3][lid4 + 2] = sbob_sl64[3][lid4 + 2]; - s_sbob_sl64[3][lid4 + 3] = sbob_sl64[3][lid4 + 3]; - s_sbob_sl64[4][lid4 + 0] = sbob_sl64[4][lid4 + 0]; - s_sbob_sl64[4][lid4 + 1] = sbob_sl64[4][lid4 + 1]; - s_sbob_sl64[4][lid4 + 2] = sbob_sl64[4][lid4 + 2]; - s_sbob_sl64[4][lid4 + 3] = sbob_sl64[4][lid4 + 3]; - s_sbob_sl64[5][lid4 + 0] = sbob_sl64[5][lid4 + 0]; - s_sbob_sl64[5][lid4 + 1] = sbob_sl64[5][lid4 + 1]; - s_sbob_sl64[5][lid4 + 2] = sbob_sl64[5][lid4 + 2]; - s_sbob_sl64[5][lid4 + 3] = sbob_sl64[5][lid4 + 3]; - s_sbob_sl64[6][lid4 + 0] = sbob_sl64[6][lid4 + 0]; - s_sbob_sl64[6][lid4 + 1] = sbob_sl64[6][lid4 + 1]; - s_sbob_sl64[6][lid4 + 2] = sbob_sl64[6][lid4 + 2]; - s_sbob_sl64[6][lid4 + 3] = sbob_sl64[6][lid4 + 3]; - s_sbob_sl64[7][lid4 + 0] = sbob_sl64[7][lid4 + 0]; - s_sbob_sl64[7][lid4 + 1] = sbob_sl64[7][lid4 + 1]; - s_sbob_sl64[7][lid4 + 2] = sbob_sl64[7][lid4 + 2]; - s_sbob_sl64[7][lid4 + 3] = sbob_sl64[7][lid4 + 3]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m11700s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11700_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * shared lookup table - */ - - __shared__ u64 s_sbob_sl64[8][256]; - - if (lid < 64) - { - const u32 lid4 = lid * 4; - - s_sbob_sl64[0][lid4 + 0] = sbob_sl64[0][lid4 + 0]; - s_sbob_sl64[0][lid4 + 1] = sbob_sl64[0][lid4 + 1]; - s_sbob_sl64[0][lid4 + 2] = sbob_sl64[0][lid4 + 2]; - s_sbob_sl64[0][lid4 + 3] = sbob_sl64[0][lid4 + 3]; - s_sbob_sl64[1][lid4 + 0] = sbob_sl64[1][lid4 + 0]; - s_sbob_sl64[1][lid4 + 1] = sbob_sl64[1][lid4 + 1]; - s_sbob_sl64[1][lid4 + 2] = sbob_sl64[1][lid4 + 2]; - s_sbob_sl64[1][lid4 + 3] = sbob_sl64[1][lid4 + 3]; - s_sbob_sl64[2][lid4 + 0] = sbob_sl64[2][lid4 + 0]; - s_sbob_sl64[2][lid4 + 1] = sbob_sl64[2][lid4 + 1]; - s_sbob_sl64[2][lid4 + 2] = sbob_sl64[2][lid4 + 2]; - s_sbob_sl64[2][lid4 + 3] = sbob_sl64[2][lid4 + 3]; - s_sbob_sl64[3][lid4 + 0] = sbob_sl64[3][lid4 + 0]; - s_sbob_sl64[3][lid4 + 1] = sbob_sl64[3][lid4 + 1]; - s_sbob_sl64[3][lid4 + 2] = sbob_sl64[3][lid4 + 2]; - s_sbob_sl64[3][lid4 + 3] = sbob_sl64[3][lid4 + 3]; - s_sbob_sl64[4][lid4 + 0] = sbob_sl64[4][lid4 + 0]; - s_sbob_sl64[4][lid4 + 1] = sbob_sl64[4][lid4 + 1]; - s_sbob_sl64[4][lid4 + 2] = sbob_sl64[4][lid4 + 2]; - s_sbob_sl64[4][lid4 + 3] = sbob_sl64[4][lid4 + 3]; - s_sbob_sl64[5][lid4 + 0] = sbob_sl64[5][lid4 + 0]; - s_sbob_sl64[5][lid4 + 1] = sbob_sl64[5][lid4 + 1]; - s_sbob_sl64[5][lid4 + 2] = sbob_sl64[5][lid4 + 2]; - s_sbob_sl64[5][lid4 + 3] = sbob_sl64[5][lid4 + 3]; - s_sbob_sl64[6][lid4 + 0] = sbob_sl64[6][lid4 + 0]; - s_sbob_sl64[6][lid4 + 1] = sbob_sl64[6][lid4 + 1]; - s_sbob_sl64[6][lid4 + 2] = sbob_sl64[6][lid4 + 2]; - s_sbob_sl64[6][lid4 + 3] = sbob_sl64[6][lid4 + 3]; - s_sbob_sl64[7][lid4 + 0] = sbob_sl64[7][lid4 + 0]; - s_sbob_sl64[7][lid4 + 1] = sbob_sl64[7][lid4 + 1]; - s_sbob_sl64[7][lid4 + 2] = sbob_sl64[7][lid4 + 2]; - s_sbob_sl64[7][lid4 + 3] = sbob_sl64[7][lid4 + 3]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m11700s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11700_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * shared lookup table - */ - - __shared__ u64 s_sbob_sl64[8][256]; - - if (lid < 64) - { - const u32 lid4 = lid * 4; - - s_sbob_sl64[0][lid4 + 0] = sbob_sl64[0][lid4 + 0]; - s_sbob_sl64[0][lid4 + 1] = sbob_sl64[0][lid4 + 1]; - s_sbob_sl64[0][lid4 + 2] = sbob_sl64[0][lid4 + 2]; - s_sbob_sl64[0][lid4 + 3] = sbob_sl64[0][lid4 + 3]; - s_sbob_sl64[1][lid4 + 0] = sbob_sl64[1][lid4 + 0]; - s_sbob_sl64[1][lid4 + 1] = sbob_sl64[1][lid4 + 1]; - s_sbob_sl64[1][lid4 + 2] = sbob_sl64[1][lid4 + 2]; - s_sbob_sl64[1][lid4 + 3] = sbob_sl64[1][lid4 + 3]; - s_sbob_sl64[2][lid4 + 0] = sbob_sl64[2][lid4 + 0]; - s_sbob_sl64[2][lid4 + 1] = sbob_sl64[2][lid4 + 1]; - s_sbob_sl64[2][lid4 + 2] = sbob_sl64[2][lid4 + 2]; - s_sbob_sl64[2][lid4 + 3] = sbob_sl64[2][lid4 + 3]; - s_sbob_sl64[3][lid4 + 0] = sbob_sl64[3][lid4 + 0]; - s_sbob_sl64[3][lid4 + 1] = sbob_sl64[3][lid4 + 1]; - s_sbob_sl64[3][lid4 + 2] = sbob_sl64[3][lid4 + 2]; - s_sbob_sl64[3][lid4 + 3] = sbob_sl64[3][lid4 + 3]; - s_sbob_sl64[4][lid4 + 0] = sbob_sl64[4][lid4 + 0]; - s_sbob_sl64[4][lid4 + 1] = sbob_sl64[4][lid4 + 1]; - s_sbob_sl64[4][lid4 + 2] = sbob_sl64[4][lid4 + 2]; - s_sbob_sl64[4][lid4 + 3] = sbob_sl64[4][lid4 + 3]; - s_sbob_sl64[5][lid4 + 0] = sbob_sl64[5][lid4 + 0]; - s_sbob_sl64[5][lid4 + 1] = sbob_sl64[5][lid4 + 1]; - s_sbob_sl64[5][lid4 + 2] = sbob_sl64[5][lid4 + 2]; - s_sbob_sl64[5][lid4 + 3] = sbob_sl64[5][lid4 + 3]; - s_sbob_sl64[6][lid4 + 0] = sbob_sl64[6][lid4 + 0]; - s_sbob_sl64[6][lid4 + 1] = sbob_sl64[6][lid4 + 1]; - s_sbob_sl64[6][lid4 + 2] = sbob_sl64[6][lid4 + 2]; - s_sbob_sl64[6][lid4 + 3] = sbob_sl64[6][lid4 + 3]; - s_sbob_sl64[7][lid4 + 0] = sbob_sl64[7][lid4 + 0]; - s_sbob_sl64[7][lid4 + 1] = sbob_sl64[7][lid4 + 1]; - s_sbob_sl64[7][lid4 + 2] = sbob_sl64[7][lid4 + 2]; - s_sbob_sl64[7][lid4 + 3] = sbob_sl64[7][lid4 + 3]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m11700s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m11800_a0.cu b/nv/m11800_a0.cu deleted file mode 100644 index b042907..0000000 --- a/nv/m11800_a0.cu +++ /dev/null @@ -1,2674 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _GOST2012_512_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -#define INITVAL 0 - -#define SBOG_LPSti64 \ - s_sbob_sl64[0][(t[0] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[1][(t[1] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[2][(t[2] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[3][(t[3] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[4][(t[4] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[5][(t[5] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[6][(t[6] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[7][(t[7] >> (i * 8)) & 0xff] - -// constants - -__device__ const u64 sbob_sl64[8][256] = -{ - { - 0xd031c397ce553fe6, - 0x16ba5b01b006b525, - 0xa89bade6296e70c8, - 0x6a1f525d77d3435b, - 0x6e103570573dfa0b, - 0x660efb2a17fc95ab, - 0x76327a9e97634bf6, - 0x4bad9d6462458bf5, - 0xf1830caedbc3f748, - 0xc5c8f542669131ff, - 0x95044a1cdc48b0cb, - 0x892962df3cf8b866, - 0xb0b9e208e930c135, - 0xa14fb3f0611a767c, - 0x8d2605f21c160136, - 0xd6b71922fecc549e, - 0x37089438a5907d8b, - 0x0b5da38e5803d49c, - 0x5a5bcc9cea6f3cbc, - 0xedae246d3b73ffe5, - 0xd2b87e0fde22edce, - 0x5e54abb1ca8185ec, - 0x1de7f88fe80561b9, - 0xad5e1a870135a08c, - 0x2f2adbd665cecc76, - 0x5780b5a782f58358, - 0x3edc8a2eede47b3f, - 0xc9d95c3506bee70f, - 0x83be111d6c4e05ee, - 0xa603b90959367410, - 0x103c81b4809fde5d, - 0x2c69b6027d0c774a, - 0x399080d7d5c87953, - 0x09d41e16487406b4, - 0xcdd63b1826505e5f, - 0xf99dc2f49b0298e8, - 0x9cd0540a943cb67f, - 0xbca84b7f891f17c5, - 0x723d1db3b78df2a6, - 0x78aa6e71e73b4f2e, - 0x1433e699a071670d, - 0x84f21be454620782, - 0x98df3327b4d20f2f, - 0xf049dce2d3769e5c, - 0xdb6c60199656eb7a, - 0x648746b2078b4783, - 0x32cd23598dcbadcf, - 0x1ea4955bf0c7da85, - 0xe9a143401b9d46b5, - 0xfd92a5d9bbec21b8, - 0xc8138c790e0b8e1b, - 0x2ee00b9a6d7ba562, - 0xf85712b893b7f1fc, - 0xeb28fed80bea949d, - 0x564a65eb8a40ea4c, - 0x6c9988e8474a2823, - 0x4535898b121d8f2d, - 0xabd8c03231accbf4, - 0xba2e91cab9867cbd, - 0x7960be3def8e263a, - 0x0c11a977602fd6f0, - 0xcb50e1ad16c93527, - 0xeae22e94035ffd89, - 0x2866d12f5de2ce1a, - 0xff1b1841ab9bf390, - 0x9f9339de8cfe0d43, - 0x964727c8c48a0bf7, - 0x524502c6aaae531c, - 0x9b9c5ef3ac10b413, - 0x4fa2fa4942ab32a5, - 0x3f165a62e551122b, - 0xc74148da76e6e3d7, - 0x924840e5e464b2a7, - 0xd372ae43d69784da, - 0x233b72a105e11a86, - 0xa48a04914941a638, - 0xb4b68525c9de7865, - 0xddeabaaca6cf8002, - 0x0a9773c250b6bd88, - 0xc284ffbb5ebd3393, - 0x8ba0df472c8f6a4e, - 0x2aef6cb74d951c32, - 0x427983722a318d41, - 0x73f7cdffbf389bb2, - 0x074c0af9382c026c, - 0x8a6a0f0b243a035a, - 0x6fdae53c5f88931f, - 0xc68b98967e538ac3, - 0x44ff59c71aa8e639, - 0xe2fce0ce439e9229, - 0xa20cde2479d8cd40, - 0x19e89fa2c8ebd8e9, - 0xf446bbcff398270c, - 0x43b3533e2284e455, - 0xd82f0dcd8e945046, - 0x51066f12b26ce820, - 0xe73957af6bc5426d, - 0x081ece5a40c16fa0, - 0x3b193d4fc5bfab7b, - 0x7fe66488df174d42, - 0x0e9814ef705804d8, - 0x8137ac857c39d7c6, - 0xb1733244e185a821, - 0x695c3f896f11f867, - 0xf6cf0657e3eff524, - 0x1aabf276d02963d5, - 0x2da3664e75b91e5e, - 0x0289bd981077d228, - 0x90c1fd7df413608f, - 0x3c5537b6fd93a917, - 0xaa12107e3919a2e0, - 0x0686dab530996b78, - 0xdaa6b0559ee3826e, - 0xc34e2ff756085a87, - 0x6d5358a44fff4137, - 0xfc587595b35948ac, - 0x7ca5095cc7d5f67e, - 0xfb147f6c8b754ac0, - 0xbfeb26ab91ddacf9, - 0x6896efc567a49173, - 0xca9a31e11e7c5c33, - 0xbbe44186b13315a9, - 0x0ddb793b689abfe4, - 0x70b4a02ba7fa208e, - 0xe47a3a7b7307f951, - 0x8cecd5be14a36822, - 0xeeed49b923b144d9, - 0x17708b4db8b3dc31, - 0x6088219f2765fed3, - 0xb3fa8fdcf1f27a09, - 0x910b2d31fca6099b, - 0x0f52c4a378ed6dcc, - 0x50ccbf5ebad98134, - 0x6bd582117f662a4f, - 0x94ce9a50d4fdd9df, - 0x2b25bcfb45207526, - 0x67c42b661f49fcbf, - 0x492420fc723259dd, - 0x03436dd418c2bb3c, - 0x1f6e4517f872b391, - 0xa08563bc69af1f68, - 0xd43ea4baeebb86b6, - 0x01cad04c08b56914, - 0xac94cacb0980c998, - 0x54c3d8739a373864, - 0x26fec5c02dbacac2, - 0xdea9d778be0d3b3e, - 0x040f672d20eeb950, - 0xe5b0ea377bb29045, - 0xf30ab136cbb42560, - 0x62019c0737122cfb, - 0xe86b930c13282fa1, - 0xcc1ceb542ee5374b, - 0x538fd28aa21b3a08, - 0x1b61223ad89c0ac1, - 0x36c24474ad25149f, - 0x7a23d3e9f74c9d06, - 0xbe21f6e79968c5ed, - 0xcf5f868036278c77, - 0xf705d61beb5a9c30, - 0x4d2b47d152dce08d, - 0x5f9e7bfdc234ecf8, - 0x247778583dcd18ea, - 0x867ba67c4415d5aa, - 0x4ce1979d5a698999, - 0x0000000000000000, - 0xec64f42133c696f1, - 0xb57c5569c16b1171, - 0xc1c7926f467f88af, - 0x654d96fe0f3e2e97, - 0x15f936d5a8c40e19, - 0xb8a72c52a9f1ae95, - 0xa9517daa21db19dc, - 0x58d27104fa18ee94, - 0x5918a148f2ad8780, - 0x5cdd1629daf657c4, - 0x8274c15164fb6cfa, - 0xd1fb13dbc6e056f2, - 0x7d6fd910cf609f6a, - 0xb63f38bdd9a9aa4d, - 0x3d9fe7faf526c003, - 0x74bbc706871499de, - 0xdf630734b6b8522a, - 0x3ad3ed03cd0ac26f, - 0xfadeaf2083c023d4, - 0xc00d42234ecae1bb, - 0x8538cba85cd76e96, - 0xc402250e6e2458eb, - 0x47bc3413026a5d05, - 0xafd7a71f114272a4, - 0x978df784cc3f62e3, - 0xb96dfc1ea144c781, - 0x21b2cf391596c8ae, - 0x318e4e8d950916f3, - 0xce9556cc3e92e563, - 0x385a509bdd7d1047, - 0x358129a0b5e7afa3, - 0xe6f387e363702b79, - 0xe0755d5653e94001, - 0x7be903a5fff9f412, - 0x12b53c2c90e80c75, - 0x3307f315857ec4db, - 0x8fafb86a0c61d31e, - 0xd9e5dd8186213952, - 0x77f8aad29fd622e2, - 0x25bda814357871fe, - 0x7571174a8fa1f0ca, - 0x137fec60985d6561, - 0x30449ec19dbc7fe7, - 0xa540d4dd41f4cf2c, - 0xdc206ae0ae7ae916, - 0x5b911cd0e2da55a8, - 0xb2305f90f947131d, - 0x344bf9ecbd52c6b7, - 0x5d17c665d2433ed0, - 0x18224feec05eb1fd, - 0x9e59e992844b6457, - 0x9a568ebfa4a5dd07, - 0xa3c60e68716da454, - 0x7e2cb4c4d7a22456, - 0x87b176304ca0bcbe, - 0x413aeea632f3367d, - 0x9915e36bbc67663b, - 0x40f03eea3a465f69, - 0x1c2d28c3e0b008ad, - 0x4e682a054a1e5bb1, - 0x05c5b761285bd044, - 0xe1bf8d1a5b5c2915, - 0xf2c0617ac3014c74, - 0xb7f5e8f1d11cc359, - 0x63cb4c4b3fa745ef, - 0x9d1a84469c89df6b, - 0xe33630824b2bfb3d, - 0xd5f474f6e60eefa2, - 0xf58c6b83fb2d4e18, - 0x4676e45f0adf3411, - 0x20781f751d23a1ba, - 0xbd629b3381aa7ed1, - 0xae1d775319f71bb0, - 0xfed1c80da32e9a84, - 0x5509083f92825170, - 0x29ac01635557a70e, - 0xa7c9694551831d04, - 0x8e65682604d4ba0a, - 0x11f651f8882ab749, - 0xd77dc96ef6793d8a, - 0xef2799f52b042dcd, - 0x48eef0b07a8730c9, - 0x22f1a2ed0d547392, - 0x6142f1d32fd097c7, - 0x4a674d286af0e2e1, - 0x80fd7cc9748cbed2, - 0x717e7067af4f499a, - 0x938290a9ecd1dbb3, - 0x88e3b293344dd172, - 0x2734158c250fa3d6, - }, - { - 0x7e37e62dfc7d40c3, - 0x776f25a4ee939e5b, - 0xe045c850dd8fb5ad, - 0x86ed5ba711ff1952, - 0xe91d0bd9cf616b35, - 0x37e0ab256e408ffb, - 0x9607f6c031025a7a, - 0x0b02f5e116d23c9d, - 0xf3d8486bfb50650c, - 0x621cff27c40875f5, - 0x7d40cb71fa5fd34a, - 0x6daa6616daa29062, - 0x9f5f354923ec84e2, - 0xec847c3dc507c3b3, - 0x025a3668043ce205, - 0xa8bf9e6c4dac0b19, - 0xfa808be2e9bebb94, - 0xb5b99c5277c74fa3, - 0x78d9bc95f0397bcc, - 0xe332e50cdbad2624, - 0xc74fce129332797e, - 0x1729eceb2ea709ab, - 0xc2d6b9f69954d1f8, - 0x5d898cbfbab8551a, - 0x859a76fb17dd8adb, - 0x1be85886362f7fb5, - 0xf6413f8ff136cd8a, - 0xd3110fa5bbb7e35c, - 0x0a2feed514cc4d11, - 0xe83010edcd7f1ab9, - 0xa1e75de55f42d581, - 0xeede4a55c13b21b6, - 0xf2f5535ff94e1480, - 0x0cc1b46d1888761e, - 0xbce15fdb6529913b, - 0x2d25e8975a7181c2, - 0x71817f1ce2d7a554, - 0x2e52c5cb5c53124b, - 0xf9f7a6beef9c281d, - 0x9e722e7d21f2f56e, - 0xce170d9b81dca7e6, - 0x0e9b82051cb4941b, - 0x1e712f623c49d733, - 0x21e45cfa42f9f7dc, - 0xcb8e7a7f8bba0f60, - 0x8e98831a010fb646, - 0x474ccf0d8e895b23, - 0xa99285584fb27a95, - 0x8cc2b57205335443, - 0x42d5b8e984eff3a5, - 0x012d1b34021e718c, - 0x57a6626aae74180b, - 0xff19fc06e3d81312, - 0x35ba9d4d6a7c6dfe, - 0xc9d44c178f86ed65, - 0x506523e6a02e5288, - 0x03772d5c06229389, - 0x8b01f4fe0b691ec0, - 0xf8dabd8aed825991, - 0x4c4e3aec985b67be, - 0xb10df0827fbf96a9, - 0x6a69279ad4f8dae1, - 0xe78689dcd3d5ff2e, - 0x812e1a2b1fa553d1, - 0xfbad90d6eba0ca18, - 0x1ac543b234310e39, - 0x1604f7df2cb97827, - 0xa6241c6951189f02, - 0x753513cceaaf7c5e, - 0x64f2a59fc84c4efa, - 0x247d2b1e489f5f5a, - 0xdb64d718ab474c48, - 0x79f4a7a1f2270a40, - 0x1573da832a9bebae, - 0x3497867968621c72, - 0x514838d2a2302304, - 0xf0af6537fd72f685, - 0x1d06023e3a6b44ba, - 0x678588c3ce6edd73, - 0x66a893f7cc70acff, - 0xd4d24e29b5eda9df, - 0x3856321470ea6a6c, - 0x07c3418c0e5a4a83, - 0x2bcbb22f5635bacd, - 0x04b46cd00878d90a, - 0x06ee5ab80c443b0f, - 0x3b211f4876c8f9e5, - 0x0958c38912eede98, - 0xd14b39cdbf8b0159, - 0x397b292072f41be0, - 0x87c0409313e168de, - 0xad26e98847caa39f, - 0x4e140c849c6785bb, - 0xd5ff551db7f3d853, - 0xa0ca46d15d5ca40d, - 0xcd6020c787fe346f, - 0x84b76dcf15c3fb57, - 0xdefda0fca121e4ce, - 0x4b8d7b6096012d3d, - 0x9ac642ad298a2c64, - 0x0875d8bd10f0af14, - 0xb357c6ea7b8374ac, - 0x4d6321d89a451632, - 0xeda96709c719b23f, - 0xf76c24bbf328bc06, - 0xc662d526912c08f2, - 0x3ce25ec47892b366, - 0xb978283f6f4f39bd, - 0xc08c8f9e9d6833fd, - 0x4f3917b09e79f437, - 0x593de06fb2c08c10, - 0xd6887841b1d14bda, - 0x19b26eee32139db0, - 0xb494876675d93e2f, - 0x825937771987c058, - 0x90e9ac783d466175, - 0xf1827e03ff6c8709, - 0x945dc0a8353eb87f, - 0x4516f9658ab5b926, - 0x3f9573987eb020ef, - 0xb855330b6d514831, - 0x2ae6a91b542bcb41, - 0x6331e413c6160479, - 0x408f8e8180d311a0, - 0xeff35161c325503a, - 0xd06622f9bd9570d5, - 0x8876d9a20d4b8d49, - 0xa5533135573a0c8b, - 0xe168d364df91c421, - 0xf41b09e7f50a2f8f, - 0x12b09b0f24c1a12d, - 0xda49cc2ca9593dc4, - 0x1f5c34563e57a6bf, - 0x54d14f36a8568b82, - 0xaf7cdfe043f6419a, - 0xea6a2685c943f8bc, - 0xe5dcbfb4d7e91d2b, - 0xb27addde799d0520, - 0x6b443caed6e6ab6d, - 0x7bae91c9f61be845, - 0x3eb868ac7cae5163, - 0x11c7b65322e332a4, - 0xd23c1491b9a992d0, - 0x8fb5982e0311c7ca, - 0x70ac6428e0c9d4d8, - 0x895bc2960f55fcc5, - 0x76423e90ec8defd7, - 0x6ff0507ede9e7267, - 0x3dcf45f07a8cc2ea, - 0x4aa06054941f5cb1, - 0x5810fb5bb0defd9c, - 0x5efea1e3bc9ac693, - 0x6edd4b4adc8003eb, - 0x741808f8e8b10dd2, - 0x145ec1b728859a22, - 0x28bc9f7350172944, - 0x270a06424ebdccd3, - 0x972aedf4331c2bf6, - 0x059977e40a66a886, - 0x2550302a4a812ed6, - 0xdd8a8da0a7037747, - 0xc515f87a970e9b7b, - 0x3023eaa9601ac578, - 0xb7e3aa3a73fbada6, - 0x0fb699311eaae597, - 0x0000000000000000, - 0x310ef19d6204b4f4, - 0x229371a644db6455, - 0x0decaf591a960792, - 0x5ca4978bb8a62496, - 0x1c2b190a38753536, - 0x41a295b582cd602c, - 0x3279dcc16426277d, - 0xc1a194aa9f764271, - 0x139d803b26dfd0a1, - 0xae51c4d441e83016, - 0xd813fa44ad65dfc1, - 0xac0bf2bc45d4d213, - 0x23be6a9246c515d9, - 0x49d74d08923dcf38, - 0x9d05032127d066e7, - 0x2f7fdeff5e4d63c7, - 0xa47e2a0155247d07, - 0x99b16ff12fa8bfed, - 0x4661d4398c972aaf, - 0xdfd0bbc8a33f9542, - 0xdca79694a51d06cb, - 0xb020ebb67da1e725, - 0xba0f0563696daa34, - 0xe4f1a480d5f76ca7, - 0xc438e34e9510eaf7, - 0x939e81243b64f2fc, - 0x8defae46072d25cf, - 0x2c08f3a3586ff04e, - 0xd7a56375b3cf3a56, - 0x20c947ce40e78650, - 0x43f8a3dd86f18229, - 0x568b795eac6a6987, - 0x8003011f1dbb225d, - 0xf53612d3f7145e03, - 0x189f75da300dec3c, - 0x9570db9c3720c9f3, - 0xbb221e576b73dbb8, - 0x72f65240e4f536dd, - 0x443be25188abc8aa, - 0xe21ffe38d9b357a8, - 0xfd43ca6ee7e4f117, - 0xcaa3614b89a47eec, - 0xfe34e732e1c6629e, - 0x83742c431b99b1d4, - 0xcf3a16af83c2d66a, - 0xaae5a8044990e91c, - 0x26271d764ca3bd5f, - 0x91c4b74c3f5810f9, - 0x7c6dd045f841a2c6, - 0x7f1afd19fe63314f, - 0xc8f957238d989ce9, - 0xa709075d5306ee8e, - 0x55fc5402aa48fa0e, - 0x48fa563c9023beb4, - 0x65dfbeabca523f76, - 0x6c877d22d8bce1ee, - 0xcc4d3bf385e045e3, - 0xbebb69b36115733e, - 0x10eaad6720fd4328, - 0xb6ceb10e71e5dc2a, - 0xbdcc44ef6737e0b7, - 0x523f158ea412b08d, - 0x989c74c52db6ce61, - 0x9beb59992b945de8, - 0x8a2cefca09776f4c, - 0xa3bd6b8d5b7e3784, - 0xeb473db1cb5d8930, - 0xc3fba2c29b4aa074, - 0x9c28181525ce176b, - 0x683311f2d0c438e4, - 0x5fd3bad7be84b71f, - 0xfc6ed15ae5fa809b, - 0x36cdb0116c5efe77, - 0x29918447520958c8, - 0xa29070b959604608, - 0x53120ebaa60cc101, - 0x3a0c047c74d68869, - 0x691e0ac6d2da4968, - 0x73db4974e6eb4751, - 0x7a838afdf40599c9, - 0x5a4acd33b4e21f99, - 0x6046c94fc03497f0, - 0xe6ab92e8d1cb8ea2, - 0x3354c7f5663856f1, - 0xd93ee170af7bae4d, - 0x616bd27bc22ae67c, - 0x92b39a10397a8370, - 0xabc8b3304b8e9890, - 0xbf967287630b02b2, - 0x5b67d607b6fc6e15, - }, - { - 0x8ab0a96846e06a6d, - 0x43c7e80b4bf0b33a, - 0x08c9b3546b161ee5, - 0x39f1c235eba990be, - 0xc1bef2376606c7b2, - 0x2c209233614569aa, - 0xeb01523b6fc3289a, - 0x946953ab935acedd, - 0x272838f63e13340e, - 0x8b0455eca12ba052, - 0x77a1b2c4978ff8a2, - 0xa55122ca13e54086, - 0x2276135862d3f1cd, - 0xdb8ddfde08b76cfe, - 0x5d1e12c89e4a178a, - 0x0e56816b03969867, - 0xee5f79953303ed59, - 0xafed748bab78d71d, - 0x6d929f2df93e53ee, - 0xf5d8a8f8ba798c2a, - 0xf619b1698e39cf6b, - 0x95ddaf2f749104e2, - 0xec2a9c80e0886427, - 0xce5c8fd8825b95ea, - 0xc4e0d9993ac60271, - 0x4699c3a5173076f9, - 0x3d1b151f50a29f42, - 0x9ed505ea2bc75946, - 0x34665acfdc7f4b98, - 0x61b1fb53292342f7, - 0xc721c0080e864130, - 0x8693cd1696fd7b74, - 0x872731927136b14b, - 0xd3446c8a63a1721b, - 0x669a35e8a6680e4a, - 0xcab658f239509a16, - 0xa4e5de4ef42e8ab9, - 0x37a7435ee83f08d9, - 0x134e6239e26c7f96, - 0x82791a3c2df67488, - 0x3f6ef00a8329163c, - 0x8e5a7e42fdeb6591, - 0x5caaee4c7981ddb5, - 0x19f234785af1e80d, - 0x255ddde3ed98bd70, - 0x50898a32a99cccac, - 0x28ca4519da4e6656, - 0xae59880f4cb31d22, - 0x0d9798fa37d6db26, - 0x32f968f0b4ffcd1a, - 0xa00f09644f258545, - 0xfa3ad5175e24de72, - 0xf46c547c5db24615, - 0x713e80fbff0f7e20, - 0x7843cf2b73d2aafa, - 0xbd17ea36aedf62b4, - 0xfd111bacd16f92cf, - 0x4abaa7dbc72d67e0, - 0xb3416b5dad49fad3, - 0xbca316b24914a88b, - 0x15d150068aecf914, - 0xe27c1debe31efc40, - 0x4fe48c759beda223, - 0x7edcfd141b522c78, - 0x4e5070f17c26681c, - 0xe696cac15815f3bc, - 0x35d2a64b3bb481a7, - 0x800cff29fe7dfdf6, - 0x1ed9fac3d5baa4b0, - 0x6c2663a91ef599d1, - 0x03c1199134404341, - 0xf7ad4ded69f20554, - 0xcd9d9649b61bd6ab, - 0xc8c3bde7eadb1368, - 0xd131899fb02afb65, - 0x1d18e352e1fae7f1, - 0xda39235aef7ca6c1, - 0xa1bbf5e0a8ee4f7a, - 0x91377805cf9a0b1e, - 0x3138716180bf8e5b, - 0xd9f83acbdb3ce580, - 0x0275e515d38b897e, - 0x472d3f21f0fbbcc6, - 0x2d946eb7868ea395, - 0xba3c248d21942e09, - 0xe7223645bfde3983, - 0xff64feb902e41bb1, - 0xc97741630d10d957, - 0xc3cb1722b58d4ecc, - 0xa27aec719cae0c3b, - 0x99fecb51a48c15fb, - 0x1465ac826d27332b, - 0xe1bd047ad75ebf01, - 0x79f733af941960c5, - 0x672ec96c41a3c475, - 0xc27feba6524684f3, - 0x64efd0fd75e38734, - 0xed9e60040743ae18, - 0xfb8e2993b9ef144d, - 0x38453eb10c625a81, - 0x6978480742355c12, - 0x48cf42ce14a6ee9e, - 0x1cac1fd606312dce, - 0x7b82d6ba4792e9bb, - 0x9d141c7b1f871a07, - 0x5616b80dc11c4a2e, - 0xb849c198f21fa777, - 0x7ca91801c8d9a506, - 0xb1348e487ec273ad, - 0x41b20d1e987b3a44, - 0x7460ab55a3cfbbe3, - 0x84e628034576f20a, - 0x1b87d16d897a6173, - 0x0fe27defe45d5258, - 0x83cde6b8ca3dbeb7, - 0x0c23647ed01d1119, - 0x7a362a3ea0592384, - 0xb61f40f3f1893f10, - 0x75d457d1440471dc, - 0x4558da34237035b8, - 0xdca6116587fc2043, - 0x8d9b67d3c9ab26d0, - 0x2b0b5c88ee0e2517, - 0x6fe77a382ab5da90, - 0x269cc472d9d8fe31, - 0x63c41e46faa8cb89, - 0xb7abbc771642f52f, - 0x7d1de4852f126f39, - 0xa8c6ba3024339ba0, - 0x600507d7cee888c8, - 0x8fee82c61a20afae, - 0x57a2448926d78011, - 0xfca5e72836a458f0, - 0x072bcebb8f4b4cbd, - 0x497bbe4af36d24a1, - 0x3cafe99bb769557d, - 0x12fa9ebd05a7b5a9, - 0xe8c04baa5b836bdb, - 0x4273148fac3b7905, - 0x908384812851c121, - 0xe557d3506c55b0fd, - 0x72ff996acb4f3d61, - 0x3eda0c8e64e2dc03, - 0xf0868356e6b949e9, - 0x04ead72abb0b0ffc, - 0x17a4b5135967706a, - 0xe3c8e16f04d5367f, - 0xf84f30028daf570c, - 0x1846c8fcbd3a2232, - 0x5b8120f7f6ca9108, - 0xd46fa231ecea3ea6, - 0x334d947453340725, - 0x58403966c28ad249, - 0xbed6f3a79a9f21f5, - 0x68ccb483a5fe962d, - 0xd085751b57e1315a, - 0xfed0023de52fd18e, - 0x4b0e5b5f20e6addf, - 0x1a332de96eb1ab4c, - 0xa3ce10f57b65c604, - 0x108f7ba8d62c3cd7, - 0xab07a3a11073d8e1, - 0x6b0dad1291bed56c, - 0xf2f366433532c097, - 0x2e557726b2cee0d4, - 0x0000000000000000, - 0xcb02a476de9b5029, - 0xe4e32fd48b9e7ac2, - 0x734b65ee2c84f75e, - 0x6e5386bccd7e10af, - 0x01b4fc84e7cbca3f, - 0xcfe8735c65905fd5, - 0x3613bfda0ff4c2e6, - 0x113b872c31e7f6e8, - 0x2fe18ba255052aeb, - 0xe974b72ebc48a1e4, - 0x0abc5641b89d979b, - 0xb46aa5e62202b66e, - 0x44ec26b0c4bbff87, - 0xa6903b5b27a503c7, - 0x7f680190fc99e647, - 0x97a84a3aa71a8d9c, - 0xdd12ede16037ea7c, - 0xc554251ddd0dc84e, - 0x88c54c7d956be313, - 0x4d91696048662b5d, - 0xb08072cc9909b992, - 0xb5de5962c5c97c51, - 0x81b803ad19b637c9, - 0xb2f597d94a8230ec, - 0x0b08aac55f565da4, - 0xf1327fd2017283d6, - 0xad98919e78f35e63, - 0x6ab9519676751f53, - 0x24e921670a53774f, - 0xb9fd3d1c15d46d48, - 0x92f66194fbda485f, - 0x5a35dc7311015b37, - 0xded3f4705477a93d, - 0xc00a0eb381cd0d8d, - 0xbb88d809c65fe436, - 0x16104997beacba55, - 0x21b70ac95693b28c, - 0x59f4c5e225411876, - 0xd5db5eb50b21f499, - 0x55d7a19cf55c096f, - 0xa97246b4c3f8519f, - 0x8552d487a2bd3835, - 0x54635d181297c350, - 0x23c2efdc85183bf2, - 0x9f61f96ecc0c9379, - 0x534893a39ddc8fed, - 0x5edf0b59aa0a54cb, - 0xac2c6d1a9f38945c, - 0xd7aebba0d8aa7de7, - 0x2abfa00c09c5ef28, - 0xd84cc64f3cf72fbf, - 0x2003f64db15878b3, - 0xa724c7dfc06ec9f8, - 0x069f323f68808682, - 0xcc296acd51d01c94, - 0x055e2bae5cc0c5c3, - 0x6270e2c21d6301b6, - 0x3b842720382219c0, - 0xd2f0900e846ab824, - 0x52fc6f277a1745d2, - 0xc6953c8ce94d8b0f, - 0xe009f8fe3095753e, - 0x655b2c7992284d0b, - 0x984a37d54347dfc4, - 0xeab5aebf8808e2a5, - 0x9a3fd2c090cc56ba, - 0x9ca0e0fff84cd038, - 0x4c2595e4afade162, - 0xdf6708f4b3bc6302, - 0xbf620f237d54ebca, - 0x93429d101c118260, - 0x097d4fd08cddd4da, - 0x8c2f9b572e60ecef, - 0x708a7c7f18c4b41f, - 0x3a30dba4dfe9d3ff, - 0x4006f19a7fb0f07b, - 0x5f6bf7dd4dc19ef4, - 0x1f6d064732716e8f, - 0xf9fbcc866a649d33, - 0x308c8de567744464, - 0x8971b0f972a0292c, - 0xd61a47243f61b7d8, - 0xefeb8511d4c82766, - 0x961cb6be40d147a3, - 0xaab35f25f7b812de, - 0x76154e407044329d, - 0x513d76b64e570693, - 0xf3479ac7d2f90aa8, - 0x9b8b2e4477079c85, - 0x297eb99d3d85ac69, - }, - { - 0x3ef29d249b2c0a19, - 0xe9e16322b6f8622f, - 0x5536994047757f7a, - 0x9f4d56d5a47b0b33, - 0x822567466aa1174c, - 0xb8f5057deb082fb2, - 0xcc48c10bf4475f53, - 0x373088d4275dec3a, - 0x968f4325180aed10, - 0x173d232cf7016151, - 0xae4ed09f946fcc13, - 0xfd4b4741c4539873, - 0x1b5b3f0dd9933765, - 0x2ffcb0967b644052, - 0xe02376d20a89840c, - 0xa3ae3a70329b18d7, - 0x419cbd2335de8526, - 0xfafebf115b7c3199, - 0x0397074f85aa9b0d, - 0xc58ad4fb4836b970, - 0xbec60be3fc4104a8, - 0x1eff36dc4b708772, - 0x131fdc33ed8453b6, - 0x0844e33e341764d3, - 0x0ff11b6eab38cd39, - 0x64351f0a7761b85a, - 0x3b5694f509cfba0e, - 0x30857084b87245d0, - 0x47afb3bd2297ae3c, - 0xf2ba5c2f6f6b554a, - 0x74bdc4761f4f70e1, - 0xcfdfc64471edc45e, - 0xe610784c1dc0af16, - 0x7aca29d63c113f28, - 0x2ded411776a859af, - 0xac5f211e99a3d5ee, - 0xd484f949a87ef33b, - 0x3ce36ca596e013e4, - 0xd120f0983a9d432c, - 0x6bc40464dc597563, - 0x69d5f5e5d1956c9e, - 0x9ae95f043698bb24, - 0xc9ecc8da66a4ef44, - 0xd69508c8a5b2eac6, - 0xc40c2235c0503b80, - 0x38c193ba8c652103, - 0x1ceec75d46bc9e8f, - 0xd331011937515ad1, - 0xd8e2e56886eca50f, - 0xb137108d5779c991, - 0x709f3b6905ca4206, - 0x4feb50831680caef, - 0xec456af3241bd238, - 0x58d673afe181abbe, - 0x242f54e7cad9bf8c, - 0x0211f1810dcc19fd, - 0x90bc4dbb0f43c60a, - 0x9518446a9da0761d, - 0xa1bfcbf13f57012a, - 0x2bde4f8961e172b5, - 0x27b853a84f732481, - 0xb0b1e643df1f4b61, - 0x18cc38425c39ac68, - 0xd2b7f7d7bf37d821, - 0x3103864a3014c720, - 0x14aa246372abfa5c, - 0x6e600db54ebac574, - 0x394765740403a3f3, - 0x09c215f0bc71e623, - 0x2a58b947e987f045, - 0x7b4cdf18b477bdd8, - 0x9709b5eb906c6fe0, - 0x73083c268060d90b, - 0xfedc400e41f9037e, - 0x284948c6e44be9b8, - 0x728ecae808065bfb, - 0x06330e9e17492b1a, - 0x5950856169e7294e, - 0xbae4f4fce6c4364f, - 0xca7bcf95e30e7449, - 0x7d7fd186a33e96c2, - 0x52836110d85ad690, - 0x4dfaa1021b4cd312, - 0x913abb75872544fa, - 0xdd46ecb9140f1518, - 0x3d659a6b1e869114, - 0xc23f2cabd719109a, - 0xd713fe062dd46836, - 0xd0a60656b2fbc1dc, - 0x221c5a79dd909496, - 0xefd26dbca1b14935, - 0x0e77eda0235e4fc9, - 0xcbfd395b6b68f6b9, - 0x0de0eaefa6f4d4c4, - 0x0422ff1f1a8532e7, - 0xf969b85eded6aa94, - 0x7f6e2007aef28f3f, - 0x3ad0623b81a938fe, - 0x6624ee8b7aada1a7, - 0xb682e8ddc856607b, - 0xa78cc56f281e2a30, - 0xc79b257a45faa08d, - 0x5b4174e0642b30b3, - 0x5f638bff7eae0254, - 0x4bc9af9c0c05f808, - 0xce59308af98b46ae, - 0x8fc58da9cc55c388, - 0x803496c7676d0eb1, - 0xf33caae1e70dd7ba, - 0xbb6202326ea2b4bf, - 0xd5020f87201871cb, - 0x9d5ca754a9b712ce, - 0x841669d87de83c56, - 0x8a6184785eb6739f, - 0x420bba6cb0741e2b, - 0xf12d5b60eac1ce47, - 0x76ac35f71283691c, - 0x2c6bb7d9fecedb5f, - 0xfccdb18f4c351a83, - 0x1f79c012c3160582, - 0xf0abadae62a74cb7, - 0xe1a5801c82ef06fc, - 0x67a21845f2cb2357, - 0x5114665f5df04d9d, - 0xbf40fd2d74278658, - 0xa0393d3fb73183da, - 0x05a409d192e3b017, - 0xa9fb28cf0b4065f9, - 0x25a9a22942bf3d7c, - 0xdb75e22703463e02, - 0xb326e10c5ab5d06c, - 0xe7968e8295a62de6, - 0xb973f3b3636ead42, - 0xdf571d3819c30ce5, - 0xee549b7229d7cbc5, - 0x12992afd65e2d146, - 0xf8ef4e9056b02864, - 0xb7041e134030e28b, - 0xc02edd2adad50967, - 0x932b4af48ae95d07, - 0x6fe6fb7bc6dc4784, - 0x239aacb755f61666, - 0x401a4bedbdb807d6, - 0x485ea8d389af6305, - 0xa41bc220adb4b13d, - 0x753b32b89729f211, - 0x997e584bb3322029, - 0x1d683193ceda1c7f, - 0xff5ab6c0c99f818e, - 0x16bbd5e27f67e3a1, - 0xa59d34ee25d233cd, - 0x98f8ae853b54a2d9, - 0x6df70afacb105e79, - 0x795d2e99b9bba425, - 0x8e437b6744334178, - 0x0186f6ce886682f0, - 0xebf092a3bb347bd2, - 0xbcd7fa62f18d1d55, - 0xadd9d7d011c5571e, - 0x0bd3e471b1bdffde, - 0xaa6c2f808eeafef4, - 0x5ee57d31f6c880a4, - 0xf50fa47ff044fca0, - 0x1addc9c351f5b595, - 0xea76646d3352f922, - 0x0000000000000000, - 0x85909f16f58ebea6, - 0x46294573aaf12ccc, - 0x0a5512bf39db7d2e, - 0x78dbd85731dd26d5, - 0x29cfbe086c2d6b48, - 0x218b5d36583a0f9b, - 0x152cd2adfacd78ac, - 0x83a39188e2c795bc, - 0xc3b9da655f7f926a, - 0x9ecba01b2c1d89c3, - 0x07b5f8509f2fa9ea, - 0x7ee8d6c926940dcf, - 0x36b67e1aaf3b6eca, - 0x86079859702425ab, - 0xfb7849dfd31ab369, - 0x4c7c57cc932a51e2, - 0xd96413a60e8a27ff, - 0x263ea566c715a671, - 0x6c71fc344376dc89, - 0x4a4f595284637af8, - 0xdaf314e98b20bcf2, - 0x572768c14ab96687, - 0x1088db7c682ec8bb, - 0x887075f9537a6a62, - 0x2e7a4658f302c2a2, - 0x619116dbe582084d, - 0xa87dde018326e709, - 0xdcc01a779c6997e8, - 0xedc39c3dac7d50c8, - 0xa60a33a1a078a8c0, - 0xc1a82be452b38b97, - 0x3f746bea134a88e9, - 0xa228ccbebafd9a27, - 0xabead94e068c7c04, - 0xf48952b178227e50, - 0x5cf48cb0fb049959, - 0x6017e0156de48abd, - 0x4438b4f2a73d3531, - 0x8c528ae649ff5885, - 0xb515ef924dfcfb76, - 0x0c661c212e925634, - 0xb493195cc59a7986, - 0x9cda519a21d1903e, - 0x32948105b5be5c2d, - 0x194ace8cd45f2e98, - 0x438d4ca238129cdb, - 0x9b6fa9cabefe39d4, - 0x81b26009ef0b8c41, - 0xded1ebf691a58e15, - 0x4e6da64d9ee6481f, - 0x54b06f8ecf13fd8a, - 0x49d85e1d01c9e1f5, - 0xafc826511c094ee3, - 0xf698a33075ee67ad, - 0x5ac7822eec4db243, - 0x8dd47c28c199da75, - 0x89f68337db1ce892, - 0xcdce37c57c21dda3, - 0x530597de503c5460, - 0x6a42f2aa543ff793, - 0x5d727a7e73621ba9, - 0xe232875307459df1, - 0x56a19e0fc2dfe477, - 0xc61dd3b4cd9c227d, - 0xe5877f03986a341b, - 0x949eb2a415c6f4ed, - 0x6206119460289340, - 0x6380e75ae84e11b0, - 0x8be772b6d6d0f16f, - 0x50929091d596cf6d, - 0xe86795ec3e9ee0df, - 0x7cf927482b581432, - 0xc86a3e14eec26db4, - 0x7119cda78dacc0f6, - 0xe40189cd100cb6eb, - 0x92adbc3a028fdff7, - 0xb2a017c2d2d3529c, - 0x200dabf8d05c8d6b, - 0x34a78f9ba2f77737, - 0xe3b4719d8f231f01, - 0x45be423c2f5bb7c1, - 0xf71e55fefd88e55d, - 0x6853032b59f3ee6e, - 0x65b3e9c4ff073aaa, - 0x772ac3399ae5ebec, - 0x87816e97f842a75b, - 0x110e2db2e0484a4b, - 0x331277cb3dd8dedd, - 0xbd510cac79eb9fa5, - 0x352179552a91f5c7, - }, - { - 0x05ba7bc82c9b3220, - 0x31a54665f8b65e4f, - 0xb1b651f77547f4d4, - 0x8bfa0d857ba46682, - 0x85a96c5aa16a98bb, - 0x990faef908eb79c9, - 0xa15e37a247f4a62d, - 0x76857dcd5d27741e, - 0xf8c50b800a1820bc, - 0xbe65dcb201f7a2b4, - 0x666d1b986f9426e7, - 0x4cc921bf53c4e648, - 0x95410a0f93d9ca42, - 0x20cdccaa647ba4ef, - 0x429a4060890a1871, - 0x0c4ea4f69b32b38b, - 0xccda362dde354cd3, - 0x96dc23bc7c5b2fa9, - 0xc309bb68aa851ab3, - 0xd26131a73648e013, - 0x021dc52941fc4db2, - 0xcd5adab7704be48a, - 0xa77965d984ed71e6, - 0x32386fd61734bba4, - 0xe82d6dd538ab7245, - 0x5c2147ea6177b4b1, - 0x5da1ab70cf091ce8, - 0xac907fce72b8bdff, - 0x57c85dfd972278a8, - 0xa4e44c6a6b6f940d, - 0x3851995b4f1fdfe4, - 0x62578ccaed71bc9e, - 0xd9882bb0c01d2c0a, - 0x917b9d5d113c503b, - 0xa2c31e11a87643c6, - 0xe463c923a399c1ce, - 0xf71686c57ea876dc, - 0x87b4a973e096d509, - 0xaf0d567d9d3a5814, - 0xb40c2a3f59dcc6f4, - 0x3602f88495d121dd, - 0xd3e1dd3d9836484a, - 0xf945e71aa46688e5, - 0x7518547eb2a591f5, - 0x9366587450c01d89, - 0x9ea81018658c065b, - 0x4f54080cbc4603a3, - 0x2d0384c65137bf3d, - 0xdc325078ec861e2a, - 0xea30a8fc79573ff7, - 0x214d2030ca050cb6, - 0x65f0322b8016c30c, - 0x69be96dd1b247087, - 0xdb95ee9981e161b8, - 0xd1fc1814d9ca05f8, - 0x820ed2bbcc0de729, - 0x63d76050430f14c7, - 0x3bccb0e8a09d3a0f, - 0x8e40764d573f54a2, - 0x39d175c1e16177bd, - 0x12f5a37c734f1f4b, - 0xab37c12f1fdfc26d, - 0x5648b167395cd0f1, - 0x6c04ed1537bf42a7, - 0xed97161d14304065, - 0x7d6c67daab72b807, - 0xec17fa87ba4ee83c, - 0xdfaf79cb0304fbc1, - 0x733f060571bc463e, - 0x78d61c1287e98a27, - 0xd07cf48e77b4ada1, - 0xb9c262536c90dd26, - 0xe2449b5860801605, - 0x8fc09ad7f941fcfb, - 0xfad8cea94be46d0e, - 0xa343f28b0608eb9f, - 0x9b126bd04917347b, - 0x9a92874ae7699c22, - 0x1b017c42c4e69ee0, - 0x3a4c5c720ee39256, - 0x4b6e9f5e3ea399da, - 0x6ba353f45ad83d35, - 0xe7fee0904c1b2425, - 0x22d009832587e95d, - 0x842980c00f1430e2, - 0xc6b3c0a0861e2893, - 0x087433a419d729f2, - 0x341f3dadd42d6c6f, - 0xee0a3faefbb2a58e, - 0x4aee73c490dd3183, - 0xaab72db5b1a16a34, - 0xa92a04065e238fdf, - 0x7b4b35a1686b6fcc, - 0x6a23bf6ef4a6956c, - 0x191cb96b851ad352, - 0x55d598d4d6de351a, - 0xc9604de5f2ae7ef3, - 0x1ca6c2a3a981e172, - 0xde2f9551ad7a5398, - 0x3025aaff56c8f616, - 0x15521d9d1e2860d9, - 0x506fe31cfa45073a, - 0x189c55f12b647b0b, - 0x0180ec9aae7ea859, - 0x7cec8b40050c105e, - 0x2350e5198bf94104, - 0xef8ad33455cc0dd7, - 0x07a7bee16d677f92, - 0xe5e325b90de76997, - 0x5a061591a26e637a, - 0xb611ef1618208b46, - 0x09f4df3eb7a981ab, - 0x1ebb078ae87dacc0, - 0xb791038cb65e231f, - 0x0fd38d4574b05660, - 0x67edf702c1ea8ebe, - 0xba5f4be0831238cd, - 0xe3c477c2cefebe5c, - 0x0dce486c354c1bd2, - 0x8c5db36416c31910, - 0x26ea9ed1a7627324, - 0x039d29b3ef82e5eb, - 0x9f28fc82cbf2ae02, - 0xa8aae89cf05d2786, - 0x431aacfa2774b028, - 0xcf471f9e31b7a938, - 0x581bd0b8e3922ec8, - 0xbc78199b400bef06, - 0x90fb71c7bf42f862, - 0x1f3beb1046030499, - 0x683e7a47b55ad8de, - 0x988f4263a695d190, - 0xd808c72a6e638453, - 0x0627527bc319d7cb, - 0xebb04466d72997ae, - 0xe67e0c0ae2658c7c, - 0x14d2f107b056c880, - 0x7122c32c30400b8c, - 0x8a7ae11fd5dacedb, - 0xa0dedb38e98a0e74, - 0xad109354dcc615a6, - 0x0be91a17f655cc19, - 0x8ddd5ffeb8bdb149, - 0xbfe53028af890aed, - 0xd65ba6f5b4ad7a6a, - 0x7956f0882997227e, - 0x10e8665532b352f9, - 0x0e5361dfdacefe39, - 0xcec7f3049fc90161, - 0xff62b561677f5f2e, - 0x975ccf26d22587f0, - 0x51ef0f86543baf63, - 0x2f1e41ef10cbf28f, - 0x52722635bbb94a88, - 0xae8dbae73344f04d, - 0x410769d36688fd9a, - 0xb3ab94de34bbb966, - 0x801317928df1aa9b, - 0xa564a0f0c5113c54, - 0xf131d4bebdb1a117, - 0x7f71a2f3ea8ef5b5, - 0x40878549c8f655c3, - 0x7ef14e6944f05dec, - 0xd44663dcf55137d8, - 0xf2acfd0d523344fc, - 0x0000000000000000, - 0x5fbc6e598ef5515a, - 0x16cf342ef1aa8532, - 0xb036bd6ddb395c8d, - 0x13754fe6dd31b712, - 0xbbdfa77a2d6c9094, - 0x89e7c8ac3a582b30, - 0x3c6b0e09cdfa459d, - 0xc4ae0589c7e26521, - 0x49735a777f5fd468, - 0xcafd64561d2c9b18, - 0xda1502032f9fc9e1, - 0x8867243694268369, - 0x3782141e3baf8984, - 0x9cb5d53124704be9, - 0xd7db4a6f1ad3d233, - 0xa6f989432a93d9bf, - 0x9d3539ab8a0ee3b0, - 0x53f2caaf15c7e2d1, - 0x6e19283c76430f15, - 0x3debe2936384edc4, - 0x5e3c82c3208bf903, - 0x33b8834cb94a13fd, - 0x6470deb12e686b55, - 0x359fd1377a53c436, - 0x61caa57902f35975, - 0x043a975282e59a79, - 0xfd7f70482683129c, - 0xc52ee913699ccd78, - 0x28b9ff0e7dac8d1d, - 0x5455744e78a09d43, - 0xcb7d88ccb3523341, - 0x44bd121b4a13cfba, - 0x4d49cd25fdba4e11, - 0x3e76cb208c06082f, - 0x3ff627ba2278a076, - 0xc28957f204fbb2ea, - 0x453dfe81e46d67e3, - 0x94c1e6953da7621b, - 0x2c83685cff491764, - 0xf32c1197fc4deca5, - 0x2b24d6bd922e68f6, - 0xb22b78449ac5113f, - 0x48f3b6edd1217c31, - 0x2e9ead75beb55ad6, - 0x174fd8b45fd42d6b, - 0x4ed4e4961238abfa, - 0x92e6b4eefebeb5d0, - 0x46a0d7320bef8208, - 0x47203ba8a5912a51, - 0x24f75bf8e69e3e96, - 0xf0b1382413cf094e, - 0xfee259fbc901f777, - 0x276a724b091cdb7d, - 0xbdf8f501ee75475f, - 0x599b3c224dec8691, - 0x6d84018f99c1eafe, - 0x7498b8e41cdb39ac, - 0xe0595e71217c5bb7, - 0x2aa43a273c50c0af, - 0xf50b43ec3f543b6e, - 0x838e3e2162734f70, - 0xc09492db4507ff58, - 0x72bfea9fdfc2ee67, - 0x11688acf9ccdfaa0, - 0x1a8190d86a9836b9, - 0x7acbd93bc615c795, - 0xc7332c3a286080ca, - 0x863445e94ee87d50, - 0xf6966a5fd0d6de85, - 0xe9ad814f96d5da1c, - 0x70a22fb69e3ea3d5, - 0x0a69f68d582b6440, - 0xb8428ec9c2ee757f, - 0x604a49e3ac8df12c, - 0x5b86f90b0c10cb23, - 0xe1d9b2eb8f02f3ee, - 0x29391394d3d22544, - 0xc8e0a17f5cd0d6aa, - 0xb58cc6a5f7a26ead, - 0x8193fb08238f02c2, - 0xd5c68f465b2f9f81, - 0xfcff9cd288fdbac5, - 0x77059157f359dc47, - 0x1d262e3907ff492b, - 0xfb582233e59ac557, - 0xddb2bce242f8b673, - 0x2577b76248e096cf, - 0x6f99c4a6d83da74c, - 0xc1147e41eb795701, - 0xf48baf76912a9337, - }, - { - 0x45b268a93acde4cc, - 0xaf7f0be884549d08, - 0x048354b3c1468263, - 0x925435c2c80efed2, - 0xee4e37f27fdffba7, - 0x167a33920c60f14d, - 0xfb123b52ea03e584, - 0x4a0cab53fdbb9007, - 0x9deaf6380f788a19, - 0xcb48ec558f0cb32a, - 0xb59dc4b2d6fef7e0, - 0xdcdbca22f4f3ecb6, - 0x11df5813549a9c40, - 0xe33fdedf568aced3, - 0xa0c1c8124322e9c3, - 0x07a56b8158fa6d0d, - 0x77279579b1e1f3dd, - 0xd9b18b74422ac004, - 0xb8ec2d9fffabc294, - 0xf4acf8a82d75914f, - 0x7bbf69b1ef2b6878, - 0xc4f62faf487ac7e1, - 0x76ce809cc67e5d0c, - 0x6711d88f92e4c14c, - 0x627b99d9243dedfe, - 0x234aa5c3dfb68b51, - 0x909b1f15262dbf6d, - 0x4f66ea054b62bcb5, - 0x1ae2cf5a52aa6ae8, - 0xbea053fbd0ce0148, - 0xed6808c0e66314c9, - 0x43fe16cd15a82710, - 0xcd049231a06970f6, - 0xe7bc8a6c97cc4cb0, - 0x337ce835fcb3b9c0, - 0x65def2587cc780f3, - 0x52214ede4132bb50, - 0x95f15e4390f493df, - 0x870839625dd2e0f1, - 0x41313c1afb8b66af, - 0x91720af051b211bc, - 0x477d427ed4eea573, - 0x2e3b4ceef6e3be25, - 0x82627834eb0bcc43, - 0x9c03e3dd78e724c8, - 0x2877328ad9867df9, - 0x14b51945e243b0f2, - 0x574b0f88f7eb97e2, - 0x88b6fa989aa4943a, - 0x19c4f068cb168586, - 0x50ee6409af11faef, - 0x7df317d5c04eaba4, - 0x7a567c5498b4c6a9, - 0xb6bbfb804f42188e, - 0x3cc22bcf3bc5cd0b, - 0xd04336eaaa397713, - 0xf02fac1bec33132c, - 0x2506dba7f0d3488d, - 0xd7e65d6bf2c31a1e, - 0x5eb9b2161ff820f5, - 0x842e0650c46e0f9f, - 0x716beb1d9e843001, - 0xa933758cab315ed4, - 0x3fe414fda2792265, - 0x27c9f1701ef00932, - 0x73a4c1ca70a771be, - 0x94184ba6e76b3d0e, - 0x40d829ff8c14c87e, - 0x0fbec3fac77674cb, - 0x3616a9634a6a9572, - 0x8f139119c25ef937, - 0xf545ed4d5aea3f9e, - 0xe802499650ba387b, - 0x6437e7bd0b582e22, - 0xe6559f89e053e261, - 0x80ad52e305288dfc, - 0x6dc55a23e34b9935, - 0xde14e0f51ad0ad09, - 0xc6390578a659865e, - 0x96d7617109487cb1, - 0xe2d6cb3a21156002, - 0x01e915e5779faed1, - 0xadb0213f6a77dcb7, - 0x9880b76eb9a1a6ab, - 0x5d9f8d248644cf9b, - 0xfd5e4536c5662658, - 0xf1c6b9fe9bacbdfd, - 0xeacd6341be9979c4, - 0xefa7221708405576, - 0x510771ecd88e543e, - 0xc2ba51cb671f043d, - 0x0ad482ac71af5879, - 0xfe787a045cdac936, - 0xb238af338e049aed, - 0xbd866cc94972ee26, - 0x615da6ebbd810290, - 0x3295fdd08b2c1711, - 0xf834046073bf0aea, - 0xf3099329758ffc42, - 0x1caeb13e7dcfa934, - 0xba2307481188832b, - 0x24efce42874ce65c, - 0x0e57d61fb0e9da1a, - 0xb3d1bad6f99b343c, - 0xc0757b1c893c4582, - 0x2b510db8403a9297, - 0x5c7698c1f1db614a, - 0x3e0d0118d5e68cb4, - 0xd60f488e855cb4cf, - 0xae961e0df3cb33d9, - 0x3a8e55ab14a00ed7, - 0x42170328623789c1, - 0x838b6dd19c946292, - 0x895fef7ded3b3aeb, - 0xcfcbb8e64e4a3149, - 0x064c7e642f65c3dc, - 0x3d2b3e2a4c5a63da, - 0x5bd3f340a9210c47, - 0xb474d157a1615931, - 0xac5934da1de87266, - 0x6ee365117af7765b, - 0xc86ed36716b05c44, - 0x9ba6885c201d49c5, - 0xb905387a88346c45, - 0x131072c4bab9ddff, - 0xbf49461ea751af99, - 0xd52977bc1ce05ba1, - 0xb0f785e46027db52, - 0x546d30ba6e57788c, - 0x305ad707650f56ae, - 0xc987c682612ff295, - 0xa5ab8944f5fbc571, - 0x7ed528e759f244ca, - 0x8ddcbbce2c7db888, - 0xaa154abe328db1ba, - 0x1e619be993ece88b, - 0x09f2bd9ee813b717, - 0x7401aa4b285d1cb3, - 0x21858f143195caee, - 0x48c381841398d1b8, - 0xfcb750d3b2f98889, - 0x39a86a998d1ce1b9, - 0x1f888e0ce473465a, - 0x7899568376978716, - 0x02cf2ad7ee2341bf, - 0x85c713b5b3f1a14e, - 0xff916fe12b4567e7, - 0x7c1a0230b7d10575, - 0x0c98fcc85eca9ba5, - 0xa3e7f720da9e06ad, - 0x6a6031a2bbb1f438, - 0x973e74947ed7d260, - 0x2cf4663918c0ff9a, - 0x5f50a7f368678e24, - 0x34d983b4a449d4cd, - 0x68af1b755592b587, - 0x7f3c3d022e6dea1b, - 0xabfc5f5b45121f6b, - 0x0d71e92d29553574, - 0xdffdf5106d4f03d8, - 0x081ba87b9f8c19c6, - 0xdb7ea1a3ac0981bb, - 0xbbca12ad66172dfa, - 0x79704366010829c7, - 0x179326777bff5f9c, - 0x0000000000000000, - 0xeb2476a4c906d715, - 0x724dd42f0738df6f, - 0xb752ee6538ddb65f, - 0x37ffbc863df53ba3, - 0x8efa84fcb5c157e6, - 0xe9eb5c73272596aa, - 0x1b0bdabf2535c439, - 0x86e12c872a4d4e20, - 0x9969a28bce3e087a, - 0xfafb2eb79d9c4b55, - 0x056a4156b6d92cb2, - 0x5a3ae6a5debea296, - 0x22a3b026a8292580, - 0x53c85b3b36ad1581, - 0xb11e900117b87583, - 0xc51f3a4a3fe56930, - 0xe019e1edcf3621bd, - 0xec811d2591fcba18, - 0x445b7d4c4d524a1d, - 0xa8da6069dcaef005, - 0x58f5cc72309de329, - 0xd4c062596b7ff570, - 0xce22ad0339d59f98, - 0x591cd99747024df8, - 0x8b90c5aa03187b54, - 0xf663d27fc356d0f0, - 0xd8589e9135b56ed5, - 0x35309651d3d67a1c, - 0x12f96721cd26732e, - 0xd28c1c3d441a36ac, - 0x492a946164077f69, - 0x2d1d73dc6f5f514b, - 0x6f0a70f40d68d88a, - 0x60b4b30eca1eac41, - 0xd36509d83385987d, - 0x0b3d97490630f6a8, - 0x9eccc90a96c46577, - 0xa20ee2c5ad01a87c, - 0xe49ab55e0e70a3de, - 0xa4429ca182646ba0, - 0xda97b446db962f6a, - 0xcced87d4d7f6de27, - 0x2ab8185d37a53c46, - 0x9f25dcefe15bcba6, - 0xc19c6ef9fea3eb53, - 0xa764a3931bd884ce, - 0x2fd2590b817c10f4, - 0x56a21a6d80743933, - 0xe573a0bb79ef0d0f, - 0x155c0ca095dc1e23, - 0x6c2c4fc694d437e4, - 0x10364df623053291, - 0xdd32dfc7836c4267, - 0x03263f3299bcef6e, - 0x66f8cd6ae57b6f9d, - 0x8c35ae2b5be21659, - 0x31b3c2e21290f87f, - 0x93bd2027bf915003, - 0x69460e90220d1b56, - 0x299e276fae19d328, - 0x63928c3c53a2432f, - 0x7082fef8e91b9ed0, - 0xbc6f792c3eed40f7, - 0x4c40d537d2de53db, - 0x75e8bfae5fc2b262, - 0x4da9c0d2a541fd0a, - 0x4e8fffe03cfd1264, - 0x2620e495696fa7e3, - 0xe1f0f408b8a98f6c, - 0xd1aa230fdda6d9c2, - 0xc7d0109dd1c6288f, - 0x8a79d04f7487d585, - 0x4694579ba3710ba2, - 0x38417f7cfa834f68, - 0x1d47a4db0a5007e5, - 0x206c9af1460a643f, - 0xa128ddf734bd4712, - 0x8144470672b7232d, - 0xf2e086cc02105293, - 0x182de58dbc892b57, - 0xcaa1f9b0f8931dfb, - 0x6b892447cc2e5ae9, - 0xf9dd11850420a43b, - 0x4be5beb68a243ed6, - 0x5584255f19c8d65d, - 0x3b67404e633fa006, - 0xa68db6766c472a1f, - 0xf78ac79ab4c97e21, - 0xc353442e1080aaec, - 0x9a4f9db95782e714, - }, - { - 0xc811a8058c3f55de, - 0x65f5b43196b50619, - 0xf74f96b1d6706e43, - 0x859d1e8bcb43d336, - 0x5aab8a85ccfa3d84, - 0xf9c7bf99c295fcfd, - 0xa21fd5a1de4b630f, - 0xcdb3ef763b8b456d, - 0x803f59f87cf7c385, - 0xb27c73be5f31913c, - 0x98e3ac6633b04821, - 0xbf61674c26b8f818, - 0x0ffbc995c4c130c8, - 0xaaa0862010761a98, - 0x6057f342210116aa, - 0xf63c760c0654cc35, - 0x2ddb45cc667d9042, - 0xbcf45a964bd40382, - 0x68e8a0c3ef3c6f3d, - 0xa7bd92d269ff73bc, - 0x290ae20201ed2287, - 0xb7de34cde885818f, - 0xd901eea7dd61059b, - 0xd6fa273219a03553, - 0xd56f1ae874cccec9, - 0xea31245c2e83f554, - 0x7034555da07be499, - 0xce26d2ac56e7bef7, - 0xfd161857a5054e38, - 0x6a0e7da4527436d1, - 0x5bd86a381cde9ff2, - 0xcaf7756231770c32, - 0xb09aaed9e279c8d0, - 0x5def1091c60674db, - 0x111046a2515e5045, - 0x23536ce4729802fc, - 0xc50cbcf7f5b63cfa, - 0x73a16887cd171f03, - 0x7d2941afd9f28dbd, - 0x3f5e3eb45a4f3b9d, - 0x84eefe361b677140, - 0x3db8e3d3e7076271, - 0x1a3a28f9f20fd248, - 0x7ebc7c75b49e7627, - 0x74e5f293c7eb565c, - 0x18dcf59e4f478ba4, - 0x0c6ef44fa9adcb52, - 0xc699812d98dac760, - 0x788b06dc6e469d0e, - 0xfc65f8ea7521ec4e, - 0x30a5f7219e8e0b55, - 0x2bec3f65bca57b6b, - 0xddd04969baf1b75e, - 0x99904cdbe394ea57, - 0x14b201d1e6ea40f6, - 0xbbb0c08241284add, - 0x50f20463bf8f1dff, - 0xe8d7f93b93cbacb8, - 0x4d8cb68e477c86e8, - 0xc1dd1b3992268e3f, - 0x7c5aa11209d62fcb, - 0x2f3d98abdb35c9ae, - 0x671369562bfd5ff5, - 0x15c1e16c36cee280, - 0x1d7eb2edf8f39b17, - 0xda94d37db00dfe01, - 0x877bc3ec760b8ada, - 0xcb8495dfe153ae44, - 0x05a24773b7b410b3, - 0x12857b783c32abdf, - 0x8eb770d06812513b, - 0x536739b9d2e3e665, - 0x584d57e271b26468, - 0xd789c78fc9849725, - 0xa935bbfa7d1ae102, - 0x8b1537a3dfa64188, - 0xd0cd5d9bc378de7a, - 0x4ac82c9a4d80cfb7, - 0x42777f1b83bdb620, - 0x72d2883a1d33bd75, - 0x5e7a2d4bab6a8f41, - 0xf4daab6bbb1c95d9, - 0x905cffe7fd8d31b6, - 0x83aa6422119b381f, - 0xc0aefb8442022c49, - 0xa0f908c663033ae3, - 0xa428af0804938826, - 0xade41c341a8a53c7, - 0xae7121ee77e6a85d, - 0xc47f5c4a25929e8c, - 0xb538e9aa55cdd863, - 0x06377aa9dad8eb29, - 0xa18ae87bb3279895, - 0x6edfda6a35e48414, - 0x6b7d9d19825094a7, - 0xd41cfa55a4e86cbf, - 0xe5caedc9ea42c59c, - 0xa36c351c0e6fc179, - 0x5181e4de6fabbf89, - 0xfff0c530184d17d4, - 0x9d41eb1584045892, - 0x1c0d525028d73961, - 0xf178ec180ca8856a, - 0x9a0571018ef811cd, - 0x4091a27c3ef5efcc, - 0x19af15239f6329d2, - 0x347450eff91eb990, - 0xe11b4a078dd27759, - 0xb9561de5fc601331, - 0x912f1f5a2da993c0, - 0x1654dcb65ba2191a, - 0x3e2dde098a6b99eb, - 0x8a66d71e0f82e3fe, - 0x8c51adb7d55a08d7, - 0x4533e50f8941ff7f, - 0x02e6dd67bd4859ec, - 0xe068aaba5df6d52f, - 0xc24826e3ff4a75a5, - 0x6c39070d88acddf8, - 0x6486548c4691a46f, - 0xd1bebd26135c7c0c, - 0xb30f93038f15334a, - 0x82d9849fc1bf9a69, - 0x9c320ba85420fae4, - 0xfa528243aff90767, - 0x9ed4d6cfe968a308, - 0xb825fd582c44b147, - 0x9b7691bc5edcb3bb, - 0xc7ea619048fe6516, - 0x1063a61f817af233, - 0x47d538683409a693, - 0x63c2ce984c6ded30, - 0x2a9fdfd86c81d91d, - 0x7b1e3b06032a6694, - 0x666089ebfbd9fd83, - 0x0a598ee67375207b, - 0x07449a140afc495f, - 0x2ca8a571b6593234, - 0x1f986f8a45bbc2fb, - 0x381aa4a050b372c2, - 0x5423a3add81faf3a, - 0x17273c0b8b86bb6c, - 0xfe83258dc869b5a2, - 0x287902bfd1c980f1, - 0xf5a94bd66b3837af, - 0x88800a79b2caba12, - 0x55504310083b0d4c, - 0xdf36940e07b9eeb2, - 0x04d1a7ce6790b2c5, - 0x612413fff125b4dc, - 0x26f12b97c52c124f, - 0x86082351a62f28ac, - 0xef93632f9937e5e7, - 0x3507b052293a1be6, - 0xe72c30ae570a9c70, - 0xd3586041ae1425e0, - 0xde4574b3d79d4cc4, - 0x92ba228040c5685a, - 0xf00b0ca5dc8c271c, - 0xbe1287f1f69c5a6e, - 0xf39e317fb1e0dc86, - 0x495d114020ec342d, - 0x699b407e3f18cd4b, - 0xdca3a9d46ad51528, - 0x0d1d14f279896924, - 0x0000000000000000, - 0x593eb75fa196c61e, - 0x2e4e78160b116bd8, - 0x6d4ae7b058887f8e, - 0xe65fd013872e3e06, - 0x7a6ddbbbd30ec4e2, - 0xac97fc89caaef1b1, - 0x09ccb33c1e19dbe1, - 0x89f3eac462ee1864, - 0x7770cf49aa87adc6, - 0x56c57eca6557f6d6, - 0x03953dda6d6cfb9a, - 0x36928d884456e07c, - 0x1eeb8f37959f608d, - 0x31d6179c4eaaa923, - 0x6fac3ad7e5c02662, - 0x43049fa653991456, - 0xabd3669dc052b8ee, - 0xaf02c153a7c20a2b, - 0x3ccb036e3723c007, - 0x93c9c23d90e1ca2c, - 0xc33bc65e2f6ed7d3, - 0x4cff56339758249e, - 0xb1e94e64325d6aa6, - 0x37e16d359472420a, - 0x79f8e661be623f78, - 0x5214d90402c74413, - 0x482ef1fdf0c8965b, - 0x13f69bc5ec1609a9, - 0x0e88292814e592be, - 0x4e198b542a107d72, - 0xccc00fcbebafe71b, - 0x1b49c844222b703e, - 0x2564164da840e9d5, - 0x20c6513e1ff4f966, - 0xbac3203f910ce8ab, - 0xf2edd1c261c47ef0, - 0x814cb945acd361f3, - 0x95feb8944a392105, - 0x5c9cf02c1622d6ad, - 0x971865f3f77178e9, - 0xbd87ba2b9bf0a1f4, - 0x444005b259655d09, - 0xed75be48247fbc0b, - 0x7596122e17cff42a, - 0xb44b091785e97a15, - 0x966b854e2755da9f, - 0xeee0839249134791, - 0x32432a4623c652b9, - 0xa8465b47ad3e4374, - 0xf8b45f2412b15e8b, - 0x2417f6f078644ba3, - 0xfb2162fe7fdda511, - 0x4bbbcc279da46dc1, - 0x0173e0bdd024a276, - 0x22208c59a2bca08a, - 0x8fc4906db836f34d, - 0xe4b90d743a6667ea, - 0x7147b5e0705f46ef, - 0x2782cb2a1508b039, - 0xec065ef5f45b1e7d, - 0x21b5b183cfd05b10, - 0xdbe733c060295c77, - 0x9fa73672394c017e, - 0xcf55321186c31c81, - 0xd8720e1a0d45a7ed, - 0x3b8f997a3ddf8958, - 0x3afc79c7edfb2b2e, - 0xe9a4198643ef0ece, - 0x5f09cdf67b4e2d37, - 0x4f6a6be9fa34df04, - 0xb6add47038a123f9, - 0x8d224d0a057eaaa1, - 0xc96248b85c1bf7a8, - 0xe3fd9760309a2eb5, - 0x0b2a6e5ba351820d, - 0xeb42c4e1fea75722, - 0x948d58299a1d8373, - 0x7fcf9cc864bad451, - 0xa55b4fb5d4b72a50, - 0x08bf5381ce3d7997, - 0x46a6d8d5e42d04e5, - 0xd22b80fc7e308796, - 0x57b69e77b57354a0, - 0x3969441d8097d0b4, - 0x3330cafbf3e2f0cf, - 0xe28e77dde0be8cc3, - 0x62b12e259c494f46, - 0xa6ce726fb9dbd1ca, - 0x41e242c1eed14dba, - 0x76032ff47aa30fb0, - }, - { - 0xe6f87e5c5b711fd0, - 0x258377800924fa16, - 0xc849e07e852ea4a8, - 0x5b4686a18f06c16a, - 0x0b32e9a2d77b416e, - 0xabda37a467815c66, - 0xf61796a81a686676, - 0xf5dc0b706391954b, - 0x4862f38db7e64bf1, - 0xff5c629a68bd85c5, - 0xcb827da6fcd75795, - 0x66d36daf69b9f089, - 0x356c9f74483d83b0, - 0x7cbcecb1238c99a1, - 0x36a702ac31c4708d, - 0x9eb6a8d02fbcdfd6, - 0x8b19fa51e5b3ae37, - 0x9ccfb5408a127d0b, - 0xbc0c78b508208f5a, - 0xe533e3842288eced, - 0xcec2c7d377c15fd2, - 0xec7817b6505d0f5e, - 0xb94cc2c08336871d, - 0x8c205db4cb0b04ad, - 0x763c855b28a0892f, - 0x588d1b79f6ff3257, - 0x3fecf69e4311933e, - 0x0fc0d39f803a18c9, - 0xee010a26f5f3ad83, - 0x10efe8f4411979a6, - 0x5dcda10c7de93a10, - 0x4a1bee1d1248e92c, - 0x53bff2db21847339, - 0xb4f50ccfa6a23d09, - 0x5fb4bc9cd84798cd, - 0xe88a2d8b071c56f9, - 0x7f7771695a756a9c, - 0xc5f02e71a0ba1ebc, - 0xa663f9ab4215e672, - 0x2eb19e22de5fbb78, - 0x0db9ce0f2594ba14, - 0x82520e6397664d84, - 0x2f031e6a0208ea98, - 0x5c7f2144a1be6bf0, - 0x7a37cb1cd16362db, - 0x83e08e2b4b311c64, - 0xcf70479bab960e32, - 0x856ba986b9dee71e, - 0xb5478c877af56ce9, - 0xb8fe42885f61d6fd, - 0x1bdd0156966238c8, - 0x622157923ef8a92e, - 0xfc97ff42114476f8, - 0x9d7d350856452ceb, - 0x4c90c9b0e0a71256, - 0x2308502dfbcb016c, - 0x2d7a03faa7a64845, - 0xf46e8b38bfc6c4ab, - 0xbdbef8fdd477deba, - 0x3aac4cebc8079b79, - 0xf09cb105e8879d0c, - 0x27fa6a10ac8a58cb, - 0x8960e7c1401d0cea, - 0x1a6f811e4a356928, - 0x90c4fb0773d196ff, - 0x43501a2f609d0a9f, - 0xf7a516e0c63f3796, - 0x1ce4a6b3b8da9252, - 0x1324752c38e08a9b, - 0xa5a864733bec154f, - 0x2bf124575549b33f, - 0xd766db15440dc5c7, - 0xa7d179e39e42b792, - 0xdadf151a61997fd3, - 0x86a0345ec0271423, - 0x38d5517b6da939a4, - 0x6518f077104003b4, - 0x02791d90a5aea2dd, - 0x88d267899c4a5d0a, - 0x930f66df0a2865c2, - 0x4ee9d4204509b08b, - 0x325538916685292a, - 0x412907bfc533a842, - 0xb27e2b62544dc673, - 0x6c5304456295e007, - 0x5af406e95351908a, - 0x1f2f3b6bc123616f, - 0xc37b09dc5255e5c6, - 0x3967d133b1fe6844, - 0x298839c7f0e711e2, - 0x409b87f71964f9a2, - 0xe938adc3db4b0719, - 0x0c0b4e47f9c3ebf4, - 0x5534d576d36b8843, - 0x4610a05aeb8b02d8, - 0x20c3cdf58232f251, - 0x6de1840dbec2b1e7, - 0xa0e8de06b0fa1d08, - 0x7b854b540d34333b, - 0x42e29a67bcca5b7f, - 0xd8a6088ac437dd0e, - 0xc63bb3a9d943ed81, - 0x21714dbd5e65a3b1, - 0x6761ede7b5eea169, - 0x2431f7c8d573abf6, - 0xd51fc685e1a3671a, - 0x5e063cd40410c92d, - 0x283ab98f2cb04002, - 0x8febc06cb2f2f790, - 0x17d64f116fa1d33c, - 0xe07359f1a99ee4aa, - 0x784ed68c74cdc006, - 0x6e2a19d5c73b42da, - 0x8712b4161c7045c3, - 0x371582e4ed93216d, - 0xace390414939f6fc, - 0x7ec5f12186223b7c, - 0xc0b094042bac16fb, - 0xf9d745379a527ebf, - 0x737c3f2ea3b68168, - 0x33e7b8d9bad278ca, - 0xa9a32a34c22ffebb, - 0xe48163ccfedfbd0d, - 0x8e5940246ea5a670, - 0x51c6ef4b842ad1e4, - 0x22bad065279c508c, - 0xd91488c218608cee, - 0x319ea5491f7cda17, - 0xd394e128134c9c60, - 0x094bf43272d5e3b3, - 0x9bf612a5a4aad791, - 0xccbbda43d26ffd0f, - 0x34de1f3c946ad250, - 0x4f5b5468995ee16b, - 0xdf9faf6fea8f7794, - 0x2648ea5870dd092b, - 0xbfc7e56d71d97c67, - 0xdde6b2ff4f21d549, - 0x3c276b463ae86003, - 0x91767b4faf86c71f, - 0x68a13e7835d4b9a0, - 0xb68c115f030c9fd4, - 0x141dd2c916582001, - 0x983d8f7ddd5324ac, - 0x64aa703fcc175254, - 0xc2c989948e02b426, - 0x3e5e76d69f46c2de, - 0x50746f03587d8004, - 0x45db3d829272f1e5, - 0x60584a029b560bf3, - 0xfbae58a73ffcdc62, - 0xa15a5e4e6cad4ce8, - 0x4ba96e55ce1fb8cc, - 0x08f9747aae82b253, - 0xc102144cf7fb471b, - 0x9f042898f3eb8e36, - 0x068b27adf2effb7a, - 0xedca97fe8c0a5ebe, - 0x778e0513f4f7d8cf, - 0x302c2501c32b8bf7, - 0x8d92ddfc175c554d, - 0xf865c57f46052f5f, - 0xeaf3301ba2b2f424, - 0xaa68b7ecbbd60d86, - 0x998f0f350104754c, - 0x0000000000000000, - 0xf12e314d34d0ccec, - 0x710522be061823b5, - 0xaf280d9930c005c1, - 0x97fd5ce25d693c65, - 0x19a41cc633cc9a15, - 0x95844172f8c79eb8, - 0xdc5432b7937684a9, - 0x9436c13a2490cf58, - 0x802b13f332c8ef59, - 0xc442ae397ced4f5c, - 0xfa1cd8efe3ab8d82, - 0xf2e5ac954d293fd1, - 0x6ad823e8907a1b7d, - 0x4d2249f83cf043b6, - 0x03cb9dd879f9f33d, - 0xde2d2f2736d82674, - 0x2a43a41f891ee2df, - 0x6f98999d1b6c133a, - 0xd4ad46cd3df436fa, - 0xbb35df50269825c0, - 0x964fdcaa813e6d85, - 0xeb41b0537ee5a5c4, - 0x0540ba758b160847, - 0xa41ae43be7bb44af, - 0xe3b8c429d0671797, - 0x819993bbee9fbeb9, - 0xae9a8dd1ec975421, - 0xf3572cdd917e6e31, - 0x6393d7dae2aff8ce, - 0x47a2201237dc5338, - 0xa32343dec903ee35, - 0x79fc56c4a89a91e6, - 0x01b28048dc5751e0, - 0x1296f564e4b7db7b, - 0x75f7188351597a12, - 0xdb6d9552bdce2e33, - 0x1e9dbb231d74308f, - 0x520d7293fdd322d9, - 0xe20a44610c304677, - 0xfeeee2d2b4ead425, - 0xca30fdee20800675, - 0x61eaca4a47015a13, - 0xe74afe1487264e30, - 0x2cc883b27bf119a5, - 0x1664cf59b3f682dc, - 0xa811aa7c1e78af5b, - 0x1d5626fb648dc3b2, - 0xb73e9117df5bce34, - 0xd05f7cf06ab56f5d, - 0xfd257f0acd132718, - 0x574dc8e676c52a9e, - 0x0739a7e52eb8aa9a, - 0x5486553e0f3cd9a3, - 0x56ff48aeaa927b7e, - 0xbe756525ad8e2d87, - 0x7d0e6cf9ffdbc841, - 0x3b1ecca31450ca99, - 0x6913be30e983e840, - 0xad511009956ea71c, - 0xb1b5b6ba2db4354e, - 0x4469bdca4e25a005, - 0x15af5281ca0f71e1, - 0x744598cb8d0e2bf2, - 0x593f9b312aa863b7, - 0xefb38a6e29a4fc63, - 0x6b6aa3a04c2d4a9d, - 0x3d95eb0ee6bf31e3, - 0xa291c3961554bfd5, - 0x18169c8eef9bcbf5, - 0x115d68bc9d4e2846, - 0xba875f18facf7420, - 0xd1edfcb8b6e23ebd, - 0xb00736f2f1e364ae, - 0x84d929ce6589b6fe, - 0x70b7a2f6da4f7255, - 0x0e7253d75c6d4929, - 0x04f23a3d574159a7, - 0x0a8069ea0b2c108e, - 0x49d073c56bb11a11, - 0x8aab7a1939e4ffd7, - 0xcd095a0b0e38acef, - 0xc9fb60365979f548, - 0x92bde697d67f3422, - 0xc78933e10514bc61, - 0xe1c1d9b975c9b54a, - 0xd2266160cf1bcd80, - 0x9a4492ed78fd8671, - 0xb3ccab2a881a9793, - 0x72cebf667fe1d088, - 0xd6d45b5d985a9427, - }, -}; - -__device__ const u64 sbob_rc64[12][8] = -{ - { - 0xe9daca1eda5b08b1, - 0x1f7c65c0812fcbeb, - 0x16d0452e43766a2f, - 0xfcc485758db84e71, - 0x0169679291e07c4b, - 0x15d360a4082a42a2, - 0x234d74cc36747605, - 0x0745a6f2596580dd, - }, - { - 0x1a2f9da98ab5a36f, - 0xd7b5700f469de34f, - 0x982b230a72eafef3, - 0x3101b5160f5ed561, - 0x5899d6126b17b59a, - 0xcaa70adbc261b55c, - 0x56cdcbd71ba2dd55, - 0xb79bb121700479e6, - }, - { - 0xc72fce2bacdc74f5, - 0x35843d6a28fc390a, - 0x8b1f9c525f5ef106, - 0x7b7b29b11475eaf2, - 0xb19e3590e40fe2d3, - 0x09db6260373ac9c1, - 0x31db7a8643f4b6c2, - 0xb20aba0af5961e99, - }, - { - 0xd26615e8b3df1fef, - 0xdde4715da0e148f9, - 0x7d3c5c337e858e48, - 0x3f355e68ad1c729d, - 0x75d603ed822cd7a9, - 0xbe0352933313b7d8, - 0xf137e893a1ea5334, - 0x2ed1e384bcbe0c22, - }, - { - 0x994747adac6bea4b, - 0x6323a96c0c413f9a, - 0x4a1086161f1c157f, - 0xbdff0f80d7359e35, - 0xa3f53a254717cdbf, - 0x161a2723b700ffdf, - 0xf563eaa97ea2567a, - 0x57fe6c7cfd581760, - }, - { - 0xd9d33a1daeae4fae, - 0xc039307a3bc3a46f, - 0x6ca44251f9c4662d, - 0xc68ef09ab49a7f18, - 0xb4b79a1cb7a6facf, - 0xb6c6bec2661ff20a, - 0x354f903672c571bf, - 0x6e7d64467a4068fa, - }, - { - 0xecc5aaee160ec7f4, - 0x540924bffe86ac51, - 0xc987bfe6c7c69e39, - 0xc9937a19333e47d3, - 0x372c822dc5ab9209, - 0x04054a2883694706, - 0xf34a3ca24c451735, - 0x93d4143a4d568688, - }, - { - 0xa7c9934d425b1f9b, - 0x41416e0c02aae703, - 0x1ede369c71f8b74e, - 0x9ac4db4d3b44b489, - 0x90069b92cb2b89f4, - 0x2fc4a5d12b8dd169, - 0xd9a8515935c2ac36, - 0x1ee702bfd40d7fa4, - }, - { - 0x9b223116545a8f37, - 0xde5f16ecd89a4c94, - 0x244289251b3a7d3a, - 0x84090de0b755d93c, - 0xb1ceb2db0b440a80, - 0x549c07a69a8a2b7b, - 0x602a1fcb92dc380e, - 0xdb5a238351446172, - }, - { - 0x526f0580a6debeab, - 0xf3f3e4b248e52a38, - 0xdb788aff1ce74189, - 0x0361331b8ae1ff1f, - 0x4b3369af0267e79f, - 0xf452763b306c1e7a, - 0xc3b63b15d1fa9836, - 0xed9c4598fbc7b474, - }, - { - 0xfb89c8efd09ecd7b, - 0x94fe5a63cdc60230, - 0x6107abebbb6bfad8, - 0x7966841421800120, - 0xcab948eaef711d8a, - 0x986e477d1dcdbaef, - 0x5dd86fc04a59a2de, - 0x1b2df381cda4ca6b, - }, - { - 0xba3116f167e78e37, - 0x7ab14904b08013d2, - 0x771ddfbc323ca4cd, - 0x9b9f2130d41220f8, - 0x86cc91189def805d, - 0x5228e188aaa41de7, - 0x991bb2d9d517f4fa, - 0x20d71bf14a92bc48, - }, -}; - -__device__ static void streebog_g (u64 h[8], const u64 m[8], u64 s_sbob_sl64[8][256]) -{ - u64 k[8]; - u64 s[8]; - u64 t[8]; - - #pragma unroll - for (int i = 0; i < 8; i++) - { - t[i] = h[i]; - } - - for (int i = 0; i < 8; i++) - { - k[i] = SBOG_LPSti64; - } - - #pragma unroll - for (int i = 0; i < 8; i++) - { - s[i] = m[i]; - } - - for (int r = 0; r < 12; r++) - { - #pragma unroll - for (int i = 0; i < 8; i++) - { - t[i] = s[i] ^ k[i]; - } - - #pragma unroll - for (int i = 0; i < 8; i++) - { - s[i] = SBOG_LPSti64; - } - - for (int i = 0; i < 8; i++) - { - t[i] = k[i] ^ sbob_rc64[r][i]; - } - - #pragma unroll - for (int i = 0; i < 8; i++) - { - k[i] = SBOG_LPSti64; - } - } - - #pragma unroll - for (int i = 0; i < 8; i++) - { - h[i] ^= s[i] ^ k[i] ^ m[i]; - } -} - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m11800_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * shared lookup table - */ - - __shared__ u64 s_sbob_sl64[8][256]; - - if (lid < 64) - { - const u32 lid4 = lid * 4; - - s_sbob_sl64[0][lid4 + 0] = sbob_sl64[0][lid4 + 0]; - s_sbob_sl64[0][lid4 + 1] = sbob_sl64[0][lid4 + 1]; - s_sbob_sl64[0][lid4 + 2] = sbob_sl64[0][lid4 + 2]; - s_sbob_sl64[0][lid4 + 3] = sbob_sl64[0][lid4 + 3]; - s_sbob_sl64[1][lid4 + 0] = sbob_sl64[1][lid4 + 0]; - s_sbob_sl64[1][lid4 + 1] = sbob_sl64[1][lid4 + 1]; - s_sbob_sl64[1][lid4 + 2] = sbob_sl64[1][lid4 + 2]; - s_sbob_sl64[1][lid4 + 3] = sbob_sl64[1][lid4 + 3]; - s_sbob_sl64[2][lid4 + 0] = sbob_sl64[2][lid4 + 0]; - s_sbob_sl64[2][lid4 + 1] = sbob_sl64[2][lid4 + 1]; - s_sbob_sl64[2][lid4 + 2] = sbob_sl64[2][lid4 + 2]; - s_sbob_sl64[2][lid4 + 3] = sbob_sl64[2][lid4 + 3]; - s_sbob_sl64[3][lid4 + 0] = sbob_sl64[3][lid4 + 0]; - s_sbob_sl64[3][lid4 + 1] = sbob_sl64[3][lid4 + 1]; - s_sbob_sl64[3][lid4 + 2] = sbob_sl64[3][lid4 + 2]; - s_sbob_sl64[3][lid4 + 3] = sbob_sl64[3][lid4 + 3]; - s_sbob_sl64[4][lid4 + 0] = sbob_sl64[4][lid4 + 0]; - s_sbob_sl64[4][lid4 + 1] = sbob_sl64[4][lid4 + 1]; - s_sbob_sl64[4][lid4 + 2] = sbob_sl64[4][lid4 + 2]; - s_sbob_sl64[4][lid4 + 3] = sbob_sl64[4][lid4 + 3]; - s_sbob_sl64[5][lid4 + 0] = sbob_sl64[5][lid4 + 0]; - s_sbob_sl64[5][lid4 + 1] = sbob_sl64[5][lid4 + 1]; - s_sbob_sl64[5][lid4 + 2] = sbob_sl64[5][lid4 + 2]; - s_sbob_sl64[5][lid4 + 3] = sbob_sl64[5][lid4 + 3]; - s_sbob_sl64[6][lid4 + 0] = sbob_sl64[6][lid4 + 0]; - s_sbob_sl64[6][lid4 + 1] = sbob_sl64[6][lid4 + 1]; - s_sbob_sl64[6][lid4 + 2] = sbob_sl64[6][lid4 + 2]; - s_sbob_sl64[6][lid4 + 3] = sbob_sl64[6][lid4 + 3]; - s_sbob_sl64[7][lid4 + 0] = sbob_sl64[7][lid4 + 0]; - s_sbob_sl64[7][lid4 + 1] = sbob_sl64[7][lid4 + 1]; - s_sbob_sl64[7][lid4 + 2] = sbob_sl64[7][lid4 + 2]; - s_sbob_sl64[7][lid4 + 3] = sbob_sl64[7][lid4 + 3]; - } - - __syncthreads (); - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w[16]; - - w[ 0] = pw_buf0[0]; - w[ 1] = pw_buf0[1]; - w[ 2] = pw_buf0[2]; - w[ 3] = pw_buf0[3]; - w[ 4] = pw_buf1[0]; - w[ 5] = pw_buf1[1]; - w[ 6] = pw_buf1[2]; - w[ 7] = pw_buf1[3]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, &w[0], &w[1], pw_len); - - append_0x01_2 (&w[0], &w[1], out_len); - - /** - * reverse message block - */ - - u64 m[8]; - - m[0] = hl32_to_64 (w[15], w[14]); - m[1] = hl32_to_64 (w[13], w[12]); - m[2] = hl32_to_64 (w[11], w[10]); - m[3] = hl32_to_64 (w[ 9], w[ 8]); - m[4] = hl32_to_64 (w[ 7], w[ 6]); - m[5] = hl32_to_64 (w[ 5], w[ 4]); - m[6] = hl32_to_64 (w[ 3], w[ 2]); - m[7] = hl32_to_64 (w[ 1], w[ 0]); - - m[0] = swap_workaround (m[0]); - m[1] = swap_workaround (m[1]); - m[2] = swap_workaround (m[2]); - m[3] = swap_workaround (m[3]); - m[4] = swap_workaround (m[4]); - m[5] = swap_workaround (m[5]); - m[6] = swap_workaround (m[6]); - m[7] = swap_workaround (m[7]); - - // state buffer (hash) - - u64 h[8]; - - h[0] = INITVAL; - h[1] = INITVAL; - h[2] = INITVAL; - h[3] = INITVAL; - h[4] = INITVAL; - h[5] = INITVAL; - h[6] = INITVAL; - h[7] = INITVAL; - - streebog_g (h, m, s_sbob_sl64); - - u64 z[8]; - - z[0] = 0; - z[1] = 0; - z[2] = 0; - z[3] = 0; - z[4] = 0; - z[5] = 0; - z[6] = 0; - z[7] = swap_workaround ((u64) (out_len * 8)); - - streebog_g (h, z, s_sbob_sl64); - streebog_g (h, m, s_sbob_sl64); - - const u32 r0 = l32_from_64 (h[0]); - const u32 r1 = h32_from_64 (h[0]); - const u32 r2 = l32_from_64 (h[1]); - const u32 r3 = h32_from_64 (h[1]); - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11800_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11800_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11800_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - - /** - * shared lookup table - */ - - __shared__ u64 s_sbob_sl64[8][256]; - - if (lid < 64) - { - const u32 lid4 = lid * 4; - - s_sbob_sl64[0][lid4 + 0] = sbob_sl64[0][lid4 + 0]; - s_sbob_sl64[0][lid4 + 1] = sbob_sl64[0][lid4 + 1]; - s_sbob_sl64[0][lid4 + 2] = sbob_sl64[0][lid4 + 2]; - s_sbob_sl64[0][lid4 + 3] = sbob_sl64[0][lid4 + 3]; - s_sbob_sl64[1][lid4 + 0] = sbob_sl64[1][lid4 + 0]; - s_sbob_sl64[1][lid4 + 1] = sbob_sl64[1][lid4 + 1]; - s_sbob_sl64[1][lid4 + 2] = sbob_sl64[1][lid4 + 2]; - s_sbob_sl64[1][lid4 + 3] = sbob_sl64[1][lid4 + 3]; - s_sbob_sl64[2][lid4 + 0] = sbob_sl64[2][lid4 + 0]; - s_sbob_sl64[2][lid4 + 1] = sbob_sl64[2][lid4 + 1]; - s_sbob_sl64[2][lid4 + 2] = sbob_sl64[2][lid4 + 2]; - s_sbob_sl64[2][lid4 + 3] = sbob_sl64[2][lid4 + 3]; - s_sbob_sl64[3][lid4 + 0] = sbob_sl64[3][lid4 + 0]; - s_sbob_sl64[3][lid4 + 1] = sbob_sl64[3][lid4 + 1]; - s_sbob_sl64[3][lid4 + 2] = sbob_sl64[3][lid4 + 2]; - s_sbob_sl64[3][lid4 + 3] = sbob_sl64[3][lid4 + 3]; - s_sbob_sl64[4][lid4 + 0] = sbob_sl64[4][lid4 + 0]; - s_sbob_sl64[4][lid4 + 1] = sbob_sl64[4][lid4 + 1]; - s_sbob_sl64[4][lid4 + 2] = sbob_sl64[4][lid4 + 2]; - s_sbob_sl64[4][lid4 + 3] = sbob_sl64[4][lid4 + 3]; - s_sbob_sl64[5][lid4 + 0] = sbob_sl64[5][lid4 + 0]; - s_sbob_sl64[5][lid4 + 1] = sbob_sl64[5][lid4 + 1]; - s_sbob_sl64[5][lid4 + 2] = sbob_sl64[5][lid4 + 2]; - s_sbob_sl64[5][lid4 + 3] = sbob_sl64[5][lid4 + 3]; - s_sbob_sl64[6][lid4 + 0] = sbob_sl64[6][lid4 + 0]; - s_sbob_sl64[6][lid4 + 1] = sbob_sl64[6][lid4 + 1]; - s_sbob_sl64[6][lid4 + 2] = sbob_sl64[6][lid4 + 2]; - s_sbob_sl64[6][lid4 + 3] = sbob_sl64[6][lid4 + 3]; - s_sbob_sl64[7][lid4 + 0] = sbob_sl64[7][lid4 + 0]; - s_sbob_sl64[7][lid4 + 1] = sbob_sl64[7][lid4 + 1]; - s_sbob_sl64[7][lid4 + 2] = sbob_sl64[7][lid4 + 2]; - s_sbob_sl64[7][lid4 + 3] = sbob_sl64[7][lid4 + 3]; - } - - __syncthreads (); - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w[16]; - - w[ 0] = pw_buf0[0]; - w[ 1] = pw_buf0[1]; - w[ 2] = pw_buf0[2]; - w[ 3] = pw_buf0[3]; - w[ 4] = pw_buf1[0]; - w[ 5] = pw_buf1[1]; - w[ 6] = pw_buf1[2]; - w[ 7] = pw_buf1[3]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, &w[0], &w[1], pw_len); - - append_0x01_2 (&w[0], &w[1], out_len); - - /** - * reverse message block - */ - - u64 m[8]; - - m[0] = hl32_to_64 (w[15], w[14]); - m[1] = hl32_to_64 (w[13], w[12]); - m[2] = hl32_to_64 (w[11], w[10]); - m[3] = hl32_to_64 (w[ 9], w[ 8]); - m[4] = hl32_to_64 (w[ 7], w[ 6]); - m[5] = hl32_to_64 (w[ 5], w[ 4]); - m[6] = hl32_to_64 (w[ 3], w[ 2]); - m[7] = hl32_to_64 (w[ 1], w[ 0]); - - m[0] = swap_workaround (m[0]); - m[1] = swap_workaround (m[1]); - m[2] = swap_workaround (m[2]); - m[3] = swap_workaround (m[3]); - m[4] = swap_workaround (m[4]); - m[5] = swap_workaround (m[5]); - m[6] = swap_workaround (m[6]); - m[7] = swap_workaround (m[7]); - - // state buffer (hash) - - u64 h[8]; - - h[0] = INITVAL; - h[1] = INITVAL; - h[2] = INITVAL; - h[3] = INITVAL; - h[4] = INITVAL; - h[5] = INITVAL; - h[6] = INITVAL; - h[7] = INITVAL; - - streebog_g (h, m, s_sbob_sl64); - - u64 z[8]; - - z[0] = 0; - z[1] = 0; - z[2] = 0; - z[3] = 0; - z[4] = 0; - z[5] = 0; - z[6] = 0; - z[7] = swap_workaround ((u64) (out_len * 8)); - - streebog_g (h, z, s_sbob_sl64); - streebog_g (h, m, s_sbob_sl64); - - const u32 r0 = l32_from_64 (h[0]); - const u32 r1 = h32_from_64 (h[0]); - const u32 r2 = l32_from_64 (h[1]); - const u32 r3 = h32_from_64 (h[1]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11800_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11800_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m11800_a1.cu b/nv/m11800_a1.cu deleted file mode 100644 index b251a51..0000000 --- a/nv/m11800_a1.cu +++ /dev/null @@ -1,2785 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _GOST2012_512_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - - -#define INITVAL 0 - -#define SBOG_LPSti64 \ - s_sbob_sl64[0][(t[0] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[1][(t[1] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[2][(t[2] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[3][(t[3] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[4][(t[4] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[5][(t[5] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[6][(t[6] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[7][(t[7] >> (i * 8)) & 0xff] - -// constants - -__device__ __constant__ u64 sbob_sl64[8][256] = -{ - { - 0xd031c397ce553fe6, - 0x16ba5b01b006b525, - 0xa89bade6296e70c8, - 0x6a1f525d77d3435b, - 0x6e103570573dfa0b, - 0x660efb2a17fc95ab, - 0x76327a9e97634bf6, - 0x4bad9d6462458bf5, - 0xf1830caedbc3f748, - 0xc5c8f542669131ff, - 0x95044a1cdc48b0cb, - 0x892962df3cf8b866, - 0xb0b9e208e930c135, - 0xa14fb3f0611a767c, - 0x8d2605f21c160136, - 0xd6b71922fecc549e, - 0x37089438a5907d8b, - 0x0b5da38e5803d49c, - 0x5a5bcc9cea6f3cbc, - 0xedae246d3b73ffe5, - 0xd2b87e0fde22edce, - 0x5e54abb1ca8185ec, - 0x1de7f88fe80561b9, - 0xad5e1a870135a08c, - 0x2f2adbd665cecc76, - 0x5780b5a782f58358, - 0x3edc8a2eede47b3f, - 0xc9d95c3506bee70f, - 0x83be111d6c4e05ee, - 0xa603b90959367410, - 0x103c81b4809fde5d, - 0x2c69b6027d0c774a, - 0x399080d7d5c87953, - 0x09d41e16487406b4, - 0xcdd63b1826505e5f, - 0xf99dc2f49b0298e8, - 0x9cd0540a943cb67f, - 0xbca84b7f891f17c5, - 0x723d1db3b78df2a6, - 0x78aa6e71e73b4f2e, - 0x1433e699a071670d, - 0x84f21be454620782, - 0x98df3327b4d20f2f, - 0xf049dce2d3769e5c, - 0xdb6c60199656eb7a, - 0x648746b2078b4783, - 0x32cd23598dcbadcf, - 0x1ea4955bf0c7da85, - 0xe9a143401b9d46b5, - 0xfd92a5d9bbec21b8, - 0xc8138c790e0b8e1b, - 0x2ee00b9a6d7ba562, - 0xf85712b893b7f1fc, - 0xeb28fed80bea949d, - 0x564a65eb8a40ea4c, - 0x6c9988e8474a2823, - 0x4535898b121d8f2d, - 0xabd8c03231accbf4, - 0xba2e91cab9867cbd, - 0x7960be3def8e263a, - 0x0c11a977602fd6f0, - 0xcb50e1ad16c93527, - 0xeae22e94035ffd89, - 0x2866d12f5de2ce1a, - 0xff1b1841ab9bf390, - 0x9f9339de8cfe0d43, - 0x964727c8c48a0bf7, - 0x524502c6aaae531c, - 0x9b9c5ef3ac10b413, - 0x4fa2fa4942ab32a5, - 0x3f165a62e551122b, - 0xc74148da76e6e3d7, - 0x924840e5e464b2a7, - 0xd372ae43d69784da, - 0x233b72a105e11a86, - 0xa48a04914941a638, - 0xb4b68525c9de7865, - 0xddeabaaca6cf8002, - 0x0a9773c250b6bd88, - 0xc284ffbb5ebd3393, - 0x8ba0df472c8f6a4e, - 0x2aef6cb74d951c32, - 0x427983722a318d41, - 0x73f7cdffbf389bb2, - 0x074c0af9382c026c, - 0x8a6a0f0b243a035a, - 0x6fdae53c5f88931f, - 0xc68b98967e538ac3, - 0x44ff59c71aa8e639, - 0xe2fce0ce439e9229, - 0xa20cde2479d8cd40, - 0x19e89fa2c8ebd8e9, - 0xf446bbcff398270c, - 0x43b3533e2284e455, - 0xd82f0dcd8e945046, - 0x51066f12b26ce820, - 0xe73957af6bc5426d, - 0x081ece5a40c16fa0, - 0x3b193d4fc5bfab7b, - 0x7fe66488df174d42, - 0x0e9814ef705804d8, - 0x8137ac857c39d7c6, - 0xb1733244e185a821, - 0x695c3f896f11f867, - 0xf6cf0657e3eff524, - 0x1aabf276d02963d5, - 0x2da3664e75b91e5e, - 0x0289bd981077d228, - 0x90c1fd7df413608f, - 0x3c5537b6fd93a917, - 0xaa12107e3919a2e0, - 0x0686dab530996b78, - 0xdaa6b0559ee3826e, - 0xc34e2ff756085a87, - 0x6d5358a44fff4137, - 0xfc587595b35948ac, - 0x7ca5095cc7d5f67e, - 0xfb147f6c8b754ac0, - 0xbfeb26ab91ddacf9, - 0x6896efc567a49173, - 0xca9a31e11e7c5c33, - 0xbbe44186b13315a9, - 0x0ddb793b689abfe4, - 0x70b4a02ba7fa208e, - 0xe47a3a7b7307f951, - 0x8cecd5be14a36822, - 0xeeed49b923b144d9, - 0x17708b4db8b3dc31, - 0x6088219f2765fed3, - 0xb3fa8fdcf1f27a09, - 0x910b2d31fca6099b, - 0x0f52c4a378ed6dcc, - 0x50ccbf5ebad98134, - 0x6bd582117f662a4f, - 0x94ce9a50d4fdd9df, - 0x2b25bcfb45207526, - 0x67c42b661f49fcbf, - 0x492420fc723259dd, - 0x03436dd418c2bb3c, - 0x1f6e4517f872b391, - 0xa08563bc69af1f68, - 0xd43ea4baeebb86b6, - 0x01cad04c08b56914, - 0xac94cacb0980c998, - 0x54c3d8739a373864, - 0x26fec5c02dbacac2, - 0xdea9d778be0d3b3e, - 0x040f672d20eeb950, - 0xe5b0ea377bb29045, - 0xf30ab136cbb42560, - 0x62019c0737122cfb, - 0xe86b930c13282fa1, - 0xcc1ceb542ee5374b, - 0x538fd28aa21b3a08, - 0x1b61223ad89c0ac1, - 0x36c24474ad25149f, - 0x7a23d3e9f74c9d06, - 0xbe21f6e79968c5ed, - 0xcf5f868036278c77, - 0xf705d61beb5a9c30, - 0x4d2b47d152dce08d, - 0x5f9e7bfdc234ecf8, - 0x247778583dcd18ea, - 0x867ba67c4415d5aa, - 0x4ce1979d5a698999, - 0x0000000000000000, - 0xec64f42133c696f1, - 0xb57c5569c16b1171, - 0xc1c7926f467f88af, - 0x654d96fe0f3e2e97, - 0x15f936d5a8c40e19, - 0xb8a72c52a9f1ae95, - 0xa9517daa21db19dc, - 0x58d27104fa18ee94, - 0x5918a148f2ad8780, - 0x5cdd1629daf657c4, - 0x8274c15164fb6cfa, - 0xd1fb13dbc6e056f2, - 0x7d6fd910cf609f6a, - 0xb63f38bdd9a9aa4d, - 0x3d9fe7faf526c003, - 0x74bbc706871499de, - 0xdf630734b6b8522a, - 0x3ad3ed03cd0ac26f, - 0xfadeaf2083c023d4, - 0xc00d42234ecae1bb, - 0x8538cba85cd76e96, - 0xc402250e6e2458eb, - 0x47bc3413026a5d05, - 0xafd7a71f114272a4, - 0x978df784cc3f62e3, - 0xb96dfc1ea144c781, - 0x21b2cf391596c8ae, - 0x318e4e8d950916f3, - 0xce9556cc3e92e563, - 0x385a509bdd7d1047, - 0x358129a0b5e7afa3, - 0xe6f387e363702b79, - 0xe0755d5653e94001, - 0x7be903a5fff9f412, - 0x12b53c2c90e80c75, - 0x3307f315857ec4db, - 0x8fafb86a0c61d31e, - 0xd9e5dd8186213952, - 0x77f8aad29fd622e2, - 0x25bda814357871fe, - 0x7571174a8fa1f0ca, - 0x137fec60985d6561, - 0x30449ec19dbc7fe7, - 0xa540d4dd41f4cf2c, - 0xdc206ae0ae7ae916, - 0x5b911cd0e2da55a8, - 0xb2305f90f947131d, - 0x344bf9ecbd52c6b7, - 0x5d17c665d2433ed0, - 0x18224feec05eb1fd, - 0x9e59e992844b6457, - 0x9a568ebfa4a5dd07, - 0xa3c60e68716da454, - 0x7e2cb4c4d7a22456, - 0x87b176304ca0bcbe, - 0x413aeea632f3367d, - 0x9915e36bbc67663b, - 0x40f03eea3a465f69, - 0x1c2d28c3e0b008ad, - 0x4e682a054a1e5bb1, - 0x05c5b761285bd044, - 0xe1bf8d1a5b5c2915, - 0xf2c0617ac3014c74, - 0xb7f5e8f1d11cc359, - 0x63cb4c4b3fa745ef, - 0x9d1a84469c89df6b, - 0xe33630824b2bfb3d, - 0xd5f474f6e60eefa2, - 0xf58c6b83fb2d4e18, - 0x4676e45f0adf3411, - 0x20781f751d23a1ba, - 0xbd629b3381aa7ed1, - 0xae1d775319f71bb0, - 0xfed1c80da32e9a84, - 0x5509083f92825170, - 0x29ac01635557a70e, - 0xa7c9694551831d04, - 0x8e65682604d4ba0a, - 0x11f651f8882ab749, - 0xd77dc96ef6793d8a, - 0xef2799f52b042dcd, - 0x48eef0b07a8730c9, - 0x22f1a2ed0d547392, - 0x6142f1d32fd097c7, - 0x4a674d286af0e2e1, - 0x80fd7cc9748cbed2, - 0x717e7067af4f499a, - 0x938290a9ecd1dbb3, - 0x88e3b293344dd172, - 0x2734158c250fa3d6, - }, - { - 0x7e37e62dfc7d40c3, - 0x776f25a4ee939e5b, - 0xe045c850dd8fb5ad, - 0x86ed5ba711ff1952, - 0xe91d0bd9cf616b35, - 0x37e0ab256e408ffb, - 0x9607f6c031025a7a, - 0x0b02f5e116d23c9d, - 0xf3d8486bfb50650c, - 0x621cff27c40875f5, - 0x7d40cb71fa5fd34a, - 0x6daa6616daa29062, - 0x9f5f354923ec84e2, - 0xec847c3dc507c3b3, - 0x025a3668043ce205, - 0xa8bf9e6c4dac0b19, - 0xfa808be2e9bebb94, - 0xb5b99c5277c74fa3, - 0x78d9bc95f0397bcc, - 0xe332e50cdbad2624, - 0xc74fce129332797e, - 0x1729eceb2ea709ab, - 0xc2d6b9f69954d1f8, - 0x5d898cbfbab8551a, - 0x859a76fb17dd8adb, - 0x1be85886362f7fb5, - 0xf6413f8ff136cd8a, - 0xd3110fa5bbb7e35c, - 0x0a2feed514cc4d11, - 0xe83010edcd7f1ab9, - 0xa1e75de55f42d581, - 0xeede4a55c13b21b6, - 0xf2f5535ff94e1480, - 0x0cc1b46d1888761e, - 0xbce15fdb6529913b, - 0x2d25e8975a7181c2, - 0x71817f1ce2d7a554, - 0x2e52c5cb5c53124b, - 0xf9f7a6beef9c281d, - 0x9e722e7d21f2f56e, - 0xce170d9b81dca7e6, - 0x0e9b82051cb4941b, - 0x1e712f623c49d733, - 0x21e45cfa42f9f7dc, - 0xcb8e7a7f8bba0f60, - 0x8e98831a010fb646, - 0x474ccf0d8e895b23, - 0xa99285584fb27a95, - 0x8cc2b57205335443, - 0x42d5b8e984eff3a5, - 0x012d1b34021e718c, - 0x57a6626aae74180b, - 0xff19fc06e3d81312, - 0x35ba9d4d6a7c6dfe, - 0xc9d44c178f86ed65, - 0x506523e6a02e5288, - 0x03772d5c06229389, - 0x8b01f4fe0b691ec0, - 0xf8dabd8aed825991, - 0x4c4e3aec985b67be, - 0xb10df0827fbf96a9, - 0x6a69279ad4f8dae1, - 0xe78689dcd3d5ff2e, - 0x812e1a2b1fa553d1, - 0xfbad90d6eba0ca18, - 0x1ac543b234310e39, - 0x1604f7df2cb97827, - 0xa6241c6951189f02, - 0x753513cceaaf7c5e, - 0x64f2a59fc84c4efa, - 0x247d2b1e489f5f5a, - 0xdb64d718ab474c48, - 0x79f4a7a1f2270a40, - 0x1573da832a9bebae, - 0x3497867968621c72, - 0x514838d2a2302304, - 0xf0af6537fd72f685, - 0x1d06023e3a6b44ba, - 0x678588c3ce6edd73, - 0x66a893f7cc70acff, - 0xd4d24e29b5eda9df, - 0x3856321470ea6a6c, - 0x07c3418c0e5a4a83, - 0x2bcbb22f5635bacd, - 0x04b46cd00878d90a, - 0x06ee5ab80c443b0f, - 0x3b211f4876c8f9e5, - 0x0958c38912eede98, - 0xd14b39cdbf8b0159, - 0x397b292072f41be0, - 0x87c0409313e168de, - 0xad26e98847caa39f, - 0x4e140c849c6785bb, - 0xd5ff551db7f3d853, - 0xa0ca46d15d5ca40d, - 0xcd6020c787fe346f, - 0x84b76dcf15c3fb57, - 0xdefda0fca121e4ce, - 0x4b8d7b6096012d3d, - 0x9ac642ad298a2c64, - 0x0875d8bd10f0af14, - 0xb357c6ea7b8374ac, - 0x4d6321d89a451632, - 0xeda96709c719b23f, - 0xf76c24bbf328bc06, - 0xc662d526912c08f2, - 0x3ce25ec47892b366, - 0xb978283f6f4f39bd, - 0xc08c8f9e9d6833fd, - 0x4f3917b09e79f437, - 0x593de06fb2c08c10, - 0xd6887841b1d14bda, - 0x19b26eee32139db0, - 0xb494876675d93e2f, - 0x825937771987c058, - 0x90e9ac783d466175, - 0xf1827e03ff6c8709, - 0x945dc0a8353eb87f, - 0x4516f9658ab5b926, - 0x3f9573987eb020ef, - 0xb855330b6d514831, - 0x2ae6a91b542bcb41, - 0x6331e413c6160479, - 0x408f8e8180d311a0, - 0xeff35161c325503a, - 0xd06622f9bd9570d5, - 0x8876d9a20d4b8d49, - 0xa5533135573a0c8b, - 0xe168d364df91c421, - 0xf41b09e7f50a2f8f, - 0x12b09b0f24c1a12d, - 0xda49cc2ca9593dc4, - 0x1f5c34563e57a6bf, - 0x54d14f36a8568b82, - 0xaf7cdfe043f6419a, - 0xea6a2685c943f8bc, - 0xe5dcbfb4d7e91d2b, - 0xb27addde799d0520, - 0x6b443caed6e6ab6d, - 0x7bae91c9f61be845, - 0x3eb868ac7cae5163, - 0x11c7b65322e332a4, - 0xd23c1491b9a992d0, - 0x8fb5982e0311c7ca, - 0x70ac6428e0c9d4d8, - 0x895bc2960f55fcc5, - 0x76423e90ec8defd7, - 0x6ff0507ede9e7267, - 0x3dcf45f07a8cc2ea, - 0x4aa06054941f5cb1, - 0x5810fb5bb0defd9c, - 0x5efea1e3bc9ac693, - 0x6edd4b4adc8003eb, - 0x741808f8e8b10dd2, - 0x145ec1b728859a22, - 0x28bc9f7350172944, - 0x270a06424ebdccd3, - 0x972aedf4331c2bf6, - 0x059977e40a66a886, - 0x2550302a4a812ed6, - 0xdd8a8da0a7037747, - 0xc515f87a970e9b7b, - 0x3023eaa9601ac578, - 0xb7e3aa3a73fbada6, - 0x0fb699311eaae597, - 0x0000000000000000, - 0x310ef19d6204b4f4, - 0x229371a644db6455, - 0x0decaf591a960792, - 0x5ca4978bb8a62496, - 0x1c2b190a38753536, - 0x41a295b582cd602c, - 0x3279dcc16426277d, - 0xc1a194aa9f764271, - 0x139d803b26dfd0a1, - 0xae51c4d441e83016, - 0xd813fa44ad65dfc1, - 0xac0bf2bc45d4d213, - 0x23be6a9246c515d9, - 0x49d74d08923dcf38, - 0x9d05032127d066e7, - 0x2f7fdeff5e4d63c7, - 0xa47e2a0155247d07, - 0x99b16ff12fa8bfed, - 0x4661d4398c972aaf, - 0xdfd0bbc8a33f9542, - 0xdca79694a51d06cb, - 0xb020ebb67da1e725, - 0xba0f0563696daa34, - 0xe4f1a480d5f76ca7, - 0xc438e34e9510eaf7, - 0x939e81243b64f2fc, - 0x8defae46072d25cf, - 0x2c08f3a3586ff04e, - 0xd7a56375b3cf3a56, - 0x20c947ce40e78650, - 0x43f8a3dd86f18229, - 0x568b795eac6a6987, - 0x8003011f1dbb225d, - 0xf53612d3f7145e03, - 0x189f75da300dec3c, - 0x9570db9c3720c9f3, - 0xbb221e576b73dbb8, - 0x72f65240e4f536dd, - 0x443be25188abc8aa, - 0xe21ffe38d9b357a8, - 0xfd43ca6ee7e4f117, - 0xcaa3614b89a47eec, - 0xfe34e732e1c6629e, - 0x83742c431b99b1d4, - 0xcf3a16af83c2d66a, - 0xaae5a8044990e91c, - 0x26271d764ca3bd5f, - 0x91c4b74c3f5810f9, - 0x7c6dd045f841a2c6, - 0x7f1afd19fe63314f, - 0xc8f957238d989ce9, - 0xa709075d5306ee8e, - 0x55fc5402aa48fa0e, - 0x48fa563c9023beb4, - 0x65dfbeabca523f76, - 0x6c877d22d8bce1ee, - 0xcc4d3bf385e045e3, - 0xbebb69b36115733e, - 0x10eaad6720fd4328, - 0xb6ceb10e71e5dc2a, - 0xbdcc44ef6737e0b7, - 0x523f158ea412b08d, - 0x989c74c52db6ce61, - 0x9beb59992b945de8, - 0x8a2cefca09776f4c, - 0xa3bd6b8d5b7e3784, - 0xeb473db1cb5d8930, - 0xc3fba2c29b4aa074, - 0x9c28181525ce176b, - 0x683311f2d0c438e4, - 0x5fd3bad7be84b71f, - 0xfc6ed15ae5fa809b, - 0x36cdb0116c5efe77, - 0x29918447520958c8, - 0xa29070b959604608, - 0x53120ebaa60cc101, - 0x3a0c047c74d68869, - 0x691e0ac6d2da4968, - 0x73db4974e6eb4751, - 0x7a838afdf40599c9, - 0x5a4acd33b4e21f99, - 0x6046c94fc03497f0, - 0xe6ab92e8d1cb8ea2, - 0x3354c7f5663856f1, - 0xd93ee170af7bae4d, - 0x616bd27bc22ae67c, - 0x92b39a10397a8370, - 0xabc8b3304b8e9890, - 0xbf967287630b02b2, - 0x5b67d607b6fc6e15, - }, - { - 0x8ab0a96846e06a6d, - 0x43c7e80b4bf0b33a, - 0x08c9b3546b161ee5, - 0x39f1c235eba990be, - 0xc1bef2376606c7b2, - 0x2c209233614569aa, - 0xeb01523b6fc3289a, - 0x946953ab935acedd, - 0x272838f63e13340e, - 0x8b0455eca12ba052, - 0x77a1b2c4978ff8a2, - 0xa55122ca13e54086, - 0x2276135862d3f1cd, - 0xdb8ddfde08b76cfe, - 0x5d1e12c89e4a178a, - 0x0e56816b03969867, - 0xee5f79953303ed59, - 0xafed748bab78d71d, - 0x6d929f2df93e53ee, - 0xf5d8a8f8ba798c2a, - 0xf619b1698e39cf6b, - 0x95ddaf2f749104e2, - 0xec2a9c80e0886427, - 0xce5c8fd8825b95ea, - 0xc4e0d9993ac60271, - 0x4699c3a5173076f9, - 0x3d1b151f50a29f42, - 0x9ed505ea2bc75946, - 0x34665acfdc7f4b98, - 0x61b1fb53292342f7, - 0xc721c0080e864130, - 0x8693cd1696fd7b74, - 0x872731927136b14b, - 0xd3446c8a63a1721b, - 0x669a35e8a6680e4a, - 0xcab658f239509a16, - 0xa4e5de4ef42e8ab9, - 0x37a7435ee83f08d9, - 0x134e6239e26c7f96, - 0x82791a3c2df67488, - 0x3f6ef00a8329163c, - 0x8e5a7e42fdeb6591, - 0x5caaee4c7981ddb5, - 0x19f234785af1e80d, - 0x255ddde3ed98bd70, - 0x50898a32a99cccac, - 0x28ca4519da4e6656, - 0xae59880f4cb31d22, - 0x0d9798fa37d6db26, - 0x32f968f0b4ffcd1a, - 0xa00f09644f258545, - 0xfa3ad5175e24de72, - 0xf46c547c5db24615, - 0x713e80fbff0f7e20, - 0x7843cf2b73d2aafa, - 0xbd17ea36aedf62b4, - 0xfd111bacd16f92cf, - 0x4abaa7dbc72d67e0, - 0xb3416b5dad49fad3, - 0xbca316b24914a88b, - 0x15d150068aecf914, - 0xe27c1debe31efc40, - 0x4fe48c759beda223, - 0x7edcfd141b522c78, - 0x4e5070f17c26681c, - 0xe696cac15815f3bc, - 0x35d2a64b3bb481a7, - 0x800cff29fe7dfdf6, - 0x1ed9fac3d5baa4b0, - 0x6c2663a91ef599d1, - 0x03c1199134404341, - 0xf7ad4ded69f20554, - 0xcd9d9649b61bd6ab, - 0xc8c3bde7eadb1368, - 0xd131899fb02afb65, - 0x1d18e352e1fae7f1, - 0xda39235aef7ca6c1, - 0xa1bbf5e0a8ee4f7a, - 0x91377805cf9a0b1e, - 0x3138716180bf8e5b, - 0xd9f83acbdb3ce580, - 0x0275e515d38b897e, - 0x472d3f21f0fbbcc6, - 0x2d946eb7868ea395, - 0xba3c248d21942e09, - 0xe7223645bfde3983, - 0xff64feb902e41bb1, - 0xc97741630d10d957, - 0xc3cb1722b58d4ecc, - 0xa27aec719cae0c3b, - 0x99fecb51a48c15fb, - 0x1465ac826d27332b, - 0xe1bd047ad75ebf01, - 0x79f733af941960c5, - 0x672ec96c41a3c475, - 0xc27feba6524684f3, - 0x64efd0fd75e38734, - 0xed9e60040743ae18, - 0xfb8e2993b9ef144d, - 0x38453eb10c625a81, - 0x6978480742355c12, - 0x48cf42ce14a6ee9e, - 0x1cac1fd606312dce, - 0x7b82d6ba4792e9bb, - 0x9d141c7b1f871a07, - 0x5616b80dc11c4a2e, - 0xb849c198f21fa777, - 0x7ca91801c8d9a506, - 0xb1348e487ec273ad, - 0x41b20d1e987b3a44, - 0x7460ab55a3cfbbe3, - 0x84e628034576f20a, - 0x1b87d16d897a6173, - 0x0fe27defe45d5258, - 0x83cde6b8ca3dbeb7, - 0x0c23647ed01d1119, - 0x7a362a3ea0592384, - 0xb61f40f3f1893f10, - 0x75d457d1440471dc, - 0x4558da34237035b8, - 0xdca6116587fc2043, - 0x8d9b67d3c9ab26d0, - 0x2b0b5c88ee0e2517, - 0x6fe77a382ab5da90, - 0x269cc472d9d8fe31, - 0x63c41e46faa8cb89, - 0xb7abbc771642f52f, - 0x7d1de4852f126f39, - 0xa8c6ba3024339ba0, - 0x600507d7cee888c8, - 0x8fee82c61a20afae, - 0x57a2448926d78011, - 0xfca5e72836a458f0, - 0x072bcebb8f4b4cbd, - 0x497bbe4af36d24a1, - 0x3cafe99bb769557d, - 0x12fa9ebd05a7b5a9, - 0xe8c04baa5b836bdb, - 0x4273148fac3b7905, - 0x908384812851c121, - 0xe557d3506c55b0fd, - 0x72ff996acb4f3d61, - 0x3eda0c8e64e2dc03, - 0xf0868356e6b949e9, - 0x04ead72abb0b0ffc, - 0x17a4b5135967706a, - 0xe3c8e16f04d5367f, - 0xf84f30028daf570c, - 0x1846c8fcbd3a2232, - 0x5b8120f7f6ca9108, - 0xd46fa231ecea3ea6, - 0x334d947453340725, - 0x58403966c28ad249, - 0xbed6f3a79a9f21f5, - 0x68ccb483a5fe962d, - 0xd085751b57e1315a, - 0xfed0023de52fd18e, - 0x4b0e5b5f20e6addf, - 0x1a332de96eb1ab4c, - 0xa3ce10f57b65c604, - 0x108f7ba8d62c3cd7, - 0xab07a3a11073d8e1, - 0x6b0dad1291bed56c, - 0xf2f366433532c097, - 0x2e557726b2cee0d4, - 0x0000000000000000, - 0xcb02a476de9b5029, - 0xe4e32fd48b9e7ac2, - 0x734b65ee2c84f75e, - 0x6e5386bccd7e10af, - 0x01b4fc84e7cbca3f, - 0xcfe8735c65905fd5, - 0x3613bfda0ff4c2e6, - 0x113b872c31e7f6e8, - 0x2fe18ba255052aeb, - 0xe974b72ebc48a1e4, - 0x0abc5641b89d979b, - 0xb46aa5e62202b66e, - 0x44ec26b0c4bbff87, - 0xa6903b5b27a503c7, - 0x7f680190fc99e647, - 0x97a84a3aa71a8d9c, - 0xdd12ede16037ea7c, - 0xc554251ddd0dc84e, - 0x88c54c7d956be313, - 0x4d91696048662b5d, - 0xb08072cc9909b992, - 0xb5de5962c5c97c51, - 0x81b803ad19b637c9, - 0xb2f597d94a8230ec, - 0x0b08aac55f565da4, - 0xf1327fd2017283d6, - 0xad98919e78f35e63, - 0x6ab9519676751f53, - 0x24e921670a53774f, - 0xb9fd3d1c15d46d48, - 0x92f66194fbda485f, - 0x5a35dc7311015b37, - 0xded3f4705477a93d, - 0xc00a0eb381cd0d8d, - 0xbb88d809c65fe436, - 0x16104997beacba55, - 0x21b70ac95693b28c, - 0x59f4c5e225411876, - 0xd5db5eb50b21f499, - 0x55d7a19cf55c096f, - 0xa97246b4c3f8519f, - 0x8552d487a2bd3835, - 0x54635d181297c350, - 0x23c2efdc85183bf2, - 0x9f61f96ecc0c9379, - 0x534893a39ddc8fed, - 0x5edf0b59aa0a54cb, - 0xac2c6d1a9f38945c, - 0xd7aebba0d8aa7de7, - 0x2abfa00c09c5ef28, - 0xd84cc64f3cf72fbf, - 0x2003f64db15878b3, - 0xa724c7dfc06ec9f8, - 0x069f323f68808682, - 0xcc296acd51d01c94, - 0x055e2bae5cc0c5c3, - 0x6270e2c21d6301b6, - 0x3b842720382219c0, - 0xd2f0900e846ab824, - 0x52fc6f277a1745d2, - 0xc6953c8ce94d8b0f, - 0xe009f8fe3095753e, - 0x655b2c7992284d0b, - 0x984a37d54347dfc4, - 0xeab5aebf8808e2a5, - 0x9a3fd2c090cc56ba, - 0x9ca0e0fff84cd038, - 0x4c2595e4afade162, - 0xdf6708f4b3bc6302, - 0xbf620f237d54ebca, - 0x93429d101c118260, - 0x097d4fd08cddd4da, - 0x8c2f9b572e60ecef, - 0x708a7c7f18c4b41f, - 0x3a30dba4dfe9d3ff, - 0x4006f19a7fb0f07b, - 0x5f6bf7dd4dc19ef4, - 0x1f6d064732716e8f, - 0xf9fbcc866a649d33, - 0x308c8de567744464, - 0x8971b0f972a0292c, - 0xd61a47243f61b7d8, - 0xefeb8511d4c82766, - 0x961cb6be40d147a3, - 0xaab35f25f7b812de, - 0x76154e407044329d, - 0x513d76b64e570693, - 0xf3479ac7d2f90aa8, - 0x9b8b2e4477079c85, - 0x297eb99d3d85ac69, - }, - { - 0x3ef29d249b2c0a19, - 0xe9e16322b6f8622f, - 0x5536994047757f7a, - 0x9f4d56d5a47b0b33, - 0x822567466aa1174c, - 0xb8f5057deb082fb2, - 0xcc48c10bf4475f53, - 0x373088d4275dec3a, - 0x968f4325180aed10, - 0x173d232cf7016151, - 0xae4ed09f946fcc13, - 0xfd4b4741c4539873, - 0x1b5b3f0dd9933765, - 0x2ffcb0967b644052, - 0xe02376d20a89840c, - 0xa3ae3a70329b18d7, - 0x419cbd2335de8526, - 0xfafebf115b7c3199, - 0x0397074f85aa9b0d, - 0xc58ad4fb4836b970, - 0xbec60be3fc4104a8, - 0x1eff36dc4b708772, - 0x131fdc33ed8453b6, - 0x0844e33e341764d3, - 0x0ff11b6eab38cd39, - 0x64351f0a7761b85a, - 0x3b5694f509cfba0e, - 0x30857084b87245d0, - 0x47afb3bd2297ae3c, - 0xf2ba5c2f6f6b554a, - 0x74bdc4761f4f70e1, - 0xcfdfc64471edc45e, - 0xe610784c1dc0af16, - 0x7aca29d63c113f28, - 0x2ded411776a859af, - 0xac5f211e99a3d5ee, - 0xd484f949a87ef33b, - 0x3ce36ca596e013e4, - 0xd120f0983a9d432c, - 0x6bc40464dc597563, - 0x69d5f5e5d1956c9e, - 0x9ae95f043698bb24, - 0xc9ecc8da66a4ef44, - 0xd69508c8a5b2eac6, - 0xc40c2235c0503b80, - 0x38c193ba8c652103, - 0x1ceec75d46bc9e8f, - 0xd331011937515ad1, - 0xd8e2e56886eca50f, - 0xb137108d5779c991, - 0x709f3b6905ca4206, - 0x4feb50831680caef, - 0xec456af3241bd238, - 0x58d673afe181abbe, - 0x242f54e7cad9bf8c, - 0x0211f1810dcc19fd, - 0x90bc4dbb0f43c60a, - 0x9518446a9da0761d, - 0xa1bfcbf13f57012a, - 0x2bde4f8961e172b5, - 0x27b853a84f732481, - 0xb0b1e643df1f4b61, - 0x18cc38425c39ac68, - 0xd2b7f7d7bf37d821, - 0x3103864a3014c720, - 0x14aa246372abfa5c, - 0x6e600db54ebac574, - 0x394765740403a3f3, - 0x09c215f0bc71e623, - 0x2a58b947e987f045, - 0x7b4cdf18b477bdd8, - 0x9709b5eb906c6fe0, - 0x73083c268060d90b, - 0xfedc400e41f9037e, - 0x284948c6e44be9b8, - 0x728ecae808065bfb, - 0x06330e9e17492b1a, - 0x5950856169e7294e, - 0xbae4f4fce6c4364f, - 0xca7bcf95e30e7449, - 0x7d7fd186a33e96c2, - 0x52836110d85ad690, - 0x4dfaa1021b4cd312, - 0x913abb75872544fa, - 0xdd46ecb9140f1518, - 0x3d659a6b1e869114, - 0xc23f2cabd719109a, - 0xd713fe062dd46836, - 0xd0a60656b2fbc1dc, - 0x221c5a79dd909496, - 0xefd26dbca1b14935, - 0x0e77eda0235e4fc9, - 0xcbfd395b6b68f6b9, - 0x0de0eaefa6f4d4c4, - 0x0422ff1f1a8532e7, - 0xf969b85eded6aa94, - 0x7f6e2007aef28f3f, - 0x3ad0623b81a938fe, - 0x6624ee8b7aada1a7, - 0xb682e8ddc856607b, - 0xa78cc56f281e2a30, - 0xc79b257a45faa08d, - 0x5b4174e0642b30b3, - 0x5f638bff7eae0254, - 0x4bc9af9c0c05f808, - 0xce59308af98b46ae, - 0x8fc58da9cc55c388, - 0x803496c7676d0eb1, - 0xf33caae1e70dd7ba, - 0xbb6202326ea2b4bf, - 0xd5020f87201871cb, - 0x9d5ca754a9b712ce, - 0x841669d87de83c56, - 0x8a6184785eb6739f, - 0x420bba6cb0741e2b, - 0xf12d5b60eac1ce47, - 0x76ac35f71283691c, - 0x2c6bb7d9fecedb5f, - 0xfccdb18f4c351a83, - 0x1f79c012c3160582, - 0xf0abadae62a74cb7, - 0xe1a5801c82ef06fc, - 0x67a21845f2cb2357, - 0x5114665f5df04d9d, - 0xbf40fd2d74278658, - 0xa0393d3fb73183da, - 0x05a409d192e3b017, - 0xa9fb28cf0b4065f9, - 0x25a9a22942bf3d7c, - 0xdb75e22703463e02, - 0xb326e10c5ab5d06c, - 0xe7968e8295a62de6, - 0xb973f3b3636ead42, - 0xdf571d3819c30ce5, - 0xee549b7229d7cbc5, - 0x12992afd65e2d146, - 0xf8ef4e9056b02864, - 0xb7041e134030e28b, - 0xc02edd2adad50967, - 0x932b4af48ae95d07, - 0x6fe6fb7bc6dc4784, - 0x239aacb755f61666, - 0x401a4bedbdb807d6, - 0x485ea8d389af6305, - 0xa41bc220adb4b13d, - 0x753b32b89729f211, - 0x997e584bb3322029, - 0x1d683193ceda1c7f, - 0xff5ab6c0c99f818e, - 0x16bbd5e27f67e3a1, - 0xa59d34ee25d233cd, - 0x98f8ae853b54a2d9, - 0x6df70afacb105e79, - 0x795d2e99b9bba425, - 0x8e437b6744334178, - 0x0186f6ce886682f0, - 0xebf092a3bb347bd2, - 0xbcd7fa62f18d1d55, - 0xadd9d7d011c5571e, - 0x0bd3e471b1bdffde, - 0xaa6c2f808eeafef4, - 0x5ee57d31f6c880a4, - 0xf50fa47ff044fca0, - 0x1addc9c351f5b595, - 0xea76646d3352f922, - 0x0000000000000000, - 0x85909f16f58ebea6, - 0x46294573aaf12ccc, - 0x0a5512bf39db7d2e, - 0x78dbd85731dd26d5, - 0x29cfbe086c2d6b48, - 0x218b5d36583a0f9b, - 0x152cd2adfacd78ac, - 0x83a39188e2c795bc, - 0xc3b9da655f7f926a, - 0x9ecba01b2c1d89c3, - 0x07b5f8509f2fa9ea, - 0x7ee8d6c926940dcf, - 0x36b67e1aaf3b6eca, - 0x86079859702425ab, - 0xfb7849dfd31ab369, - 0x4c7c57cc932a51e2, - 0xd96413a60e8a27ff, - 0x263ea566c715a671, - 0x6c71fc344376dc89, - 0x4a4f595284637af8, - 0xdaf314e98b20bcf2, - 0x572768c14ab96687, - 0x1088db7c682ec8bb, - 0x887075f9537a6a62, - 0x2e7a4658f302c2a2, - 0x619116dbe582084d, - 0xa87dde018326e709, - 0xdcc01a779c6997e8, - 0xedc39c3dac7d50c8, - 0xa60a33a1a078a8c0, - 0xc1a82be452b38b97, - 0x3f746bea134a88e9, - 0xa228ccbebafd9a27, - 0xabead94e068c7c04, - 0xf48952b178227e50, - 0x5cf48cb0fb049959, - 0x6017e0156de48abd, - 0x4438b4f2a73d3531, - 0x8c528ae649ff5885, - 0xb515ef924dfcfb76, - 0x0c661c212e925634, - 0xb493195cc59a7986, - 0x9cda519a21d1903e, - 0x32948105b5be5c2d, - 0x194ace8cd45f2e98, - 0x438d4ca238129cdb, - 0x9b6fa9cabefe39d4, - 0x81b26009ef0b8c41, - 0xded1ebf691a58e15, - 0x4e6da64d9ee6481f, - 0x54b06f8ecf13fd8a, - 0x49d85e1d01c9e1f5, - 0xafc826511c094ee3, - 0xf698a33075ee67ad, - 0x5ac7822eec4db243, - 0x8dd47c28c199da75, - 0x89f68337db1ce892, - 0xcdce37c57c21dda3, - 0x530597de503c5460, - 0x6a42f2aa543ff793, - 0x5d727a7e73621ba9, - 0xe232875307459df1, - 0x56a19e0fc2dfe477, - 0xc61dd3b4cd9c227d, - 0xe5877f03986a341b, - 0x949eb2a415c6f4ed, - 0x6206119460289340, - 0x6380e75ae84e11b0, - 0x8be772b6d6d0f16f, - 0x50929091d596cf6d, - 0xe86795ec3e9ee0df, - 0x7cf927482b581432, - 0xc86a3e14eec26db4, - 0x7119cda78dacc0f6, - 0xe40189cd100cb6eb, - 0x92adbc3a028fdff7, - 0xb2a017c2d2d3529c, - 0x200dabf8d05c8d6b, - 0x34a78f9ba2f77737, - 0xe3b4719d8f231f01, - 0x45be423c2f5bb7c1, - 0xf71e55fefd88e55d, - 0x6853032b59f3ee6e, - 0x65b3e9c4ff073aaa, - 0x772ac3399ae5ebec, - 0x87816e97f842a75b, - 0x110e2db2e0484a4b, - 0x331277cb3dd8dedd, - 0xbd510cac79eb9fa5, - 0x352179552a91f5c7, - }, - { - 0x05ba7bc82c9b3220, - 0x31a54665f8b65e4f, - 0xb1b651f77547f4d4, - 0x8bfa0d857ba46682, - 0x85a96c5aa16a98bb, - 0x990faef908eb79c9, - 0xa15e37a247f4a62d, - 0x76857dcd5d27741e, - 0xf8c50b800a1820bc, - 0xbe65dcb201f7a2b4, - 0x666d1b986f9426e7, - 0x4cc921bf53c4e648, - 0x95410a0f93d9ca42, - 0x20cdccaa647ba4ef, - 0x429a4060890a1871, - 0x0c4ea4f69b32b38b, - 0xccda362dde354cd3, - 0x96dc23bc7c5b2fa9, - 0xc309bb68aa851ab3, - 0xd26131a73648e013, - 0x021dc52941fc4db2, - 0xcd5adab7704be48a, - 0xa77965d984ed71e6, - 0x32386fd61734bba4, - 0xe82d6dd538ab7245, - 0x5c2147ea6177b4b1, - 0x5da1ab70cf091ce8, - 0xac907fce72b8bdff, - 0x57c85dfd972278a8, - 0xa4e44c6a6b6f940d, - 0x3851995b4f1fdfe4, - 0x62578ccaed71bc9e, - 0xd9882bb0c01d2c0a, - 0x917b9d5d113c503b, - 0xa2c31e11a87643c6, - 0xe463c923a399c1ce, - 0xf71686c57ea876dc, - 0x87b4a973e096d509, - 0xaf0d567d9d3a5814, - 0xb40c2a3f59dcc6f4, - 0x3602f88495d121dd, - 0xd3e1dd3d9836484a, - 0xf945e71aa46688e5, - 0x7518547eb2a591f5, - 0x9366587450c01d89, - 0x9ea81018658c065b, - 0x4f54080cbc4603a3, - 0x2d0384c65137bf3d, - 0xdc325078ec861e2a, - 0xea30a8fc79573ff7, - 0x214d2030ca050cb6, - 0x65f0322b8016c30c, - 0x69be96dd1b247087, - 0xdb95ee9981e161b8, - 0xd1fc1814d9ca05f8, - 0x820ed2bbcc0de729, - 0x63d76050430f14c7, - 0x3bccb0e8a09d3a0f, - 0x8e40764d573f54a2, - 0x39d175c1e16177bd, - 0x12f5a37c734f1f4b, - 0xab37c12f1fdfc26d, - 0x5648b167395cd0f1, - 0x6c04ed1537bf42a7, - 0xed97161d14304065, - 0x7d6c67daab72b807, - 0xec17fa87ba4ee83c, - 0xdfaf79cb0304fbc1, - 0x733f060571bc463e, - 0x78d61c1287e98a27, - 0xd07cf48e77b4ada1, - 0xb9c262536c90dd26, - 0xe2449b5860801605, - 0x8fc09ad7f941fcfb, - 0xfad8cea94be46d0e, - 0xa343f28b0608eb9f, - 0x9b126bd04917347b, - 0x9a92874ae7699c22, - 0x1b017c42c4e69ee0, - 0x3a4c5c720ee39256, - 0x4b6e9f5e3ea399da, - 0x6ba353f45ad83d35, - 0xe7fee0904c1b2425, - 0x22d009832587e95d, - 0x842980c00f1430e2, - 0xc6b3c0a0861e2893, - 0x087433a419d729f2, - 0x341f3dadd42d6c6f, - 0xee0a3faefbb2a58e, - 0x4aee73c490dd3183, - 0xaab72db5b1a16a34, - 0xa92a04065e238fdf, - 0x7b4b35a1686b6fcc, - 0x6a23bf6ef4a6956c, - 0x191cb96b851ad352, - 0x55d598d4d6de351a, - 0xc9604de5f2ae7ef3, - 0x1ca6c2a3a981e172, - 0xde2f9551ad7a5398, - 0x3025aaff56c8f616, - 0x15521d9d1e2860d9, - 0x506fe31cfa45073a, - 0x189c55f12b647b0b, - 0x0180ec9aae7ea859, - 0x7cec8b40050c105e, - 0x2350e5198bf94104, - 0xef8ad33455cc0dd7, - 0x07a7bee16d677f92, - 0xe5e325b90de76997, - 0x5a061591a26e637a, - 0xb611ef1618208b46, - 0x09f4df3eb7a981ab, - 0x1ebb078ae87dacc0, - 0xb791038cb65e231f, - 0x0fd38d4574b05660, - 0x67edf702c1ea8ebe, - 0xba5f4be0831238cd, - 0xe3c477c2cefebe5c, - 0x0dce486c354c1bd2, - 0x8c5db36416c31910, - 0x26ea9ed1a7627324, - 0x039d29b3ef82e5eb, - 0x9f28fc82cbf2ae02, - 0xa8aae89cf05d2786, - 0x431aacfa2774b028, - 0xcf471f9e31b7a938, - 0x581bd0b8e3922ec8, - 0xbc78199b400bef06, - 0x90fb71c7bf42f862, - 0x1f3beb1046030499, - 0x683e7a47b55ad8de, - 0x988f4263a695d190, - 0xd808c72a6e638453, - 0x0627527bc319d7cb, - 0xebb04466d72997ae, - 0xe67e0c0ae2658c7c, - 0x14d2f107b056c880, - 0x7122c32c30400b8c, - 0x8a7ae11fd5dacedb, - 0xa0dedb38e98a0e74, - 0xad109354dcc615a6, - 0x0be91a17f655cc19, - 0x8ddd5ffeb8bdb149, - 0xbfe53028af890aed, - 0xd65ba6f5b4ad7a6a, - 0x7956f0882997227e, - 0x10e8665532b352f9, - 0x0e5361dfdacefe39, - 0xcec7f3049fc90161, - 0xff62b561677f5f2e, - 0x975ccf26d22587f0, - 0x51ef0f86543baf63, - 0x2f1e41ef10cbf28f, - 0x52722635bbb94a88, - 0xae8dbae73344f04d, - 0x410769d36688fd9a, - 0xb3ab94de34bbb966, - 0x801317928df1aa9b, - 0xa564a0f0c5113c54, - 0xf131d4bebdb1a117, - 0x7f71a2f3ea8ef5b5, - 0x40878549c8f655c3, - 0x7ef14e6944f05dec, - 0xd44663dcf55137d8, - 0xf2acfd0d523344fc, - 0x0000000000000000, - 0x5fbc6e598ef5515a, - 0x16cf342ef1aa8532, - 0xb036bd6ddb395c8d, - 0x13754fe6dd31b712, - 0xbbdfa77a2d6c9094, - 0x89e7c8ac3a582b30, - 0x3c6b0e09cdfa459d, - 0xc4ae0589c7e26521, - 0x49735a777f5fd468, - 0xcafd64561d2c9b18, - 0xda1502032f9fc9e1, - 0x8867243694268369, - 0x3782141e3baf8984, - 0x9cb5d53124704be9, - 0xd7db4a6f1ad3d233, - 0xa6f989432a93d9bf, - 0x9d3539ab8a0ee3b0, - 0x53f2caaf15c7e2d1, - 0x6e19283c76430f15, - 0x3debe2936384edc4, - 0x5e3c82c3208bf903, - 0x33b8834cb94a13fd, - 0x6470deb12e686b55, - 0x359fd1377a53c436, - 0x61caa57902f35975, - 0x043a975282e59a79, - 0xfd7f70482683129c, - 0xc52ee913699ccd78, - 0x28b9ff0e7dac8d1d, - 0x5455744e78a09d43, - 0xcb7d88ccb3523341, - 0x44bd121b4a13cfba, - 0x4d49cd25fdba4e11, - 0x3e76cb208c06082f, - 0x3ff627ba2278a076, - 0xc28957f204fbb2ea, - 0x453dfe81e46d67e3, - 0x94c1e6953da7621b, - 0x2c83685cff491764, - 0xf32c1197fc4deca5, - 0x2b24d6bd922e68f6, - 0xb22b78449ac5113f, - 0x48f3b6edd1217c31, - 0x2e9ead75beb55ad6, - 0x174fd8b45fd42d6b, - 0x4ed4e4961238abfa, - 0x92e6b4eefebeb5d0, - 0x46a0d7320bef8208, - 0x47203ba8a5912a51, - 0x24f75bf8e69e3e96, - 0xf0b1382413cf094e, - 0xfee259fbc901f777, - 0x276a724b091cdb7d, - 0xbdf8f501ee75475f, - 0x599b3c224dec8691, - 0x6d84018f99c1eafe, - 0x7498b8e41cdb39ac, - 0xe0595e71217c5bb7, - 0x2aa43a273c50c0af, - 0xf50b43ec3f543b6e, - 0x838e3e2162734f70, - 0xc09492db4507ff58, - 0x72bfea9fdfc2ee67, - 0x11688acf9ccdfaa0, - 0x1a8190d86a9836b9, - 0x7acbd93bc615c795, - 0xc7332c3a286080ca, - 0x863445e94ee87d50, - 0xf6966a5fd0d6de85, - 0xe9ad814f96d5da1c, - 0x70a22fb69e3ea3d5, - 0x0a69f68d582b6440, - 0xb8428ec9c2ee757f, - 0x604a49e3ac8df12c, - 0x5b86f90b0c10cb23, - 0xe1d9b2eb8f02f3ee, - 0x29391394d3d22544, - 0xc8e0a17f5cd0d6aa, - 0xb58cc6a5f7a26ead, - 0x8193fb08238f02c2, - 0xd5c68f465b2f9f81, - 0xfcff9cd288fdbac5, - 0x77059157f359dc47, - 0x1d262e3907ff492b, - 0xfb582233e59ac557, - 0xddb2bce242f8b673, - 0x2577b76248e096cf, - 0x6f99c4a6d83da74c, - 0xc1147e41eb795701, - 0xf48baf76912a9337, - }, - { - 0x45b268a93acde4cc, - 0xaf7f0be884549d08, - 0x048354b3c1468263, - 0x925435c2c80efed2, - 0xee4e37f27fdffba7, - 0x167a33920c60f14d, - 0xfb123b52ea03e584, - 0x4a0cab53fdbb9007, - 0x9deaf6380f788a19, - 0xcb48ec558f0cb32a, - 0xb59dc4b2d6fef7e0, - 0xdcdbca22f4f3ecb6, - 0x11df5813549a9c40, - 0xe33fdedf568aced3, - 0xa0c1c8124322e9c3, - 0x07a56b8158fa6d0d, - 0x77279579b1e1f3dd, - 0xd9b18b74422ac004, - 0xb8ec2d9fffabc294, - 0xf4acf8a82d75914f, - 0x7bbf69b1ef2b6878, - 0xc4f62faf487ac7e1, - 0x76ce809cc67e5d0c, - 0x6711d88f92e4c14c, - 0x627b99d9243dedfe, - 0x234aa5c3dfb68b51, - 0x909b1f15262dbf6d, - 0x4f66ea054b62bcb5, - 0x1ae2cf5a52aa6ae8, - 0xbea053fbd0ce0148, - 0xed6808c0e66314c9, - 0x43fe16cd15a82710, - 0xcd049231a06970f6, - 0xe7bc8a6c97cc4cb0, - 0x337ce835fcb3b9c0, - 0x65def2587cc780f3, - 0x52214ede4132bb50, - 0x95f15e4390f493df, - 0x870839625dd2e0f1, - 0x41313c1afb8b66af, - 0x91720af051b211bc, - 0x477d427ed4eea573, - 0x2e3b4ceef6e3be25, - 0x82627834eb0bcc43, - 0x9c03e3dd78e724c8, - 0x2877328ad9867df9, - 0x14b51945e243b0f2, - 0x574b0f88f7eb97e2, - 0x88b6fa989aa4943a, - 0x19c4f068cb168586, - 0x50ee6409af11faef, - 0x7df317d5c04eaba4, - 0x7a567c5498b4c6a9, - 0xb6bbfb804f42188e, - 0x3cc22bcf3bc5cd0b, - 0xd04336eaaa397713, - 0xf02fac1bec33132c, - 0x2506dba7f0d3488d, - 0xd7e65d6bf2c31a1e, - 0x5eb9b2161ff820f5, - 0x842e0650c46e0f9f, - 0x716beb1d9e843001, - 0xa933758cab315ed4, - 0x3fe414fda2792265, - 0x27c9f1701ef00932, - 0x73a4c1ca70a771be, - 0x94184ba6e76b3d0e, - 0x40d829ff8c14c87e, - 0x0fbec3fac77674cb, - 0x3616a9634a6a9572, - 0x8f139119c25ef937, - 0xf545ed4d5aea3f9e, - 0xe802499650ba387b, - 0x6437e7bd0b582e22, - 0xe6559f89e053e261, - 0x80ad52e305288dfc, - 0x6dc55a23e34b9935, - 0xde14e0f51ad0ad09, - 0xc6390578a659865e, - 0x96d7617109487cb1, - 0xe2d6cb3a21156002, - 0x01e915e5779faed1, - 0xadb0213f6a77dcb7, - 0x9880b76eb9a1a6ab, - 0x5d9f8d248644cf9b, - 0xfd5e4536c5662658, - 0xf1c6b9fe9bacbdfd, - 0xeacd6341be9979c4, - 0xefa7221708405576, - 0x510771ecd88e543e, - 0xc2ba51cb671f043d, - 0x0ad482ac71af5879, - 0xfe787a045cdac936, - 0xb238af338e049aed, - 0xbd866cc94972ee26, - 0x615da6ebbd810290, - 0x3295fdd08b2c1711, - 0xf834046073bf0aea, - 0xf3099329758ffc42, - 0x1caeb13e7dcfa934, - 0xba2307481188832b, - 0x24efce42874ce65c, - 0x0e57d61fb0e9da1a, - 0xb3d1bad6f99b343c, - 0xc0757b1c893c4582, - 0x2b510db8403a9297, - 0x5c7698c1f1db614a, - 0x3e0d0118d5e68cb4, - 0xd60f488e855cb4cf, - 0xae961e0df3cb33d9, - 0x3a8e55ab14a00ed7, - 0x42170328623789c1, - 0x838b6dd19c946292, - 0x895fef7ded3b3aeb, - 0xcfcbb8e64e4a3149, - 0x064c7e642f65c3dc, - 0x3d2b3e2a4c5a63da, - 0x5bd3f340a9210c47, - 0xb474d157a1615931, - 0xac5934da1de87266, - 0x6ee365117af7765b, - 0xc86ed36716b05c44, - 0x9ba6885c201d49c5, - 0xb905387a88346c45, - 0x131072c4bab9ddff, - 0xbf49461ea751af99, - 0xd52977bc1ce05ba1, - 0xb0f785e46027db52, - 0x546d30ba6e57788c, - 0x305ad707650f56ae, - 0xc987c682612ff295, - 0xa5ab8944f5fbc571, - 0x7ed528e759f244ca, - 0x8ddcbbce2c7db888, - 0xaa154abe328db1ba, - 0x1e619be993ece88b, - 0x09f2bd9ee813b717, - 0x7401aa4b285d1cb3, - 0x21858f143195caee, - 0x48c381841398d1b8, - 0xfcb750d3b2f98889, - 0x39a86a998d1ce1b9, - 0x1f888e0ce473465a, - 0x7899568376978716, - 0x02cf2ad7ee2341bf, - 0x85c713b5b3f1a14e, - 0xff916fe12b4567e7, - 0x7c1a0230b7d10575, - 0x0c98fcc85eca9ba5, - 0xa3e7f720da9e06ad, - 0x6a6031a2bbb1f438, - 0x973e74947ed7d260, - 0x2cf4663918c0ff9a, - 0x5f50a7f368678e24, - 0x34d983b4a449d4cd, - 0x68af1b755592b587, - 0x7f3c3d022e6dea1b, - 0xabfc5f5b45121f6b, - 0x0d71e92d29553574, - 0xdffdf5106d4f03d8, - 0x081ba87b9f8c19c6, - 0xdb7ea1a3ac0981bb, - 0xbbca12ad66172dfa, - 0x79704366010829c7, - 0x179326777bff5f9c, - 0x0000000000000000, - 0xeb2476a4c906d715, - 0x724dd42f0738df6f, - 0xb752ee6538ddb65f, - 0x37ffbc863df53ba3, - 0x8efa84fcb5c157e6, - 0xe9eb5c73272596aa, - 0x1b0bdabf2535c439, - 0x86e12c872a4d4e20, - 0x9969a28bce3e087a, - 0xfafb2eb79d9c4b55, - 0x056a4156b6d92cb2, - 0x5a3ae6a5debea296, - 0x22a3b026a8292580, - 0x53c85b3b36ad1581, - 0xb11e900117b87583, - 0xc51f3a4a3fe56930, - 0xe019e1edcf3621bd, - 0xec811d2591fcba18, - 0x445b7d4c4d524a1d, - 0xa8da6069dcaef005, - 0x58f5cc72309de329, - 0xd4c062596b7ff570, - 0xce22ad0339d59f98, - 0x591cd99747024df8, - 0x8b90c5aa03187b54, - 0xf663d27fc356d0f0, - 0xd8589e9135b56ed5, - 0x35309651d3d67a1c, - 0x12f96721cd26732e, - 0xd28c1c3d441a36ac, - 0x492a946164077f69, - 0x2d1d73dc6f5f514b, - 0x6f0a70f40d68d88a, - 0x60b4b30eca1eac41, - 0xd36509d83385987d, - 0x0b3d97490630f6a8, - 0x9eccc90a96c46577, - 0xa20ee2c5ad01a87c, - 0xe49ab55e0e70a3de, - 0xa4429ca182646ba0, - 0xda97b446db962f6a, - 0xcced87d4d7f6de27, - 0x2ab8185d37a53c46, - 0x9f25dcefe15bcba6, - 0xc19c6ef9fea3eb53, - 0xa764a3931bd884ce, - 0x2fd2590b817c10f4, - 0x56a21a6d80743933, - 0xe573a0bb79ef0d0f, - 0x155c0ca095dc1e23, - 0x6c2c4fc694d437e4, - 0x10364df623053291, - 0xdd32dfc7836c4267, - 0x03263f3299bcef6e, - 0x66f8cd6ae57b6f9d, - 0x8c35ae2b5be21659, - 0x31b3c2e21290f87f, - 0x93bd2027bf915003, - 0x69460e90220d1b56, - 0x299e276fae19d328, - 0x63928c3c53a2432f, - 0x7082fef8e91b9ed0, - 0xbc6f792c3eed40f7, - 0x4c40d537d2de53db, - 0x75e8bfae5fc2b262, - 0x4da9c0d2a541fd0a, - 0x4e8fffe03cfd1264, - 0x2620e495696fa7e3, - 0xe1f0f408b8a98f6c, - 0xd1aa230fdda6d9c2, - 0xc7d0109dd1c6288f, - 0x8a79d04f7487d585, - 0x4694579ba3710ba2, - 0x38417f7cfa834f68, - 0x1d47a4db0a5007e5, - 0x206c9af1460a643f, - 0xa128ddf734bd4712, - 0x8144470672b7232d, - 0xf2e086cc02105293, - 0x182de58dbc892b57, - 0xcaa1f9b0f8931dfb, - 0x6b892447cc2e5ae9, - 0xf9dd11850420a43b, - 0x4be5beb68a243ed6, - 0x5584255f19c8d65d, - 0x3b67404e633fa006, - 0xa68db6766c472a1f, - 0xf78ac79ab4c97e21, - 0xc353442e1080aaec, - 0x9a4f9db95782e714, - }, - { - 0xc811a8058c3f55de, - 0x65f5b43196b50619, - 0xf74f96b1d6706e43, - 0x859d1e8bcb43d336, - 0x5aab8a85ccfa3d84, - 0xf9c7bf99c295fcfd, - 0xa21fd5a1de4b630f, - 0xcdb3ef763b8b456d, - 0x803f59f87cf7c385, - 0xb27c73be5f31913c, - 0x98e3ac6633b04821, - 0xbf61674c26b8f818, - 0x0ffbc995c4c130c8, - 0xaaa0862010761a98, - 0x6057f342210116aa, - 0xf63c760c0654cc35, - 0x2ddb45cc667d9042, - 0xbcf45a964bd40382, - 0x68e8a0c3ef3c6f3d, - 0xa7bd92d269ff73bc, - 0x290ae20201ed2287, - 0xb7de34cde885818f, - 0xd901eea7dd61059b, - 0xd6fa273219a03553, - 0xd56f1ae874cccec9, - 0xea31245c2e83f554, - 0x7034555da07be499, - 0xce26d2ac56e7bef7, - 0xfd161857a5054e38, - 0x6a0e7da4527436d1, - 0x5bd86a381cde9ff2, - 0xcaf7756231770c32, - 0xb09aaed9e279c8d0, - 0x5def1091c60674db, - 0x111046a2515e5045, - 0x23536ce4729802fc, - 0xc50cbcf7f5b63cfa, - 0x73a16887cd171f03, - 0x7d2941afd9f28dbd, - 0x3f5e3eb45a4f3b9d, - 0x84eefe361b677140, - 0x3db8e3d3e7076271, - 0x1a3a28f9f20fd248, - 0x7ebc7c75b49e7627, - 0x74e5f293c7eb565c, - 0x18dcf59e4f478ba4, - 0x0c6ef44fa9adcb52, - 0xc699812d98dac760, - 0x788b06dc6e469d0e, - 0xfc65f8ea7521ec4e, - 0x30a5f7219e8e0b55, - 0x2bec3f65bca57b6b, - 0xddd04969baf1b75e, - 0x99904cdbe394ea57, - 0x14b201d1e6ea40f6, - 0xbbb0c08241284add, - 0x50f20463bf8f1dff, - 0xe8d7f93b93cbacb8, - 0x4d8cb68e477c86e8, - 0xc1dd1b3992268e3f, - 0x7c5aa11209d62fcb, - 0x2f3d98abdb35c9ae, - 0x671369562bfd5ff5, - 0x15c1e16c36cee280, - 0x1d7eb2edf8f39b17, - 0xda94d37db00dfe01, - 0x877bc3ec760b8ada, - 0xcb8495dfe153ae44, - 0x05a24773b7b410b3, - 0x12857b783c32abdf, - 0x8eb770d06812513b, - 0x536739b9d2e3e665, - 0x584d57e271b26468, - 0xd789c78fc9849725, - 0xa935bbfa7d1ae102, - 0x8b1537a3dfa64188, - 0xd0cd5d9bc378de7a, - 0x4ac82c9a4d80cfb7, - 0x42777f1b83bdb620, - 0x72d2883a1d33bd75, - 0x5e7a2d4bab6a8f41, - 0xf4daab6bbb1c95d9, - 0x905cffe7fd8d31b6, - 0x83aa6422119b381f, - 0xc0aefb8442022c49, - 0xa0f908c663033ae3, - 0xa428af0804938826, - 0xade41c341a8a53c7, - 0xae7121ee77e6a85d, - 0xc47f5c4a25929e8c, - 0xb538e9aa55cdd863, - 0x06377aa9dad8eb29, - 0xa18ae87bb3279895, - 0x6edfda6a35e48414, - 0x6b7d9d19825094a7, - 0xd41cfa55a4e86cbf, - 0xe5caedc9ea42c59c, - 0xa36c351c0e6fc179, - 0x5181e4de6fabbf89, - 0xfff0c530184d17d4, - 0x9d41eb1584045892, - 0x1c0d525028d73961, - 0xf178ec180ca8856a, - 0x9a0571018ef811cd, - 0x4091a27c3ef5efcc, - 0x19af15239f6329d2, - 0x347450eff91eb990, - 0xe11b4a078dd27759, - 0xb9561de5fc601331, - 0x912f1f5a2da993c0, - 0x1654dcb65ba2191a, - 0x3e2dde098a6b99eb, - 0x8a66d71e0f82e3fe, - 0x8c51adb7d55a08d7, - 0x4533e50f8941ff7f, - 0x02e6dd67bd4859ec, - 0xe068aaba5df6d52f, - 0xc24826e3ff4a75a5, - 0x6c39070d88acddf8, - 0x6486548c4691a46f, - 0xd1bebd26135c7c0c, - 0xb30f93038f15334a, - 0x82d9849fc1bf9a69, - 0x9c320ba85420fae4, - 0xfa528243aff90767, - 0x9ed4d6cfe968a308, - 0xb825fd582c44b147, - 0x9b7691bc5edcb3bb, - 0xc7ea619048fe6516, - 0x1063a61f817af233, - 0x47d538683409a693, - 0x63c2ce984c6ded30, - 0x2a9fdfd86c81d91d, - 0x7b1e3b06032a6694, - 0x666089ebfbd9fd83, - 0x0a598ee67375207b, - 0x07449a140afc495f, - 0x2ca8a571b6593234, - 0x1f986f8a45bbc2fb, - 0x381aa4a050b372c2, - 0x5423a3add81faf3a, - 0x17273c0b8b86bb6c, - 0xfe83258dc869b5a2, - 0x287902bfd1c980f1, - 0xf5a94bd66b3837af, - 0x88800a79b2caba12, - 0x55504310083b0d4c, - 0xdf36940e07b9eeb2, - 0x04d1a7ce6790b2c5, - 0x612413fff125b4dc, - 0x26f12b97c52c124f, - 0x86082351a62f28ac, - 0xef93632f9937e5e7, - 0x3507b052293a1be6, - 0xe72c30ae570a9c70, - 0xd3586041ae1425e0, - 0xde4574b3d79d4cc4, - 0x92ba228040c5685a, - 0xf00b0ca5dc8c271c, - 0xbe1287f1f69c5a6e, - 0xf39e317fb1e0dc86, - 0x495d114020ec342d, - 0x699b407e3f18cd4b, - 0xdca3a9d46ad51528, - 0x0d1d14f279896924, - 0x0000000000000000, - 0x593eb75fa196c61e, - 0x2e4e78160b116bd8, - 0x6d4ae7b058887f8e, - 0xe65fd013872e3e06, - 0x7a6ddbbbd30ec4e2, - 0xac97fc89caaef1b1, - 0x09ccb33c1e19dbe1, - 0x89f3eac462ee1864, - 0x7770cf49aa87adc6, - 0x56c57eca6557f6d6, - 0x03953dda6d6cfb9a, - 0x36928d884456e07c, - 0x1eeb8f37959f608d, - 0x31d6179c4eaaa923, - 0x6fac3ad7e5c02662, - 0x43049fa653991456, - 0xabd3669dc052b8ee, - 0xaf02c153a7c20a2b, - 0x3ccb036e3723c007, - 0x93c9c23d90e1ca2c, - 0xc33bc65e2f6ed7d3, - 0x4cff56339758249e, - 0xb1e94e64325d6aa6, - 0x37e16d359472420a, - 0x79f8e661be623f78, - 0x5214d90402c74413, - 0x482ef1fdf0c8965b, - 0x13f69bc5ec1609a9, - 0x0e88292814e592be, - 0x4e198b542a107d72, - 0xccc00fcbebafe71b, - 0x1b49c844222b703e, - 0x2564164da840e9d5, - 0x20c6513e1ff4f966, - 0xbac3203f910ce8ab, - 0xf2edd1c261c47ef0, - 0x814cb945acd361f3, - 0x95feb8944a392105, - 0x5c9cf02c1622d6ad, - 0x971865f3f77178e9, - 0xbd87ba2b9bf0a1f4, - 0x444005b259655d09, - 0xed75be48247fbc0b, - 0x7596122e17cff42a, - 0xb44b091785e97a15, - 0x966b854e2755da9f, - 0xeee0839249134791, - 0x32432a4623c652b9, - 0xa8465b47ad3e4374, - 0xf8b45f2412b15e8b, - 0x2417f6f078644ba3, - 0xfb2162fe7fdda511, - 0x4bbbcc279da46dc1, - 0x0173e0bdd024a276, - 0x22208c59a2bca08a, - 0x8fc4906db836f34d, - 0xe4b90d743a6667ea, - 0x7147b5e0705f46ef, - 0x2782cb2a1508b039, - 0xec065ef5f45b1e7d, - 0x21b5b183cfd05b10, - 0xdbe733c060295c77, - 0x9fa73672394c017e, - 0xcf55321186c31c81, - 0xd8720e1a0d45a7ed, - 0x3b8f997a3ddf8958, - 0x3afc79c7edfb2b2e, - 0xe9a4198643ef0ece, - 0x5f09cdf67b4e2d37, - 0x4f6a6be9fa34df04, - 0xb6add47038a123f9, - 0x8d224d0a057eaaa1, - 0xc96248b85c1bf7a8, - 0xe3fd9760309a2eb5, - 0x0b2a6e5ba351820d, - 0xeb42c4e1fea75722, - 0x948d58299a1d8373, - 0x7fcf9cc864bad451, - 0xa55b4fb5d4b72a50, - 0x08bf5381ce3d7997, - 0x46a6d8d5e42d04e5, - 0xd22b80fc7e308796, - 0x57b69e77b57354a0, - 0x3969441d8097d0b4, - 0x3330cafbf3e2f0cf, - 0xe28e77dde0be8cc3, - 0x62b12e259c494f46, - 0xa6ce726fb9dbd1ca, - 0x41e242c1eed14dba, - 0x76032ff47aa30fb0, - }, - { - 0xe6f87e5c5b711fd0, - 0x258377800924fa16, - 0xc849e07e852ea4a8, - 0x5b4686a18f06c16a, - 0x0b32e9a2d77b416e, - 0xabda37a467815c66, - 0xf61796a81a686676, - 0xf5dc0b706391954b, - 0x4862f38db7e64bf1, - 0xff5c629a68bd85c5, - 0xcb827da6fcd75795, - 0x66d36daf69b9f089, - 0x356c9f74483d83b0, - 0x7cbcecb1238c99a1, - 0x36a702ac31c4708d, - 0x9eb6a8d02fbcdfd6, - 0x8b19fa51e5b3ae37, - 0x9ccfb5408a127d0b, - 0xbc0c78b508208f5a, - 0xe533e3842288eced, - 0xcec2c7d377c15fd2, - 0xec7817b6505d0f5e, - 0xb94cc2c08336871d, - 0x8c205db4cb0b04ad, - 0x763c855b28a0892f, - 0x588d1b79f6ff3257, - 0x3fecf69e4311933e, - 0x0fc0d39f803a18c9, - 0xee010a26f5f3ad83, - 0x10efe8f4411979a6, - 0x5dcda10c7de93a10, - 0x4a1bee1d1248e92c, - 0x53bff2db21847339, - 0xb4f50ccfa6a23d09, - 0x5fb4bc9cd84798cd, - 0xe88a2d8b071c56f9, - 0x7f7771695a756a9c, - 0xc5f02e71a0ba1ebc, - 0xa663f9ab4215e672, - 0x2eb19e22de5fbb78, - 0x0db9ce0f2594ba14, - 0x82520e6397664d84, - 0x2f031e6a0208ea98, - 0x5c7f2144a1be6bf0, - 0x7a37cb1cd16362db, - 0x83e08e2b4b311c64, - 0xcf70479bab960e32, - 0x856ba986b9dee71e, - 0xb5478c877af56ce9, - 0xb8fe42885f61d6fd, - 0x1bdd0156966238c8, - 0x622157923ef8a92e, - 0xfc97ff42114476f8, - 0x9d7d350856452ceb, - 0x4c90c9b0e0a71256, - 0x2308502dfbcb016c, - 0x2d7a03faa7a64845, - 0xf46e8b38bfc6c4ab, - 0xbdbef8fdd477deba, - 0x3aac4cebc8079b79, - 0xf09cb105e8879d0c, - 0x27fa6a10ac8a58cb, - 0x8960e7c1401d0cea, - 0x1a6f811e4a356928, - 0x90c4fb0773d196ff, - 0x43501a2f609d0a9f, - 0xf7a516e0c63f3796, - 0x1ce4a6b3b8da9252, - 0x1324752c38e08a9b, - 0xa5a864733bec154f, - 0x2bf124575549b33f, - 0xd766db15440dc5c7, - 0xa7d179e39e42b792, - 0xdadf151a61997fd3, - 0x86a0345ec0271423, - 0x38d5517b6da939a4, - 0x6518f077104003b4, - 0x02791d90a5aea2dd, - 0x88d267899c4a5d0a, - 0x930f66df0a2865c2, - 0x4ee9d4204509b08b, - 0x325538916685292a, - 0x412907bfc533a842, - 0xb27e2b62544dc673, - 0x6c5304456295e007, - 0x5af406e95351908a, - 0x1f2f3b6bc123616f, - 0xc37b09dc5255e5c6, - 0x3967d133b1fe6844, - 0x298839c7f0e711e2, - 0x409b87f71964f9a2, - 0xe938adc3db4b0719, - 0x0c0b4e47f9c3ebf4, - 0x5534d576d36b8843, - 0x4610a05aeb8b02d8, - 0x20c3cdf58232f251, - 0x6de1840dbec2b1e7, - 0xa0e8de06b0fa1d08, - 0x7b854b540d34333b, - 0x42e29a67bcca5b7f, - 0xd8a6088ac437dd0e, - 0xc63bb3a9d943ed81, - 0x21714dbd5e65a3b1, - 0x6761ede7b5eea169, - 0x2431f7c8d573abf6, - 0xd51fc685e1a3671a, - 0x5e063cd40410c92d, - 0x283ab98f2cb04002, - 0x8febc06cb2f2f790, - 0x17d64f116fa1d33c, - 0xe07359f1a99ee4aa, - 0x784ed68c74cdc006, - 0x6e2a19d5c73b42da, - 0x8712b4161c7045c3, - 0x371582e4ed93216d, - 0xace390414939f6fc, - 0x7ec5f12186223b7c, - 0xc0b094042bac16fb, - 0xf9d745379a527ebf, - 0x737c3f2ea3b68168, - 0x33e7b8d9bad278ca, - 0xa9a32a34c22ffebb, - 0xe48163ccfedfbd0d, - 0x8e5940246ea5a670, - 0x51c6ef4b842ad1e4, - 0x22bad065279c508c, - 0xd91488c218608cee, - 0x319ea5491f7cda17, - 0xd394e128134c9c60, - 0x094bf43272d5e3b3, - 0x9bf612a5a4aad791, - 0xccbbda43d26ffd0f, - 0x34de1f3c946ad250, - 0x4f5b5468995ee16b, - 0xdf9faf6fea8f7794, - 0x2648ea5870dd092b, - 0xbfc7e56d71d97c67, - 0xdde6b2ff4f21d549, - 0x3c276b463ae86003, - 0x91767b4faf86c71f, - 0x68a13e7835d4b9a0, - 0xb68c115f030c9fd4, - 0x141dd2c916582001, - 0x983d8f7ddd5324ac, - 0x64aa703fcc175254, - 0xc2c989948e02b426, - 0x3e5e76d69f46c2de, - 0x50746f03587d8004, - 0x45db3d829272f1e5, - 0x60584a029b560bf3, - 0xfbae58a73ffcdc62, - 0xa15a5e4e6cad4ce8, - 0x4ba96e55ce1fb8cc, - 0x08f9747aae82b253, - 0xc102144cf7fb471b, - 0x9f042898f3eb8e36, - 0x068b27adf2effb7a, - 0xedca97fe8c0a5ebe, - 0x778e0513f4f7d8cf, - 0x302c2501c32b8bf7, - 0x8d92ddfc175c554d, - 0xf865c57f46052f5f, - 0xeaf3301ba2b2f424, - 0xaa68b7ecbbd60d86, - 0x998f0f350104754c, - 0x0000000000000000, - 0xf12e314d34d0ccec, - 0x710522be061823b5, - 0xaf280d9930c005c1, - 0x97fd5ce25d693c65, - 0x19a41cc633cc9a15, - 0x95844172f8c79eb8, - 0xdc5432b7937684a9, - 0x9436c13a2490cf58, - 0x802b13f332c8ef59, - 0xc442ae397ced4f5c, - 0xfa1cd8efe3ab8d82, - 0xf2e5ac954d293fd1, - 0x6ad823e8907a1b7d, - 0x4d2249f83cf043b6, - 0x03cb9dd879f9f33d, - 0xde2d2f2736d82674, - 0x2a43a41f891ee2df, - 0x6f98999d1b6c133a, - 0xd4ad46cd3df436fa, - 0xbb35df50269825c0, - 0x964fdcaa813e6d85, - 0xeb41b0537ee5a5c4, - 0x0540ba758b160847, - 0xa41ae43be7bb44af, - 0xe3b8c429d0671797, - 0x819993bbee9fbeb9, - 0xae9a8dd1ec975421, - 0xf3572cdd917e6e31, - 0x6393d7dae2aff8ce, - 0x47a2201237dc5338, - 0xa32343dec903ee35, - 0x79fc56c4a89a91e6, - 0x01b28048dc5751e0, - 0x1296f564e4b7db7b, - 0x75f7188351597a12, - 0xdb6d9552bdce2e33, - 0x1e9dbb231d74308f, - 0x520d7293fdd322d9, - 0xe20a44610c304677, - 0xfeeee2d2b4ead425, - 0xca30fdee20800675, - 0x61eaca4a47015a13, - 0xe74afe1487264e30, - 0x2cc883b27bf119a5, - 0x1664cf59b3f682dc, - 0xa811aa7c1e78af5b, - 0x1d5626fb648dc3b2, - 0xb73e9117df5bce34, - 0xd05f7cf06ab56f5d, - 0xfd257f0acd132718, - 0x574dc8e676c52a9e, - 0x0739a7e52eb8aa9a, - 0x5486553e0f3cd9a3, - 0x56ff48aeaa927b7e, - 0xbe756525ad8e2d87, - 0x7d0e6cf9ffdbc841, - 0x3b1ecca31450ca99, - 0x6913be30e983e840, - 0xad511009956ea71c, - 0xb1b5b6ba2db4354e, - 0x4469bdca4e25a005, - 0x15af5281ca0f71e1, - 0x744598cb8d0e2bf2, - 0x593f9b312aa863b7, - 0xefb38a6e29a4fc63, - 0x6b6aa3a04c2d4a9d, - 0x3d95eb0ee6bf31e3, - 0xa291c3961554bfd5, - 0x18169c8eef9bcbf5, - 0x115d68bc9d4e2846, - 0xba875f18facf7420, - 0xd1edfcb8b6e23ebd, - 0xb00736f2f1e364ae, - 0x84d929ce6589b6fe, - 0x70b7a2f6da4f7255, - 0x0e7253d75c6d4929, - 0x04f23a3d574159a7, - 0x0a8069ea0b2c108e, - 0x49d073c56bb11a11, - 0x8aab7a1939e4ffd7, - 0xcd095a0b0e38acef, - 0xc9fb60365979f548, - 0x92bde697d67f3422, - 0xc78933e10514bc61, - 0xe1c1d9b975c9b54a, - 0xd2266160cf1bcd80, - 0x9a4492ed78fd8671, - 0xb3ccab2a881a9793, - 0x72cebf667fe1d088, - 0xd6d45b5d985a9427, - }, -}; - -__device__ __constant__ u64 sbob_rc64[12][8] = -{ - { - 0xe9daca1eda5b08b1, - 0x1f7c65c0812fcbeb, - 0x16d0452e43766a2f, - 0xfcc485758db84e71, - 0x0169679291e07c4b, - 0x15d360a4082a42a2, - 0x234d74cc36747605, - 0x0745a6f2596580dd, - }, - { - 0x1a2f9da98ab5a36f, - 0xd7b5700f469de34f, - 0x982b230a72eafef3, - 0x3101b5160f5ed561, - 0x5899d6126b17b59a, - 0xcaa70adbc261b55c, - 0x56cdcbd71ba2dd55, - 0xb79bb121700479e6, - }, - { - 0xc72fce2bacdc74f5, - 0x35843d6a28fc390a, - 0x8b1f9c525f5ef106, - 0x7b7b29b11475eaf2, - 0xb19e3590e40fe2d3, - 0x09db6260373ac9c1, - 0x31db7a8643f4b6c2, - 0xb20aba0af5961e99, - }, - { - 0xd26615e8b3df1fef, - 0xdde4715da0e148f9, - 0x7d3c5c337e858e48, - 0x3f355e68ad1c729d, - 0x75d603ed822cd7a9, - 0xbe0352933313b7d8, - 0xf137e893a1ea5334, - 0x2ed1e384bcbe0c22, - }, - { - 0x994747adac6bea4b, - 0x6323a96c0c413f9a, - 0x4a1086161f1c157f, - 0xbdff0f80d7359e35, - 0xa3f53a254717cdbf, - 0x161a2723b700ffdf, - 0xf563eaa97ea2567a, - 0x57fe6c7cfd581760, - }, - { - 0xd9d33a1daeae4fae, - 0xc039307a3bc3a46f, - 0x6ca44251f9c4662d, - 0xc68ef09ab49a7f18, - 0xb4b79a1cb7a6facf, - 0xb6c6bec2661ff20a, - 0x354f903672c571bf, - 0x6e7d64467a4068fa, - }, - { - 0xecc5aaee160ec7f4, - 0x540924bffe86ac51, - 0xc987bfe6c7c69e39, - 0xc9937a19333e47d3, - 0x372c822dc5ab9209, - 0x04054a2883694706, - 0xf34a3ca24c451735, - 0x93d4143a4d568688, - }, - { - 0xa7c9934d425b1f9b, - 0x41416e0c02aae703, - 0x1ede369c71f8b74e, - 0x9ac4db4d3b44b489, - 0x90069b92cb2b89f4, - 0x2fc4a5d12b8dd169, - 0xd9a8515935c2ac36, - 0x1ee702bfd40d7fa4, - }, - { - 0x9b223116545a8f37, - 0xde5f16ecd89a4c94, - 0x244289251b3a7d3a, - 0x84090de0b755d93c, - 0xb1ceb2db0b440a80, - 0x549c07a69a8a2b7b, - 0x602a1fcb92dc380e, - 0xdb5a238351446172, - }, - { - 0x526f0580a6debeab, - 0xf3f3e4b248e52a38, - 0xdb788aff1ce74189, - 0x0361331b8ae1ff1f, - 0x4b3369af0267e79f, - 0xf452763b306c1e7a, - 0xc3b63b15d1fa9836, - 0xed9c4598fbc7b474, - }, - { - 0xfb89c8efd09ecd7b, - 0x94fe5a63cdc60230, - 0x6107abebbb6bfad8, - 0x7966841421800120, - 0xcab948eaef711d8a, - 0x986e477d1dcdbaef, - 0x5dd86fc04a59a2de, - 0x1b2df381cda4ca6b, - }, - { - 0xba3116f167e78e37, - 0x7ab14904b08013d2, - 0x771ddfbc323ca4cd, - 0x9b9f2130d41220f8, - 0x86cc91189def805d, - 0x5228e188aaa41de7, - 0x991bb2d9d517f4fa, - 0x20d71bf14a92bc48, - }, -}; - -__device__ static void streebog_g (u64 h[8], const u64 m[8], u64 s_sbob_sl64[8][256]) -{ - u64 k[8]; - u64 s[8]; - u64 t[8]; - - #pragma unroll - for (int i = 0; i < 8; i++) - { - t[i] = h[i]; - } - - for (int i = 0; i < 8; i++) - { - k[i] = SBOG_LPSti64; - } - - #pragma unroll - for (int i = 0; i < 8; i++) - { - s[i] = m[i]; - } - - for (int r = 0; r < 12; r++) - { - #pragma unroll - for (int i = 0; i < 8; i++) - { - t[i] = s[i] ^ k[i]; - } - - #pragma unroll - for (int i = 0; i < 8; i++) - { - s[i] = SBOG_LPSti64; - } - - for (int i = 0; i < 8; i++) - { - t[i] = k[i] ^ sbob_rc64[r][i]; - } - - #pragma unroll - for (int i = 0; i < 8; i++) - { - k[i] = SBOG_LPSti64; - } - } - - #pragma unroll - for (int i = 0; i < 8; i++) - { - h[i] ^= s[i] ^ k[i] ^ m[i]; - } -} - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m11800_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * shared lookup table - */ - - __shared__ u64 s_sbob_sl64[8][256]; - - if (lid < 64) - { - const u32 lid4 = lid * 4; - - s_sbob_sl64[0][lid4 + 0] = sbob_sl64[0][lid4 + 0]; - s_sbob_sl64[0][lid4 + 1] = sbob_sl64[0][lid4 + 1]; - s_sbob_sl64[0][lid4 + 2] = sbob_sl64[0][lid4 + 2]; - s_sbob_sl64[0][lid4 + 3] = sbob_sl64[0][lid4 + 3]; - s_sbob_sl64[1][lid4 + 0] = sbob_sl64[1][lid4 + 0]; - s_sbob_sl64[1][lid4 + 1] = sbob_sl64[1][lid4 + 1]; - s_sbob_sl64[1][lid4 + 2] = sbob_sl64[1][lid4 + 2]; - s_sbob_sl64[1][lid4 + 3] = sbob_sl64[1][lid4 + 3]; - s_sbob_sl64[2][lid4 + 0] = sbob_sl64[2][lid4 + 0]; - s_sbob_sl64[2][lid4 + 1] = sbob_sl64[2][lid4 + 1]; - s_sbob_sl64[2][lid4 + 2] = sbob_sl64[2][lid4 + 2]; - s_sbob_sl64[2][lid4 + 3] = sbob_sl64[2][lid4 + 3]; - s_sbob_sl64[3][lid4 + 0] = sbob_sl64[3][lid4 + 0]; - s_sbob_sl64[3][lid4 + 1] = sbob_sl64[3][lid4 + 1]; - s_sbob_sl64[3][lid4 + 2] = sbob_sl64[3][lid4 + 2]; - s_sbob_sl64[3][lid4 + 3] = sbob_sl64[3][lid4 + 3]; - s_sbob_sl64[4][lid4 + 0] = sbob_sl64[4][lid4 + 0]; - s_sbob_sl64[4][lid4 + 1] = sbob_sl64[4][lid4 + 1]; - s_sbob_sl64[4][lid4 + 2] = sbob_sl64[4][lid4 + 2]; - s_sbob_sl64[4][lid4 + 3] = sbob_sl64[4][lid4 + 3]; - s_sbob_sl64[5][lid4 + 0] = sbob_sl64[5][lid4 + 0]; - s_sbob_sl64[5][lid4 + 1] = sbob_sl64[5][lid4 + 1]; - s_sbob_sl64[5][lid4 + 2] = sbob_sl64[5][lid4 + 2]; - s_sbob_sl64[5][lid4 + 3] = sbob_sl64[5][lid4 + 3]; - s_sbob_sl64[6][lid4 + 0] = sbob_sl64[6][lid4 + 0]; - s_sbob_sl64[6][lid4 + 1] = sbob_sl64[6][lid4 + 1]; - s_sbob_sl64[6][lid4 + 2] = sbob_sl64[6][lid4 + 2]; - s_sbob_sl64[6][lid4 + 3] = sbob_sl64[6][lid4 + 3]; - s_sbob_sl64[7][lid4 + 0] = sbob_sl64[7][lid4 + 0]; - s_sbob_sl64[7][lid4 + 1] = sbob_sl64[7][lid4 + 1]; - s_sbob_sl64[7][lid4 + 2] = sbob_sl64[7][lid4 + 2]; - s_sbob_sl64[7][lid4 + 3] = sbob_sl64[7][lid4 + 3]; - } - - __syncthreads (); - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w[16]; - - w[ 0] = wordl0[0] | wordr0[0]; - w[ 1] = wordl0[1] | wordr0[1]; - w[ 2] = wordl0[2] | wordr0[2]; - w[ 3] = wordl0[3] | wordr0[3]; - w[ 4] = wordl1[0] | wordr1[0]; - w[ 5] = wordl1[1] | wordr1[1]; - w[ 6] = wordl1[2] | wordr1[2]; - w[ 7] = wordl1[3] | wordr1[3]; - w[ 8] = wordl2[0] | wordr2[0]; - w[ 9] = wordl2[1] | wordr2[1]; - w[10] = wordl2[2] | wordr2[2]; - w[11] = wordl2[3] | wordr2[3]; - w[12] = wordl3[0] | wordr3[0]; - w[13] = wordl3[1] | wordr3[1]; - w[14] = wordl3[1] | wordr3[1]; - w[15] = wordl3[1] | wordr3[1]; - - append_0x01_4 (&w[0], &w[1], &w[2], &w[3], pw_len); - - /** - * reverse message block - */ - - u64 m[8]; - - m[0] = hl32_to_64 (w[15], w[14]); - m[1] = hl32_to_64 (w[13], w[12]); - m[2] = hl32_to_64 (w[11], w[10]); - m[3] = hl32_to_64 (w[ 9], w[ 8]); - m[4] = hl32_to_64 (w[ 7], w[ 6]); - m[5] = hl32_to_64 (w[ 5], w[ 4]); - m[6] = hl32_to_64 (w[ 3], w[ 2]); - m[7] = hl32_to_64 (w[ 1], w[ 0]); - - m[0] = swap_workaround (m[0]); - m[1] = swap_workaround (m[1]); - m[2] = swap_workaround (m[2]); - m[3] = swap_workaround (m[3]); - m[4] = swap_workaround (m[4]); - m[5] = swap_workaround (m[5]); - m[6] = swap_workaround (m[6]); - m[7] = swap_workaround (m[7]); - - // state buffer (hash) - - u64 h[8]; - - h[0] = INITVAL; - h[1] = INITVAL; - h[2] = INITVAL; - h[3] = INITVAL; - h[4] = INITVAL; - h[5] = INITVAL; - h[6] = INITVAL; - h[7] = INITVAL; - - streebog_g (h, m, s_sbob_sl64); - - u64 z[8]; - - z[0] = 0; - z[1] = 0; - z[2] = 0; - z[3] = 0; - z[4] = 0; - z[5] = 0; - z[6] = 0; - z[7] = swap_workaround ((u64) (pw_len * 8)); - - streebog_g (h, z, s_sbob_sl64); - streebog_g (h, m, s_sbob_sl64); - - const u32 r0 = l32_from_64 (h[0]); - const u32 r1 = h32_from_64 (h[0]); - const u32 r2 = l32_from_64 (h[1]); - const u32 r3 = h32_from_64 (h[1]); - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11800_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11800_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11800_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - - /** - * shared lookup table - */ - - __shared__ u64 s_sbob_sl64[8][256]; - - if (lid < 64) - { - const u32 lid4 = lid * 4; - - s_sbob_sl64[0][lid4 + 0] = sbob_sl64[0][lid4 + 0]; - s_sbob_sl64[0][lid4 + 1] = sbob_sl64[0][lid4 + 1]; - s_sbob_sl64[0][lid4 + 2] = sbob_sl64[0][lid4 + 2]; - s_sbob_sl64[0][lid4 + 3] = sbob_sl64[0][lid4 + 3]; - s_sbob_sl64[1][lid4 + 0] = sbob_sl64[1][lid4 + 0]; - s_sbob_sl64[1][lid4 + 1] = sbob_sl64[1][lid4 + 1]; - s_sbob_sl64[1][lid4 + 2] = sbob_sl64[1][lid4 + 2]; - s_sbob_sl64[1][lid4 + 3] = sbob_sl64[1][lid4 + 3]; - s_sbob_sl64[2][lid4 + 0] = sbob_sl64[2][lid4 + 0]; - s_sbob_sl64[2][lid4 + 1] = sbob_sl64[2][lid4 + 1]; - s_sbob_sl64[2][lid4 + 2] = sbob_sl64[2][lid4 + 2]; - s_sbob_sl64[2][lid4 + 3] = sbob_sl64[2][lid4 + 3]; - s_sbob_sl64[3][lid4 + 0] = sbob_sl64[3][lid4 + 0]; - s_sbob_sl64[3][lid4 + 1] = sbob_sl64[3][lid4 + 1]; - s_sbob_sl64[3][lid4 + 2] = sbob_sl64[3][lid4 + 2]; - s_sbob_sl64[3][lid4 + 3] = sbob_sl64[3][lid4 + 3]; - s_sbob_sl64[4][lid4 + 0] = sbob_sl64[4][lid4 + 0]; - s_sbob_sl64[4][lid4 + 1] = sbob_sl64[4][lid4 + 1]; - s_sbob_sl64[4][lid4 + 2] = sbob_sl64[4][lid4 + 2]; - s_sbob_sl64[4][lid4 + 3] = sbob_sl64[4][lid4 + 3]; - s_sbob_sl64[5][lid4 + 0] = sbob_sl64[5][lid4 + 0]; - s_sbob_sl64[5][lid4 + 1] = sbob_sl64[5][lid4 + 1]; - s_sbob_sl64[5][lid4 + 2] = sbob_sl64[5][lid4 + 2]; - s_sbob_sl64[5][lid4 + 3] = sbob_sl64[5][lid4 + 3]; - s_sbob_sl64[6][lid4 + 0] = sbob_sl64[6][lid4 + 0]; - s_sbob_sl64[6][lid4 + 1] = sbob_sl64[6][lid4 + 1]; - s_sbob_sl64[6][lid4 + 2] = sbob_sl64[6][lid4 + 2]; - s_sbob_sl64[6][lid4 + 3] = sbob_sl64[6][lid4 + 3]; - s_sbob_sl64[7][lid4 + 0] = sbob_sl64[7][lid4 + 0]; - s_sbob_sl64[7][lid4 + 1] = sbob_sl64[7][lid4 + 1]; - s_sbob_sl64[7][lid4 + 2] = sbob_sl64[7][lid4 + 2]; - s_sbob_sl64[7][lid4 + 3] = sbob_sl64[7][lid4 + 3]; - } - - __syncthreads (); - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w[16]; - - w[ 0] = wordl0[0] | wordr0[0]; - w[ 1] = wordl0[1] | wordr0[1]; - w[ 2] = wordl0[2] | wordr0[2]; - w[ 3] = wordl0[3] | wordr0[3]; - w[ 4] = wordl1[0] | wordr1[0]; - w[ 5] = wordl1[1] | wordr1[1]; - w[ 6] = wordl1[2] | wordr1[2]; - w[ 7] = wordl1[3] | wordr1[3]; - w[ 8] = wordl2[0] | wordr2[0]; - w[ 9] = wordl2[1] | wordr2[1]; - w[10] = wordl2[2] | wordr2[2]; - w[11] = wordl2[3] | wordr2[3]; - w[12] = wordl3[0] | wordr3[0]; - w[13] = wordl3[1] | wordr3[1]; - w[14] = wordl3[1] | wordr3[1]; - w[15] = wordl3[1] | wordr3[1]; - - append_0x01_4 (&w[0], &w[1], &w[2], &w[3], pw_len); - - /** - * reverse message block - */ - - u64 m[8]; - - m[0] = hl32_to_64 (w[15], w[14]); - m[1] = hl32_to_64 (w[13], w[12]); - m[2] = hl32_to_64 (w[11], w[10]); - m[3] = hl32_to_64 (w[ 9], w[ 8]); - m[4] = hl32_to_64 (w[ 7], w[ 6]); - m[5] = hl32_to_64 (w[ 5], w[ 4]); - m[6] = hl32_to_64 (w[ 3], w[ 2]); - m[7] = hl32_to_64 (w[ 1], w[ 0]); - - m[0] = swap_workaround (m[0]); - m[1] = swap_workaround (m[1]); - m[2] = swap_workaround (m[2]); - m[3] = swap_workaround (m[3]); - m[4] = swap_workaround (m[4]); - m[5] = swap_workaround (m[5]); - m[6] = swap_workaround (m[6]); - m[7] = swap_workaround (m[7]); - - // state buffer (hash) - - u64 h[8]; - - h[0] = INITVAL; - h[1] = INITVAL; - h[2] = INITVAL; - h[3] = INITVAL; - h[4] = INITVAL; - h[5] = INITVAL; - h[6] = INITVAL; - h[7] = INITVAL; - - streebog_g (h, m, s_sbob_sl64); - - u64 z[8]; - - z[0] = 0; - z[1] = 0; - z[2] = 0; - z[3] = 0; - z[4] = 0; - z[5] = 0; - z[6] = 0; - z[7] = swap_workaround ((u64) (pw_len * 8)); - - streebog_g (h, z, s_sbob_sl64); - streebog_g (h, m, s_sbob_sl64); - - const u32 r0 = l32_from_64 (h[0]); - const u32 r1 = h32_from_64 (h[0]); - const u32 r2 = l32_from_64 (h[1]); - const u32 r3 = h32_from_64 (h[1]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11800_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11800_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m11800_a3.cu b/nv/m11800_a3.cu deleted file mode 100644 index 5b6d1f7..0000000 --- a/nv/m11800_a3.cu +++ /dev/null @@ -1,2993 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _GOST2012_512_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -#define INITVAL 0 - -#define SBOG_LPSti64 \ - s_sbob_sl64[0][(t[0] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[1][(t[1] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[2][(t[2] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[3][(t[3] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[4][(t[4] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[5][(t[5] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[6][(t[6] >> (i * 8)) & 0xff] ^ \ - s_sbob_sl64[7][(t[7] >> (i * 8)) & 0xff] - -// constants - -__device__ __constant__ u64 sbob_sl64[8][256] = -{ - { - 0xd031c397ce553fe6, - 0x16ba5b01b006b525, - 0xa89bade6296e70c8, - 0x6a1f525d77d3435b, - 0x6e103570573dfa0b, - 0x660efb2a17fc95ab, - 0x76327a9e97634bf6, - 0x4bad9d6462458bf5, - 0xf1830caedbc3f748, - 0xc5c8f542669131ff, - 0x95044a1cdc48b0cb, - 0x892962df3cf8b866, - 0xb0b9e208e930c135, - 0xa14fb3f0611a767c, - 0x8d2605f21c160136, - 0xd6b71922fecc549e, - 0x37089438a5907d8b, - 0x0b5da38e5803d49c, - 0x5a5bcc9cea6f3cbc, - 0xedae246d3b73ffe5, - 0xd2b87e0fde22edce, - 0x5e54abb1ca8185ec, - 0x1de7f88fe80561b9, - 0xad5e1a870135a08c, - 0x2f2adbd665cecc76, - 0x5780b5a782f58358, - 0x3edc8a2eede47b3f, - 0xc9d95c3506bee70f, - 0x83be111d6c4e05ee, - 0xa603b90959367410, - 0x103c81b4809fde5d, - 0x2c69b6027d0c774a, - 0x399080d7d5c87953, - 0x09d41e16487406b4, - 0xcdd63b1826505e5f, - 0xf99dc2f49b0298e8, - 0x9cd0540a943cb67f, - 0xbca84b7f891f17c5, - 0x723d1db3b78df2a6, - 0x78aa6e71e73b4f2e, - 0x1433e699a071670d, - 0x84f21be454620782, - 0x98df3327b4d20f2f, - 0xf049dce2d3769e5c, - 0xdb6c60199656eb7a, - 0x648746b2078b4783, - 0x32cd23598dcbadcf, - 0x1ea4955bf0c7da85, - 0xe9a143401b9d46b5, - 0xfd92a5d9bbec21b8, - 0xc8138c790e0b8e1b, - 0x2ee00b9a6d7ba562, - 0xf85712b893b7f1fc, - 0xeb28fed80bea949d, - 0x564a65eb8a40ea4c, - 0x6c9988e8474a2823, - 0x4535898b121d8f2d, - 0xabd8c03231accbf4, - 0xba2e91cab9867cbd, - 0x7960be3def8e263a, - 0x0c11a977602fd6f0, - 0xcb50e1ad16c93527, - 0xeae22e94035ffd89, - 0x2866d12f5de2ce1a, - 0xff1b1841ab9bf390, - 0x9f9339de8cfe0d43, - 0x964727c8c48a0bf7, - 0x524502c6aaae531c, - 0x9b9c5ef3ac10b413, - 0x4fa2fa4942ab32a5, - 0x3f165a62e551122b, - 0xc74148da76e6e3d7, - 0x924840e5e464b2a7, - 0xd372ae43d69784da, - 0x233b72a105e11a86, - 0xa48a04914941a638, - 0xb4b68525c9de7865, - 0xddeabaaca6cf8002, - 0x0a9773c250b6bd88, - 0xc284ffbb5ebd3393, - 0x8ba0df472c8f6a4e, - 0x2aef6cb74d951c32, - 0x427983722a318d41, - 0x73f7cdffbf389bb2, - 0x074c0af9382c026c, - 0x8a6a0f0b243a035a, - 0x6fdae53c5f88931f, - 0xc68b98967e538ac3, - 0x44ff59c71aa8e639, - 0xe2fce0ce439e9229, - 0xa20cde2479d8cd40, - 0x19e89fa2c8ebd8e9, - 0xf446bbcff398270c, - 0x43b3533e2284e455, - 0xd82f0dcd8e945046, - 0x51066f12b26ce820, - 0xe73957af6bc5426d, - 0x081ece5a40c16fa0, - 0x3b193d4fc5bfab7b, - 0x7fe66488df174d42, - 0x0e9814ef705804d8, - 0x8137ac857c39d7c6, - 0xb1733244e185a821, - 0x695c3f896f11f867, - 0xf6cf0657e3eff524, - 0x1aabf276d02963d5, - 0x2da3664e75b91e5e, - 0x0289bd981077d228, - 0x90c1fd7df413608f, - 0x3c5537b6fd93a917, - 0xaa12107e3919a2e0, - 0x0686dab530996b78, - 0xdaa6b0559ee3826e, - 0xc34e2ff756085a87, - 0x6d5358a44fff4137, - 0xfc587595b35948ac, - 0x7ca5095cc7d5f67e, - 0xfb147f6c8b754ac0, - 0xbfeb26ab91ddacf9, - 0x6896efc567a49173, - 0xca9a31e11e7c5c33, - 0xbbe44186b13315a9, - 0x0ddb793b689abfe4, - 0x70b4a02ba7fa208e, - 0xe47a3a7b7307f951, - 0x8cecd5be14a36822, - 0xeeed49b923b144d9, - 0x17708b4db8b3dc31, - 0x6088219f2765fed3, - 0xb3fa8fdcf1f27a09, - 0x910b2d31fca6099b, - 0x0f52c4a378ed6dcc, - 0x50ccbf5ebad98134, - 0x6bd582117f662a4f, - 0x94ce9a50d4fdd9df, - 0x2b25bcfb45207526, - 0x67c42b661f49fcbf, - 0x492420fc723259dd, - 0x03436dd418c2bb3c, - 0x1f6e4517f872b391, - 0xa08563bc69af1f68, - 0xd43ea4baeebb86b6, - 0x01cad04c08b56914, - 0xac94cacb0980c998, - 0x54c3d8739a373864, - 0x26fec5c02dbacac2, - 0xdea9d778be0d3b3e, - 0x040f672d20eeb950, - 0xe5b0ea377bb29045, - 0xf30ab136cbb42560, - 0x62019c0737122cfb, - 0xe86b930c13282fa1, - 0xcc1ceb542ee5374b, - 0x538fd28aa21b3a08, - 0x1b61223ad89c0ac1, - 0x36c24474ad25149f, - 0x7a23d3e9f74c9d06, - 0xbe21f6e79968c5ed, - 0xcf5f868036278c77, - 0xf705d61beb5a9c30, - 0x4d2b47d152dce08d, - 0x5f9e7bfdc234ecf8, - 0x247778583dcd18ea, - 0x867ba67c4415d5aa, - 0x4ce1979d5a698999, - 0x0000000000000000, - 0xec64f42133c696f1, - 0xb57c5569c16b1171, - 0xc1c7926f467f88af, - 0x654d96fe0f3e2e97, - 0x15f936d5a8c40e19, - 0xb8a72c52a9f1ae95, - 0xa9517daa21db19dc, - 0x58d27104fa18ee94, - 0x5918a148f2ad8780, - 0x5cdd1629daf657c4, - 0x8274c15164fb6cfa, - 0xd1fb13dbc6e056f2, - 0x7d6fd910cf609f6a, - 0xb63f38bdd9a9aa4d, - 0x3d9fe7faf526c003, - 0x74bbc706871499de, - 0xdf630734b6b8522a, - 0x3ad3ed03cd0ac26f, - 0xfadeaf2083c023d4, - 0xc00d42234ecae1bb, - 0x8538cba85cd76e96, - 0xc402250e6e2458eb, - 0x47bc3413026a5d05, - 0xafd7a71f114272a4, - 0x978df784cc3f62e3, - 0xb96dfc1ea144c781, - 0x21b2cf391596c8ae, - 0x318e4e8d950916f3, - 0xce9556cc3e92e563, - 0x385a509bdd7d1047, - 0x358129a0b5e7afa3, - 0xe6f387e363702b79, - 0xe0755d5653e94001, - 0x7be903a5fff9f412, - 0x12b53c2c90e80c75, - 0x3307f315857ec4db, - 0x8fafb86a0c61d31e, - 0xd9e5dd8186213952, - 0x77f8aad29fd622e2, - 0x25bda814357871fe, - 0x7571174a8fa1f0ca, - 0x137fec60985d6561, - 0x30449ec19dbc7fe7, - 0xa540d4dd41f4cf2c, - 0xdc206ae0ae7ae916, - 0x5b911cd0e2da55a8, - 0xb2305f90f947131d, - 0x344bf9ecbd52c6b7, - 0x5d17c665d2433ed0, - 0x18224feec05eb1fd, - 0x9e59e992844b6457, - 0x9a568ebfa4a5dd07, - 0xa3c60e68716da454, - 0x7e2cb4c4d7a22456, - 0x87b176304ca0bcbe, - 0x413aeea632f3367d, - 0x9915e36bbc67663b, - 0x40f03eea3a465f69, - 0x1c2d28c3e0b008ad, - 0x4e682a054a1e5bb1, - 0x05c5b761285bd044, - 0xe1bf8d1a5b5c2915, - 0xf2c0617ac3014c74, - 0xb7f5e8f1d11cc359, - 0x63cb4c4b3fa745ef, - 0x9d1a84469c89df6b, - 0xe33630824b2bfb3d, - 0xd5f474f6e60eefa2, - 0xf58c6b83fb2d4e18, - 0x4676e45f0adf3411, - 0x20781f751d23a1ba, - 0xbd629b3381aa7ed1, - 0xae1d775319f71bb0, - 0xfed1c80da32e9a84, - 0x5509083f92825170, - 0x29ac01635557a70e, - 0xa7c9694551831d04, - 0x8e65682604d4ba0a, - 0x11f651f8882ab749, - 0xd77dc96ef6793d8a, - 0xef2799f52b042dcd, - 0x48eef0b07a8730c9, - 0x22f1a2ed0d547392, - 0x6142f1d32fd097c7, - 0x4a674d286af0e2e1, - 0x80fd7cc9748cbed2, - 0x717e7067af4f499a, - 0x938290a9ecd1dbb3, - 0x88e3b293344dd172, - 0x2734158c250fa3d6, - }, - { - 0x7e37e62dfc7d40c3, - 0x776f25a4ee939e5b, - 0xe045c850dd8fb5ad, - 0x86ed5ba711ff1952, - 0xe91d0bd9cf616b35, - 0x37e0ab256e408ffb, - 0x9607f6c031025a7a, - 0x0b02f5e116d23c9d, - 0xf3d8486bfb50650c, - 0x621cff27c40875f5, - 0x7d40cb71fa5fd34a, - 0x6daa6616daa29062, - 0x9f5f354923ec84e2, - 0xec847c3dc507c3b3, - 0x025a3668043ce205, - 0xa8bf9e6c4dac0b19, - 0xfa808be2e9bebb94, - 0xb5b99c5277c74fa3, - 0x78d9bc95f0397bcc, - 0xe332e50cdbad2624, - 0xc74fce129332797e, - 0x1729eceb2ea709ab, - 0xc2d6b9f69954d1f8, - 0x5d898cbfbab8551a, - 0x859a76fb17dd8adb, - 0x1be85886362f7fb5, - 0xf6413f8ff136cd8a, - 0xd3110fa5bbb7e35c, - 0x0a2feed514cc4d11, - 0xe83010edcd7f1ab9, - 0xa1e75de55f42d581, - 0xeede4a55c13b21b6, - 0xf2f5535ff94e1480, - 0x0cc1b46d1888761e, - 0xbce15fdb6529913b, - 0x2d25e8975a7181c2, - 0x71817f1ce2d7a554, - 0x2e52c5cb5c53124b, - 0xf9f7a6beef9c281d, - 0x9e722e7d21f2f56e, - 0xce170d9b81dca7e6, - 0x0e9b82051cb4941b, - 0x1e712f623c49d733, - 0x21e45cfa42f9f7dc, - 0xcb8e7a7f8bba0f60, - 0x8e98831a010fb646, - 0x474ccf0d8e895b23, - 0xa99285584fb27a95, - 0x8cc2b57205335443, - 0x42d5b8e984eff3a5, - 0x012d1b34021e718c, - 0x57a6626aae74180b, - 0xff19fc06e3d81312, - 0x35ba9d4d6a7c6dfe, - 0xc9d44c178f86ed65, - 0x506523e6a02e5288, - 0x03772d5c06229389, - 0x8b01f4fe0b691ec0, - 0xf8dabd8aed825991, - 0x4c4e3aec985b67be, - 0xb10df0827fbf96a9, - 0x6a69279ad4f8dae1, - 0xe78689dcd3d5ff2e, - 0x812e1a2b1fa553d1, - 0xfbad90d6eba0ca18, - 0x1ac543b234310e39, - 0x1604f7df2cb97827, - 0xa6241c6951189f02, - 0x753513cceaaf7c5e, - 0x64f2a59fc84c4efa, - 0x247d2b1e489f5f5a, - 0xdb64d718ab474c48, - 0x79f4a7a1f2270a40, - 0x1573da832a9bebae, - 0x3497867968621c72, - 0x514838d2a2302304, - 0xf0af6537fd72f685, - 0x1d06023e3a6b44ba, - 0x678588c3ce6edd73, - 0x66a893f7cc70acff, - 0xd4d24e29b5eda9df, - 0x3856321470ea6a6c, - 0x07c3418c0e5a4a83, - 0x2bcbb22f5635bacd, - 0x04b46cd00878d90a, - 0x06ee5ab80c443b0f, - 0x3b211f4876c8f9e5, - 0x0958c38912eede98, - 0xd14b39cdbf8b0159, - 0x397b292072f41be0, - 0x87c0409313e168de, - 0xad26e98847caa39f, - 0x4e140c849c6785bb, - 0xd5ff551db7f3d853, - 0xa0ca46d15d5ca40d, - 0xcd6020c787fe346f, - 0x84b76dcf15c3fb57, - 0xdefda0fca121e4ce, - 0x4b8d7b6096012d3d, - 0x9ac642ad298a2c64, - 0x0875d8bd10f0af14, - 0xb357c6ea7b8374ac, - 0x4d6321d89a451632, - 0xeda96709c719b23f, - 0xf76c24bbf328bc06, - 0xc662d526912c08f2, - 0x3ce25ec47892b366, - 0xb978283f6f4f39bd, - 0xc08c8f9e9d6833fd, - 0x4f3917b09e79f437, - 0x593de06fb2c08c10, - 0xd6887841b1d14bda, - 0x19b26eee32139db0, - 0xb494876675d93e2f, - 0x825937771987c058, - 0x90e9ac783d466175, - 0xf1827e03ff6c8709, - 0x945dc0a8353eb87f, - 0x4516f9658ab5b926, - 0x3f9573987eb020ef, - 0xb855330b6d514831, - 0x2ae6a91b542bcb41, - 0x6331e413c6160479, - 0x408f8e8180d311a0, - 0xeff35161c325503a, - 0xd06622f9bd9570d5, - 0x8876d9a20d4b8d49, - 0xa5533135573a0c8b, - 0xe168d364df91c421, - 0xf41b09e7f50a2f8f, - 0x12b09b0f24c1a12d, - 0xda49cc2ca9593dc4, - 0x1f5c34563e57a6bf, - 0x54d14f36a8568b82, - 0xaf7cdfe043f6419a, - 0xea6a2685c943f8bc, - 0xe5dcbfb4d7e91d2b, - 0xb27addde799d0520, - 0x6b443caed6e6ab6d, - 0x7bae91c9f61be845, - 0x3eb868ac7cae5163, - 0x11c7b65322e332a4, - 0xd23c1491b9a992d0, - 0x8fb5982e0311c7ca, - 0x70ac6428e0c9d4d8, - 0x895bc2960f55fcc5, - 0x76423e90ec8defd7, - 0x6ff0507ede9e7267, - 0x3dcf45f07a8cc2ea, - 0x4aa06054941f5cb1, - 0x5810fb5bb0defd9c, - 0x5efea1e3bc9ac693, - 0x6edd4b4adc8003eb, - 0x741808f8e8b10dd2, - 0x145ec1b728859a22, - 0x28bc9f7350172944, - 0x270a06424ebdccd3, - 0x972aedf4331c2bf6, - 0x059977e40a66a886, - 0x2550302a4a812ed6, - 0xdd8a8da0a7037747, - 0xc515f87a970e9b7b, - 0x3023eaa9601ac578, - 0xb7e3aa3a73fbada6, - 0x0fb699311eaae597, - 0x0000000000000000, - 0x310ef19d6204b4f4, - 0x229371a644db6455, - 0x0decaf591a960792, - 0x5ca4978bb8a62496, - 0x1c2b190a38753536, - 0x41a295b582cd602c, - 0x3279dcc16426277d, - 0xc1a194aa9f764271, - 0x139d803b26dfd0a1, - 0xae51c4d441e83016, - 0xd813fa44ad65dfc1, - 0xac0bf2bc45d4d213, - 0x23be6a9246c515d9, - 0x49d74d08923dcf38, - 0x9d05032127d066e7, - 0x2f7fdeff5e4d63c7, - 0xa47e2a0155247d07, - 0x99b16ff12fa8bfed, - 0x4661d4398c972aaf, - 0xdfd0bbc8a33f9542, - 0xdca79694a51d06cb, - 0xb020ebb67da1e725, - 0xba0f0563696daa34, - 0xe4f1a480d5f76ca7, - 0xc438e34e9510eaf7, - 0x939e81243b64f2fc, - 0x8defae46072d25cf, - 0x2c08f3a3586ff04e, - 0xd7a56375b3cf3a56, - 0x20c947ce40e78650, - 0x43f8a3dd86f18229, - 0x568b795eac6a6987, - 0x8003011f1dbb225d, - 0xf53612d3f7145e03, - 0x189f75da300dec3c, - 0x9570db9c3720c9f3, - 0xbb221e576b73dbb8, - 0x72f65240e4f536dd, - 0x443be25188abc8aa, - 0xe21ffe38d9b357a8, - 0xfd43ca6ee7e4f117, - 0xcaa3614b89a47eec, - 0xfe34e732e1c6629e, - 0x83742c431b99b1d4, - 0xcf3a16af83c2d66a, - 0xaae5a8044990e91c, - 0x26271d764ca3bd5f, - 0x91c4b74c3f5810f9, - 0x7c6dd045f841a2c6, - 0x7f1afd19fe63314f, - 0xc8f957238d989ce9, - 0xa709075d5306ee8e, - 0x55fc5402aa48fa0e, - 0x48fa563c9023beb4, - 0x65dfbeabca523f76, - 0x6c877d22d8bce1ee, - 0xcc4d3bf385e045e3, - 0xbebb69b36115733e, - 0x10eaad6720fd4328, - 0xb6ceb10e71e5dc2a, - 0xbdcc44ef6737e0b7, - 0x523f158ea412b08d, - 0x989c74c52db6ce61, - 0x9beb59992b945de8, - 0x8a2cefca09776f4c, - 0xa3bd6b8d5b7e3784, - 0xeb473db1cb5d8930, - 0xc3fba2c29b4aa074, - 0x9c28181525ce176b, - 0x683311f2d0c438e4, - 0x5fd3bad7be84b71f, - 0xfc6ed15ae5fa809b, - 0x36cdb0116c5efe77, - 0x29918447520958c8, - 0xa29070b959604608, - 0x53120ebaa60cc101, - 0x3a0c047c74d68869, - 0x691e0ac6d2da4968, - 0x73db4974e6eb4751, - 0x7a838afdf40599c9, - 0x5a4acd33b4e21f99, - 0x6046c94fc03497f0, - 0xe6ab92e8d1cb8ea2, - 0x3354c7f5663856f1, - 0xd93ee170af7bae4d, - 0x616bd27bc22ae67c, - 0x92b39a10397a8370, - 0xabc8b3304b8e9890, - 0xbf967287630b02b2, - 0x5b67d607b6fc6e15, - }, - { - 0x8ab0a96846e06a6d, - 0x43c7e80b4bf0b33a, - 0x08c9b3546b161ee5, - 0x39f1c235eba990be, - 0xc1bef2376606c7b2, - 0x2c209233614569aa, - 0xeb01523b6fc3289a, - 0x946953ab935acedd, - 0x272838f63e13340e, - 0x8b0455eca12ba052, - 0x77a1b2c4978ff8a2, - 0xa55122ca13e54086, - 0x2276135862d3f1cd, - 0xdb8ddfde08b76cfe, - 0x5d1e12c89e4a178a, - 0x0e56816b03969867, - 0xee5f79953303ed59, - 0xafed748bab78d71d, - 0x6d929f2df93e53ee, - 0xf5d8a8f8ba798c2a, - 0xf619b1698e39cf6b, - 0x95ddaf2f749104e2, - 0xec2a9c80e0886427, - 0xce5c8fd8825b95ea, - 0xc4e0d9993ac60271, - 0x4699c3a5173076f9, - 0x3d1b151f50a29f42, - 0x9ed505ea2bc75946, - 0x34665acfdc7f4b98, - 0x61b1fb53292342f7, - 0xc721c0080e864130, - 0x8693cd1696fd7b74, - 0x872731927136b14b, - 0xd3446c8a63a1721b, - 0x669a35e8a6680e4a, - 0xcab658f239509a16, - 0xa4e5de4ef42e8ab9, - 0x37a7435ee83f08d9, - 0x134e6239e26c7f96, - 0x82791a3c2df67488, - 0x3f6ef00a8329163c, - 0x8e5a7e42fdeb6591, - 0x5caaee4c7981ddb5, - 0x19f234785af1e80d, - 0x255ddde3ed98bd70, - 0x50898a32a99cccac, - 0x28ca4519da4e6656, - 0xae59880f4cb31d22, - 0x0d9798fa37d6db26, - 0x32f968f0b4ffcd1a, - 0xa00f09644f258545, - 0xfa3ad5175e24de72, - 0xf46c547c5db24615, - 0x713e80fbff0f7e20, - 0x7843cf2b73d2aafa, - 0xbd17ea36aedf62b4, - 0xfd111bacd16f92cf, - 0x4abaa7dbc72d67e0, - 0xb3416b5dad49fad3, - 0xbca316b24914a88b, - 0x15d150068aecf914, - 0xe27c1debe31efc40, - 0x4fe48c759beda223, - 0x7edcfd141b522c78, - 0x4e5070f17c26681c, - 0xe696cac15815f3bc, - 0x35d2a64b3bb481a7, - 0x800cff29fe7dfdf6, - 0x1ed9fac3d5baa4b0, - 0x6c2663a91ef599d1, - 0x03c1199134404341, - 0xf7ad4ded69f20554, - 0xcd9d9649b61bd6ab, - 0xc8c3bde7eadb1368, - 0xd131899fb02afb65, - 0x1d18e352e1fae7f1, - 0xda39235aef7ca6c1, - 0xa1bbf5e0a8ee4f7a, - 0x91377805cf9a0b1e, - 0x3138716180bf8e5b, - 0xd9f83acbdb3ce580, - 0x0275e515d38b897e, - 0x472d3f21f0fbbcc6, - 0x2d946eb7868ea395, - 0xba3c248d21942e09, - 0xe7223645bfde3983, - 0xff64feb902e41bb1, - 0xc97741630d10d957, - 0xc3cb1722b58d4ecc, - 0xa27aec719cae0c3b, - 0x99fecb51a48c15fb, - 0x1465ac826d27332b, - 0xe1bd047ad75ebf01, - 0x79f733af941960c5, - 0x672ec96c41a3c475, - 0xc27feba6524684f3, - 0x64efd0fd75e38734, - 0xed9e60040743ae18, - 0xfb8e2993b9ef144d, - 0x38453eb10c625a81, - 0x6978480742355c12, - 0x48cf42ce14a6ee9e, - 0x1cac1fd606312dce, - 0x7b82d6ba4792e9bb, - 0x9d141c7b1f871a07, - 0x5616b80dc11c4a2e, - 0xb849c198f21fa777, - 0x7ca91801c8d9a506, - 0xb1348e487ec273ad, - 0x41b20d1e987b3a44, - 0x7460ab55a3cfbbe3, - 0x84e628034576f20a, - 0x1b87d16d897a6173, - 0x0fe27defe45d5258, - 0x83cde6b8ca3dbeb7, - 0x0c23647ed01d1119, - 0x7a362a3ea0592384, - 0xb61f40f3f1893f10, - 0x75d457d1440471dc, - 0x4558da34237035b8, - 0xdca6116587fc2043, - 0x8d9b67d3c9ab26d0, - 0x2b0b5c88ee0e2517, - 0x6fe77a382ab5da90, - 0x269cc472d9d8fe31, - 0x63c41e46faa8cb89, - 0xb7abbc771642f52f, - 0x7d1de4852f126f39, - 0xa8c6ba3024339ba0, - 0x600507d7cee888c8, - 0x8fee82c61a20afae, - 0x57a2448926d78011, - 0xfca5e72836a458f0, - 0x072bcebb8f4b4cbd, - 0x497bbe4af36d24a1, - 0x3cafe99bb769557d, - 0x12fa9ebd05a7b5a9, - 0xe8c04baa5b836bdb, - 0x4273148fac3b7905, - 0x908384812851c121, - 0xe557d3506c55b0fd, - 0x72ff996acb4f3d61, - 0x3eda0c8e64e2dc03, - 0xf0868356e6b949e9, - 0x04ead72abb0b0ffc, - 0x17a4b5135967706a, - 0xe3c8e16f04d5367f, - 0xf84f30028daf570c, - 0x1846c8fcbd3a2232, - 0x5b8120f7f6ca9108, - 0xd46fa231ecea3ea6, - 0x334d947453340725, - 0x58403966c28ad249, - 0xbed6f3a79a9f21f5, - 0x68ccb483a5fe962d, - 0xd085751b57e1315a, - 0xfed0023de52fd18e, - 0x4b0e5b5f20e6addf, - 0x1a332de96eb1ab4c, - 0xa3ce10f57b65c604, - 0x108f7ba8d62c3cd7, - 0xab07a3a11073d8e1, - 0x6b0dad1291bed56c, - 0xf2f366433532c097, - 0x2e557726b2cee0d4, - 0x0000000000000000, - 0xcb02a476de9b5029, - 0xe4e32fd48b9e7ac2, - 0x734b65ee2c84f75e, - 0x6e5386bccd7e10af, - 0x01b4fc84e7cbca3f, - 0xcfe8735c65905fd5, - 0x3613bfda0ff4c2e6, - 0x113b872c31e7f6e8, - 0x2fe18ba255052aeb, - 0xe974b72ebc48a1e4, - 0x0abc5641b89d979b, - 0xb46aa5e62202b66e, - 0x44ec26b0c4bbff87, - 0xa6903b5b27a503c7, - 0x7f680190fc99e647, - 0x97a84a3aa71a8d9c, - 0xdd12ede16037ea7c, - 0xc554251ddd0dc84e, - 0x88c54c7d956be313, - 0x4d91696048662b5d, - 0xb08072cc9909b992, - 0xb5de5962c5c97c51, - 0x81b803ad19b637c9, - 0xb2f597d94a8230ec, - 0x0b08aac55f565da4, - 0xf1327fd2017283d6, - 0xad98919e78f35e63, - 0x6ab9519676751f53, - 0x24e921670a53774f, - 0xb9fd3d1c15d46d48, - 0x92f66194fbda485f, - 0x5a35dc7311015b37, - 0xded3f4705477a93d, - 0xc00a0eb381cd0d8d, - 0xbb88d809c65fe436, - 0x16104997beacba55, - 0x21b70ac95693b28c, - 0x59f4c5e225411876, - 0xd5db5eb50b21f499, - 0x55d7a19cf55c096f, - 0xa97246b4c3f8519f, - 0x8552d487a2bd3835, - 0x54635d181297c350, - 0x23c2efdc85183bf2, - 0x9f61f96ecc0c9379, - 0x534893a39ddc8fed, - 0x5edf0b59aa0a54cb, - 0xac2c6d1a9f38945c, - 0xd7aebba0d8aa7de7, - 0x2abfa00c09c5ef28, - 0xd84cc64f3cf72fbf, - 0x2003f64db15878b3, - 0xa724c7dfc06ec9f8, - 0x069f323f68808682, - 0xcc296acd51d01c94, - 0x055e2bae5cc0c5c3, - 0x6270e2c21d6301b6, - 0x3b842720382219c0, - 0xd2f0900e846ab824, - 0x52fc6f277a1745d2, - 0xc6953c8ce94d8b0f, - 0xe009f8fe3095753e, - 0x655b2c7992284d0b, - 0x984a37d54347dfc4, - 0xeab5aebf8808e2a5, - 0x9a3fd2c090cc56ba, - 0x9ca0e0fff84cd038, - 0x4c2595e4afade162, - 0xdf6708f4b3bc6302, - 0xbf620f237d54ebca, - 0x93429d101c118260, - 0x097d4fd08cddd4da, - 0x8c2f9b572e60ecef, - 0x708a7c7f18c4b41f, - 0x3a30dba4dfe9d3ff, - 0x4006f19a7fb0f07b, - 0x5f6bf7dd4dc19ef4, - 0x1f6d064732716e8f, - 0xf9fbcc866a649d33, - 0x308c8de567744464, - 0x8971b0f972a0292c, - 0xd61a47243f61b7d8, - 0xefeb8511d4c82766, - 0x961cb6be40d147a3, - 0xaab35f25f7b812de, - 0x76154e407044329d, - 0x513d76b64e570693, - 0xf3479ac7d2f90aa8, - 0x9b8b2e4477079c85, - 0x297eb99d3d85ac69, - }, - { - 0x3ef29d249b2c0a19, - 0xe9e16322b6f8622f, - 0x5536994047757f7a, - 0x9f4d56d5a47b0b33, - 0x822567466aa1174c, - 0xb8f5057deb082fb2, - 0xcc48c10bf4475f53, - 0x373088d4275dec3a, - 0x968f4325180aed10, - 0x173d232cf7016151, - 0xae4ed09f946fcc13, - 0xfd4b4741c4539873, - 0x1b5b3f0dd9933765, - 0x2ffcb0967b644052, - 0xe02376d20a89840c, - 0xa3ae3a70329b18d7, - 0x419cbd2335de8526, - 0xfafebf115b7c3199, - 0x0397074f85aa9b0d, - 0xc58ad4fb4836b970, - 0xbec60be3fc4104a8, - 0x1eff36dc4b708772, - 0x131fdc33ed8453b6, - 0x0844e33e341764d3, - 0x0ff11b6eab38cd39, - 0x64351f0a7761b85a, - 0x3b5694f509cfba0e, - 0x30857084b87245d0, - 0x47afb3bd2297ae3c, - 0xf2ba5c2f6f6b554a, - 0x74bdc4761f4f70e1, - 0xcfdfc64471edc45e, - 0xe610784c1dc0af16, - 0x7aca29d63c113f28, - 0x2ded411776a859af, - 0xac5f211e99a3d5ee, - 0xd484f949a87ef33b, - 0x3ce36ca596e013e4, - 0xd120f0983a9d432c, - 0x6bc40464dc597563, - 0x69d5f5e5d1956c9e, - 0x9ae95f043698bb24, - 0xc9ecc8da66a4ef44, - 0xd69508c8a5b2eac6, - 0xc40c2235c0503b80, - 0x38c193ba8c652103, - 0x1ceec75d46bc9e8f, - 0xd331011937515ad1, - 0xd8e2e56886eca50f, - 0xb137108d5779c991, - 0x709f3b6905ca4206, - 0x4feb50831680caef, - 0xec456af3241bd238, - 0x58d673afe181abbe, - 0x242f54e7cad9bf8c, - 0x0211f1810dcc19fd, - 0x90bc4dbb0f43c60a, - 0x9518446a9da0761d, - 0xa1bfcbf13f57012a, - 0x2bde4f8961e172b5, - 0x27b853a84f732481, - 0xb0b1e643df1f4b61, - 0x18cc38425c39ac68, - 0xd2b7f7d7bf37d821, - 0x3103864a3014c720, - 0x14aa246372abfa5c, - 0x6e600db54ebac574, - 0x394765740403a3f3, - 0x09c215f0bc71e623, - 0x2a58b947e987f045, - 0x7b4cdf18b477bdd8, - 0x9709b5eb906c6fe0, - 0x73083c268060d90b, - 0xfedc400e41f9037e, - 0x284948c6e44be9b8, - 0x728ecae808065bfb, - 0x06330e9e17492b1a, - 0x5950856169e7294e, - 0xbae4f4fce6c4364f, - 0xca7bcf95e30e7449, - 0x7d7fd186a33e96c2, - 0x52836110d85ad690, - 0x4dfaa1021b4cd312, - 0x913abb75872544fa, - 0xdd46ecb9140f1518, - 0x3d659a6b1e869114, - 0xc23f2cabd719109a, - 0xd713fe062dd46836, - 0xd0a60656b2fbc1dc, - 0x221c5a79dd909496, - 0xefd26dbca1b14935, - 0x0e77eda0235e4fc9, - 0xcbfd395b6b68f6b9, - 0x0de0eaefa6f4d4c4, - 0x0422ff1f1a8532e7, - 0xf969b85eded6aa94, - 0x7f6e2007aef28f3f, - 0x3ad0623b81a938fe, - 0x6624ee8b7aada1a7, - 0xb682e8ddc856607b, - 0xa78cc56f281e2a30, - 0xc79b257a45faa08d, - 0x5b4174e0642b30b3, - 0x5f638bff7eae0254, - 0x4bc9af9c0c05f808, - 0xce59308af98b46ae, - 0x8fc58da9cc55c388, - 0x803496c7676d0eb1, - 0xf33caae1e70dd7ba, - 0xbb6202326ea2b4bf, - 0xd5020f87201871cb, - 0x9d5ca754a9b712ce, - 0x841669d87de83c56, - 0x8a6184785eb6739f, - 0x420bba6cb0741e2b, - 0xf12d5b60eac1ce47, - 0x76ac35f71283691c, - 0x2c6bb7d9fecedb5f, - 0xfccdb18f4c351a83, - 0x1f79c012c3160582, - 0xf0abadae62a74cb7, - 0xe1a5801c82ef06fc, - 0x67a21845f2cb2357, - 0x5114665f5df04d9d, - 0xbf40fd2d74278658, - 0xa0393d3fb73183da, - 0x05a409d192e3b017, - 0xa9fb28cf0b4065f9, - 0x25a9a22942bf3d7c, - 0xdb75e22703463e02, - 0xb326e10c5ab5d06c, - 0xe7968e8295a62de6, - 0xb973f3b3636ead42, - 0xdf571d3819c30ce5, - 0xee549b7229d7cbc5, - 0x12992afd65e2d146, - 0xf8ef4e9056b02864, - 0xb7041e134030e28b, - 0xc02edd2adad50967, - 0x932b4af48ae95d07, - 0x6fe6fb7bc6dc4784, - 0x239aacb755f61666, - 0x401a4bedbdb807d6, - 0x485ea8d389af6305, - 0xa41bc220adb4b13d, - 0x753b32b89729f211, - 0x997e584bb3322029, - 0x1d683193ceda1c7f, - 0xff5ab6c0c99f818e, - 0x16bbd5e27f67e3a1, - 0xa59d34ee25d233cd, - 0x98f8ae853b54a2d9, - 0x6df70afacb105e79, - 0x795d2e99b9bba425, - 0x8e437b6744334178, - 0x0186f6ce886682f0, - 0xebf092a3bb347bd2, - 0xbcd7fa62f18d1d55, - 0xadd9d7d011c5571e, - 0x0bd3e471b1bdffde, - 0xaa6c2f808eeafef4, - 0x5ee57d31f6c880a4, - 0xf50fa47ff044fca0, - 0x1addc9c351f5b595, - 0xea76646d3352f922, - 0x0000000000000000, - 0x85909f16f58ebea6, - 0x46294573aaf12ccc, - 0x0a5512bf39db7d2e, - 0x78dbd85731dd26d5, - 0x29cfbe086c2d6b48, - 0x218b5d36583a0f9b, - 0x152cd2adfacd78ac, - 0x83a39188e2c795bc, - 0xc3b9da655f7f926a, - 0x9ecba01b2c1d89c3, - 0x07b5f8509f2fa9ea, - 0x7ee8d6c926940dcf, - 0x36b67e1aaf3b6eca, - 0x86079859702425ab, - 0xfb7849dfd31ab369, - 0x4c7c57cc932a51e2, - 0xd96413a60e8a27ff, - 0x263ea566c715a671, - 0x6c71fc344376dc89, - 0x4a4f595284637af8, - 0xdaf314e98b20bcf2, - 0x572768c14ab96687, - 0x1088db7c682ec8bb, - 0x887075f9537a6a62, - 0x2e7a4658f302c2a2, - 0x619116dbe582084d, - 0xa87dde018326e709, - 0xdcc01a779c6997e8, - 0xedc39c3dac7d50c8, - 0xa60a33a1a078a8c0, - 0xc1a82be452b38b97, - 0x3f746bea134a88e9, - 0xa228ccbebafd9a27, - 0xabead94e068c7c04, - 0xf48952b178227e50, - 0x5cf48cb0fb049959, - 0x6017e0156de48abd, - 0x4438b4f2a73d3531, - 0x8c528ae649ff5885, - 0xb515ef924dfcfb76, - 0x0c661c212e925634, - 0xb493195cc59a7986, - 0x9cda519a21d1903e, - 0x32948105b5be5c2d, - 0x194ace8cd45f2e98, - 0x438d4ca238129cdb, - 0x9b6fa9cabefe39d4, - 0x81b26009ef0b8c41, - 0xded1ebf691a58e15, - 0x4e6da64d9ee6481f, - 0x54b06f8ecf13fd8a, - 0x49d85e1d01c9e1f5, - 0xafc826511c094ee3, - 0xf698a33075ee67ad, - 0x5ac7822eec4db243, - 0x8dd47c28c199da75, - 0x89f68337db1ce892, - 0xcdce37c57c21dda3, - 0x530597de503c5460, - 0x6a42f2aa543ff793, - 0x5d727a7e73621ba9, - 0xe232875307459df1, - 0x56a19e0fc2dfe477, - 0xc61dd3b4cd9c227d, - 0xe5877f03986a341b, - 0x949eb2a415c6f4ed, - 0x6206119460289340, - 0x6380e75ae84e11b0, - 0x8be772b6d6d0f16f, - 0x50929091d596cf6d, - 0xe86795ec3e9ee0df, - 0x7cf927482b581432, - 0xc86a3e14eec26db4, - 0x7119cda78dacc0f6, - 0xe40189cd100cb6eb, - 0x92adbc3a028fdff7, - 0xb2a017c2d2d3529c, - 0x200dabf8d05c8d6b, - 0x34a78f9ba2f77737, - 0xe3b4719d8f231f01, - 0x45be423c2f5bb7c1, - 0xf71e55fefd88e55d, - 0x6853032b59f3ee6e, - 0x65b3e9c4ff073aaa, - 0x772ac3399ae5ebec, - 0x87816e97f842a75b, - 0x110e2db2e0484a4b, - 0x331277cb3dd8dedd, - 0xbd510cac79eb9fa5, - 0x352179552a91f5c7, - }, - { - 0x05ba7bc82c9b3220, - 0x31a54665f8b65e4f, - 0xb1b651f77547f4d4, - 0x8bfa0d857ba46682, - 0x85a96c5aa16a98bb, - 0x990faef908eb79c9, - 0xa15e37a247f4a62d, - 0x76857dcd5d27741e, - 0xf8c50b800a1820bc, - 0xbe65dcb201f7a2b4, - 0x666d1b986f9426e7, - 0x4cc921bf53c4e648, - 0x95410a0f93d9ca42, - 0x20cdccaa647ba4ef, - 0x429a4060890a1871, - 0x0c4ea4f69b32b38b, - 0xccda362dde354cd3, - 0x96dc23bc7c5b2fa9, - 0xc309bb68aa851ab3, - 0xd26131a73648e013, - 0x021dc52941fc4db2, - 0xcd5adab7704be48a, - 0xa77965d984ed71e6, - 0x32386fd61734bba4, - 0xe82d6dd538ab7245, - 0x5c2147ea6177b4b1, - 0x5da1ab70cf091ce8, - 0xac907fce72b8bdff, - 0x57c85dfd972278a8, - 0xa4e44c6a6b6f940d, - 0x3851995b4f1fdfe4, - 0x62578ccaed71bc9e, - 0xd9882bb0c01d2c0a, - 0x917b9d5d113c503b, - 0xa2c31e11a87643c6, - 0xe463c923a399c1ce, - 0xf71686c57ea876dc, - 0x87b4a973e096d509, - 0xaf0d567d9d3a5814, - 0xb40c2a3f59dcc6f4, - 0x3602f88495d121dd, - 0xd3e1dd3d9836484a, - 0xf945e71aa46688e5, - 0x7518547eb2a591f5, - 0x9366587450c01d89, - 0x9ea81018658c065b, - 0x4f54080cbc4603a3, - 0x2d0384c65137bf3d, - 0xdc325078ec861e2a, - 0xea30a8fc79573ff7, - 0x214d2030ca050cb6, - 0x65f0322b8016c30c, - 0x69be96dd1b247087, - 0xdb95ee9981e161b8, - 0xd1fc1814d9ca05f8, - 0x820ed2bbcc0de729, - 0x63d76050430f14c7, - 0x3bccb0e8a09d3a0f, - 0x8e40764d573f54a2, - 0x39d175c1e16177bd, - 0x12f5a37c734f1f4b, - 0xab37c12f1fdfc26d, - 0x5648b167395cd0f1, - 0x6c04ed1537bf42a7, - 0xed97161d14304065, - 0x7d6c67daab72b807, - 0xec17fa87ba4ee83c, - 0xdfaf79cb0304fbc1, - 0x733f060571bc463e, - 0x78d61c1287e98a27, - 0xd07cf48e77b4ada1, - 0xb9c262536c90dd26, - 0xe2449b5860801605, - 0x8fc09ad7f941fcfb, - 0xfad8cea94be46d0e, - 0xa343f28b0608eb9f, - 0x9b126bd04917347b, - 0x9a92874ae7699c22, - 0x1b017c42c4e69ee0, - 0x3a4c5c720ee39256, - 0x4b6e9f5e3ea399da, - 0x6ba353f45ad83d35, - 0xe7fee0904c1b2425, - 0x22d009832587e95d, - 0x842980c00f1430e2, - 0xc6b3c0a0861e2893, - 0x087433a419d729f2, - 0x341f3dadd42d6c6f, - 0xee0a3faefbb2a58e, - 0x4aee73c490dd3183, - 0xaab72db5b1a16a34, - 0xa92a04065e238fdf, - 0x7b4b35a1686b6fcc, - 0x6a23bf6ef4a6956c, - 0x191cb96b851ad352, - 0x55d598d4d6de351a, - 0xc9604de5f2ae7ef3, - 0x1ca6c2a3a981e172, - 0xde2f9551ad7a5398, - 0x3025aaff56c8f616, - 0x15521d9d1e2860d9, - 0x506fe31cfa45073a, - 0x189c55f12b647b0b, - 0x0180ec9aae7ea859, - 0x7cec8b40050c105e, - 0x2350e5198bf94104, - 0xef8ad33455cc0dd7, - 0x07a7bee16d677f92, - 0xe5e325b90de76997, - 0x5a061591a26e637a, - 0xb611ef1618208b46, - 0x09f4df3eb7a981ab, - 0x1ebb078ae87dacc0, - 0xb791038cb65e231f, - 0x0fd38d4574b05660, - 0x67edf702c1ea8ebe, - 0xba5f4be0831238cd, - 0xe3c477c2cefebe5c, - 0x0dce486c354c1bd2, - 0x8c5db36416c31910, - 0x26ea9ed1a7627324, - 0x039d29b3ef82e5eb, - 0x9f28fc82cbf2ae02, - 0xa8aae89cf05d2786, - 0x431aacfa2774b028, - 0xcf471f9e31b7a938, - 0x581bd0b8e3922ec8, - 0xbc78199b400bef06, - 0x90fb71c7bf42f862, - 0x1f3beb1046030499, - 0x683e7a47b55ad8de, - 0x988f4263a695d190, - 0xd808c72a6e638453, - 0x0627527bc319d7cb, - 0xebb04466d72997ae, - 0xe67e0c0ae2658c7c, - 0x14d2f107b056c880, - 0x7122c32c30400b8c, - 0x8a7ae11fd5dacedb, - 0xa0dedb38e98a0e74, - 0xad109354dcc615a6, - 0x0be91a17f655cc19, - 0x8ddd5ffeb8bdb149, - 0xbfe53028af890aed, - 0xd65ba6f5b4ad7a6a, - 0x7956f0882997227e, - 0x10e8665532b352f9, - 0x0e5361dfdacefe39, - 0xcec7f3049fc90161, - 0xff62b561677f5f2e, - 0x975ccf26d22587f0, - 0x51ef0f86543baf63, - 0x2f1e41ef10cbf28f, - 0x52722635bbb94a88, - 0xae8dbae73344f04d, - 0x410769d36688fd9a, - 0xb3ab94de34bbb966, - 0x801317928df1aa9b, - 0xa564a0f0c5113c54, - 0xf131d4bebdb1a117, - 0x7f71a2f3ea8ef5b5, - 0x40878549c8f655c3, - 0x7ef14e6944f05dec, - 0xd44663dcf55137d8, - 0xf2acfd0d523344fc, - 0x0000000000000000, - 0x5fbc6e598ef5515a, - 0x16cf342ef1aa8532, - 0xb036bd6ddb395c8d, - 0x13754fe6dd31b712, - 0xbbdfa77a2d6c9094, - 0x89e7c8ac3a582b30, - 0x3c6b0e09cdfa459d, - 0xc4ae0589c7e26521, - 0x49735a777f5fd468, - 0xcafd64561d2c9b18, - 0xda1502032f9fc9e1, - 0x8867243694268369, - 0x3782141e3baf8984, - 0x9cb5d53124704be9, - 0xd7db4a6f1ad3d233, - 0xa6f989432a93d9bf, - 0x9d3539ab8a0ee3b0, - 0x53f2caaf15c7e2d1, - 0x6e19283c76430f15, - 0x3debe2936384edc4, - 0x5e3c82c3208bf903, - 0x33b8834cb94a13fd, - 0x6470deb12e686b55, - 0x359fd1377a53c436, - 0x61caa57902f35975, - 0x043a975282e59a79, - 0xfd7f70482683129c, - 0xc52ee913699ccd78, - 0x28b9ff0e7dac8d1d, - 0x5455744e78a09d43, - 0xcb7d88ccb3523341, - 0x44bd121b4a13cfba, - 0x4d49cd25fdba4e11, - 0x3e76cb208c06082f, - 0x3ff627ba2278a076, - 0xc28957f204fbb2ea, - 0x453dfe81e46d67e3, - 0x94c1e6953da7621b, - 0x2c83685cff491764, - 0xf32c1197fc4deca5, - 0x2b24d6bd922e68f6, - 0xb22b78449ac5113f, - 0x48f3b6edd1217c31, - 0x2e9ead75beb55ad6, - 0x174fd8b45fd42d6b, - 0x4ed4e4961238abfa, - 0x92e6b4eefebeb5d0, - 0x46a0d7320bef8208, - 0x47203ba8a5912a51, - 0x24f75bf8e69e3e96, - 0xf0b1382413cf094e, - 0xfee259fbc901f777, - 0x276a724b091cdb7d, - 0xbdf8f501ee75475f, - 0x599b3c224dec8691, - 0x6d84018f99c1eafe, - 0x7498b8e41cdb39ac, - 0xe0595e71217c5bb7, - 0x2aa43a273c50c0af, - 0xf50b43ec3f543b6e, - 0x838e3e2162734f70, - 0xc09492db4507ff58, - 0x72bfea9fdfc2ee67, - 0x11688acf9ccdfaa0, - 0x1a8190d86a9836b9, - 0x7acbd93bc615c795, - 0xc7332c3a286080ca, - 0x863445e94ee87d50, - 0xf6966a5fd0d6de85, - 0xe9ad814f96d5da1c, - 0x70a22fb69e3ea3d5, - 0x0a69f68d582b6440, - 0xb8428ec9c2ee757f, - 0x604a49e3ac8df12c, - 0x5b86f90b0c10cb23, - 0xe1d9b2eb8f02f3ee, - 0x29391394d3d22544, - 0xc8e0a17f5cd0d6aa, - 0xb58cc6a5f7a26ead, - 0x8193fb08238f02c2, - 0xd5c68f465b2f9f81, - 0xfcff9cd288fdbac5, - 0x77059157f359dc47, - 0x1d262e3907ff492b, - 0xfb582233e59ac557, - 0xddb2bce242f8b673, - 0x2577b76248e096cf, - 0x6f99c4a6d83da74c, - 0xc1147e41eb795701, - 0xf48baf76912a9337, - }, - { - 0x45b268a93acde4cc, - 0xaf7f0be884549d08, - 0x048354b3c1468263, - 0x925435c2c80efed2, - 0xee4e37f27fdffba7, - 0x167a33920c60f14d, - 0xfb123b52ea03e584, - 0x4a0cab53fdbb9007, - 0x9deaf6380f788a19, - 0xcb48ec558f0cb32a, - 0xb59dc4b2d6fef7e0, - 0xdcdbca22f4f3ecb6, - 0x11df5813549a9c40, - 0xe33fdedf568aced3, - 0xa0c1c8124322e9c3, - 0x07a56b8158fa6d0d, - 0x77279579b1e1f3dd, - 0xd9b18b74422ac004, - 0xb8ec2d9fffabc294, - 0xf4acf8a82d75914f, - 0x7bbf69b1ef2b6878, - 0xc4f62faf487ac7e1, - 0x76ce809cc67e5d0c, - 0x6711d88f92e4c14c, - 0x627b99d9243dedfe, - 0x234aa5c3dfb68b51, - 0x909b1f15262dbf6d, - 0x4f66ea054b62bcb5, - 0x1ae2cf5a52aa6ae8, - 0xbea053fbd0ce0148, - 0xed6808c0e66314c9, - 0x43fe16cd15a82710, - 0xcd049231a06970f6, - 0xe7bc8a6c97cc4cb0, - 0x337ce835fcb3b9c0, - 0x65def2587cc780f3, - 0x52214ede4132bb50, - 0x95f15e4390f493df, - 0x870839625dd2e0f1, - 0x41313c1afb8b66af, - 0x91720af051b211bc, - 0x477d427ed4eea573, - 0x2e3b4ceef6e3be25, - 0x82627834eb0bcc43, - 0x9c03e3dd78e724c8, - 0x2877328ad9867df9, - 0x14b51945e243b0f2, - 0x574b0f88f7eb97e2, - 0x88b6fa989aa4943a, - 0x19c4f068cb168586, - 0x50ee6409af11faef, - 0x7df317d5c04eaba4, - 0x7a567c5498b4c6a9, - 0xb6bbfb804f42188e, - 0x3cc22bcf3bc5cd0b, - 0xd04336eaaa397713, - 0xf02fac1bec33132c, - 0x2506dba7f0d3488d, - 0xd7e65d6bf2c31a1e, - 0x5eb9b2161ff820f5, - 0x842e0650c46e0f9f, - 0x716beb1d9e843001, - 0xa933758cab315ed4, - 0x3fe414fda2792265, - 0x27c9f1701ef00932, - 0x73a4c1ca70a771be, - 0x94184ba6e76b3d0e, - 0x40d829ff8c14c87e, - 0x0fbec3fac77674cb, - 0x3616a9634a6a9572, - 0x8f139119c25ef937, - 0xf545ed4d5aea3f9e, - 0xe802499650ba387b, - 0x6437e7bd0b582e22, - 0xe6559f89e053e261, - 0x80ad52e305288dfc, - 0x6dc55a23e34b9935, - 0xde14e0f51ad0ad09, - 0xc6390578a659865e, - 0x96d7617109487cb1, - 0xe2d6cb3a21156002, - 0x01e915e5779faed1, - 0xadb0213f6a77dcb7, - 0x9880b76eb9a1a6ab, - 0x5d9f8d248644cf9b, - 0xfd5e4536c5662658, - 0xf1c6b9fe9bacbdfd, - 0xeacd6341be9979c4, - 0xefa7221708405576, - 0x510771ecd88e543e, - 0xc2ba51cb671f043d, - 0x0ad482ac71af5879, - 0xfe787a045cdac936, - 0xb238af338e049aed, - 0xbd866cc94972ee26, - 0x615da6ebbd810290, - 0x3295fdd08b2c1711, - 0xf834046073bf0aea, - 0xf3099329758ffc42, - 0x1caeb13e7dcfa934, - 0xba2307481188832b, - 0x24efce42874ce65c, - 0x0e57d61fb0e9da1a, - 0xb3d1bad6f99b343c, - 0xc0757b1c893c4582, - 0x2b510db8403a9297, - 0x5c7698c1f1db614a, - 0x3e0d0118d5e68cb4, - 0xd60f488e855cb4cf, - 0xae961e0df3cb33d9, - 0x3a8e55ab14a00ed7, - 0x42170328623789c1, - 0x838b6dd19c946292, - 0x895fef7ded3b3aeb, - 0xcfcbb8e64e4a3149, - 0x064c7e642f65c3dc, - 0x3d2b3e2a4c5a63da, - 0x5bd3f340a9210c47, - 0xb474d157a1615931, - 0xac5934da1de87266, - 0x6ee365117af7765b, - 0xc86ed36716b05c44, - 0x9ba6885c201d49c5, - 0xb905387a88346c45, - 0x131072c4bab9ddff, - 0xbf49461ea751af99, - 0xd52977bc1ce05ba1, - 0xb0f785e46027db52, - 0x546d30ba6e57788c, - 0x305ad707650f56ae, - 0xc987c682612ff295, - 0xa5ab8944f5fbc571, - 0x7ed528e759f244ca, - 0x8ddcbbce2c7db888, - 0xaa154abe328db1ba, - 0x1e619be993ece88b, - 0x09f2bd9ee813b717, - 0x7401aa4b285d1cb3, - 0x21858f143195caee, - 0x48c381841398d1b8, - 0xfcb750d3b2f98889, - 0x39a86a998d1ce1b9, - 0x1f888e0ce473465a, - 0x7899568376978716, - 0x02cf2ad7ee2341bf, - 0x85c713b5b3f1a14e, - 0xff916fe12b4567e7, - 0x7c1a0230b7d10575, - 0x0c98fcc85eca9ba5, - 0xa3e7f720da9e06ad, - 0x6a6031a2bbb1f438, - 0x973e74947ed7d260, - 0x2cf4663918c0ff9a, - 0x5f50a7f368678e24, - 0x34d983b4a449d4cd, - 0x68af1b755592b587, - 0x7f3c3d022e6dea1b, - 0xabfc5f5b45121f6b, - 0x0d71e92d29553574, - 0xdffdf5106d4f03d8, - 0x081ba87b9f8c19c6, - 0xdb7ea1a3ac0981bb, - 0xbbca12ad66172dfa, - 0x79704366010829c7, - 0x179326777bff5f9c, - 0x0000000000000000, - 0xeb2476a4c906d715, - 0x724dd42f0738df6f, - 0xb752ee6538ddb65f, - 0x37ffbc863df53ba3, - 0x8efa84fcb5c157e6, - 0xe9eb5c73272596aa, - 0x1b0bdabf2535c439, - 0x86e12c872a4d4e20, - 0x9969a28bce3e087a, - 0xfafb2eb79d9c4b55, - 0x056a4156b6d92cb2, - 0x5a3ae6a5debea296, - 0x22a3b026a8292580, - 0x53c85b3b36ad1581, - 0xb11e900117b87583, - 0xc51f3a4a3fe56930, - 0xe019e1edcf3621bd, - 0xec811d2591fcba18, - 0x445b7d4c4d524a1d, - 0xa8da6069dcaef005, - 0x58f5cc72309de329, - 0xd4c062596b7ff570, - 0xce22ad0339d59f98, - 0x591cd99747024df8, - 0x8b90c5aa03187b54, - 0xf663d27fc356d0f0, - 0xd8589e9135b56ed5, - 0x35309651d3d67a1c, - 0x12f96721cd26732e, - 0xd28c1c3d441a36ac, - 0x492a946164077f69, - 0x2d1d73dc6f5f514b, - 0x6f0a70f40d68d88a, - 0x60b4b30eca1eac41, - 0xd36509d83385987d, - 0x0b3d97490630f6a8, - 0x9eccc90a96c46577, - 0xa20ee2c5ad01a87c, - 0xe49ab55e0e70a3de, - 0xa4429ca182646ba0, - 0xda97b446db962f6a, - 0xcced87d4d7f6de27, - 0x2ab8185d37a53c46, - 0x9f25dcefe15bcba6, - 0xc19c6ef9fea3eb53, - 0xa764a3931bd884ce, - 0x2fd2590b817c10f4, - 0x56a21a6d80743933, - 0xe573a0bb79ef0d0f, - 0x155c0ca095dc1e23, - 0x6c2c4fc694d437e4, - 0x10364df623053291, - 0xdd32dfc7836c4267, - 0x03263f3299bcef6e, - 0x66f8cd6ae57b6f9d, - 0x8c35ae2b5be21659, - 0x31b3c2e21290f87f, - 0x93bd2027bf915003, - 0x69460e90220d1b56, - 0x299e276fae19d328, - 0x63928c3c53a2432f, - 0x7082fef8e91b9ed0, - 0xbc6f792c3eed40f7, - 0x4c40d537d2de53db, - 0x75e8bfae5fc2b262, - 0x4da9c0d2a541fd0a, - 0x4e8fffe03cfd1264, - 0x2620e495696fa7e3, - 0xe1f0f408b8a98f6c, - 0xd1aa230fdda6d9c2, - 0xc7d0109dd1c6288f, - 0x8a79d04f7487d585, - 0x4694579ba3710ba2, - 0x38417f7cfa834f68, - 0x1d47a4db0a5007e5, - 0x206c9af1460a643f, - 0xa128ddf734bd4712, - 0x8144470672b7232d, - 0xf2e086cc02105293, - 0x182de58dbc892b57, - 0xcaa1f9b0f8931dfb, - 0x6b892447cc2e5ae9, - 0xf9dd11850420a43b, - 0x4be5beb68a243ed6, - 0x5584255f19c8d65d, - 0x3b67404e633fa006, - 0xa68db6766c472a1f, - 0xf78ac79ab4c97e21, - 0xc353442e1080aaec, - 0x9a4f9db95782e714, - }, - { - 0xc811a8058c3f55de, - 0x65f5b43196b50619, - 0xf74f96b1d6706e43, - 0x859d1e8bcb43d336, - 0x5aab8a85ccfa3d84, - 0xf9c7bf99c295fcfd, - 0xa21fd5a1de4b630f, - 0xcdb3ef763b8b456d, - 0x803f59f87cf7c385, - 0xb27c73be5f31913c, - 0x98e3ac6633b04821, - 0xbf61674c26b8f818, - 0x0ffbc995c4c130c8, - 0xaaa0862010761a98, - 0x6057f342210116aa, - 0xf63c760c0654cc35, - 0x2ddb45cc667d9042, - 0xbcf45a964bd40382, - 0x68e8a0c3ef3c6f3d, - 0xa7bd92d269ff73bc, - 0x290ae20201ed2287, - 0xb7de34cde885818f, - 0xd901eea7dd61059b, - 0xd6fa273219a03553, - 0xd56f1ae874cccec9, - 0xea31245c2e83f554, - 0x7034555da07be499, - 0xce26d2ac56e7bef7, - 0xfd161857a5054e38, - 0x6a0e7da4527436d1, - 0x5bd86a381cde9ff2, - 0xcaf7756231770c32, - 0xb09aaed9e279c8d0, - 0x5def1091c60674db, - 0x111046a2515e5045, - 0x23536ce4729802fc, - 0xc50cbcf7f5b63cfa, - 0x73a16887cd171f03, - 0x7d2941afd9f28dbd, - 0x3f5e3eb45a4f3b9d, - 0x84eefe361b677140, - 0x3db8e3d3e7076271, - 0x1a3a28f9f20fd248, - 0x7ebc7c75b49e7627, - 0x74e5f293c7eb565c, - 0x18dcf59e4f478ba4, - 0x0c6ef44fa9adcb52, - 0xc699812d98dac760, - 0x788b06dc6e469d0e, - 0xfc65f8ea7521ec4e, - 0x30a5f7219e8e0b55, - 0x2bec3f65bca57b6b, - 0xddd04969baf1b75e, - 0x99904cdbe394ea57, - 0x14b201d1e6ea40f6, - 0xbbb0c08241284add, - 0x50f20463bf8f1dff, - 0xe8d7f93b93cbacb8, - 0x4d8cb68e477c86e8, - 0xc1dd1b3992268e3f, - 0x7c5aa11209d62fcb, - 0x2f3d98abdb35c9ae, - 0x671369562bfd5ff5, - 0x15c1e16c36cee280, - 0x1d7eb2edf8f39b17, - 0xda94d37db00dfe01, - 0x877bc3ec760b8ada, - 0xcb8495dfe153ae44, - 0x05a24773b7b410b3, - 0x12857b783c32abdf, - 0x8eb770d06812513b, - 0x536739b9d2e3e665, - 0x584d57e271b26468, - 0xd789c78fc9849725, - 0xa935bbfa7d1ae102, - 0x8b1537a3dfa64188, - 0xd0cd5d9bc378de7a, - 0x4ac82c9a4d80cfb7, - 0x42777f1b83bdb620, - 0x72d2883a1d33bd75, - 0x5e7a2d4bab6a8f41, - 0xf4daab6bbb1c95d9, - 0x905cffe7fd8d31b6, - 0x83aa6422119b381f, - 0xc0aefb8442022c49, - 0xa0f908c663033ae3, - 0xa428af0804938826, - 0xade41c341a8a53c7, - 0xae7121ee77e6a85d, - 0xc47f5c4a25929e8c, - 0xb538e9aa55cdd863, - 0x06377aa9dad8eb29, - 0xa18ae87bb3279895, - 0x6edfda6a35e48414, - 0x6b7d9d19825094a7, - 0xd41cfa55a4e86cbf, - 0xe5caedc9ea42c59c, - 0xa36c351c0e6fc179, - 0x5181e4de6fabbf89, - 0xfff0c530184d17d4, - 0x9d41eb1584045892, - 0x1c0d525028d73961, - 0xf178ec180ca8856a, - 0x9a0571018ef811cd, - 0x4091a27c3ef5efcc, - 0x19af15239f6329d2, - 0x347450eff91eb990, - 0xe11b4a078dd27759, - 0xb9561de5fc601331, - 0x912f1f5a2da993c0, - 0x1654dcb65ba2191a, - 0x3e2dde098a6b99eb, - 0x8a66d71e0f82e3fe, - 0x8c51adb7d55a08d7, - 0x4533e50f8941ff7f, - 0x02e6dd67bd4859ec, - 0xe068aaba5df6d52f, - 0xc24826e3ff4a75a5, - 0x6c39070d88acddf8, - 0x6486548c4691a46f, - 0xd1bebd26135c7c0c, - 0xb30f93038f15334a, - 0x82d9849fc1bf9a69, - 0x9c320ba85420fae4, - 0xfa528243aff90767, - 0x9ed4d6cfe968a308, - 0xb825fd582c44b147, - 0x9b7691bc5edcb3bb, - 0xc7ea619048fe6516, - 0x1063a61f817af233, - 0x47d538683409a693, - 0x63c2ce984c6ded30, - 0x2a9fdfd86c81d91d, - 0x7b1e3b06032a6694, - 0x666089ebfbd9fd83, - 0x0a598ee67375207b, - 0x07449a140afc495f, - 0x2ca8a571b6593234, - 0x1f986f8a45bbc2fb, - 0x381aa4a050b372c2, - 0x5423a3add81faf3a, - 0x17273c0b8b86bb6c, - 0xfe83258dc869b5a2, - 0x287902bfd1c980f1, - 0xf5a94bd66b3837af, - 0x88800a79b2caba12, - 0x55504310083b0d4c, - 0xdf36940e07b9eeb2, - 0x04d1a7ce6790b2c5, - 0x612413fff125b4dc, - 0x26f12b97c52c124f, - 0x86082351a62f28ac, - 0xef93632f9937e5e7, - 0x3507b052293a1be6, - 0xe72c30ae570a9c70, - 0xd3586041ae1425e0, - 0xde4574b3d79d4cc4, - 0x92ba228040c5685a, - 0xf00b0ca5dc8c271c, - 0xbe1287f1f69c5a6e, - 0xf39e317fb1e0dc86, - 0x495d114020ec342d, - 0x699b407e3f18cd4b, - 0xdca3a9d46ad51528, - 0x0d1d14f279896924, - 0x0000000000000000, - 0x593eb75fa196c61e, - 0x2e4e78160b116bd8, - 0x6d4ae7b058887f8e, - 0xe65fd013872e3e06, - 0x7a6ddbbbd30ec4e2, - 0xac97fc89caaef1b1, - 0x09ccb33c1e19dbe1, - 0x89f3eac462ee1864, - 0x7770cf49aa87adc6, - 0x56c57eca6557f6d6, - 0x03953dda6d6cfb9a, - 0x36928d884456e07c, - 0x1eeb8f37959f608d, - 0x31d6179c4eaaa923, - 0x6fac3ad7e5c02662, - 0x43049fa653991456, - 0xabd3669dc052b8ee, - 0xaf02c153a7c20a2b, - 0x3ccb036e3723c007, - 0x93c9c23d90e1ca2c, - 0xc33bc65e2f6ed7d3, - 0x4cff56339758249e, - 0xb1e94e64325d6aa6, - 0x37e16d359472420a, - 0x79f8e661be623f78, - 0x5214d90402c74413, - 0x482ef1fdf0c8965b, - 0x13f69bc5ec1609a9, - 0x0e88292814e592be, - 0x4e198b542a107d72, - 0xccc00fcbebafe71b, - 0x1b49c844222b703e, - 0x2564164da840e9d5, - 0x20c6513e1ff4f966, - 0xbac3203f910ce8ab, - 0xf2edd1c261c47ef0, - 0x814cb945acd361f3, - 0x95feb8944a392105, - 0x5c9cf02c1622d6ad, - 0x971865f3f77178e9, - 0xbd87ba2b9bf0a1f4, - 0x444005b259655d09, - 0xed75be48247fbc0b, - 0x7596122e17cff42a, - 0xb44b091785e97a15, - 0x966b854e2755da9f, - 0xeee0839249134791, - 0x32432a4623c652b9, - 0xa8465b47ad3e4374, - 0xf8b45f2412b15e8b, - 0x2417f6f078644ba3, - 0xfb2162fe7fdda511, - 0x4bbbcc279da46dc1, - 0x0173e0bdd024a276, - 0x22208c59a2bca08a, - 0x8fc4906db836f34d, - 0xe4b90d743a6667ea, - 0x7147b5e0705f46ef, - 0x2782cb2a1508b039, - 0xec065ef5f45b1e7d, - 0x21b5b183cfd05b10, - 0xdbe733c060295c77, - 0x9fa73672394c017e, - 0xcf55321186c31c81, - 0xd8720e1a0d45a7ed, - 0x3b8f997a3ddf8958, - 0x3afc79c7edfb2b2e, - 0xe9a4198643ef0ece, - 0x5f09cdf67b4e2d37, - 0x4f6a6be9fa34df04, - 0xb6add47038a123f9, - 0x8d224d0a057eaaa1, - 0xc96248b85c1bf7a8, - 0xe3fd9760309a2eb5, - 0x0b2a6e5ba351820d, - 0xeb42c4e1fea75722, - 0x948d58299a1d8373, - 0x7fcf9cc864bad451, - 0xa55b4fb5d4b72a50, - 0x08bf5381ce3d7997, - 0x46a6d8d5e42d04e5, - 0xd22b80fc7e308796, - 0x57b69e77b57354a0, - 0x3969441d8097d0b4, - 0x3330cafbf3e2f0cf, - 0xe28e77dde0be8cc3, - 0x62b12e259c494f46, - 0xa6ce726fb9dbd1ca, - 0x41e242c1eed14dba, - 0x76032ff47aa30fb0, - }, - { - 0xe6f87e5c5b711fd0, - 0x258377800924fa16, - 0xc849e07e852ea4a8, - 0x5b4686a18f06c16a, - 0x0b32e9a2d77b416e, - 0xabda37a467815c66, - 0xf61796a81a686676, - 0xf5dc0b706391954b, - 0x4862f38db7e64bf1, - 0xff5c629a68bd85c5, - 0xcb827da6fcd75795, - 0x66d36daf69b9f089, - 0x356c9f74483d83b0, - 0x7cbcecb1238c99a1, - 0x36a702ac31c4708d, - 0x9eb6a8d02fbcdfd6, - 0x8b19fa51e5b3ae37, - 0x9ccfb5408a127d0b, - 0xbc0c78b508208f5a, - 0xe533e3842288eced, - 0xcec2c7d377c15fd2, - 0xec7817b6505d0f5e, - 0xb94cc2c08336871d, - 0x8c205db4cb0b04ad, - 0x763c855b28a0892f, - 0x588d1b79f6ff3257, - 0x3fecf69e4311933e, - 0x0fc0d39f803a18c9, - 0xee010a26f5f3ad83, - 0x10efe8f4411979a6, - 0x5dcda10c7de93a10, - 0x4a1bee1d1248e92c, - 0x53bff2db21847339, - 0xb4f50ccfa6a23d09, - 0x5fb4bc9cd84798cd, - 0xe88a2d8b071c56f9, - 0x7f7771695a756a9c, - 0xc5f02e71a0ba1ebc, - 0xa663f9ab4215e672, - 0x2eb19e22de5fbb78, - 0x0db9ce0f2594ba14, - 0x82520e6397664d84, - 0x2f031e6a0208ea98, - 0x5c7f2144a1be6bf0, - 0x7a37cb1cd16362db, - 0x83e08e2b4b311c64, - 0xcf70479bab960e32, - 0x856ba986b9dee71e, - 0xb5478c877af56ce9, - 0xb8fe42885f61d6fd, - 0x1bdd0156966238c8, - 0x622157923ef8a92e, - 0xfc97ff42114476f8, - 0x9d7d350856452ceb, - 0x4c90c9b0e0a71256, - 0x2308502dfbcb016c, - 0x2d7a03faa7a64845, - 0xf46e8b38bfc6c4ab, - 0xbdbef8fdd477deba, - 0x3aac4cebc8079b79, - 0xf09cb105e8879d0c, - 0x27fa6a10ac8a58cb, - 0x8960e7c1401d0cea, - 0x1a6f811e4a356928, - 0x90c4fb0773d196ff, - 0x43501a2f609d0a9f, - 0xf7a516e0c63f3796, - 0x1ce4a6b3b8da9252, - 0x1324752c38e08a9b, - 0xa5a864733bec154f, - 0x2bf124575549b33f, - 0xd766db15440dc5c7, - 0xa7d179e39e42b792, - 0xdadf151a61997fd3, - 0x86a0345ec0271423, - 0x38d5517b6da939a4, - 0x6518f077104003b4, - 0x02791d90a5aea2dd, - 0x88d267899c4a5d0a, - 0x930f66df0a2865c2, - 0x4ee9d4204509b08b, - 0x325538916685292a, - 0x412907bfc533a842, - 0xb27e2b62544dc673, - 0x6c5304456295e007, - 0x5af406e95351908a, - 0x1f2f3b6bc123616f, - 0xc37b09dc5255e5c6, - 0x3967d133b1fe6844, - 0x298839c7f0e711e2, - 0x409b87f71964f9a2, - 0xe938adc3db4b0719, - 0x0c0b4e47f9c3ebf4, - 0x5534d576d36b8843, - 0x4610a05aeb8b02d8, - 0x20c3cdf58232f251, - 0x6de1840dbec2b1e7, - 0xa0e8de06b0fa1d08, - 0x7b854b540d34333b, - 0x42e29a67bcca5b7f, - 0xd8a6088ac437dd0e, - 0xc63bb3a9d943ed81, - 0x21714dbd5e65a3b1, - 0x6761ede7b5eea169, - 0x2431f7c8d573abf6, - 0xd51fc685e1a3671a, - 0x5e063cd40410c92d, - 0x283ab98f2cb04002, - 0x8febc06cb2f2f790, - 0x17d64f116fa1d33c, - 0xe07359f1a99ee4aa, - 0x784ed68c74cdc006, - 0x6e2a19d5c73b42da, - 0x8712b4161c7045c3, - 0x371582e4ed93216d, - 0xace390414939f6fc, - 0x7ec5f12186223b7c, - 0xc0b094042bac16fb, - 0xf9d745379a527ebf, - 0x737c3f2ea3b68168, - 0x33e7b8d9bad278ca, - 0xa9a32a34c22ffebb, - 0xe48163ccfedfbd0d, - 0x8e5940246ea5a670, - 0x51c6ef4b842ad1e4, - 0x22bad065279c508c, - 0xd91488c218608cee, - 0x319ea5491f7cda17, - 0xd394e128134c9c60, - 0x094bf43272d5e3b3, - 0x9bf612a5a4aad791, - 0xccbbda43d26ffd0f, - 0x34de1f3c946ad250, - 0x4f5b5468995ee16b, - 0xdf9faf6fea8f7794, - 0x2648ea5870dd092b, - 0xbfc7e56d71d97c67, - 0xdde6b2ff4f21d549, - 0x3c276b463ae86003, - 0x91767b4faf86c71f, - 0x68a13e7835d4b9a0, - 0xb68c115f030c9fd4, - 0x141dd2c916582001, - 0x983d8f7ddd5324ac, - 0x64aa703fcc175254, - 0xc2c989948e02b426, - 0x3e5e76d69f46c2de, - 0x50746f03587d8004, - 0x45db3d829272f1e5, - 0x60584a029b560bf3, - 0xfbae58a73ffcdc62, - 0xa15a5e4e6cad4ce8, - 0x4ba96e55ce1fb8cc, - 0x08f9747aae82b253, - 0xc102144cf7fb471b, - 0x9f042898f3eb8e36, - 0x068b27adf2effb7a, - 0xedca97fe8c0a5ebe, - 0x778e0513f4f7d8cf, - 0x302c2501c32b8bf7, - 0x8d92ddfc175c554d, - 0xf865c57f46052f5f, - 0xeaf3301ba2b2f424, - 0xaa68b7ecbbd60d86, - 0x998f0f350104754c, - 0x0000000000000000, - 0xf12e314d34d0ccec, - 0x710522be061823b5, - 0xaf280d9930c005c1, - 0x97fd5ce25d693c65, - 0x19a41cc633cc9a15, - 0x95844172f8c79eb8, - 0xdc5432b7937684a9, - 0x9436c13a2490cf58, - 0x802b13f332c8ef59, - 0xc442ae397ced4f5c, - 0xfa1cd8efe3ab8d82, - 0xf2e5ac954d293fd1, - 0x6ad823e8907a1b7d, - 0x4d2249f83cf043b6, - 0x03cb9dd879f9f33d, - 0xde2d2f2736d82674, - 0x2a43a41f891ee2df, - 0x6f98999d1b6c133a, - 0xd4ad46cd3df436fa, - 0xbb35df50269825c0, - 0x964fdcaa813e6d85, - 0xeb41b0537ee5a5c4, - 0x0540ba758b160847, - 0xa41ae43be7bb44af, - 0xe3b8c429d0671797, - 0x819993bbee9fbeb9, - 0xae9a8dd1ec975421, - 0xf3572cdd917e6e31, - 0x6393d7dae2aff8ce, - 0x47a2201237dc5338, - 0xa32343dec903ee35, - 0x79fc56c4a89a91e6, - 0x01b28048dc5751e0, - 0x1296f564e4b7db7b, - 0x75f7188351597a12, - 0xdb6d9552bdce2e33, - 0x1e9dbb231d74308f, - 0x520d7293fdd322d9, - 0xe20a44610c304677, - 0xfeeee2d2b4ead425, - 0xca30fdee20800675, - 0x61eaca4a47015a13, - 0xe74afe1487264e30, - 0x2cc883b27bf119a5, - 0x1664cf59b3f682dc, - 0xa811aa7c1e78af5b, - 0x1d5626fb648dc3b2, - 0xb73e9117df5bce34, - 0xd05f7cf06ab56f5d, - 0xfd257f0acd132718, - 0x574dc8e676c52a9e, - 0x0739a7e52eb8aa9a, - 0x5486553e0f3cd9a3, - 0x56ff48aeaa927b7e, - 0xbe756525ad8e2d87, - 0x7d0e6cf9ffdbc841, - 0x3b1ecca31450ca99, - 0x6913be30e983e840, - 0xad511009956ea71c, - 0xb1b5b6ba2db4354e, - 0x4469bdca4e25a005, - 0x15af5281ca0f71e1, - 0x744598cb8d0e2bf2, - 0x593f9b312aa863b7, - 0xefb38a6e29a4fc63, - 0x6b6aa3a04c2d4a9d, - 0x3d95eb0ee6bf31e3, - 0xa291c3961554bfd5, - 0x18169c8eef9bcbf5, - 0x115d68bc9d4e2846, - 0xba875f18facf7420, - 0xd1edfcb8b6e23ebd, - 0xb00736f2f1e364ae, - 0x84d929ce6589b6fe, - 0x70b7a2f6da4f7255, - 0x0e7253d75c6d4929, - 0x04f23a3d574159a7, - 0x0a8069ea0b2c108e, - 0x49d073c56bb11a11, - 0x8aab7a1939e4ffd7, - 0xcd095a0b0e38acef, - 0xc9fb60365979f548, - 0x92bde697d67f3422, - 0xc78933e10514bc61, - 0xe1c1d9b975c9b54a, - 0xd2266160cf1bcd80, - 0x9a4492ed78fd8671, - 0xb3ccab2a881a9793, - 0x72cebf667fe1d088, - 0xd6d45b5d985a9427, - }, -}; - -__device__ __constant__ u64 sbob_rc64[12][8] = -{ - { - 0xe9daca1eda5b08b1, - 0x1f7c65c0812fcbeb, - 0x16d0452e43766a2f, - 0xfcc485758db84e71, - 0x0169679291e07c4b, - 0x15d360a4082a42a2, - 0x234d74cc36747605, - 0x0745a6f2596580dd, - }, - { - 0x1a2f9da98ab5a36f, - 0xd7b5700f469de34f, - 0x982b230a72eafef3, - 0x3101b5160f5ed561, - 0x5899d6126b17b59a, - 0xcaa70adbc261b55c, - 0x56cdcbd71ba2dd55, - 0xb79bb121700479e6, - }, - { - 0xc72fce2bacdc74f5, - 0x35843d6a28fc390a, - 0x8b1f9c525f5ef106, - 0x7b7b29b11475eaf2, - 0xb19e3590e40fe2d3, - 0x09db6260373ac9c1, - 0x31db7a8643f4b6c2, - 0xb20aba0af5961e99, - }, - { - 0xd26615e8b3df1fef, - 0xdde4715da0e148f9, - 0x7d3c5c337e858e48, - 0x3f355e68ad1c729d, - 0x75d603ed822cd7a9, - 0xbe0352933313b7d8, - 0xf137e893a1ea5334, - 0x2ed1e384bcbe0c22, - }, - { - 0x994747adac6bea4b, - 0x6323a96c0c413f9a, - 0x4a1086161f1c157f, - 0xbdff0f80d7359e35, - 0xa3f53a254717cdbf, - 0x161a2723b700ffdf, - 0xf563eaa97ea2567a, - 0x57fe6c7cfd581760, - }, - { - 0xd9d33a1daeae4fae, - 0xc039307a3bc3a46f, - 0x6ca44251f9c4662d, - 0xc68ef09ab49a7f18, - 0xb4b79a1cb7a6facf, - 0xb6c6bec2661ff20a, - 0x354f903672c571bf, - 0x6e7d64467a4068fa, - }, - { - 0xecc5aaee160ec7f4, - 0x540924bffe86ac51, - 0xc987bfe6c7c69e39, - 0xc9937a19333e47d3, - 0x372c822dc5ab9209, - 0x04054a2883694706, - 0xf34a3ca24c451735, - 0x93d4143a4d568688, - }, - { - 0xa7c9934d425b1f9b, - 0x41416e0c02aae703, - 0x1ede369c71f8b74e, - 0x9ac4db4d3b44b489, - 0x90069b92cb2b89f4, - 0x2fc4a5d12b8dd169, - 0xd9a8515935c2ac36, - 0x1ee702bfd40d7fa4, - }, - { - 0x9b223116545a8f37, - 0xde5f16ecd89a4c94, - 0x244289251b3a7d3a, - 0x84090de0b755d93c, - 0xb1ceb2db0b440a80, - 0x549c07a69a8a2b7b, - 0x602a1fcb92dc380e, - 0xdb5a238351446172, - }, - { - 0x526f0580a6debeab, - 0xf3f3e4b248e52a38, - 0xdb788aff1ce74189, - 0x0361331b8ae1ff1f, - 0x4b3369af0267e79f, - 0xf452763b306c1e7a, - 0xc3b63b15d1fa9836, - 0xed9c4598fbc7b474, - }, - { - 0xfb89c8efd09ecd7b, - 0x94fe5a63cdc60230, - 0x6107abebbb6bfad8, - 0x7966841421800120, - 0xcab948eaef711d8a, - 0x986e477d1dcdbaef, - 0x5dd86fc04a59a2de, - 0x1b2df381cda4ca6b, - }, - { - 0xba3116f167e78e37, - 0x7ab14904b08013d2, - 0x771ddfbc323ca4cd, - 0x9b9f2130d41220f8, - 0x86cc91189def805d, - 0x5228e188aaa41de7, - 0x991bb2d9d517f4fa, - 0x20d71bf14a92bc48, - }, -}; - -__device__ static void streebog_g (u64 h[8], const u64 m[8], u64 s_sbob_sl64[8][256]) -{ - u64 k[8]; - u64 s[8]; - u64 t[8]; - - #pragma unroll - for (int i = 0; i < 8; i++) - { - t[i] = h[i]; - } - - for (int i = 0; i < 8; i++) - { - k[i] = SBOG_LPSti64; - } - - #pragma unroll - for (int i = 0; i < 8; i++) - { - s[i] = m[i]; - } - - for (int r = 0; r < 12; r++) - { - #pragma unroll - for (int i = 0; i < 8; i++) - { - t[i] = s[i] ^ k[i]; - } - - #pragma unroll - for (int i = 0; i < 8; i++) - { - s[i] = SBOG_LPSti64; - } - - for (int i = 0; i < 8; i++) - { - t[i] = k[i] ^ sbob_rc64[r][i]; - } - - #pragma unroll - for (int i = 0; i < 8; i++) - { - k[i] = SBOG_LPSti64; - } - } - - #pragma unroll - for (int i = 0; i < 8; i++) - { - h[i] ^= s[i] ^ k[i] ^ m[i]; - } -} - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m11800m (u64 s_sbob_sl64[8][256], u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * loop - */ - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w[0] = w0l | w0r; - - /** - * reverse message block - */ - - u64 m[8]; - - m[0] = hl32_to_64 (w[15], w[14]); - m[1] = hl32_to_64 (w[13], w[12]); - m[2] = hl32_to_64 (w[11], w[10]); - m[3] = hl32_to_64 (w[ 9], w[ 8]); - m[4] = hl32_to_64 (w[ 7], w[ 6]); - m[5] = hl32_to_64 (w[ 5], w[ 4]); - m[6] = hl32_to_64 (w[ 3], w[ 2]); - m[7] = hl32_to_64 (w[ 1], w[ 0]); - - m[0] = swap_workaround (m[0]); - m[1] = swap_workaround (m[1]); - m[2] = swap_workaround (m[2]); - m[3] = swap_workaround (m[3]); - m[4] = swap_workaround (m[4]); - m[5] = swap_workaround (m[5]); - m[6] = swap_workaround (m[6]); - m[7] = swap_workaround (m[7]); - - // state buffer (hash) - - u64 h[8]; - - h[0] = INITVAL; - h[1] = INITVAL; - h[2] = INITVAL; - h[3] = INITVAL; - h[4] = INITVAL; - h[5] = INITVAL; - h[6] = INITVAL; - h[7] = INITVAL; - - streebog_g (h, m, s_sbob_sl64); - - u64 z[8]; - - z[0] = 0; - z[1] = 0; - z[2] = 0; - z[3] = 0; - z[4] = 0; - z[5] = 0; - z[6] = 0; - z[7] = swap_workaround ((u64) (pw_len * 8)); - - streebog_g (h, z, s_sbob_sl64); - streebog_g (h, m, s_sbob_sl64); - - const u32 r0 = l32_from_64 (h[0]); - const u32 r1 = h32_from_64 (h[0]); - const u32 r2 = l32_from_64 (h[1]); - const u32 r3 = h32_from_64 (h[1]); - - #include VECT_COMPARE_M - } -} - -__device__ static void m11800s (u64 s_sbob_sl64[8][256], u32 w[16], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - u32x w0l = w[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w[0] = w0l | w0r; - - /** - * reverse message block - */ - - u64 m[8]; - - m[0] = hl32_to_64 (w[15], w[14]); - m[1] = hl32_to_64 (w[13], w[12]); - m[2] = hl32_to_64 (w[11], w[10]); - m[3] = hl32_to_64 (w[ 9], w[ 8]); - m[4] = hl32_to_64 (w[ 7], w[ 6]); - m[5] = hl32_to_64 (w[ 5], w[ 4]); - m[6] = hl32_to_64 (w[ 3], w[ 2]); - m[7] = hl32_to_64 (w[ 1], w[ 0]); - - m[0] = swap_workaround (m[0]); - m[1] = swap_workaround (m[1]); - m[2] = swap_workaround (m[2]); - m[3] = swap_workaround (m[3]); - m[4] = swap_workaround (m[4]); - m[5] = swap_workaround (m[5]); - m[6] = swap_workaround (m[6]); - m[7] = swap_workaround (m[7]); - - // state buffer (hash) - - u64 h[8]; - - h[0] = INITVAL; - h[1] = INITVAL; - h[2] = INITVAL; - h[3] = INITVAL; - h[4] = INITVAL; - h[5] = INITVAL; - h[6] = INITVAL; - h[7] = INITVAL; - - streebog_g (h, m, s_sbob_sl64); - - u64 z[8]; - - z[0] = 0; - z[1] = 0; - z[2] = 0; - z[3] = 0; - z[4] = 0; - z[5] = 0; - z[6] = 0; - z[7] = swap_workaround ((u64) (pw_len * 8)); - - streebog_g (h, z, s_sbob_sl64); - streebog_g (h, m, s_sbob_sl64); - - const u32 r0 = l32_from_64 (h[0]); - const u32 r1 = h32_from_64 (h[0]); - const u32 r2 = l32_from_64 (h[1]); - const u32 r3 = h32_from_64 (h[1]); - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11800_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * shared lookup table - */ - - __shared__ u64 s_sbob_sl64[8][256]; - - if (lid < 64) - { - const u32 lid4 = lid * 4; - - s_sbob_sl64[0][lid4 + 0] = sbob_sl64[0][lid4 + 0]; - s_sbob_sl64[0][lid4 + 1] = sbob_sl64[0][lid4 + 1]; - s_sbob_sl64[0][lid4 + 2] = sbob_sl64[0][lid4 + 2]; - s_sbob_sl64[0][lid4 + 3] = sbob_sl64[0][lid4 + 3]; - s_sbob_sl64[1][lid4 + 0] = sbob_sl64[1][lid4 + 0]; - s_sbob_sl64[1][lid4 + 1] = sbob_sl64[1][lid4 + 1]; - s_sbob_sl64[1][lid4 + 2] = sbob_sl64[1][lid4 + 2]; - s_sbob_sl64[1][lid4 + 3] = sbob_sl64[1][lid4 + 3]; - s_sbob_sl64[2][lid4 + 0] = sbob_sl64[2][lid4 + 0]; - s_sbob_sl64[2][lid4 + 1] = sbob_sl64[2][lid4 + 1]; - s_sbob_sl64[2][lid4 + 2] = sbob_sl64[2][lid4 + 2]; - s_sbob_sl64[2][lid4 + 3] = sbob_sl64[2][lid4 + 3]; - s_sbob_sl64[3][lid4 + 0] = sbob_sl64[3][lid4 + 0]; - s_sbob_sl64[3][lid4 + 1] = sbob_sl64[3][lid4 + 1]; - s_sbob_sl64[3][lid4 + 2] = sbob_sl64[3][lid4 + 2]; - s_sbob_sl64[3][lid4 + 3] = sbob_sl64[3][lid4 + 3]; - s_sbob_sl64[4][lid4 + 0] = sbob_sl64[4][lid4 + 0]; - s_sbob_sl64[4][lid4 + 1] = sbob_sl64[4][lid4 + 1]; - s_sbob_sl64[4][lid4 + 2] = sbob_sl64[4][lid4 + 2]; - s_sbob_sl64[4][lid4 + 3] = sbob_sl64[4][lid4 + 3]; - s_sbob_sl64[5][lid4 + 0] = sbob_sl64[5][lid4 + 0]; - s_sbob_sl64[5][lid4 + 1] = sbob_sl64[5][lid4 + 1]; - s_sbob_sl64[5][lid4 + 2] = sbob_sl64[5][lid4 + 2]; - s_sbob_sl64[5][lid4 + 3] = sbob_sl64[5][lid4 + 3]; - s_sbob_sl64[6][lid4 + 0] = sbob_sl64[6][lid4 + 0]; - s_sbob_sl64[6][lid4 + 1] = sbob_sl64[6][lid4 + 1]; - s_sbob_sl64[6][lid4 + 2] = sbob_sl64[6][lid4 + 2]; - s_sbob_sl64[6][lid4 + 3] = sbob_sl64[6][lid4 + 3]; - s_sbob_sl64[7][lid4 + 0] = sbob_sl64[7][lid4 + 0]; - s_sbob_sl64[7][lid4 + 1] = sbob_sl64[7][lid4 + 1]; - s_sbob_sl64[7][lid4 + 2] = sbob_sl64[7][lid4 + 2]; - s_sbob_sl64[7][lid4 + 3] = sbob_sl64[7][lid4 + 3]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m11800m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11800_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * shared lookup table - */ - - __shared__ u64 s_sbob_sl64[8][256]; - - if (lid < 64) - { - const u32 lid4 = lid * 4; - - s_sbob_sl64[0][lid4 + 0] = sbob_sl64[0][lid4 + 0]; - s_sbob_sl64[0][lid4 + 1] = sbob_sl64[0][lid4 + 1]; - s_sbob_sl64[0][lid4 + 2] = sbob_sl64[0][lid4 + 2]; - s_sbob_sl64[0][lid4 + 3] = sbob_sl64[0][lid4 + 3]; - s_sbob_sl64[1][lid4 + 0] = sbob_sl64[1][lid4 + 0]; - s_sbob_sl64[1][lid4 + 1] = sbob_sl64[1][lid4 + 1]; - s_sbob_sl64[1][lid4 + 2] = sbob_sl64[1][lid4 + 2]; - s_sbob_sl64[1][lid4 + 3] = sbob_sl64[1][lid4 + 3]; - s_sbob_sl64[2][lid4 + 0] = sbob_sl64[2][lid4 + 0]; - s_sbob_sl64[2][lid4 + 1] = sbob_sl64[2][lid4 + 1]; - s_sbob_sl64[2][lid4 + 2] = sbob_sl64[2][lid4 + 2]; - s_sbob_sl64[2][lid4 + 3] = sbob_sl64[2][lid4 + 3]; - s_sbob_sl64[3][lid4 + 0] = sbob_sl64[3][lid4 + 0]; - s_sbob_sl64[3][lid4 + 1] = sbob_sl64[3][lid4 + 1]; - s_sbob_sl64[3][lid4 + 2] = sbob_sl64[3][lid4 + 2]; - s_sbob_sl64[3][lid4 + 3] = sbob_sl64[3][lid4 + 3]; - s_sbob_sl64[4][lid4 + 0] = sbob_sl64[4][lid4 + 0]; - s_sbob_sl64[4][lid4 + 1] = sbob_sl64[4][lid4 + 1]; - s_sbob_sl64[4][lid4 + 2] = sbob_sl64[4][lid4 + 2]; - s_sbob_sl64[4][lid4 + 3] = sbob_sl64[4][lid4 + 3]; - s_sbob_sl64[5][lid4 + 0] = sbob_sl64[5][lid4 + 0]; - s_sbob_sl64[5][lid4 + 1] = sbob_sl64[5][lid4 + 1]; - s_sbob_sl64[5][lid4 + 2] = sbob_sl64[5][lid4 + 2]; - s_sbob_sl64[5][lid4 + 3] = sbob_sl64[5][lid4 + 3]; - s_sbob_sl64[6][lid4 + 0] = sbob_sl64[6][lid4 + 0]; - s_sbob_sl64[6][lid4 + 1] = sbob_sl64[6][lid4 + 1]; - s_sbob_sl64[6][lid4 + 2] = sbob_sl64[6][lid4 + 2]; - s_sbob_sl64[6][lid4 + 3] = sbob_sl64[6][lid4 + 3]; - s_sbob_sl64[7][lid4 + 0] = sbob_sl64[7][lid4 + 0]; - s_sbob_sl64[7][lid4 + 1] = sbob_sl64[7][lid4 + 1]; - s_sbob_sl64[7][lid4 + 2] = sbob_sl64[7][lid4 + 2]; - s_sbob_sl64[7][lid4 + 3] = sbob_sl64[7][lid4 + 3]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m11800m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11800_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * shared lookup table - */ - - __shared__ u64 s_sbob_sl64[8][256]; - - if (lid < 64) - { - const u32 lid4 = lid * 4; - - s_sbob_sl64[0][lid4 + 0] = sbob_sl64[0][lid4 + 0]; - s_sbob_sl64[0][lid4 + 1] = sbob_sl64[0][lid4 + 1]; - s_sbob_sl64[0][lid4 + 2] = sbob_sl64[0][lid4 + 2]; - s_sbob_sl64[0][lid4 + 3] = sbob_sl64[0][lid4 + 3]; - s_sbob_sl64[1][lid4 + 0] = sbob_sl64[1][lid4 + 0]; - s_sbob_sl64[1][lid4 + 1] = sbob_sl64[1][lid4 + 1]; - s_sbob_sl64[1][lid4 + 2] = sbob_sl64[1][lid4 + 2]; - s_sbob_sl64[1][lid4 + 3] = sbob_sl64[1][lid4 + 3]; - s_sbob_sl64[2][lid4 + 0] = sbob_sl64[2][lid4 + 0]; - s_sbob_sl64[2][lid4 + 1] = sbob_sl64[2][lid4 + 1]; - s_sbob_sl64[2][lid4 + 2] = sbob_sl64[2][lid4 + 2]; - s_sbob_sl64[2][lid4 + 3] = sbob_sl64[2][lid4 + 3]; - s_sbob_sl64[3][lid4 + 0] = sbob_sl64[3][lid4 + 0]; - s_sbob_sl64[3][lid4 + 1] = sbob_sl64[3][lid4 + 1]; - s_sbob_sl64[3][lid4 + 2] = sbob_sl64[3][lid4 + 2]; - s_sbob_sl64[3][lid4 + 3] = sbob_sl64[3][lid4 + 3]; - s_sbob_sl64[4][lid4 + 0] = sbob_sl64[4][lid4 + 0]; - s_sbob_sl64[4][lid4 + 1] = sbob_sl64[4][lid4 + 1]; - s_sbob_sl64[4][lid4 + 2] = sbob_sl64[4][lid4 + 2]; - s_sbob_sl64[4][lid4 + 3] = sbob_sl64[4][lid4 + 3]; - s_sbob_sl64[5][lid4 + 0] = sbob_sl64[5][lid4 + 0]; - s_sbob_sl64[5][lid4 + 1] = sbob_sl64[5][lid4 + 1]; - s_sbob_sl64[5][lid4 + 2] = sbob_sl64[5][lid4 + 2]; - s_sbob_sl64[5][lid4 + 3] = sbob_sl64[5][lid4 + 3]; - s_sbob_sl64[6][lid4 + 0] = sbob_sl64[6][lid4 + 0]; - s_sbob_sl64[6][lid4 + 1] = sbob_sl64[6][lid4 + 1]; - s_sbob_sl64[6][lid4 + 2] = sbob_sl64[6][lid4 + 2]; - s_sbob_sl64[6][lid4 + 3] = sbob_sl64[6][lid4 + 3]; - s_sbob_sl64[7][lid4 + 0] = sbob_sl64[7][lid4 + 0]; - s_sbob_sl64[7][lid4 + 1] = sbob_sl64[7][lid4 + 1]; - s_sbob_sl64[7][lid4 + 2] = sbob_sl64[7][lid4 + 2]; - s_sbob_sl64[7][lid4 + 3] = sbob_sl64[7][lid4 + 3]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m11800m (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11800_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * shared lookup table - */ - - __shared__ u64 s_sbob_sl64[8][256]; - - if (lid < 64) - { - const u32 lid4 = lid * 4; - - s_sbob_sl64[0][lid4 + 0] = sbob_sl64[0][lid4 + 0]; - s_sbob_sl64[0][lid4 + 1] = sbob_sl64[0][lid4 + 1]; - s_sbob_sl64[0][lid4 + 2] = sbob_sl64[0][lid4 + 2]; - s_sbob_sl64[0][lid4 + 3] = sbob_sl64[0][lid4 + 3]; - s_sbob_sl64[1][lid4 + 0] = sbob_sl64[1][lid4 + 0]; - s_sbob_sl64[1][lid4 + 1] = sbob_sl64[1][lid4 + 1]; - s_sbob_sl64[1][lid4 + 2] = sbob_sl64[1][lid4 + 2]; - s_sbob_sl64[1][lid4 + 3] = sbob_sl64[1][lid4 + 3]; - s_sbob_sl64[2][lid4 + 0] = sbob_sl64[2][lid4 + 0]; - s_sbob_sl64[2][lid4 + 1] = sbob_sl64[2][lid4 + 1]; - s_sbob_sl64[2][lid4 + 2] = sbob_sl64[2][lid4 + 2]; - s_sbob_sl64[2][lid4 + 3] = sbob_sl64[2][lid4 + 3]; - s_sbob_sl64[3][lid4 + 0] = sbob_sl64[3][lid4 + 0]; - s_sbob_sl64[3][lid4 + 1] = sbob_sl64[3][lid4 + 1]; - s_sbob_sl64[3][lid4 + 2] = sbob_sl64[3][lid4 + 2]; - s_sbob_sl64[3][lid4 + 3] = sbob_sl64[3][lid4 + 3]; - s_sbob_sl64[4][lid4 + 0] = sbob_sl64[4][lid4 + 0]; - s_sbob_sl64[4][lid4 + 1] = sbob_sl64[4][lid4 + 1]; - s_sbob_sl64[4][lid4 + 2] = sbob_sl64[4][lid4 + 2]; - s_sbob_sl64[4][lid4 + 3] = sbob_sl64[4][lid4 + 3]; - s_sbob_sl64[5][lid4 + 0] = sbob_sl64[5][lid4 + 0]; - s_sbob_sl64[5][lid4 + 1] = sbob_sl64[5][lid4 + 1]; - s_sbob_sl64[5][lid4 + 2] = sbob_sl64[5][lid4 + 2]; - s_sbob_sl64[5][lid4 + 3] = sbob_sl64[5][lid4 + 3]; - s_sbob_sl64[6][lid4 + 0] = sbob_sl64[6][lid4 + 0]; - s_sbob_sl64[6][lid4 + 1] = sbob_sl64[6][lid4 + 1]; - s_sbob_sl64[6][lid4 + 2] = sbob_sl64[6][lid4 + 2]; - s_sbob_sl64[6][lid4 + 3] = sbob_sl64[6][lid4 + 3]; - s_sbob_sl64[7][lid4 + 0] = sbob_sl64[7][lid4 + 0]; - s_sbob_sl64[7][lid4 + 1] = sbob_sl64[7][lid4 + 1]; - s_sbob_sl64[7][lid4 + 2] = sbob_sl64[7][lid4 + 2]; - s_sbob_sl64[7][lid4 + 3] = sbob_sl64[7][lid4 + 3]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m11800s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11800_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * shared lookup table - */ - - __shared__ u64 s_sbob_sl64[8][256]; - - if (lid < 64) - { - const u32 lid4 = lid * 4; - - s_sbob_sl64[0][lid4 + 0] = sbob_sl64[0][lid4 + 0]; - s_sbob_sl64[0][lid4 + 1] = sbob_sl64[0][lid4 + 1]; - s_sbob_sl64[0][lid4 + 2] = sbob_sl64[0][lid4 + 2]; - s_sbob_sl64[0][lid4 + 3] = sbob_sl64[0][lid4 + 3]; - s_sbob_sl64[1][lid4 + 0] = sbob_sl64[1][lid4 + 0]; - s_sbob_sl64[1][lid4 + 1] = sbob_sl64[1][lid4 + 1]; - s_sbob_sl64[1][lid4 + 2] = sbob_sl64[1][lid4 + 2]; - s_sbob_sl64[1][lid4 + 3] = sbob_sl64[1][lid4 + 3]; - s_sbob_sl64[2][lid4 + 0] = sbob_sl64[2][lid4 + 0]; - s_sbob_sl64[2][lid4 + 1] = sbob_sl64[2][lid4 + 1]; - s_sbob_sl64[2][lid4 + 2] = sbob_sl64[2][lid4 + 2]; - s_sbob_sl64[2][lid4 + 3] = sbob_sl64[2][lid4 + 3]; - s_sbob_sl64[3][lid4 + 0] = sbob_sl64[3][lid4 + 0]; - s_sbob_sl64[3][lid4 + 1] = sbob_sl64[3][lid4 + 1]; - s_sbob_sl64[3][lid4 + 2] = sbob_sl64[3][lid4 + 2]; - s_sbob_sl64[3][lid4 + 3] = sbob_sl64[3][lid4 + 3]; - s_sbob_sl64[4][lid4 + 0] = sbob_sl64[4][lid4 + 0]; - s_sbob_sl64[4][lid4 + 1] = sbob_sl64[4][lid4 + 1]; - s_sbob_sl64[4][lid4 + 2] = sbob_sl64[4][lid4 + 2]; - s_sbob_sl64[4][lid4 + 3] = sbob_sl64[4][lid4 + 3]; - s_sbob_sl64[5][lid4 + 0] = sbob_sl64[5][lid4 + 0]; - s_sbob_sl64[5][lid4 + 1] = sbob_sl64[5][lid4 + 1]; - s_sbob_sl64[5][lid4 + 2] = sbob_sl64[5][lid4 + 2]; - s_sbob_sl64[5][lid4 + 3] = sbob_sl64[5][lid4 + 3]; - s_sbob_sl64[6][lid4 + 0] = sbob_sl64[6][lid4 + 0]; - s_sbob_sl64[6][lid4 + 1] = sbob_sl64[6][lid4 + 1]; - s_sbob_sl64[6][lid4 + 2] = sbob_sl64[6][lid4 + 2]; - s_sbob_sl64[6][lid4 + 3] = sbob_sl64[6][lid4 + 3]; - s_sbob_sl64[7][lid4 + 0] = sbob_sl64[7][lid4 + 0]; - s_sbob_sl64[7][lid4 + 1] = sbob_sl64[7][lid4 + 1]; - s_sbob_sl64[7][lid4 + 2] = sbob_sl64[7][lid4 + 2]; - s_sbob_sl64[7][lid4 + 3] = sbob_sl64[7][lid4 + 3]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m11800s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11800_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - u32x w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * shared lookup table - */ - - __shared__ u64 s_sbob_sl64[8][256]; - - if (lid < 64) - { - const u32 lid4 = lid * 4; - - s_sbob_sl64[0][lid4 + 0] = sbob_sl64[0][lid4 + 0]; - s_sbob_sl64[0][lid4 + 1] = sbob_sl64[0][lid4 + 1]; - s_sbob_sl64[0][lid4 + 2] = sbob_sl64[0][lid4 + 2]; - s_sbob_sl64[0][lid4 + 3] = sbob_sl64[0][lid4 + 3]; - s_sbob_sl64[1][lid4 + 0] = sbob_sl64[1][lid4 + 0]; - s_sbob_sl64[1][lid4 + 1] = sbob_sl64[1][lid4 + 1]; - s_sbob_sl64[1][lid4 + 2] = sbob_sl64[1][lid4 + 2]; - s_sbob_sl64[1][lid4 + 3] = sbob_sl64[1][lid4 + 3]; - s_sbob_sl64[2][lid4 + 0] = sbob_sl64[2][lid4 + 0]; - s_sbob_sl64[2][lid4 + 1] = sbob_sl64[2][lid4 + 1]; - s_sbob_sl64[2][lid4 + 2] = sbob_sl64[2][lid4 + 2]; - s_sbob_sl64[2][lid4 + 3] = sbob_sl64[2][lid4 + 3]; - s_sbob_sl64[3][lid4 + 0] = sbob_sl64[3][lid4 + 0]; - s_sbob_sl64[3][lid4 + 1] = sbob_sl64[3][lid4 + 1]; - s_sbob_sl64[3][lid4 + 2] = sbob_sl64[3][lid4 + 2]; - s_sbob_sl64[3][lid4 + 3] = sbob_sl64[3][lid4 + 3]; - s_sbob_sl64[4][lid4 + 0] = sbob_sl64[4][lid4 + 0]; - s_sbob_sl64[4][lid4 + 1] = sbob_sl64[4][lid4 + 1]; - s_sbob_sl64[4][lid4 + 2] = sbob_sl64[4][lid4 + 2]; - s_sbob_sl64[4][lid4 + 3] = sbob_sl64[4][lid4 + 3]; - s_sbob_sl64[5][lid4 + 0] = sbob_sl64[5][lid4 + 0]; - s_sbob_sl64[5][lid4 + 1] = sbob_sl64[5][lid4 + 1]; - s_sbob_sl64[5][lid4 + 2] = sbob_sl64[5][lid4 + 2]; - s_sbob_sl64[5][lid4 + 3] = sbob_sl64[5][lid4 + 3]; - s_sbob_sl64[6][lid4 + 0] = sbob_sl64[6][lid4 + 0]; - s_sbob_sl64[6][lid4 + 1] = sbob_sl64[6][lid4 + 1]; - s_sbob_sl64[6][lid4 + 2] = sbob_sl64[6][lid4 + 2]; - s_sbob_sl64[6][lid4 + 3] = sbob_sl64[6][lid4 + 3]; - s_sbob_sl64[7][lid4 + 0] = sbob_sl64[7][lid4 + 0]; - s_sbob_sl64[7][lid4 + 1] = sbob_sl64[7][lid4 + 1]; - s_sbob_sl64[7][lid4 + 2] = sbob_sl64[7][lid4 + 2]; - s_sbob_sl64[7][lid4 + 3] = sbob_sl64[7][lid4 + 3]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m11800s (s_sbob_sl64, w, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m11900.cu b/nv/m11900.cu deleted file mode 100644 index 718bc4a..0000000 --- a/nv/m11900.cu +++ /dev/null @@ -1,417 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _PBKDF2_MD5_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" - -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -__device__ static void md5_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - u32x tmp2; - - MD5_STEP (MD5_Fo, a, b, c, d, w0_t, MD5C00, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w1_t, MD5C01, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w2_t, MD5C02, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w3_t, MD5C03, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w4_t, MD5C04, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w5_t, MD5C05, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, w6_t, MD5C06, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, w7_t, MD5C07, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, w8_t, MD5C08, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, w9_t, MD5C09, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, wa_t, MD5C0a, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wb_t, MD5C0b, MD5S03); - MD5_STEP (MD5_Fo, a, b, c, d, wc_t, MD5C0c, MD5S00); - MD5_STEP (MD5_Fo, d, a, b, c, wd_t, MD5C0d, MD5S01); - MD5_STEP (MD5_Fo, c, d, a, b, we_t, MD5C0e, MD5S02); - MD5_STEP (MD5_Fo, b, c, d, a, wf_t, MD5C0f, MD5S03); - - MD5_STEP (MD5_Go, a, b, c, d, w1_t, MD5C10, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w6_t, MD5C11, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wb_t, MD5C12, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w0_t, MD5C13, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w5_t, MD5C14, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, wa_t, MD5C15, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, wf_t, MD5C16, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w4_t, MD5C17, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, w9_t, MD5C18, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, we_t, MD5C19, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w3_t, MD5C1a, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, w8_t, MD5C1b, MD5S13); - MD5_STEP (MD5_Go, a, b, c, d, wd_t, MD5C1c, MD5S10); - MD5_STEP (MD5_Go, d, a, b, c, w2_t, MD5C1d, MD5S11); - MD5_STEP (MD5_Go, c, d, a, b, w7_t, MD5C1e, MD5S12); - MD5_STEP (MD5_Go, b, c, d, a, wc_t, MD5C1f, MD5S13); - - MD5_STEP (MD5_H1, a, b, c, d, w5_t, MD5C20, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w8_t, MD5C21, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, wb_t, MD5C22, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, we_t, MD5C23, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w1_t, MD5C24, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w4_t, MD5C25, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w7_t, MD5C26, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, wa_t, MD5C27, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, wd_t, MD5C28, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, w0_t, MD5C29, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, w3_t, MD5C2a, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w6_t, MD5C2b, MD5S23); - MD5_STEP (MD5_H1, a, b, c, d, w9_t, MD5C2c, MD5S20); - MD5_STEP (MD5_H2, d, a, b, c, wc_t, MD5C2d, MD5S21); - MD5_STEP (MD5_H1, c, d, a, b, wf_t, MD5C2e, MD5S22); - MD5_STEP (MD5_H2, b, c, d, a, w2_t, MD5C2f, MD5S23); - - MD5_STEP (MD5_I , a, b, c, d, w0_t, MD5C30, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w7_t, MD5C31, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, we_t, MD5C32, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w5_t, MD5C33, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, wc_t, MD5C34, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, w3_t, MD5C35, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, wa_t, MD5C36, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w1_t, MD5C37, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w8_t, MD5C38, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wf_t, MD5C39, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w6_t, MD5C3a, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, wd_t, MD5C3b, MD5S33); - MD5_STEP (MD5_I , a, b, c, d, w4_t, MD5C3c, MD5S30); - MD5_STEP (MD5_I , d, a, b, c, wb_t, MD5C3d, MD5S31); - MD5_STEP (MD5_I , c, d, a, b, w2_t, MD5C3e, MD5S32); - MD5_STEP (MD5_I , b, c, d, a, w9_t, MD5C3f, MD5S33); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void hmac_md5_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = MD5M_A; - ipad[1] = MD5M_B; - ipad[2] = MD5M_C; - ipad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = MD5M_A; - opad[1] = MD5M_B; - opad[2] = MD5M_C; - opad[3] = MD5M_D; - - md5_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_md5_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[4], u32x opad[4], u32x digest[4]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - - md5_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = 0x80; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = (64 + 16) * 8; - w3[3] = 0; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - - md5_transform (w0, w1, w2, w3, digest); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11900_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, pbkdf2_md5_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pbkdf2_md5_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - /** - * salt - */ - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 esalt_buf0[4]; - u32 esalt_buf1[4]; - u32 esalt_buf2[4]; - u32 esalt_buf3[4]; - - esalt_buf0[0] = esalt_bufs[salt_pos].salt_buf[ 0]; - esalt_buf0[1] = esalt_bufs[salt_pos].salt_buf[ 1]; - esalt_buf0[2] = esalt_bufs[salt_pos].salt_buf[ 2]; - esalt_buf0[3] = esalt_bufs[salt_pos].salt_buf[ 3]; - esalt_buf1[0] = esalt_bufs[salt_pos].salt_buf[ 4]; - esalt_buf1[1] = esalt_bufs[salt_pos].salt_buf[ 5]; - esalt_buf1[2] = esalt_bufs[salt_pos].salt_buf[ 6]; - esalt_buf1[3] = esalt_bufs[salt_pos].salt_buf[ 7]; - esalt_buf2[0] = esalt_bufs[salt_pos].salt_buf[ 8]; - esalt_buf2[1] = esalt_bufs[salt_pos].salt_buf[ 9]; - esalt_buf2[2] = esalt_bufs[salt_pos].salt_buf[10]; - esalt_buf2[3] = esalt_bufs[salt_pos].salt_buf[11]; - esalt_buf3[0] = esalt_bufs[salt_pos].salt_buf[12]; - esalt_buf3[1] = esalt_bufs[salt_pos].salt_buf[13]; - esalt_buf3[2] = (64 + salt_len + 4) * 8; - esalt_buf3[3] = 0; - - u32 ipad[4]; - u32 opad[4]; - - hmac_md5_pad (w0, w1, w2, w3, ipad, opad); - - tmps[gid].ipad[0] = ipad[0]; - tmps[gid].ipad[1] = ipad[1]; - tmps[gid].ipad[2] = ipad[2]; - tmps[gid].ipad[3] = ipad[3]; - - tmps[gid].opad[0] = opad[0]; - tmps[gid].opad[1] = opad[1]; - tmps[gid].opad[2] = opad[2]; - tmps[gid].opad[3] = opad[3]; - - for (u32 i = 0, j = 1; i < 4; i += 4, j += 1) - { - u32 dgst[4]; - - hmac_md5_run (esalt_buf0, esalt_buf1, esalt_buf2, esalt_buf3, ipad, opad, dgst); - - tmps[gid].dgst[i + 0] = dgst[0]; - tmps[gid].dgst[i + 1] = dgst[1]; - tmps[gid].dgst[i + 2] = dgst[2]; - tmps[gid].dgst[i + 3] = dgst[3]; - - tmps[gid].out[i + 0] = dgst[0]; - tmps[gid].out[i + 1] = dgst[1]; - tmps[gid].out[i + 2] = dgst[2]; - tmps[gid].out[i + 3] = dgst[3]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11900_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, pbkdf2_md5_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pbkdf2_md5_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 ipad[4]; - - ipad[0] = tmps[gid].ipad[0]; - ipad[1] = tmps[gid].ipad[1]; - ipad[2] = tmps[gid].ipad[2]; - ipad[3] = tmps[gid].ipad[3]; - - u32 opad[4]; - - opad[0] = tmps[gid].opad[0]; - opad[1] = tmps[gid].opad[1]; - opad[2] = tmps[gid].opad[2]; - opad[3] = tmps[gid].opad[3]; - - for (u32 i = 0; i < 4; i += 4) - { - u32 dgst[4]; - - dgst[0] = tmps[gid].dgst[i + 0]; - dgst[1] = tmps[gid].dgst[i + 1]; - dgst[2] = tmps[gid].dgst[i + 2]; - dgst[3] = tmps[gid].dgst[i + 3]; - - u32 out[4]; - - out[0] = tmps[gid].out[i + 0]; - out[1] = tmps[gid].out[i + 1]; - out[2] = tmps[gid].out[i + 2]; - out[3] = tmps[gid].out[i + 3]; - - for (u32 j = 0; j < loop_cnt; j++) - { - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; - - w0[0] = dgst[0]; - w0[1] = dgst[1]; - w0[2] = dgst[2]; - w0[3] = dgst[3]; - w1[0] = 0x80; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = (64 + 16) * 8; - w3[3] = 0; - - hmac_md5_run (w0, w1, w2, w3, ipad, opad, dgst); - - out[0] ^= dgst[0]; - out[1] ^= dgst[1]; - out[2] ^= dgst[2]; - out[3] ^= dgst[3]; - } - - tmps[gid].dgst[i + 0] = dgst[0]; - tmps[gid].dgst[i + 1] = dgst[1]; - tmps[gid].dgst[i + 2] = dgst[2]; - tmps[gid].dgst[i + 3] = dgst[3]; - - tmps[gid].out[i + 0] = out[0]; - tmps[gid].out[i + 1] = out[1]; - tmps[gid].out[i + 2] = out[2]; - tmps[gid].out[i + 3] = out[3]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m11900_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, pbkdf2_md5_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pbkdf2_md5_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 lid = threadIdx.x; - - const u32x r0 = tmps[gid].out[DGST_R0]; - const u32x r1 = tmps[gid].out[DGST_R1]; - const u32x r2 = tmps[gid].out[DGST_R2]; - const u32x r3 = tmps[gid].out[DGST_R3]; - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m12000.cu b/nv/m12000.cu deleted file mode 100644 index e947cd5..0000000 --- a/nv/m12000.cu +++ /dev/null @@ -1,460 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _PBKDF2_SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" - -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = SHA1M_A; - ipad[1] = SHA1M_B; - ipad[2] = SHA1M_C; - ipad[3] = SHA1M_D; - ipad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = SHA1M_A; - opad[1] = SHA1M_B; - opad[2] = SHA1M_C; - opad[3] = SHA1M_D; - opad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - - sha1_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - - sha1_transform (w0, w1, w2, w3, digest); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12000_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, pbkdf2_sha1_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pbkdf2_sha1_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = swap_workaround (pws[gid].i[ 0]); - w0[1] = swap_workaround (pws[gid].i[ 1]); - w0[2] = swap_workaround (pws[gid].i[ 2]); - w0[3] = swap_workaround (pws[gid].i[ 3]); - - u32x w1[4]; - - w1[0] = swap_workaround (pws[gid].i[ 4]); - w1[1] = swap_workaround (pws[gid].i[ 5]); - w1[2] = swap_workaround (pws[gid].i[ 6]); - w1[3] = swap_workaround (pws[gid].i[ 7]); - - u32x w2[4]; - - w2[0] = swap_workaround (pws[gid].i[ 8]); - w2[1] = swap_workaround (pws[gid].i[ 9]); - w2[2] = swap_workaround (pws[gid].i[10]); - w2[3] = swap_workaround (pws[gid].i[11]); - - u32x w3[4]; - - w3[0] = swap_workaround (pws[gid].i[12]); - w3[1] = swap_workaround (pws[gid].i[13]); - w3[2] = swap_workaround (pws[gid].i[14]); - w3[3] = swap_workaround (pws[gid].i[15]); - - /** - * salt - */ - - const u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 esalt_buf0[4]; - u32 esalt_buf1[4]; - u32 esalt_buf2[4]; - u32 esalt_buf3[4]; - - esalt_buf0[0] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 0]); - esalt_buf0[1] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 1]); - esalt_buf0[2] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 2]); - esalt_buf0[3] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 3]); - esalt_buf1[0] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 4]); - esalt_buf1[1] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 5]); - esalt_buf1[2] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 6]); - esalt_buf1[3] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 7]); - esalt_buf2[0] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 8]); - esalt_buf2[1] = swap_workaround (esalt_bufs[salt_pos].salt_buf[ 9]); - esalt_buf2[2] = swap_workaround (esalt_bufs[salt_pos].salt_buf[10]); - esalt_buf2[3] = swap_workaround (esalt_bufs[salt_pos].salt_buf[11]); - esalt_buf3[0] = swap_workaround (esalt_bufs[salt_pos].salt_buf[12]); - esalt_buf3[1] = swap_workaround (esalt_bufs[salt_pos].salt_buf[13]); - esalt_buf3[2] = 0; - esalt_buf3[3] = (64 + salt_len + 4) * 8; - - u32 ipad[5]; - u32 opad[5]; - - hmac_sha1_pad (w0, w1, w2, w3, ipad, opad); - - tmps[gid].ipad[0] = ipad[0]; - tmps[gid].ipad[1] = ipad[1]; - tmps[gid].ipad[2] = ipad[2]; - tmps[gid].ipad[3] = ipad[3]; - tmps[gid].ipad[4] = ipad[4]; - - tmps[gid].opad[0] = opad[0]; - tmps[gid].opad[1] = opad[1]; - tmps[gid].opad[2] = opad[2]; - tmps[gid].opad[3] = opad[3]; - tmps[gid].opad[4] = opad[4]; - - for (u32 i = 0, j = 1; i < 5; i += 5, j += 1) - { - u32 dgst[5]; - - hmac_sha1_run (esalt_buf0, esalt_buf1, esalt_buf2, esalt_buf3, ipad, opad, dgst); - - tmps[gid].dgst[i + 0] = dgst[0]; - tmps[gid].dgst[i + 1] = dgst[1]; - tmps[gid].dgst[i + 2] = dgst[2]; - tmps[gid].dgst[i + 3] = dgst[3]; - tmps[gid].dgst[i + 4] = dgst[4]; - - tmps[gid].out[i + 0] = dgst[0]; - tmps[gid].out[i + 1] = dgst[1]; - tmps[gid].out[i + 2] = dgst[2]; - tmps[gid].out[i + 3] = dgst[3]; - tmps[gid].out[i + 4] = dgst[4]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12000_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, pbkdf2_sha1_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pbkdf2_sha1_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 ipad[5]; - - ipad[0] = tmps[gid].ipad[0]; - ipad[1] = tmps[gid].ipad[1]; - ipad[2] = tmps[gid].ipad[2]; - ipad[3] = tmps[gid].ipad[3]; - ipad[4] = tmps[gid].ipad[4]; - - u32 opad[5]; - - opad[0] = tmps[gid].opad[0]; - opad[1] = tmps[gid].opad[1]; - opad[2] = tmps[gid].opad[2]; - opad[3] = tmps[gid].opad[3]; - opad[4] = tmps[gid].opad[4]; - - for (u32 i = 0; i < 5; i += 5) - { - u32 dgst[5]; - - dgst[0] = tmps[gid].dgst[i + 0]; - dgst[1] = tmps[gid].dgst[i + 1]; - dgst[2] = tmps[gid].dgst[i + 2]; - dgst[3] = tmps[gid].dgst[i + 3]; - dgst[4] = tmps[gid].dgst[i + 4]; - - u32 out[5]; - - out[0] = tmps[gid].out[i + 0]; - out[1] = tmps[gid].out[i + 1]; - out[2] = tmps[gid].out[i + 2]; - out[3] = tmps[gid].out[i + 3]; - out[4] = tmps[gid].out[i + 4]; - - for (u32 j = 0; j < loop_cnt; j++) - { - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; - - w0[0] = dgst[0]; - w0[1] = dgst[1]; - w0[2] = dgst[2]; - w0[3] = dgst[3]; - w1[0] = dgst[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst); - - out[0] ^= dgst[0]; - out[1] ^= dgst[1]; - out[2] ^= dgst[2]; - out[3] ^= dgst[3]; - out[4] ^= dgst[4]; - } - - tmps[gid].dgst[i + 0] = dgst[0]; - tmps[gid].dgst[i + 1] = dgst[1]; - tmps[gid].dgst[i + 2] = dgst[2]; - tmps[gid].dgst[i + 3] = dgst[3]; - tmps[gid].dgst[i + 4] = dgst[4]; - - tmps[gid].out[i + 0] = out[0]; - tmps[gid].out[i + 1] = out[1]; - tmps[gid].out[i + 2] = out[2]; - tmps[gid].out[i + 3] = out[3]; - tmps[gid].out[i + 4] = out[4]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12000_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, pbkdf2_sha1_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pbkdf2_sha1_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 lid = threadIdx.x; - - const u32x r0 = tmps[gid].out[DGST_R0]; - const u32x r1 = tmps[gid].out[DGST_R1]; - const u32x r2 = tmps[gid].out[DGST_R2]; - const u32x r3 = tmps[gid].out[DGST_R3]; - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m12200.cu b/nv/m12200.cu deleted file mode 100644 index cd910ae..0000000 --- a/nv/m12200.cu +++ /dev/null @@ -1,335 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA512_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -__device__ __constant__ u64 k[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -#define ROUND_EXPAND() \ -{ \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ -} - -#define ROUND_STEP(i) \ -{ \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k[i + 15]); \ -} - -__device__ static void sha512_transform (const u64 w[16], u64 dgst[8]) -{ - u64 a = dgst[0]; - u64 b = dgst[1]; - u64 c = dgst[2]; - u64 d = dgst[3]; - u64 e = dgst[4]; - u64 f = dgst[5]; - u64 g = dgst[6]; - u64 h = dgst[7]; - - u64 w0_t = w[ 0]; - u64 w1_t = w[ 1]; - u64 w2_t = w[ 2]; - u64 w3_t = w[ 3]; - u64 w4_t = w[ 4]; - u64 w5_t = w[ 5]; - u64 w6_t = w[ 6]; - u64 w7_t = w[ 7]; - u64 w8_t = w[ 8]; - u64 w9_t = w[ 9]; - u64 wa_t = w[10]; - u64 wb_t = w[11]; - u64 wc_t = w[12]; - u64 wd_t = w[13]; - u64 we_t = w[14]; - u64 wf_t = w[15]; - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - dgst[0] += a; - dgst[1] += b; - dgst[2] += c; - dgst[3] += d; - dgst[4] += e; - dgst[5] += f; - dgst[6] += g; - dgst[7] += h; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12200_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, ecryptfs_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - u32 pw_len = pws[gid].pw_len; - - append_0x80_4 (w0, w1, w2, w3, pw_len); - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - w2[0] = swap_workaround (w2[0]); - w2[1] = swap_workaround (w2[1]); - w2[2] = swap_workaround (w2[2]); - w2[3] = swap_workaround (w2[3]); - w3[0] = swap_workaround (w3[0]); - w3[1] = swap_workaround (w3[1]); - w3[2] = swap_workaround (w3[2]); - w3[3] = swap_workaround (w3[3]); - - /** - * salt - */ - - u32 s0[2]; - - s0[0] = salt_bufs[salt_pos].salt_buf[0]; - s0[1] = salt_bufs[salt_pos].salt_buf[1]; - - u32 salt_len = salt_bufs[salt_pos].salt_len; - - u64 w[16]; - - w[ 0] = hl32_to_64 (s0[0], s0[1]); - w[ 1] = hl32_to_64 (w0[0], w0[1]); - w[ 2] = hl32_to_64 (w0[2], w0[3]); - w[ 3] = hl32_to_64 (w1[0], w1[1]); - w[ 4] = hl32_to_64 (w1[2], w1[3]); - w[ 5] = hl32_to_64 (w2[0], w2[1]); - w[ 6] = hl32_to_64 (w2[2], w2[3]); - w[ 7] = hl32_to_64 (w3[0], w3[1]); - w[ 8] = hl32_to_64 (w3[2], w3[3]); - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = (salt_len + pw_len) * 8; - - u64 dgst[8]; - - dgst[0] = SHA512M_A; - dgst[1] = SHA512M_B; - dgst[2] = SHA512M_C; - dgst[3] = SHA512M_D; - dgst[4] = SHA512M_E; - dgst[5] = SHA512M_F; - dgst[6] = SHA512M_G; - dgst[7] = SHA512M_H; - - sha512_transform (w, dgst); - - tmps[gid].out[0] = dgst[0]; - tmps[gid].out[1] = dgst[1]; - tmps[gid].out[2] = dgst[2]; - tmps[gid].out[3] = dgst[3]; - tmps[gid].out[4] = dgst[4]; - tmps[gid].out[5] = dgst[5]; - tmps[gid].out[6] = dgst[6]; - tmps[gid].out[7] = dgst[7]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12200_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, ecryptfs_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u64 dgst[8]; - - dgst[0] = tmps[gid].out[0]; - dgst[1] = tmps[gid].out[1]; - dgst[2] = tmps[gid].out[2]; - dgst[3] = tmps[gid].out[3]; - dgst[4] = tmps[gid].out[4]; - dgst[5] = tmps[gid].out[5]; - dgst[6] = tmps[gid].out[6]; - dgst[7] = tmps[gid].out[7]; - - for (u32 i = 0; i < loop_cnt; i++) - { - u64 w[16]; - - w[ 0] = dgst[0]; - w[ 1] = dgst[1]; - w[ 2] = dgst[2]; - w[ 3] = dgst[3]; - w[ 4] = dgst[4]; - w[ 5] = dgst[5]; - w[ 6] = dgst[6]; - w[ 7] = dgst[7]; - w[ 8] = 0x8000000000000000; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 64 * 8; - - dgst[0] = SHA512M_A; - dgst[1] = SHA512M_B; - dgst[2] = SHA512M_C; - dgst[3] = SHA512M_D; - dgst[4] = SHA512M_E; - dgst[5] = SHA512M_F; - dgst[6] = SHA512M_G; - dgst[7] = SHA512M_H; - - sha512_transform (w, dgst); - } - - tmps[gid].out[0] = dgst[0]; - tmps[gid].out[1] = dgst[1]; - tmps[gid].out[2] = dgst[2]; - tmps[gid].out[3] = dgst[3]; - tmps[gid].out[4] = dgst[4]; - tmps[gid].out[5] = dgst[5]; - tmps[gid].out[6] = dgst[6]; - tmps[gid].out[7] = dgst[7]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12200_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, ecryptfs_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 lid = threadIdx.x; - - const u64x a = tmps[gid].out[0]; - - const u32x r0 = h32_from_64 (a); - const u32x r1 = l32_from_64 (a); - const u32x r2 = 0; - const u32x r3 = 0; - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m12300.cu b/nv/m12300.cu deleted file mode 100644 index 97a3ce5..0000000 --- a/nv/m12300.cu +++ /dev/null @@ -1,530 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _PBKDF2_SHA512_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -__device__ __constant__ u64 k[80] = -{ - SHA512C00, SHA512C01, SHA512C02, SHA512C03, - SHA512C04, SHA512C05, SHA512C06, SHA512C07, - SHA512C08, SHA512C09, SHA512C0a, SHA512C0b, - SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f, - SHA512C10, SHA512C11, SHA512C12, SHA512C13, - SHA512C14, SHA512C15, SHA512C16, SHA512C17, - SHA512C18, SHA512C19, SHA512C1a, SHA512C1b, - SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f, - SHA512C20, SHA512C21, SHA512C22, SHA512C23, - SHA512C24, SHA512C25, SHA512C26, SHA512C27, - SHA512C28, SHA512C29, SHA512C2a, SHA512C2b, - SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f, - SHA512C30, SHA512C31, SHA512C32, SHA512C33, - SHA512C34, SHA512C35, SHA512C36, SHA512C37, - SHA512C38, SHA512C39, SHA512C3a, SHA512C3b, - SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f, - SHA512C40, SHA512C41, SHA512C42, SHA512C43, - SHA512C44, SHA512C45, SHA512C46, SHA512C47, - SHA512C48, SHA512C49, SHA512C4a, SHA512C4b, - SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f, -}; - -#define ROUND_EXPAND() \ -{ \ - w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t); \ -} - -#define ROUND_STEP(i) \ -{ \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k[i + 0]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k[i + 1]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k[i + 2]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k[i + 3]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k[i + 4]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k[i + 5]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k[i + 6]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k[i + 7]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k[i + 8]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k[i + 9]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k[i + 10]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k[i + 11]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k[i + 12]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k[i + 13]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k[i + 14]); \ - SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k[i + 15]); \ -} - -__device__ static void sha512_transform (const u64 w[16], u64 dgst[8]) -{ - u64 a = dgst[0]; - u64 b = dgst[1]; - u64 c = dgst[2]; - u64 d = dgst[3]; - u64 e = dgst[4]; - u64 f = dgst[5]; - u64 g = dgst[6]; - u64 h = dgst[7]; - - u64 w0_t = w[ 0]; - u64 w1_t = w[ 1]; - u64 w2_t = w[ 2]; - u64 w3_t = w[ 3]; - u64 w4_t = w[ 4]; - u64 w5_t = w[ 5]; - u64 w6_t = w[ 6]; - u64 w7_t = w[ 7]; - u64 w8_t = w[ 8]; - u64 w9_t = w[ 9]; - u64 wa_t = w[10]; - u64 wb_t = w[11]; - u64 wc_t = w[12]; - u64 wd_t = w[13]; - u64 we_t = w[14]; - u64 wf_t = w[15]; - - ROUND_STEP (0); - - for (int i = 16; i < 80; i += 16) - { - ROUND_EXPAND (); ROUND_STEP (i); - } - - dgst[0] += a; - dgst[1] += b; - dgst[2] += c; - dgst[3] += d; - dgst[4] += e; - dgst[5] += f; - dgst[6] += g; - dgst[7] += h; -} - -__device__ static void hmac_run (const u64 w1[16], const u64 ipad[8], const u64 opad[8], u64 dgst[8]) -{ - dgst[0] = ipad[0]; - dgst[1] = ipad[1]; - dgst[2] = ipad[2]; - dgst[3] = ipad[3]; - dgst[4] = ipad[4]; - dgst[5] = ipad[5]; - dgst[6] = ipad[6]; - dgst[7] = ipad[7]; - - sha512_transform (w1, dgst); - - u64 w[16]; - - w[ 0] = dgst[0]; - w[ 1] = dgst[1]; - w[ 2] = dgst[2]; - w[ 3] = dgst[3]; - w[ 4] = dgst[4]; - w[ 5] = dgst[5]; - w[ 6] = dgst[6]; - w[ 7] = dgst[7]; - w[ 8] = 0x8000000000000000; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = (128 + 64) * 8; - - dgst[0] = opad[0]; - dgst[1] = opad[1]; - dgst[2] = opad[2]; - dgst[3] = opad[3]; - dgst[4] = opad[4]; - dgst[5] = opad[5]; - dgst[6] = opad[6]; - dgst[7] = opad[7]; - - sha512_transform (w, dgst); -} - -__device__ static void hmac_init (u64 w[16], u64 ipad[8], u64 opad[8]) -{ - w[ 0] ^= 0x3636363636363636; - w[ 1] ^= 0x3636363636363636; - w[ 2] ^= 0x3636363636363636; - w[ 3] ^= 0x3636363636363636; - w[ 4] ^= 0x3636363636363636; - w[ 5] ^= 0x3636363636363636; - w[ 6] ^= 0x3636363636363636; - w[ 7] ^= 0x3636363636363636; - w[ 8] ^= 0x3636363636363636; - w[ 9] ^= 0x3636363636363636; - w[10] ^= 0x3636363636363636; - w[11] ^= 0x3636363636363636; - w[12] ^= 0x3636363636363636; - w[13] ^= 0x3636363636363636; - w[14] ^= 0x3636363636363636; - w[15] ^= 0x3636363636363636; - - ipad[0] = SHA512M_A; - ipad[1] = SHA512M_B; - ipad[2] = SHA512M_C; - ipad[3] = SHA512M_D; - ipad[4] = SHA512M_E; - ipad[5] = SHA512M_F; - ipad[6] = SHA512M_G; - ipad[7] = SHA512M_H; - - sha512_transform (w, ipad); - - w[ 0] ^= 0x6a6a6a6a6a6a6a6a; - w[ 1] ^= 0x6a6a6a6a6a6a6a6a; - w[ 2] ^= 0x6a6a6a6a6a6a6a6a; - w[ 3] ^= 0x6a6a6a6a6a6a6a6a; - w[ 4] ^= 0x6a6a6a6a6a6a6a6a; - w[ 5] ^= 0x6a6a6a6a6a6a6a6a; - w[ 6] ^= 0x6a6a6a6a6a6a6a6a; - w[ 7] ^= 0x6a6a6a6a6a6a6a6a; - w[ 8] ^= 0x6a6a6a6a6a6a6a6a; - w[ 9] ^= 0x6a6a6a6a6a6a6a6a; - w[10] ^= 0x6a6a6a6a6a6a6a6a; - w[11] ^= 0x6a6a6a6a6a6a6a6a; - w[12] ^= 0x6a6a6a6a6a6a6a6a; - w[13] ^= 0x6a6a6a6a6a6a6a6a; - w[14] ^= 0x6a6a6a6a6a6a6a6a; - w[15] ^= 0x6a6a6a6a6a6a6a6a; - - opad[0] = SHA512M_A; - opad[1] = SHA512M_B; - opad[2] = SHA512M_C; - opad[3] = SHA512M_D; - opad[4] = SHA512M_E; - opad[5] = SHA512M_F; - opad[6] = SHA512M_G; - opad[7] = SHA512M_H; - - sha512_transform (w, opad); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12300_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, oraclet_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = swap_workaround (pws[gid].i[ 0]); - w0[1] = swap_workaround (pws[gid].i[ 1]); - w0[2] = swap_workaround (pws[gid].i[ 2]); - w0[3] = swap_workaround (pws[gid].i[ 3]); - - u32x w1[4]; - - w1[0] = swap_workaround (pws[gid].i[ 4]); - w1[1] = swap_workaround (pws[gid].i[ 5]); - w1[2] = swap_workaround (pws[gid].i[ 6]); - w1[3] = swap_workaround (pws[gid].i[ 7]); - - u32x w2[4]; - - w2[0] = swap_workaround (pws[gid].i[ 8]); - w2[1] = swap_workaround (pws[gid].i[ 9]); - w2[2] = swap_workaround (pws[gid].i[10]); - w2[3] = swap_workaround (pws[gid].i[11]); - - u32x w3[4]; - - w3[0] = swap_workaround (pws[gid].i[12]); - w3[1] = swap_workaround (pws[gid].i[13]); - w3[2] = swap_workaround (pws[gid].i[14]); - w3[3] = swap_workaround (pws[gid].i[15]); - - /** - * salt - */ - - u64 data[16]; - - data[ 0] = hl32_to_64 (salt_bufs[salt_pos].salt_buf[0], salt_bufs[salt_pos].salt_buf[1]); - data[ 1] = hl32_to_64 (salt_bufs[salt_pos].salt_buf[2], salt_bufs[salt_pos].salt_buf[3]); - data[ 2] = 0x415554485f50424b; - data[ 3] = 0x4446325f53504545; - data[ 4] = 0x44595f4b45590000; - data[ 5] = 0x0001800000000000; - data[ 6] = 0; - data[ 7] = 0; - data[ 8] = 0; - data[ 9] = 0; - data[10] = 0; - data[11] = 0; - data[12] = 0; - data[13] = 0; - data[14] = 0; - data[15] = (128 + 16 + 22 + 4) * 8; - - u64 w[16]; - - w[ 0] = hl32_to_64 (w0[0], w0[1]); - w[ 1] = hl32_to_64 (w0[2], w0[3]); - w[ 2] = hl32_to_64 (w1[0], w1[1]); - w[ 3] = hl32_to_64 (w1[2], w1[3]); - w[ 4] = hl32_to_64 (w2[0], w2[1]); - w[ 5] = hl32_to_64 (w2[2], w2[3]); - w[ 6] = hl32_to_64 (w3[0], w3[1]); - w[ 7] = hl32_to_64 (w3[2], w3[3]); - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - u64 ipad[8]; - u64 opad[8]; - - hmac_init (w, ipad, opad); - - tmps[gid].ipad[0] = ipad[0]; - tmps[gid].ipad[1] = ipad[1]; - tmps[gid].ipad[2] = ipad[2]; - tmps[gid].ipad[3] = ipad[3]; - tmps[gid].ipad[4] = ipad[4]; - tmps[gid].ipad[5] = ipad[5]; - tmps[gid].ipad[6] = ipad[6]; - tmps[gid].ipad[7] = ipad[7]; - - tmps[gid].opad[0] = opad[0]; - tmps[gid].opad[1] = opad[1]; - tmps[gid].opad[2] = opad[2]; - tmps[gid].opad[3] = opad[3]; - tmps[gid].opad[4] = opad[4]; - tmps[gid].opad[5] = opad[5]; - tmps[gid].opad[6] = opad[6]; - tmps[gid].opad[7] = opad[7]; - - for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) - { - u64 dgst[8]; - - hmac_run (data, ipad, opad, dgst); - - tmps[gid].dgst[i + 0] = dgst[0]; - tmps[gid].dgst[i + 1] = dgst[1]; - tmps[gid].dgst[i + 2] = dgst[2]; - tmps[gid].dgst[i + 3] = dgst[3]; - tmps[gid].dgst[i + 4] = dgst[4]; - tmps[gid].dgst[i + 5] = dgst[5]; - tmps[gid].dgst[i + 6] = dgst[6]; - tmps[gid].dgst[i + 7] = dgst[7]; - - tmps[gid].out[i + 0] = dgst[0]; - tmps[gid].out[i + 1] = dgst[1]; - tmps[gid].out[i + 2] = dgst[2]; - tmps[gid].out[i + 3] = dgst[3]; - tmps[gid].out[i + 4] = dgst[4]; - tmps[gid].out[i + 5] = dgst[5]; - tmps[gid].out[i + 6] = dgst[6]; - tmps[gid].out[i + 7] = dgst[7]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12300_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, oraclet_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u64 ipad[8]; - - ipad[0] = tmps[gid].ipad[0]; - ipad[1] = tmps[gid].ipad[1]; - ipad[2] = tmps[gid].ipad[2]; - ipad[3] = tmps[gid].ipad[3]; - ipad[4] = tmps[gid].ipad[4]; - ipad[5] = tmps[gid].ipad[5]; - ipad[6] = tmps[gid].ipad[6]; - ipad[7] = tmps[gid].ipad[7]; - - u64 opad[8]; - - opad[0] = tmps[gid].opad[0]; - opad[1] = tmps[gid].opad[1]; - opad[2] = tmps[gid].opad[2]; - opad[3] = tmps[gid].opad[3]; - opad[4] = tmps[gid].opad[4]; - opad[5] = tmps[gid].opad[5]; - opad[6] = tmps[gid].opad[6]; - opad[7] = tmps[gid].opad[7]; - - for (u32 i = 0; i < 8; i += 8) - { - u64 dgst[8]; - - dgst[0] = tmps[gid].dgst[i + 0]; - dgst[1] = tmps[gid].dgst[i + 1]; - dgst[2] = tmps[gid].dgst[i + 2]; - dgst[3] = tmps[gid].dgst[i + 3]; - dgst[4] = tmps[gid].dgst[i + 4]; - dgst[5] = tmps[gid].dgst[i + 5]; - dgst[6] = tmps[gid].dgst[i + 6]; - dgst[7] = tmps[gid].dgst[i + 7]; - - u64 out[8]; - - out[0] = tmps[gid].out[i + 0]; - out[1] = tmps[gid].out[i + 1]; - out[2] = tmps[gid].out[i + 2]; - out[3] = tmps[gid].out[i + 3]; - out[4] = tmps[gid].out[i + 4]; - out[5] = tmps[gid].out[i + 5]; - out[6] = tmps[gid].out[i + 6]; - out[7] = tmps[gid].out[i + 7]; - - for (u32 j = 0; j < loop_cnt; j++) - { - u64 w[16]; - - w[ 0] = dgst[0]; - w[ 1] = dgst[1]; - w[ 2] = dgst[2]; - w[ 3] = dgst[3]; - w[ 4] = dgst[4]; - w[ 5] = dgst[5]; - w[ 6] = dgst[6]; - w[ 7] = dgst[7]; - w[ 8] = 0x8000000000000000; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = (128 + 64) * 8; - - hmac_run (w, ipad, opad, dgst); - - out[0] ^= dgst[0]; - out[1] ^= dgst[1]; - out[2] ^= dgst[2]; - out[3] ^= dgst[3]; - out[4] ^= dgst[4]; - out[5] ^= dgst[5]; - out[6] ^= dgst[6]; - out[7] ^= dgst[7]; - } - - tmps[gid].dgst[i + 0] = dgst[0]; - tmps[gid].dgst[i + 1] = dgst[1]; - tmps[gid].dgst[i + 2] = dgst[2]; - tmps[gid].dgst[i + 3] = dgst[3]; - tmps[gid].dgst[i + 4] = dgst[4]; - tmps[gid].dgst[i + 5] = dgst[5]; - tmps[gid].dgst[i + 6] = dgst[6]; - tmps[gid].dgst[i + 7] = dgst[7]; - - tmps[gid].out[i + 0] = out[0]; - tmps[gid].out[i + 1] = out[1]; - tmps[gid].out[i + 2] = out[2]; - tmps[gid].out[i + 3] = out[3]; - tmps[gid].out[i + 4] = out[4]; - tmps[gid].out[i + 5] = out[5]; - tmps[gid].out[i + 6] = out[6]; - tmps[gid].out[i + 7] = out[7]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12300_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, oraclet_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 lid = threadIdx.x; - - u64 w[16]; - - w[ 0] = tmps[gid].out[0]; - w[ 1] = tmps[gid].out[1]; - w[ 2] = tmps[gid].out[2]; - w[ 3] = tmps[gid].out[3]; - w[ 4] = tmps[gid].out[4]; - w[ 5] = tmps[gid].out[5]; - w[ 6] = tmps[gid].out[6]; - w[ 7] = tmps[gid].out[7]; - w[ 8] = hl32_to_64 (salt_bufs[salt_pos].salt_buf[0], salt_bufs[salt_pos].salt_buf[1]); - w[ 9] = hl32_to_64 (salt_bufs[salt_pos].salt_buf[2], salt_bufs[salt_pos].salt_buf[3]); - w[10] = 0x8000000000000000; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = (64 + 16) * 8; - - u64 dgst[8]; - - dgst[0] = SHA512M_A; - dgst[1] = SHA512M_B; - dgst[2] = SHA512M_C; - dgst[3] = SHA512M_D; - dgst[4] = SHA512M_E; - dgst[5] = SHA512M_F; - dgst[6] = SHA512M_G; - dgst[7] = SHA512M_H; - - sha512_transform (w, dgst); - - const u32x r0 = h32_from_64 (dgst[0]); - const u32x r1 = l32_from_64 (dgst[0]); - const u32x r2 = h32_from_64 (dgst[1]); - const u32x r3 = l32_from_64 (dgst[1]); - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m12400.cu b/nv/m12400.cu deleted file mode 100644 index 63e0c48..0000000 --- a/nv/m12400.cu +++ /dev/null @@ -1,778 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _DES_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#define PERM_OP(a,b,tt,n,m) \ -{ \ - tt = a >> n; \ - tt = tt ^ b; \ - tt = tt & m; \ - b = b ^ tt; \ - tt = tt << n; \ - a = a ^ tt; \ -} - -#define HPERM_OP(a,tt,n,m) \ -{ \ - tt = a << (16 + n); \ - tt = tt ^ a; \ - tt = tt & m; \ - a = a ^ tt; \ - tt = tt >> (16 + n); \ - a = a ^ tt; \ -} - -#define IP(l,r,tt) \ -{ \ - PERM_OP (r, l, tt, 4, 0x0f0f0f0f); \ - PERM_OP (l, r, tt, 16, 0x0000ffff); \ - PERM_OP (r, l, tt, 2, 0x33333333); \ - PERM_OP (l, r, tt, 8, 0x00ff00ff); \ - PERM_OP (r, l, tt, 1, 0x55555555); \ -} - -#define FP(l,r,tt) \ -{ \ - PERM_OP (l, r, tt, 1, 0x55555555); \ - PERM_OP (r, l, tt, 8, 0x00ff00ff); \ - PERM_OP (l, r, tt, 2, 0x33333333); \ - PERM_OP (r, l, tt, 16, 0x0000ffff); \ - PERM_OP (l, r, tt, 4, 0x0f0f0f0f); \ -} - -__device__ __constant__ u32 c_SPtrans[8][64] = -{ - /* nibble 0 */ - 0x00820200, 0x00020000, 0x80800000, 0x80820200, - 0x00800000, 0x80020200, 0x80020000, 0x80800000, - 0x80020200, 0x00820200, 0x00820000, 0x80000200, - 0x80800200, 0x00800000, 0x00000000, 0x80020000, - 0x00020000, 0x80000000, 0x00800200, 0x00020200, - 0x80820200, 0x00820000, 0x80000200, 0x00800200, - 0x80000000, 0x00000200, 0x00020200, 0x80820000, - 0x00000200, 0x80800200, 0x80820000, 0x00000000, - 0x00000000, 0x80820200, 0x00800200, 0x80020000, - 0x00820200, 0x00020000, 0x80000200, 0x00800200, - 0x80820000, 0x00000200, 0x00020200, 0x80800000, - 0x80020200, 0x80000000, 0x80800000, 0x00820000, - 0x80820200, 0x00020200, 0x00820000, 0x80800200, - 0x00800000, 0x80000200, 0x80020000, 0x00000000, - 0x00020000, 0x00800000, 0x80800200, 0x00820200, - 0x80000000, 0x80820000, 0x00000200, 0x80020200, - /* nibble 1 */ - 0x10042004, 0x00000000, 0x00042000, 0x10040000, - 0x10000004, 0x00002004, 0x10002000, 0x00042000, - 0x00002000, 0x10040004, 0x00000004, 0x10002000, - 0x00040004, 0x10042000, 0x10040000, 0x00000004, - 0x00040000, 0x10002004, 0x10040004, 0x00002000, - 0x00042004, 0x10000000, 0x00000000, 0x00040004, - 0x10002004, 0x00042004, 0x10042000, 0x10000004, - 0x10000000, 0x00040000, 0x00002004, 0x10042004, - 0x00040004, 0x10042000, 0x10002000, 0x00042004, - 0x10042004, 0x00040004, 0x10000004, 0x00000000, - 0x10000000, 0x00002004, 0x00040000, 0x10040004, - 0x00002000, 0x10000000, 0x00042004, 0x10002004, - 0x10042000, 0x00002000, 0x00000000, 0x10000004, - 0x00000004, 0x10042004, 0x00042000, 0x10040000, - 0x10040004, 0x00040000, 0x00002004, 0x10002000, - 0x10002004, 0x00000004, 0x10040000, 0x00042000, - /* nibble 2 */ - 0x41000000, 0x01010040, 0x00000040, 0x41000040, - 0x40010000, 0x01000000, 0x41000040, 0x00010040, - 0x01000040, 0x00010000, 0x01010000, 0x40000000, - 0x41010040, 0x40000040, 0x40000000, 0x41010000, - 0x00000000, 0x40010000, 0x01010040, 0x00000040, - 0x40000040, 0x41010040, 0x00010000, 0x41000000, - 0x41010000, 0x01000040, 0x40010040, 0x01010000, - 0x00010040, 0x00000000, 0x01000000, 0x40010040, - 0x01010040, 0x00000040, 0x40000000, 0x00010000, - 0x40000040, 0x40010000, 0x01010000, 0x41000040, - 0x00000000, 0x01010040, 0x00010040, 0x41010000, - 0x40010000, 0x01000000, 0x41010040, 0x40000000, - 0x40010040, 0x41000000, 0x01000000, 0x41010040, - 0x00010000, 0x01000040, 0x41000040, 0x00010040, - 0x01000040, 0x00000000, 0x41010000, 0x40000040, - 0x41000000, 0x40010040, 0x00000040, 0x01010000, - /* nibble 3 */ - 0x00100402, 0x04000400, 0x00000002, 0x04100402, - 0x00000000, 0x04100000, 0x04000402, 0x00100002, - 0x04100400, 0x04000002, 0x04000000, 0x00000402, - 0x04000002, 0x00100402, 0x00100000, 0x04000000, - 0x04100002, 0x00100400, 0x00000400, 0x00000002, - 0x00100400, 0x04000402, 0x04100000, 0x00000400, - 0x00000402, 0x00000000, 0x00100002, 0x04100400, - 0x04000400, 0x04100002, 0x04100402, 0x00100000, - 0x04100002, 0x00000402, 0x00100000, 0x04000002, - 0x00100400, 0x04000400, 0x00000002, 0x04100000, - 0x04000402, 0x00000000, 0x00000400, 0x00100002, - 0x00000000, 0x04100002, 0x04100400, 0x00000400, - 0x04000000, 0x04100402, 0x00100402, 0x00100000, - 0x04100402, 0x00000002, 0x04000400, 0x00100402, - 0x00100002, 0x00100400, 0x04100000, 0x04000402, - 0x00000402, 0x04000000, 0x04000002, 0x04100400, - /* nibble 4 */ - 0x02000000, 0x00004000, 0x00000100, 0x02004108, - 0x02004008, 0x02000100, 0x00004108, 0x02004000, - 0x00004000, 0x00000008, 0x02000008, 0x00004100, - 0x02000108, 0x02004008, 0x02004100, 0x00000000, - 0x00004100, 0x02000000, 0x00004008, 0x00000108, - 0x02000100, 0x00004108, 0x00000000, 0x02000008, - 0x00000008, 0x02000108, 0x02004108, 0x00004008, - 0x02004000, 0x00000100, 0x00000108, 0x02004100, - 0x02004100, 0x02000108, 0x00004008, 0x02004000, - 0x00004000, 0x00000008, 0x02000008, 0x02000100, - 0x02000000, 0x00004100, 0x02004108, 0x00000000, - 0x00004108, 0x02000000, 0x00000100, 0x00004008, - 0x02000108, 0x00000100, 0x00000000, 0x02004108, - 0x02004008, 0x02004100, 0x00000108, 0x00004000, - 0x00004100, 0x02004008, 0x02000100, 0x00000108, - 0x00000008, 0x00004108, 0x02004000, 0x02000008, - /* nibble 5 */ - 0x20000010, 0x00080010, 0x00000000, 0x20080800, - 0x00080010, 0x00000800, 0x20000810, 0x00080000, - 0x00000810, 0x20080810, 0x00080800, 0x20000000, - 0x20000800, 0x20000010, 0x20080000, 0x00080810, - 0x00080000, 0x20000810, 0x20080010, 0x00000000, - 0x00000800, 0x00000010, 0x20080800, 0x20080010, - 0x20080810, 0x20080000, 0x20000000, 0x00000810, - 0x00000010, 0x00080800, 0x00080810, 0x20000800, - 0x00000810, 0x20000000, 0x20000800, 0x00080810, - 0x20080800, 0x00080010, 0x00000000, 0x20000800, - 0x20000000, 0x00000800, 0x20080010, 0x00080000, - 0x00080010, 0x20080810, 0x00080800, 0x00000010, - 0x20080810, 0x00080800, 0x00080000, 0x20000810, - 0x20000010, 0x20080000, 0x00080810, 0x00000000, - 0x00000800, 0x20000010, 0x20000810, 0x20080800, - 0x20080000, 0x00000810, 0x00000010, 0x20080010, - /* nibble 6 */ - 0x00001000, 0x00000080, 0x00400080, 0x00400001, - 0x00401081, 0x00001001, 0x00001080, 0x00000000, - 0x00400000, 0x00400081, 0x00000081, 0x00401000, - 0x00000001, 0x00401080, 0x00401000, 0x00000081, - 0x00400081, 0x00001000, 0x00001001, 0x00401081, - 0x00000000, 0x00400080, 0x00400001, 0x00001080, - 0x00401001, 0x00001081, 0x00401080, 0x00000001, - 0x00001081, 0x00401001, 0x00000080, 0x00400000, - 0x00001081, 0x00401000, 0x00401001, 0x00000081, - 0x00001000, 0x00000080, 0x00400000, 0x00401001, - 0x00400081, 0x00001081, 0x00001080, 0x00000000, - 0x00000080, 0x00400001, 0x00000001, 0x00400080, - 0x00000000, 0x00400081, 0x00400080, 0x00001080, - 0x00000081, 0x00001000, 0x00401081, 0x00400000, - 0x00401080, 0x00000001, 0x00001001, 0x00401081, - 0x00400001, 0x00401080, 0x00401000, 0x00001001, - /* nibble 7 */ - 0x08200020, 0x08208000, 0x00008020, 0x00000000, - 0x08008000, 0x00200020, 0x08200000, 0x08208020, - 0x00000020, 0x08000000, 0x00208000, 0x00008020, - 0x00208020, 0x08008020, 0x08000020, 0x08200000, - 0x00008000, 0x00208020, 0x00200020, 0x08008000, - 0x08208020, 0x08000020, 0x00000000, 0x00208000, - 0x08000000, 0x00200000, 0x08008020, 0x08200020, - 0x00200000, 0x00008000, 0x08208000, 0x00000020, - 0x00200000, 0x00008000, 0x08000020, 0x08208020, - 0x00008020, 0x08000000, 0x00000000, 0x00208000, - 0x08200020, 0x08008020, 0x08008000, 0x00200020, - 0x08208000, 0x00000020, 0x00200020, 0x08008000, - 0x08208020, 0x00200000, 0x08200000, 0x08000020, - 0x00208000, 0x00008020, 0x08008020, 0x08200000, - 0x00000020, 0x08208000, 0x00208020, 0x00000000, - 0x08000000, 0x08200020, 0x00008000, 0x00208020 -}; - -__device__ __constant__ u32 c_skb[8][64] = -{ - /* for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ - 0x00000000, 0x00000010, 0x20000000, 0x20000010, - 0x00010000, 0x00010010, 0x20010000, 0x20010010, - 0x00000800, 0x00000810, 0x20000800, 0x20000810, - 0x00010800, 0x00010810, 0x20010800, 0x20010810, - 0x00000020, 0x00000030, 0x20000020, 0x20000030, - 0x00010020, 0x00010030, 0x20010020, 0x20010030, - 0x00000820, 0x00000830, 0x20000820, 0x20000830, - 0x00010820, 0x00010830, 0x20010820, 0x20010830, - 0x00080000, 0x00080010, 0x20080000, 0x20080010, - 0x00090000, 0x00090010, 0x20090000, 0x20090010, - 0x00080800, 0x00080810, 0x20080800, 0x20080810, - 0x00090800, 0x00090810, 0x20090800, 0x20090810, - 0x00080020, 0x00080030, 0x20080020, 0x20080030, - 0x00090020, 0x00090030, 0x20090020, 0x20090030, - 0x00080820, 0x00080830, 0x20080820, 0x20080830, - 0x00090820, 0x00090830, 0x20090820, 0x20090830, - /* for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 */ - 0x00000000, 0x02000000, 0x00002000, 0x02002000, - 0x00200000, 0x02200000, 0x00202000, 0x02202000, - 0x00000004, 0x02000004, 0x00002004, 0x02002004, - 0x00200004, 0x02200004, 0x00202004, 0x02202004, - 0x00000400, 0x02000400, 0x00002400, 0x02002400, - 0x00200400, 0x02200400, 0x00202400, 0x02202400, - 0x00000404, 0x02000404, 0x00002404, 0x02002404, - 0x00200404, 0x02200404, 0x00202404, 0x02202404, - 0x10000000, 0x12000000, 0x10002000, 0x12002000, - 0x10200000, 0x12200000, 0x10202000, 0x12202000, - 0x10000004, 0x12000004, 0x10002004, 0x12002004, - 0x10200004, 0x12200004, 0x10202004, 0x12202004, - 0x10000400, 0x12000400, 0x10002400, 0x12002400, - 0x10200400, 0x12200400, 0x10202400, 0x12202400, - 0x10000404, 0x12000404, 0x10002404, 0x12002404, - 0x10200404, 0x12200404, 0x10202404, 0x12202404, - /* for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 */ - 0x00000000, 0x00000001, 0x00040000, 0x00040001, - 0x01000000, 0x01000001, 0x01040000, 0x01040001, - 0x00000002, 0x00000003, 0x00040002, 0x00040003, - 0x01000002, 0x01000003, 0x01040002, 0x01040003, - 0x00000200, 0x00000201, 0x00040200, 0x00040201, - 0x01000200, 0x01000201, 0x01040200, 0x01040201, - 0x00000202, 0x00000203, 0x00040202, 0x00040203, - 0x01000202, 0x01000203, 0x01040202, 0x01040203, - 0x08000000, 0x08000001, 0x08040000, 0x08040001, - 0x09000000, 0x09000001, 0x09040000, 0x09040001, - 0x08000002, 0x08000003, 0x08040002, 0x08040003, - 0x09000002, 0x09000003, 0x09040002, 0x09040003, - 0x08000200, 0x08000201, 0x08040200, 0x08040201, - 0x09000200, 0x09000201, 0x09040200, 0x09040201, - 0x08000202, 0x08000203, 0x08040202, 0x08040203, - 0x09000202, 0x09000203, 0x09040202, 0x09040203, - /* for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 */ - 0x00000000, 0x00100000, 0x00000100, 0x00100100, - 0x00000008, 0x00100008, 0x00000108, 0x00100108, - 0x00001000, 0x00101000, 0x00001100, 0x00101100, - 0x00001008, 0x00101008, 0x00001108, 0x00101108, - 0x04000000, 0x04100000, 0x04000100, 0x04100100, - 0x04000008, 0x04100008, 0x04000108, 0x04100108, - 0x04001000, 0x04101000, 0x04001100, 0x04101100, - 0x04001008, 0x04101008, 0x04001108, 0x04101108, - 0x00020000, 0x00120000, 0x00020100, 0x00120100, - 0x00020008, 0x00120008, 0x00020108, 0x00120108, - 0x00021000, 0x00121000, 0x00021100, 0x00121100, - 0x00021008, 0x00121008, 0x00021108, 0x00121108, - 0x04020000, 0x04120000, 0x04020100, 0x04120100, - 0x04020008, 0x04120008, 0x04020108, 0x04120108, - 0x04021000, 0x04121000, 0x04021100, 0x04121100, - 0x04021008, 0x04121008, 0x04021108, 0x04121108, - /* for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 */ - 0x00000000, 0x10000000, 0x00010000, 0x10010000, - 0x00000004, 0x10000004, 0x00010004, 0x10010004, - 0x20000000, 0x30000000, 0x20010000, 0x30010000, - 0x20000004, 0x30000004, 0x20010004, 0x30010004, - 0x00100000, 0x10100000, 0x00110000, 0x10110000, - 0x00100004, 0x10100004, 0x00110004, 0x10110004, - 0x20100000, 0x30100000, 0x20110000, 0x30110000, - 0x20100004, 0x30100004, 0x20110004, 0x30110004, - 0x00001000, 0x10001000, 0x00011000, 0x10011000, - 0x00001004, 0x10001004, 0x00011004, 0x10011004, - 0x20001000, 0x30001000, 0x20011000, 0x30011000, - 0x20001004, 0x30001004, 0x20011004, 0x30011004, - 0x00101000, 0x10101000, 0x00111000, 0x10111000, - 0x00101004, 0x10101004, 0x00111004, 0x10111004, - 0x20101000, 0x30101000, 0x20111000, 0x30111000, - 0x20101004, 0x30101004, 0x20111004, 0x30111004, - /* for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 */ - 0x00000000, 0x08000000, 0x00000008, 0x08000008, - 0x00000400, 0x08000400, 0x00000408, 0x08000408, - 0x00020000, 0x08020000, 0x00020008, 0x08020008, - 0x00020400, 0x08020400, 0x00020408, 0x08020408, - 0x00000001, 0x08000001, 0x00000009, 0x08000009, - 0x00000401, 0x08000401, 0x00000409, 0x08000409, - 0x00020001, 0x08020001, 0x00020009, 0x08020009, - 0x00020401, 0x08020401, 0x00020409, 0x08020409, - 0x02000000, 0x0A000000, 0x02000008, 0x0A000008, - 0x02000400, 0x0A000400, 0x02000408, 0x0A000408, - 0x02020000, 0x0A020000, 0x02020008, 0x0A020008, - 0x02020400, 0x0A020400, 0x02020408, 0x0A020408, - 0x02000001, 0x0A000001, 0x02000009, 0x0A000009, - 0x02000401, 0x0A000401, 0x02000409, 0x0A000409, - 0x02020001, 0x0A020001, 0x02020009, 0x0A020009, - 0x02020401, 0x0A020401, 0x02020409, 0x0A020409, - /* for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 */ - 0x00000000, 0x00000100, 0x00080000, 0x00080100, - 0x01000000, 0x01000100, 0x01080000, 0x01080100, - 0x00000010, 0x00000110, 0x00080010, 0x00080110, - 0x01000010, 0x01000110, 0x01080010, 0x01080110, - 0x00200000, 0x00200100, 0x00280000, 0x00280100, - 0x01200000, 0x01200100, 0x01280000, 0x01280100, - 0x00200010, 0x00200110, 0x00280010, 0x00280110, - 0x01200010, 0x01200110, 0x01280010, 0x01280110, - 0x00000200, 0x00000300, 0x00080200, 0x00080300, - 0x01000200, 0x01000300, 0x01080200, 0x01080300, - 0x00000210, 0x00000310, 0x00080210, 0x00080310, - 0x01000210, 0x01000310, 0x01080210, 0x01080310, - 0x00200200, 0x00200300, 0x00280200, 0x00280300, - 0x01200200, 0x01200300, 0x01280200, 0x01280300, - 0x00200210, 0x00200310, 0x00280210, 0x00280310, - 0x01200210, 0x01200310, 0x01280210, 0x01280310, - /* for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 */ - 0x00000000, 0x04000000, 0x00040000, 0x04040000, - 0x00000002, 0x04000002, 0x00040002, 0x04040002, - 0x00002000, 0x04002000, 0x00042000, 0x04042000, - 0x00002002, 0x04002002, 0x00042002, 0x04042002, - 0x00000020, 0x04000020, 0x00040020, 0x04040020, - 0x00000022, 0x04000022, 0x00040022, 0x04040022, - 0x00002020, 0x04002020, 0x00042020, 0x04042020, - 0x00002022, 0x04002022, 0x00042022, 0x04042022, - 0x00000800, 0x04000800, 0x00040800, 0x04040800, - 0x00000802, 0x04000802, 0x00040802, 0x04040802, - 0x00002800, 0x04002800, 0x00042800, 0x04042800, - 0x00002802, 0x04002802, 0x00042802, 0x04042802, - 0x00000820, 0x04000820, 0x00040820, 0x04040820, - 0x00000822, 0x04000822, 0x00040822, 0x04040822, - 0x00002820, 0x04002820, 0x00042820, 0x04042820, - 0x00002822, 0x04002822, 0x00042822, 0x04042822 -}; - -#ifdef VECT_SIZE1 -#define BOX(i,n,S) (u32x) ((S)[(n)][(i)]) -#endif - -#ifdef VECT_SIZE2 -#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) -#endif - -#ifdef VECT_SIZE4 -#define BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) -#endif - -__device__ static void _des_crypt_keysetup (u32x c, u32x d, u32x Kc[16], u32x Kd[16], u32 s_skb[8][64]) -{ - u32x tt; - - PERM_OP (d, c, tt, 4, 0x0f0f0f0f); - HPERM_OP (c, tt, 2, 0xcccc0000); - HPERM_OP (d, tt, 2, 0xcccc0000); - PERM_OP (d, c, tt, 1, 0x55555555); - PERM_OP (c, d, tt, 8, 0x00ff00ff); - PERM_OP (d, c, tt, 1, 0x55555555); - - d = ((d & 0x000000ff) << 16) - | ((d & 0x0000ff00) << 0) - | ((d & 0x00ff0000) >> 16) - | ((c & 0xf0000000) >> 4); - - c = c & 0x0fffffff; - - #pragma unroll - for (u32 i = 0; i < 16; i++) - { - const u32 shifts3s0[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 }; - const u32 shifts3s1[16] = { 27, 27, 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 27 }; - - c = c >> shifts3s0[i] | c << shifts3s1[i]; - d = d >> shifts3s0[i] | d << shifts3s1[i]; - - c = c & 0x0fffffff; - d = d & 0x0fffffff; - - const u32x c00 = (c >> 0) & 0x0000003f; - const u32x c06 = (c >> 6) & 0x00383003; - const u32x c07 = (c >> 7) & 0x0000003c; - const u32x c13 = (c >> 13) & 0x0000060f; - const u32x c20 = (c >> 20) & 0x00000001; - - u32x s = BOX (((c00 >> 0) & 0xff), 0, s_skb) - | BOX (((c06 >> 0) & 0xff) - |((c07 >> 0) & 0xff), 1, s_skb) - | BOX (((c13 >> 0) & 0xff) - |((c06 >> 8) & 0xff), 2, s_skb) - | BOX (((c20 >> 0) & 0xff) - |((c13 >> 8) & 0xff) - |((c06 >> 16) & 0xff), 3, s_skb); - - const u32x d00 = (d >> 0) & 0x00003c3f; - const u32x d07 = (d >> 7) & 0x00003f03; - const u32x d21 = (d >> 21) & 0x0000000f; - const u32x d22 = (d >> 22) & 0x00000030; - - u32x t = BOX (((d00 >> 0) & 0xff), 4, s_skb) - | BOX (((d07 >> 0) & 0xff) - |((d00 >> 8) & 0xff), 5, s_skb) - | BOX (((d07 >> 8) & 0xff), 6, s_skb) - | BOX (((d21 >> 0) & 0xff) - |((d22 >> 0) & 0xff), 7, s_skb); - - Kc[i] = ((t << 16) | (s & 0x0000ffff)); - Kd[i] = ((s >> 16) | (t & 0xffff0000)); - } -} - -__device__ static void _des_crypt_encrypt (u32x iv[2], u32 mask, u32 rounds, u32x Kc[16], u32x Kd[16], u32 s_SPtrans[8][64]) -{ - u32x tt; - - const u32 E0 = ((mask >> 0) & 0x003f) - | ((mask >> 4) & 0x3f00); - const u32 E1 = ((mask >> 2) & 0x03f0) - | ((mask >> 6) & 0xf000) - | ((mask >> 22) & 0x0003); - - u32x r = iv[0]; - u32x l = iv[1]; - - for (u32 i = 0; i < rounds; i++) - { - #pragma unroll - for (u32 j = 0; j < 16; j++) - { - /* sbox */ - u32x t = r ^ (r >> 16); - - u32x u = t; - - // u - u = u & E0; - - tt = (u << 16); - - u = u ^ r; - u = u ^ tt; - u = u ^ Kc[j]; - - // t - - t = t & E1; - - tt = (t << 16); - - t = t ^ r; - t = t ^ tt; - t = rotl32 (t, 28u); - t = t ^ Kd[j]; - - const u32x um = u & 0x3f3f3f3f; - const u32x tm = t & 0x3f3f3f3f; - - l ^= BOX (((um >> 0) & 0xff), 0, s_SPtrans) - | BOX (((um >> 8) & 0xff), 2, s_SPtrans) - | BOX (((um >> 16) & 0xff), 4, s_SPtrans) - | BOX (((um >> 24) & 0xff), 6, s_SPtrans) - | BOX (((tm >> 0) & 0xff), 1, s_SPtrans) - | BOX (((tm >> 8) & 0xff), 3, s_SPtrans) - | BOX (((tm >> 16) & 0xff), 5, s_SPtrans) - | BOX (((tm >> 24) & 0xff), 7, s_SPtrans); - - tt = l; - l = r; - r = tt; - } - - tt = l; - l = r; - r = tt; - } - - iv[0] = r; - iv[1] = l; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12400_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, bsdicrypt_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * sbox - */ - - __shared__ u32 s_skb[8][64]; - __shared__ u32 s_SPtrans[8][64]; - - if (lid < 64) - { - s_skb[0][lid] = c_skb[0][lid]; - s_skb[1][lid] = c_skb[1][lid]; - s_skb[2][lid] = c_skb[2][lid]; - s_skb[3][lid] = c_skb[3][lid]; - s_skb[4][lid] = c_skb[4][lid]; - s_skb[5][lid] = c_skb[5][lid]; - s_skb[6][lid] = c_skb[6][lid]; - s_skb[7][lid] = c_skb[7][lid]; - - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * word - */ - - u32x w[16]; - - w[ 0] = pws[gid].i[ 0]; - w[ 1] = pws[gid].i[ 1]; - w[ 2] = pws[gid].i[ 2]; - w[ 3] = pws[gid].i[ 3]; - w[ 4] = pws[gid].i[ 4]; - w[ 5] = pws[gid].i[ 5]; - w[ 6] = pws[gid].i[ 6]; - w[ 7] = pws[gid].i[ 7]; - w[ 8] = pws[gid].i[ 8]; - w[ 9] = pws[gid].i[ 9]; - w[10] = pws[gid].i[10]; - w[11] = pws[gid].i[11]; - w[12] = pws[gid].i[12]; - w[13] = pws[gid].i[13]; - w[14] = pws[gid].i[14]; - w[15] = pws[gid].i[15]; - - u32 pw_len = pws[gid].pw_len; - - u32 tt; - - u32 Kc[16]; - u32 Kd[16]; - - - u32 out[2]; - - out[0] = (w[0] << 1) & 0xfefefefe; - out[1] = (w[1] << 1) & 0xfefefefe; - - for (u32 i = 8, j = 2; i < pw_len; i += 8, j += 2) - { - _des_crypt_keysetup (out[0], out[1], Kc, Kd, s_skb); - - IP (out[0], out[1], tt); - - out[0] = rotr32 (out[0], 31); - out[1] = rotr32 (out[1], 31); - - _des_crypt_encrypt (out, 0, 1, Kc, Kd, s_SPtrans); - - out[0] = rotl32 (out[0], 31); - out[1] = rotl32 (out[1], 31); - - FP (out[1], out[0], tt); - - const u32 R = (w[j + 0] << 1) & 0xfefefefe; - const u32 L = (w[j + 1] << 1) & 0xfefefefe; - - out[0] ^= R; - out[1] ^= L; - } - - /* - out[0] = (out[0] & 0xfefefefe) >> 1; - out[1] = (out[1] & 0xfefefefe) >> 1; - - out[0] = (out[0] << 1) & 0xfefefefe; - out[1] = (out[1] << 1) & 0xfefefefe; - */ - - _des_crypt_keysetup (out[0], out[1], Kc, Kd, s_skb); - - tmps[gid].Kc[ 0] = Kc[ 0]; - tmps[gid].Kc[ 1] = Kc[ 1]; - tmps[gid].Kc[ 2] = Kc[ 2]; - tmps[gid].Kc[ 3] = Kc[ 3]; - tmps[gid].Kc[ 4] = Kc[ 4]; - tmps[gid].Kc[ 5] = Kc[ 5]; - tmps[gid].Kc[ 6] = Kc[ 6]; - tmps[gid].Kc[ 7] = Kc[ 7]; - tmps[gid].Kc[ 8] = Kc[ 8]; - tmps[gid].Kc[ 9] = Kc[ 9]; - tmps[gid].Kc[10] = Kc[10]; - tmps[gid].Kc[11] = Kc[11]; - tmps[gid].Kc[12] = Kc[12]; - tmps[gid].Kc[13] = Kc[13]; - tmps[gid].Kc[14] = Kc[14]; - tmps[gid].Kc[15] = Kc[15]; - - tmps[gid].Kd[ 0] = Kd[ 0]; - tmps[gid].Kd[ 1] = Kd[ 1]; - tmps[gid].Kd[ 2] = Kd[ 2]; - tmps[gid].Kd[ 3] = Kd[ 3]; - tmps[gid].Kd[ 4] = Kd[ 4]; - tmps[gid].Kd[ 5] = Kd[ 5]; - tmps[gid].Kd[ 6] = Kd[ 6]; - tmps[gid].Kd[ 7] = Kd[ 7]; - tmps[gid].Kd[ 8] = Kd[ 8]; - tmps[gid].Kd[ 9] = Kd[ 9]; - tmps[gid].Kd[10] = Kd[10]; - tmps[gid].Kd[11] = Kd[11]; - tmps[gid].Kd[12] = Kd[12]; - tmps[gid].Kd[13] = Kd[13]; - tmps[gid].Kd[14] = Kd[14]; - tmps[gid].Kd[15] = Kd[15]; - - tmps[gid].iv[0] = 0; - tmps[gid].iv[1] = 0; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12400_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, bsdicrypt_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * sbox - */ - - __shared__ u32 s_SPtrans[8][64]; - - if (lid < 64) - { - s_SPtrans[0][lid] = c_SPtrans[0][lid]; - s_SPtrans[1][lid] = c_SPtrans[1][lid]; - s_SPtrans[2][lid] = c_SPtrans[2][lid]; - s_SPtrans[3][lid] = c_SPtrans[3][lid]; - s_SPtrans[4][lid] = c_SPtrans[4][lid]; - s_SPtrans[5][lid] = c_SPtrans[5][lid]; - s_SPtrans[6][lid] = c_SPtrans[6][lid]; - s_SPtrans[7][lid] = c_SPtrans[7][lid]; - } - - __syncthreads (); - - if (gid >= gid_max) return; - - u32 Kc[16]; - - Kc[ 0] = tmps[gid].Kc[ 0]; - Kc[ 1] = tmps[gid].Kc[ 1]; - Kc[ 2] = tmps[gid].Kc[ 2]; - Kc[ 3] = tmps[gid].Kc[ 3]; - Kc[ 4] = tmps[gid].Kc[ 4]; - Kc[ 5] = tmps[gid].Kc[ 5]; - Kc[ 6] = tmps[gid].Kc[ 6]; - Kc[ 7] = tmps[gid].Kc[ 7]; - Kc[ 8] = tmps[gid].Kc[ 8]; - Kc[ 9] = tmps[gid].Kc[ 9]; - Kc[10] = tmps[gid].Kc[10]; - Kc[11] = tmps[gid].Kc[11]; - Kc[12] = tmps[gid].Kc[12]; - Kc[13] = tmps[gid].Kc[13]; - Kc[14] = tmps[gid].Kc[14]; - Kc[15] = tmps[gid].Kc[15]; - - u32 Kd[16]; - - Kd[ 0] = tmps[gid].Kd[ 0]; - Kd[ 1] = tmps[gid].Kd[ 1]; - Kd[ 2] = tmps[gid].Kd[ 2]; - Kd[ 3] = tmps[gid].Kd[ 3]; - Kd[ 4] = tmps[gid].Kd[ 4]; - Kd[ 5] = tmps[gid].Kd[ 5]; - Kd[ 6] = tmps[gid].Kd[ 6]; - Kd[ 7] = tmps[gid].Kd[ 7]; - Kd[ 8] = tmps[gid].Kd[ 8]; - Kd[ 9] = tmps[gid].Kd[ 9]; - Kd[10] = tmps[gid].Kd[10]; - Kd[11] = tmps[gid].Kd[11]; - Kd[12] = tmps[gid].Kd[12]; - Kd[13] = tmps[gid].Kd[13]; - Kd[14] = tmps[gid].Kd[14]; - Kd[15] = tmps[gid].Kd[15]; - - u32 iv[2]; - - iv[0] = tmps[gid].iv[0]; - iv[1] = tmps[gid].iv[1]; - - const u32 mask = salt_bufs[salt_pos].salt_buf[0]; - - _des_crypt_encrypt (iv, mask, loop_cnt, Kc, Kd, s_SPtrans); - - tmps[gid].Kc[ 0] = Kc[ 0]; - tmps[gid].Kc[ 1] = Kc[ 1]; - tmps[gid].Kc[ 2] = Kc[ 2]; - tmps[gid].Kc[ 3] = Kc[ 3]; - tmps[gid].Kc[ 4] = Kc[ 4]; - tmps[gid].Kc[ 5] = Kc[ 5]; - tmps[gid].Kc[ 6] = Kc[ 6]; - tmps[gid].Kc[ 7] = Kc[ 7]; - tmps[gid].Kc[ 8] = Kc[ 8]; - tmps[gid].Kc[ 9] = Kc[ 9]; - tmps[gid].Kc[10] = Kc[10]; - tmps[gid].Kc[11] = Kc[11]; - tmps[gid].Kc[12] = Kc[12]; - tmps[gid].Kc[13] = Kc[13]; - tmps[gid].Kc[14] = Kc[14]; - tmps[gid].Kc[15] = Kc[15]; - - tmps[gid].Kd[ 0] = Kd[ 0]; - tmps[gid].Kd[ 1] = Kd[ 1]; - tmps[gid].Kd[ 2] = Kd[ 2]; - tmps[gid].Kd[ 3] = Kd[ 3]; - tmps[gid].Kd[ 4] = Kd[ 4]; - tmps[gid].Kd[ 5] = Kd[ 5]; - tmps[gid].Kd[ 6] = Kd[ 6]; - tmps[gid].Kd[ 7] = Kd[ 7]; - tmps[gid].Kd[ 8] = Kd[ 8]; - tmps[gid].Kd[ 9] = Kd[ 9]; - tmps[gid].Kd[10] = Kd[10]; - tmps[gid].Kd[11] = Kd[11]; - tmps[gid].Kd[12] = Kd[12]; - tmps[gid].Kd[13] = Kd[13]; - tmps[gid].Kd[14] = Kd[14]; - tmps[gid].Kd[15] = Kd[15]; - - tmps[gid].iv[0] = iv[0]; - tmps[gid].iv[1] = iv[1]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12400_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, bsdicrypt_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 lid = threadIdx.x; - - const u32x r0 = tmps[gid].iv[0]; - const u32x r1 = tmps[gid].iv[1]; - const u32x r2 = 0; - const u32x r3 = 0; - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m12500.cu b/nv/m12500.cu deleted file mode 100644 index 1252449..0000000 --- a/nv/m12500.cu +++ /dev/null @@ -1,1307 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _RAR3_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" - -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#define ROUNDS 0x40000 - -#define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c) -#define GETCHAR(a,p) ((u8 *)(a))[(p)] - -#define PUTCHAR_BE(a,p,c) ((u8 *)(a))[(p) ^ 3] = (u8) (c) -#define GETCHAR_BE(a,p) ((u8 *)(a))[(p) ^ 3] - -__device__ __constant__ u32 te0[256] = -{ - 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, - 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, - 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, - 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, - 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, - 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, - 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, - 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, - 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, - 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, - 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, - 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, - 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, - 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, - 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, - 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, - 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, - 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, - 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, - 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, - 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, - 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, - 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, - 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, - 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, - 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, - 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, - 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, - 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, - 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, - 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, - 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, - 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, - 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, - 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, - 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, - 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, - 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, - 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, - 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, - 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, - 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, - 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, - 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, - 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, - 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, - 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, - 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, - 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, - 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, - 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, - 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, - 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, - 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, - 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, - 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, - 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, - 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, - 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, - 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, - 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, - 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, - 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, - 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a, -}; - -__device__ __constant__ u32 te1[256] = -{ - 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b, - 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5, - 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b, - 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676, - 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d, - 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0, - 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf, - 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0, - 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626, - 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc, - 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1, - 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515, - 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3, - 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a, - 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2, - 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575, - 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a, - 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0, - 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3, - 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484, - 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded, - 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b, - 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939, - 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf, - 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb, - 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585, - 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f, - 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8, - 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f, - 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5, - 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121, - 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2, - 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec, - 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717, - 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d, - 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373, - 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc, - 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888, - 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414, - 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb, - 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a, - 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c, - 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262, - 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979, - 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d, - 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9, - 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea, - 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808, - 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e, - 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6, - 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f, - 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a, - 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666, - 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e, - 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9, - 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e, - 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111, - 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494, - 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9, - 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf, - 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d, - 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868, - 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f, - 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616, -}; - -__device__ __constant__ u32 te2[256] = -{ - 0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b, - 0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5, - 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b, - 0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76, - 0xca458fca, 0x829d1f82, 0xc94089c9, 0x7d87fa7d, - 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0, - 0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af, - 0x9cbf239c, 0xa4f753a4, 0x7296e472, 0xc05b9bc0, - 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26, - 0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc, - 0x345c6834, 0xa5f451a5, 0xe534d1e5, 0xf108f9f1, - 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15, - 0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3, - 0x18283018, 0x96a13796, 0x050f0a05, 0x9ab52f9a, - 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2, - 0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75, - 0x091b1209, 0x839e1d83, 0x2c74582c, 0x1a2e341a, - 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0, - 0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3, - 0x297b5229, 0xe33edde3, 0x2f715e2f, 0x84971384, - 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed, - 0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b, - 0x6abed46a, 0xcb468dcb, 0xbed967be, 0x394b7239, - 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf, - 0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb, - 0x43c58643, 0x4dd79a4d, 0x33556633, 0x85941185, - 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f, - 0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8, - 0x51f3a251, 0xa3fe5da3, 0x40c08040, 0x8f8a058f, - 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5, - 0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221, - 0x10302010, 0xff1ae5ff, 0xf30efdf3, 0xd26dbfd2, - 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec, - 0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17, - 0xc45793c4, 0xa7f255a7, 0x7e82fc7e, 0x3d477a3d, - 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673, - 0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc, - 0x22664422, 0x2a7e542a, 0x90ab3b90, 0x88830b88, - 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814, - 0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb, - 0xe03bdbe0, 0x32566432, 0x3a4e743a, 0x0a1e140a, - 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c, - 0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462, - 0x91a83991, 0x95a43195, 0xe437d3e4, 0x798bf279, - 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d, - 0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9, - 0x6cb4d86c, 0x56faac56, 0xf407f3f4, 0xea25cfea, - 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008, - 0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e, - 0x1c24381c, 0xa6f157a6, 0xb4c773b4, 0xc65197c6, - 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f, - 0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a, - 0x7090e070, 0x3e427c3e, 0xb5c471b5, 0x66aacc66, - 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e, - 0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9, - 0x86911786, 0xc15899c1, 0x1d273a1d, 0x9eb9279e, - 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211, - 0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394, - 0x9bb62d9b, 0x1e223c1e, 0x87921587, 0xe920c9e9, - 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df, - 0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d, - 0xbfda65bf, 0xe631d7e6, 0x42c68442, 0x68b8d068, - 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f, - 0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16, -}; - -__device__ __constant__ u32 te3[256] = -{ - 0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6, - 0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491, - 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56, - 0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec, - 0xcaca458f, 0x82829d1f, 0xc9c94089, 0x7d7d87fa, - 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb, - 0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45, - 0x9c9cbf23, 0xa4a4f753, 0x727296e4, 0xc0c05b9b, - 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c, - 0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83, - 0x34345c68, 0xa5a5f451, 0xe5e534d1, 0xf1f108f9, - 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a, - 0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d, - 0x18182830, 0x9696a137, 0x05050f0a, 0x9a9ab52f, - 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf, - 0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea, - 0x09091b12, 0x83839e1d, 0x2c2c7458, 0x1a1a2e34, - 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b, - 0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d, - 0x29297b52, 0xe3e33edd, 0x2f2f715e, 0x84849713, - 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1, - 0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6, - 0x6a6abed4, 0xcbcb468d, 0xbebed967, 0x39394b72, - 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85, - 0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed, - 0x4343c586, 0x4d4dd79a, 0x33335566, 0x85859411, - 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe, - 0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b, - 0x5151f3a2, 0xa3a3fe5d, 0x4040c080, 0x8f8f8a05, - 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1, - 0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342, - 0x10103020, 0xffff1ae5, 0xf3f30efd, 0xd2d26dbf, - 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3, - 0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e, - 0xc4c45793, 0xa7a7f255, 0x7e7e82fc, 0x3d3d477a, - 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6, - 0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3, - 0x22226644, 0x2a2a7e54, 0x9090ab3b, 0x8888830b, - 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28, - 0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad, - 0xe0e03bdb, 0x32325664, 0x3a3a4e74, 0x0a0a1e14, - 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8, - 0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4, - 0x9191a839, 0x9595a431, 0xe4e437d3, 0x79798bf2, - 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da, - 0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049, - 0x6c6cb4d8, 0x5656faac, 0xf4f407f3, 0xeaea25cf, - 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810, - 0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c, - 0x1c1c2438, 0xa6a6f157, 0xb4b4c773, 0xc6c65197, - 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e, - 0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f, - 0x707090e0, 0x3e3e427c, 0xb5b5c471, 0x6666aacc, - 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c, - 0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069, - 0x86869117, 0xc1c15899, 0x1d1d273a, 0x9e9eb927, - 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322, - 0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733, - 0x9b9bb62d, 0x1e1e223c, 0x87879215, 0xe9e920c9, - 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5, - 0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a, - 0xbfbfda65, 0xe6e631d7, 0x4242c684, 0x6868b8d0, - 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e, - 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c, -}; - -__device__ __constant__ u32 te4[256] = -{ - 0x63636363, 0x7c7c7c7c, 0x77777777, 0x7b7b7b7b, - 0xf2f2f2f2, 0x6b6b6b6b, 0x6f6f6f6f, 0xc5c5c5c5, - 0x30303030, 0x01010101, 0x67676767, 0x2b2b2b2b, - 0xfefefefe, 0xd7d7d7d7, 0xabababab, 0x76767676, - 0xcacacaca, 0x82828282, 0xc9c9c9c9, 0x7d7d7d7d, - 0xfafafafa, 0x59595959, 0x47474747, 0xf0f0f0f0, - 0xadadadad, 0xd4d4d4d4, 0xa2a2a2a2, 0xafafafaf, - 0x9c9c9c9c, 0xa4a4a4a4, 0x72727272, 0xc0c0c0c0, - 0xb7b7b7b7, 0xfdfdfdfd, 0x93939393, 0x26262626, - 0x36363636, 0x3f3f3f3f, 0xf7f7f7f7, 0xcccccccc, - 0x34343434, 0xa5a5a5a5, 0xe5e5e5e5, 0xf1f1f1f1, - 0x71717171, 0xd8d8d8d8, 0x31313131, 0x15151515, - 0x04040404, 0xc7c7c7c7, 0x23232323, 0xc3c3c3c3, - 0x18181818, 0x96969696, 0x05050505, 0x9a9a9a9a, - 0x07070707, 0x12121212, 0x80808080, 0xe2e2e2e2, - 0xebebebeb, 0x27272727, 0xb2b2b2b2, 0x75757575, - 0x09090909, 0x83838383, 0x2c2c2c2c, 0x1a1a1a1a, - 0x1b1b1b1b, 0x6e6e6e6e, 0x5a5a5a5a, 0xa0a0a0a0, - 0x52525252, 0x3b3b3b3b, 0xd6d6d6d6, 0xb3b3b3b3, - 0x29292929, 0xe3e3e3e3, 0x2f2f2f2f, 0x84848484, - 0x53535353, 0xd1d1d1d1, 0x00000000, 0xedededed, - 0x20202020, 0xfcfcfcfc, 0xb1b1b1b1, 0x5b5b5b5b, - 0x6a6a6a6a, 0xcbcbcbcb, 0xbebebebe, 0x39393939, - 0x4a4a4a4a, 0x4c4c4c4c, 0x58585858, 0xcfcfcfcf, - 0xd0d0d0d0, 0xefefefef, 0xaaaaaaaa, 0xfbfbfbfb, - 0x43434343, 0x4d4d4d4d, 0x33333333, 0x85858585, - 0x45454545, 0xf9f9f9f9, 0x02020202, 0x7f7f7f7f, - 0x50505050, 0x3c3c3c3c, 0x9f9f9f9f, 0xa8a8a8a8, - 0x51515151, 0xa3a3a3a3, 0x40404040, 0x8f8f8f8f, - 0x92929292, 0x9d9d9d9d, 0x38383838, 0xf5f5f5f5, - 0xbcbcbcbc, 0xb6b6b6b6, 0xdadadada, 0x21212121, - 0x10101010, 0xffffffff, 0xf3f3f3f3, 0xd2d2d2d2, - 0xcdcdcdcd, 0x0c0c0c0c, 0x13131313, 0xecececec, - 0x5f5f5f5f, 0x97979797, 0x44444444, 0x17171717, - 0xc4c4c4c4, 0xa7a7a7a7, 0x7e7e7e7e, 0x3d3d3d3d, - 0x64646464, 0x5d5d5d5d, 0x19191919, 0x73737373, - 0x60606060, 0x81818181, 0x4f4f4f4f, 0xdcdcdcdc, - 0x22222222, 0x2a2a2a2a, 0x90909090, 0x88888888, - 0x46464646, 0xeeeeeeee, 0xb8b8b8b8, 0x14141414, - 0xdededede, 0x5e5e5e5e, 0x0b0b0b0b, 0xdbdbdbdb, - 0xe0e0e0e0, 0x32323232, 0x3a3a3a3a, 0x0a0a0a0a, - 0x49494949, 0x06060606, 0x24242424, 0x5c5c5c5c, - 0xc2c2c2c2, 0xd3d3d3d3, 0xacacacac, 0x62626262, - 0x91919191, 0x95959595, 0xe4e4e4e4, 0x79797979, - 0xe7e7e7e7, 0xc8c8c8c8, 0x37373737, 0x6d6d6d6d, - 0x8d8d8d8d, 0xd5d5d5d5, 0x4e4e4e4e, 0xa9a9a9a9, - 0x6c6c6c6c, 0x56565656, 0xf4f4f4f4, 0xeaeaeaea, - 0x65656565, 0x7a7a7a7a, 0xaeaeaeae, 0x08080808, - 0xbabababa, 0x78787878, 0x25252525, 0x2e2e2e2e, - 0x1c1c1c1c, 0xa6a6a6a6, 0xb4b4b4b4, 0xc6c6c6c6, - 0xe8e8e8e8, 0xdddddddd, 0x74747474, 0x1f1f1f1f, - 0x4b4b4b4b, 0xbdbdbdbd, 0x8b8b8b8b, 0x8a8a8a8a, - 0x70707070, 0x3e3e3e3e, 0xb5b5b5b5, 0x66666666, - 0x48484848, 0x03030303, 0xf6f6f6f6, 0x0e0e0e0e, - 0x61616161, 0x35353535, 0x57575757, 0xb9b9b9b9, - 0x86868686, 0xc1c1c1c1, 0x1d1d1d1d, 0x9e9e9e9e, - 0xe1e1e1e1, 0xf8f8f8f8, 0x98989898, 0x11111111, - 0x69696969, 0xd9d9d9d9, 0x8e8e8e8e, 0x94949494, - 0x9b9b9b9b, 0x1e1e1e1e, 0x87878787, 0xe9e9e9e9, - 0xcececece, 0x55555555, 0x28282828, 0xdfdfdfdf, - 0x8c8c8c8c, 0xa1a1a1a1, 0x89898989, 0x0d0d0d0d, - 0xbfbfbfbf, 0xe6e6e6e6, 0x42424242, 0x68686868, - 0x41414141, 0x99999999, 0x2d2d2d2d, 0x0f0f0f0f, - 0xb0b0b0b0, 0x54545454, 0xbbbbbbbb, 0x16161616, -}; - -__device__ __constant__ u32 td0[256] = -{ - 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, - 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, - 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, - 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, - 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, - 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, - 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, - 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, - 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, - 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, - 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, - 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, - 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, - 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, - 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, - 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, - 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, - 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, - 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, - 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, - 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, - 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, - 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, - 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, - 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, - 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, - 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, - 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, - 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, - 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, - 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, - 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, - 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, - 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, - 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, - 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, - 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, - 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, - 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, - 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, - 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, - 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, - 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, - 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, - 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, - 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, - 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, - 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, - 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, - 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, - 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, - 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, - 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, - 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, - 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, - 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, - 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, - 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, - 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, - 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, - 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, - 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, - 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, - 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742, -}; - -__device__ __constant__ u32 td1[256] = -{ - 0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e, - 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303, - 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c, - 0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3, - 0x49deb15a, 0x6725ba1b, 0x9845ea0e, 0xe15dfec0, - 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9, - 0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259, - 0x2dd4be83, 0xd3587421, 0x2949e069, 0x448ec9c8, - 0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971, - 0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a, - 0x1863df4a, 0x82e51a31, 0x60975133, 0x4562537f, - 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b, - 0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8, - 0x23ab73d3, 0xe2724b02, 0x57e31f8f, 0x2a6655ab, - 0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708, - 0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682, - 0x2b8acf1c, 0x92a779b4, 0xf0f307f2, 0xa14e69e2, - 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe, - 0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb, - 0x390b83ec, 0xaa4060ef, 0x065e719f, 0x51bd6e10, - 0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd, - 0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015, - 0x241998fb, 0x97d6bde9, 0xcc894043, 0x7767d99e, - 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee, - 0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000, - 0x83098086, 0x48322bed, 0xac1e1170, 0x4e6c5a72, - 0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39, - 0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e, - 0xb10c0a67, 0x0f9357e7, 0xd2b4ee96, 0x9e1b9b91, - 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a, - 0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17, - 0x0b0e090d, 0xadf28bc7, 0xb92db6a8, 0xc8141ea9, - 0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60, - 0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e, - 0x768b4329, 0xdccb23c6, 0x68b6edfc, 0x63b8e4f1, - 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611, - 0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1, - 0x4b1d9e2f, 0xf3dcb230, 0xec0d8652, 0xd077c1e3, - 0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964, - 0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390, - 0xc787494e, 0xc1d938d1, 0xfe8ccaa2, 0x3698d40b, - 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf, - 0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46, - 0xc2f68d13, 0xe890d8b8, 0x5e2e39f7, 0xf582c3af, - 0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512, - 0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb, - 0x09cd2678, 0xf46e5918, 0x01ec9ab7, 0xa8834f9a, - 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8, - 0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c, - 0xaf31a4b2, 0x312a3f23, 0x30c6a594, 0xc035a266, - 0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8, - 0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6, - 0x8d764dd6, 0x4d43efb0, 0x54ccaa4d, 0xdfe49604, - 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551, - 0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41, - 0x5ab3671d, 0x5292dbd2, 0x33e91056, 0x136dd647, - 0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c, - 0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1, - 0x599cd2df, 0x3f55f273, 0x791814ce, 0xbf73c737, - 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db, - 0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340, - 0x72161dc3, 0x0cbce225, 0x8b283c49, 0x41ff0d95, - 0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1, - 0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857, -}; - -__device__ __constant__ u32 td2[256] = -{ - 0xa75051f4, 0x65537e41, 0xa4c31a17, 0x5e963a27, - 0x6bcb3bab, 0x45f11f9d, 0x58abacfa, 0x03934be3, - 0xfa552030, 0x6df6ad76, 0x769188cc, 0x4c25f502, - 0xd7fc4fe5, 0xcbd7c52a, 0x44802635, 0xa38fb562, - 0x5a49deb1, 0x1b6725ba, 0x0e9845ea, 0xc0e15dfe, - 0x7502c32f, 0xf012814c, 0x97a38d46, 0xf9c66bd3, - 0x5fe7038f, 0x9c951592, 0x7aebbf6d, 0x59da9552, - 0x832dd4be, 0x21d35874, 0x692949e0, 0xc8448ec9, - 0x896a75c2, 0x7978f48e, 0x3e6b9958, 0x71dd27b9, - 0x4fb6bee1, 0xad17f088, 0xac66c920, 0x3ab47dce, - 0x4a1863df, 0x3182e51a, 0x33609751, 0x7f456253, - 0x77e0b164, 0xae84bb6b, 0xa01cfe81, 0x2b94f908, - 0x68587048, 0xfd198f45, 0x6c8794de, 0xf8b7527b, - 0xd323ab73, 0x02e2724b, 0x8f57e31f, 0xab2a6655, - 0x2807b2eb, 0xc2032fb5, 0x7b9a86c5, 0x08a5d337, - 0x87f23028, 0xa5b223bf, 0x6aba0203, 0x825ced16, - 0x1c2b8acf, 0xb492a779, 0xf2f0f307, 0xe2a14e69, - 0xf4cd65da, 0xbed50605, 0x621fd134, 0xfe8ac4a6, - 0x539d342e, 0x55a0a2f3, 0xe132058a, 0xeb75a4f6, - 0xec390b83, 0xefaa4060, 0x9f065e71, 0x1051bd6e, - 0x8af93e21, 0x063d96dd, 0x05aedd3e, 0xbd464de6, - 0x8db59154, 0x5d0571c4, 0xd46f0406, 0x15ff6050, - 0xfb241998, 0xe997d6bd, 0x43cc8940, 0x9e7767d9, - 0x42bdb0e8, 0x8b880789, 0x5b38e719, 0xeedb79c8, - 0x0a47a17c, 0x0fe97c42, 0x1ec9f884, 0x00000000, - 0x86830980, 0xed48322b, 0x70ac1e11, 0x724e6c5a, - 0xfffbfd0e, 0x38560f85, 0xd51e3dae, 0x3927362d, - 0xd9640a0f, 0xa621685c, 0x54d19b5b, 0x2e3a2436, - 0x67b10c0a, 0xe70f9357, 0x96d2b4ee, 0x919e1b9b, - 0xc54f80c0, 0x20a261dc, 0x4b695a77, 0x1a161c12, - 0xba0ae293, 0x2ae5c0a0, 0xe0433c22, 0x171d121b, - 0x0d0b0e09, 0xc7adf28b, 0xa8b92db6, 0xa9c8141e, - 0x198557f1, 0x074caf75, 0xddbbee99, 0x60fda37f, - 0x269ff701, 0xf5bc5c72, 0x3bc54466, 0x7e345bfb, - 0x29768b43, 0xc6dccb23, 0xfc68b6ed, 0xf163b8e4, - 0xdccad731, 0x85104263, 0x22401397, 0x112084c6, - 0x247d854a, 0x3df8d2bb, 0x3211aef9, 0xa16dc729, - 0x2f4b1d9e, 0x30f3dcb2, 0x52ec0d86, 0xe3d077c1, - 0x166c2bb3, 0xb999a970, 0x48fa1194, 0x642247e9, - 0x8cc4a8fc, 0x3f1aa0f0, 0x2cd8567d, 0x90ef2233, - 0x4ec78749, 0xd1c1d938, 0xa2fe8cca, 0x0b3698d4, - 0x81cfa6f5, 0xde28a57a, 0x8e26dab7, 0xbfa43fad, - 0x9de42c3a, 0x920d5078, 0xcc9b6a5f, 0x4662547e, - 0x13c2f68d, 0xb8e890d8, 0xf75e2e39, 0xaff582c3, - 0x80be9f5d, 0x937c69d0, 0x2da96fd5, 0x12b3cf25, - 0x993bc8ac, 0x7da71018, 0x636ee89c, 0xbb7bdb3b, - 0x7809cd26, 0x18f46e59, 0xb701ec9a, 0x9aa8834f, - 0x6e65e695, 0xe67eaaff, 0xcf0821bc, 0xe8e6ef15, - 0x9bd9bae7, 0x36ce4a6f, 0x09d4ea9f, 0x7cd629b0, - 0xb2af31a4, 0x23312a3f, 0x9430c6a5, 0x66c035a2, - 0xbc37744e, 0xcaa6fc82, 0xd0b0e090, 0xd81533a7, - 0x984af104, 0xdaf741ec, 0x500e7fcd, 0xf62f1791, - 0xd68d764d, 0xb04d43ef, 0x4d54ccaa, 0x04dfe496, - 0xb5e39ed1, 0x881b4c6a, 0x1fb8c12c, 0x517f4665, - 0xea049d5e, 0x355d018c, 0x7473fa87, 0x412efb0b, - 0x1d5ab367, 0xd25292db, 0x5633e910, 0x47136dd6, - 0x618c9ad7, 0x0c7a37a1, 0x148e59f8, 0x3c89eb13, - 0x27eecea9, 0xc935b761, 0xe5ede11c, 0xb13c7a47, - 0xdf599cd2, 0x733f55f2, 0xce791814, 0x37bf73c7, - 0xcdea53f7, 0xaa5b5ffd, 0x6f14df3d, 0xdb867844, - 0xf381caaf, 0xc43eb968, 0x342c3824, 0x405fc2a3, - 0xc372161d, 0x250cbce2, 0x498b283c, 0x9541ff0d, - 0x017139a8, 0xb3de080c, 0xe49cd8b4, 0xc1906456, - 0x84617bcb, 0xb670d532, 0x5c74486c, 0x5742d0b8, -}; - -__device__ __constant__ u32 td3[256] = -{ - 0xf4a75051, 0x4165537e, 0x17a4c31a, 0x275e963a, - 0xab6bcb3b, 0x9d45f11f, 0xfa58abac, 0xe303934b, - 0x30fa5520, 0x766df6ad, 0xcc769188, 0x024c25f5, - 0xe5d7fc4f, 0x2acbd7c5, 0x35448026, 0x62a38fb5, - 0xb15a49de, 0xba1b6725, 0xea0e9845, 0xfec0e15d, - 0x2f7502c3, 0x4cf01281, 0x4697a38d, 0xd3f9c66b, - 0x8f5fe703, 0x929c9515, 0x6d7aebbf, 0x5259da95, - 0xbe832dd4, 0x7421d358, 0xe0692949, 0xc9c8448e, - 0xc2896a75, 0x8e7978f4, 0x583e6b99, 0xb971dd27, - 0xe14fb6be, 0x88ad17f0, 0x20ac66c9, 0xce3ab47d, - 0xdf4a1863, 0x1a3182e5, 0x51336097, 0x537f4562, - 0x6477e0b1, 0x6bae84bb, 0x81a01cfe, 0x082b94f9, - 0x48685870, 0x45fd198f, 0xde6c8794, 0x7bf8b752, - 0x73d323ab, 0x4b02e272, 0x1f8f57e3, 0x55ab2a66, - 0xeb2807b2, 0xb5c2032f, 0xc57b9a86, 0x3708a5d3, - 0x2887f230, 0xbfa5b223, 0x036aba02, 0x16825ced, - 0xcf1c2b8a, 0x79b492a7, 0x07f2f0f3, 0x69e2a14e, - 0xdaf4cd65, 0x05bed506, 0x34621fd1, 0xa6fe8ac4, - 0x2e539d34, 0xf355a0a2, 0x8ae13205, 0xf6eb75a4, - 0x83ec390b, 0x60efaa40, 0x719f065e, 0x6e1051bd, - 0x218af93e, 0xdd063d96, 0x3e05aedd, 0xe6bd464d, - 0x548db591, 0xc45d0571, 0x06d46f04, 0x5015ff60, - 0x98fb2419, 0xbde997d6, 0x4043cc89, 0xd99e7767, - 0xe842bdb0, 0x898b8807, 0x195b38e7, 0xc8eedb79, - 0x7c0a47a1, 0x420fe97c, 0x841ec9f8, 0x00000000, - 0x80868309, 0x2bed4832, 0x1170ac1e, 0x5a724e6c, - 0x0efffbfd, 0x8538560f, 0xaed51e3d, 0x2d392736, - 0x0fd9640a, 0x5ca62168, 0x5b54d19b, 0x362e3a24, - 0x0a67b10c, 0x57e70f93, 0xee96d2b4, 0x9b919e1b, - 0xc0c54f80, 0xdc20a261, 0x774b695a, 0x121a161c, - 0x93ba0ae2, 0xa02ae5c0, 0x22e0433c, 0x1b171d12, - 0x090d0b0e, 0x8bc7adf2, 0xb6a8b92d, 0x1ea9c814, - 0xf1198557, 0x75074caf, 0x99ddbbee, 0x7f60fda3, - 0x01269ff7, 0x72f5bc5c, 0x663bc544, 0xfb7e345b, - 0x4329768b, 0x23c6dccb, 0xedfc68b6, 0xe4f163b8, - 0x31dccad7, 0x63851042, 0x97224013, 0xc6112084, - 0x4a247d85, 0xbb3df8d2, 0xf93211ae, 0x29a16dc7, - 0x9e2f4b1d, 0xb230f3dc, 0x8652ec0d, 0xc1e3d077, - 0xb3166c2b, 0x70b999a9, 0x9448fa11, 0xe9642247, - 0xfc8cc4a8, 0xf03f1aa0, 0x7d2cd856, 0x3390ef22, - 0x494ec787, 0x38d1c1d9, 0xcaa2fe8c, 0xd40b3698, - 0xf581cfa6, 0x7ade28a5, 0xb78e26da, 0xadbfa43f, - 0x3a9de42c, 0x78920d50, 0x5fcc9b6a, 0x7e466254, - 0x8d13c2f6, 0xd8b8e890, 0x39f75e2e, 0xc3aff582, - 0x5d80be9f, 0xd0937c69, 0xd52da96f, 0x2512b3cf, - 0xac993bc8, 0x187da710, 0x9c636ee8, 0x3bbb7bdb, - 0x267809cd, 0x5918f46e, 0x9ab701ec, 0x4f9aa883, - 0x956e65e6, 0xffe67eaa, 0xbccf0821, 0x15e8e6ef, - 0xe79bd9ba, 0x6f36ce4a, 0x9f09d4ea, 0xb07cd629, - 0xa4b2af31, 0x3f23312a, 0xa59430c6, 0xa266c035, - 0x4ebc3774, 0x82caa6fc, 0x90d0b0e0, 0xa7d81533, - 0x04984af1, 0xecdaf741, 0xcd500e7f, 0x91f62f17, - 0x4dd68d76, 0xefb04d43, 0xaa4d54cc, 0x9604dfe4, - 0xd1b5e39e, 0x6a881b4c, 0x2c1fb8c1, 0x65517f46, - 0x5eea049d, 0x8c355d01, 0x877473fa, 0x0b412efb, - 0x671d5ab3, 0xdbd25292, 0x105633e9, 0xd647136d, - 0xd7618c9a, 0xa10c7a37, 0xf8148e59, 0x133c89eb, - 0xa927eece, 0x61c935b7, 0x1ce5ede1, 0x47b13c7a, - 0xd2df599c, 0xf2733f55, 0x14ce7918, 0xc737bf73, - 0xf7cdea53, 0xfdaa5b5f, 0x3d6f14df, 0x44db8678, - 0xaff381ca, 0x68c43eb9, 0x24342c38, 0xa3405fc2, - 0x1dc37216, 0xe2250cbc, 0x3c498b28, 0x0d9541ff, - 0xa8017139, 0x0cb3de08, 0xb4e49cd8, 0x56c19064, - 0xcb84617b, 0x32b670d5, 0x6c5c7448, 0xb85742d0, -}; - -__device__ __constant__ u32 td4[256] = -{ - 0x52525252, 0x09090909, 0x6a6a6a6a, 0xd5d5d5d5, - 0x30303030, 0x36363636, 0xa5a5a5a5, 0x38383838, - 0xbfbfbfbf, 0x40404040, 0xa3a3a3a3, 0x9e9e9e9e, - 0x81818181, 0xf3f3f3f3, 0xd7d7d7d7, 0xfbfbfbfb, - 0x7c7c7c7c, 0xe3e3e3e3, 0x39393939, 0x82828282, - 0x9b9b9b9b, 0x2f2f2f2f, 0xffffffff, 0x87878787, - 0x34343434, 0x8e8e8e8e, 0x43434343, 0x44444444, - 0xc4c4c4c4, 0xdededede, 0xe9e9e9e9, 0xcbcbcbcb, - 0x54545454, 0x7b7b7b7b, 0x94949494, 0x32323232, - 0xa6a6a6a6, 0xc2c2c2c2, 0x23232323, 0x3d3d3d3d, - 0xeeeeeeee, 0x4c4c4c4c, 0x95959595, 0x0b0b0b0b, - 0x42424242, 0xfafafafa, 0xc3c3c3c3, 0x4e4e4e4e, - 0x08080808, 0x2e2e2e2e, 0xa1a1a1a1, 0x66666666, - 0x28282828, 0xd9d9d9d9, 0x24242424, 0xb2b2b2b2, - 0x76767676, 0x5b5b5b5b, 0xa2a2a2a2, 0x49494949, - 0x6d6d6d6d, 0x8b8b8b8b, 0xd1d1d1d1, 0x25252525, - 0x72727272, 0xf8f8f8f8, 0xf6f6f6f6, 0x64646464, - 0x86868686, 0x68686868, 0x98989898, 0x16161616, - 0xd4d4d4d4, 0xa4a4a4a4, 0x5c5c5c5c, 0xcccccccc, - 0x5d5d5d5d, 0x65656565, 0xb6b6b6b6, 0x92929292, - 0x6c6c6c6c, 0x70707070, 0x48484848, 0x50505050, - 0xfdfdfdfd, 0xedededed, 0xb9b9b9b9, 0xdadadada, - 0x5e5e5e5e, 0x15151515, 0x46464646, 0x57575757, - 0xa7a7a7a7, 0x8d8d8d8d, 0x9d9d9d9d, 0x84848484, - 0x90909090, 0xd8d8d8d8, 0xabababab, 0x00000000, - 0x8c8c8c8c, 0xbcbcbcbc, 0xd3d3d3d3, 0x0a0a0a0a, - 0xf7f7f7f7, 0xe4e4e4e4, 0x58585858, 0x05050505, - 0xb8b8b8b8, 0xb3b3b3b3, 0x45454545, 0x06060606, - 0xd0d0d0d0, 0x2c2c2c2c, 0x1e1e1e1e, 0x8f8f8f8f, - 0xcacacaca, 0x3f3f3f3f, 0x0f0f0f0f, 0x02020202, - 0xc1c1c1c1, 0xafafafaf, 0xbdbdbdbd, 0x03030303, - 0x01010101, 0x13131313, 0x8a8a8a8a, 0x6b6b6b6b, - 0x3a3a3a3a, 0x91919191, 0x11111111, 0x41414141, - 0x4f4f4f4f, 0x67676767, 0xdcdcdcdc, 0xeaeaeaea, - 0x97979797, 0xf2f2f2f2, 0xcfcfcfcf, 0xcececece, - 0xf0f0f0f0, 0xb4b4b4b4, 0xe6e6e6e6, 0x73737373, - 0x96969696, 0xacacacac, 0x74747474, 0x22222222, - 0xe7e7e7e7, 0xadadadad, 0x35353535, 0x85858585, - 0xe2e2e2e2, 0xf9f9f9f9, 0x37373737, 0xe8e8e8e8, - 0x1c1c1c1c, 0x75757575, 0xdfdfdfdf, 0x6e6e6e6e, - 0x47474747, 0xf1f1f1f1, 0x1a1a1a1a, 0x71717171, - 0x1d1d1d1d, 0x29292929, 0xc5c5c5c5, 0x89898989, - 0x6f6f6f6f, 0xb7b7b7b7, 0x62626262, 0x0e0e0e0e, - 0xaaaaaaaa, 0x18181818, 0xbebebebe, 0x1b1b1b1b, - 0xfcfcfcfc, 0x56565656, 0x3e3e3e3e, 0x4b4b4b4b, - 0xc6c6c6c6, 0xd2d2d2d2, 0x79797979, 0x20202020, - 0x9a9a9a9a, 0xdbdbdbdb, 0xc0c0c0c0, 0xfefefefe, - 0x78787878, 0xcdcdcdcd, 0x5a5a5a5a, 0xf4f4f4f4, - 0x1f1f1f1f, 0xdddddddd, 0xa8a8a8a8, 0x33333333, - 0x88888888, 0x07070707, 0xc7c7c7c7, 0x31313131, - 0xb1b1b1b1, 0x12121212, 0x10101010, 0x59595959, - 0x27272727, 0x80808080, 0xecececec, 0x5f5f5f5f, - 0x60606060, 0x51515151, 0x7f7f7f7f, 0xa9a9a9a9, - 0x19191919, 0xb5b5b5b5, 0x4a4a4a4a, 0x0d0d0d0d, - 0x2d2d2d2d, 0xe5e5e5e5, 0x7a7a7a7a, 0x9f9f9f9f, - 0x93939393, 0xc9c9c9c9, 0x9c9c9c9c, 0xefefefef, - 0xa0a0a0a0, 0xe0e0e0e0, 0x3b3b3b3b, 0x4d4d4d4d, - 0xaeaeaeae, 0x2a2a2a2a, 0xf5f5f5f5, 0xb0b0b0b0, - 0xc8c8c8c8, 0xebebebeb, 0xbbbbbbbb, 0x3c3c3c3c, - 0x83838383, 0x53535353, 0x99999999, 0x61616161, - 0x17171717, 0x2b2b2b2b, 0x04040404, 0x7e7e7e7e, - 0xbabababa, 0x77777777, 0xd6d6d6d6, 0x26262626, - 0xe1e1e1e1, 0x69696969, 0x14141414, 0x63636363, - 0x55555555, 0x21212121, 0x0c0c0c0c, 0x7d7d7d7d, -}; - -__device__ __constant__ u32 rcon[] = -{ - 0x01000000, 0x02000000, 0x04000000, 0x08000000, - 0x10000000, 0x20000000, 0x40000000, 0x80000000, - 0x1b000000, 0x36000000, -}; - -__device__ static void AES128_ExpandKey (u32 *userkey, u32 *rek, u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - rek[0] = userkey[0]; - rek[1] = userkey[1]; - rek[2] = userkey[2]; - rek[3] = userkey[3]; - - #pragma unroll 10 - for (u32 i = 0, j = 0; i < 10; i += 1, j += 4) - { - u32 temp = rek[j + 3]; - - temp = (s_te2[(temp >> 16) & 0xff] & 0xff000000) - ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000) - ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00) - ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff); - - rek[j + 4] = rek[j + 0] - ^ temp - ^ rcon[i]; - - rek[j + 5] = rek[j + 1] ^ rek[j + 4]; - rek[j + 6] = rek[j + 2] ^ rek[j + 5]; - rek[j + 7] = rek[j + 3] ^ rek[j + 6]; - } -} - -__device__ static void AES128_InvertKey (u32 *rdk, u32 s_td0[256], u32 s_td1[256], u32 s_td2[256], u32 s_td3[256], u32 s_td4[256], u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - for (u32 i = 0, j = 40; i < j; i += 4, j -= 4) - { - u32 temp; - - temp = rdk[i + 0]; rdk[i + 0] = rdk[j + 0]; rdk[j + 0] = temp; - temp = rdk[i + 1]; rdk[i + 1] = rdk[j + 1]; rdk[j + 1] = temp; - temp = rdk[i + 2]; rdk[i + 2] = rdk[j + 2]; rdk[j + 2] = temp; - temp = rdk[i + 3]; rdk[i + 3] = rdk[j + 3]; rdk[j + 3] = temp; - } - - for (u32 i = 1, j = 4; i < 10; i += 1, j += 4) - { - rdk[j + 0] = - s_td0[s_te1[(rdk[j + 0] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 0] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 0] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 0] >> 0) & 0xff] & 0xff]; - - rdk[j + 1] = - s_td0[s_te1[(rdk[j + 1] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 1] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 1] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 1] >> 0) & 0xff] & 0xff]; - - rdk[j + 2] = - s_td0[s_te1[(rdk[j + 2] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 2] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 2] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 2] >> 0) & 0xff] & 0xff]; - - rdk[j + 3] = - s_td0[s_te1[(rdk[j + 3] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 3] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 3] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 3] >> 0) & 0xff] & 0xff]; - } -} - -__device__ static void AES128_decrypt (const u32 *in, u32 *out, const u32 *rdk, u32 s_td0[256], u32 s_td1[256], u32 s_td2[256], u32 s_td3[256], u32 s_td4[256]) -{ - u32 s0 = in[0] ^ rdk[0]; - u32 s1 = in[1] ^ rdk[1]; - u32 s2 = in[2] ^ rdk[2]; - u32 s3 = in[3] ^ rdk[3]; - - u32 t0; - u32 t1; - u32 t2; - u32 t3; - - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[ 4]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[ 5]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[ 6]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[ 7]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[ 8]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[ 9]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[10]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[11]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[12]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[13]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[14]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[15]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[16]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[17]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[18]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[19]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[20]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[21]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[22]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[23]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[24]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[25]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[26]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[27]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[28]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[29]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[30]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[31]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[32]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[33]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[34]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[35]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[36]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[37]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[38]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[39]; - - out[0] = (s_td4[(t0 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t3 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t2 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t1 >> 0) & 0xff] & 0x000000ff) - ^ rdk[40]; - - out[1] = (s_td4[(t1 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t0 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t3 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t2 >> 0) & 0xff] & 0x000000ff) - ^ rdk[41]; - - out[2] = (s_td4[(t2 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t1 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t0 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t3 >> 0) & 0xff] & 0x000000ff) - ^ rdk[42]; - - out[3] = (s_td4[(t3 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t2 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t1 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t0 >> 0) & 0xff] & 0x000000ff) - ^ rdk[43]; -} - -__device__ static void sha1_transform (const u32x w[16], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w[ 0]; - u32x w1_t = w[ 1]; - u32x w2_t = w[ 2]; - u32x w3_t = w[ 3]; - u32x w4_t = w[ 4]; - u32x w5_t = w[ 5]; - u32x w6_t = w[ 6]; - u32x w7_t = w[ 7]; - u32x w8_t = w[ 8]; - u32x w9_t = w[ 9]; - u32x wa_t = w[10]; - u32x wb_t = w[11]; - u32x wc_t = w[12]; - u32x wd_t = w[13]; - u32x we_t = w[14]; - u32x wf_t = w[15]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12500_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, rar3_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - tmps[gid].dgst[0][0] = SHA1M_A; - tmps[gid].dgst[0][1] = SHA1M_B; - tmps[gid].dgst[0][2] = SHA1M_C; - tmps[gid].dgst[0][3] = SHA1M_D; - tmps[gid].dgst[0][4] = SHA1M_E; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12500_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, rar3_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 pw_buf[5]; - - pw_buf[0] = pws[gid].i[0]; - pw_buf[1] = pws[gid].i[1]; - pw_buf[2] = pws[gid].i[2]; - pw_buf[3] = pws[gid].i[3]; - pw_buf[4] = pws[gid].i[4]; - - const u32 pw_len = pws[gid].pw_len; - - u32 salt_buf[2]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - - const u32 salt_len = 8; - - // this is large enough to hold all possible w[] arrays for 64 iterations - - #define LARGEBLOCK_ELEMS ((40 + 8 + 3) * 16) - - u32 largeblock[LARGEBLOCK_ELEMS]; - - for (u32 i = 0; i < LARGEBLOCK_ELEMS; i++) largeblock[i] = 0; - - for (u32 i = 0, p = 0; i < 64; i++) - { - for (u32 j = 0; j < pw_len; j++, p += 2) - { - PUTCHAR_BE (largeblock, p, GETCHAR (pw_buf, j)); - } - - for (u32 j = 0; j < salt_len; j++, p += 1) - { - PUTCHAR_BE (largeblock, p, GETCHAR (salt_buf, j)); - } - - PUTCHAR_BE (largeblock, p + 2, (loop_pos >> 16) & 0xff); - - p += 3; - } - - const u32 p3 = (pw_len * 2) + salt_len + 3; - - const u32 init_pos = loop_pos / (ROUNDS / 16); - - u32 dgst[5]; - - dgst[0] = tmps[gid].dgst[init_pos][0]; - dgst[1] = tmps[gid].dgst[init_pos][1]; - dgst[2] = tmps[gid].dgst[init_pos][2]; - dgst[3] = tmps[gid].dgst[init_pos][3]; - dgst[4] = tmps[gid].dgst[init_pos][4]; - - u32 iter = loop_pos; - - for (u32 i = 0; i < 256; i += 4) - { - for (u32 j = 0; j < 64; j++) - { - const u32 p = ((j + 1) * p3) - 2; - - PUTCHAR_BE (largeblock, p, iter >> 8); - } - - for (u32 k = 0; k < 4; k++) - { - for (u32 j = 0; j < 64; j++) - { - const u32 p = ((j + 1) * p3) - 3; - - PUTCHAR_BE (largeblock, p, iter >> 0); - - iter++; - } - - for (u32 j = 0; j < p3; j++) - { - const u32 j16 = j * 16; - - sha1_transform (&largeblock[j16], dgst); - } - } - } - - tmps[gid].dgst[init_pos + 1][0] = dgst[0]; - tmps[gid].dgst[init_pos + 1][1] = dgst[1]; - tmps[gid].dgst[init_pos + 1][2] = dgst[2]; - tmps[gid].dgst[init_pos + 1][3] = dgst[3]; - tmps[gid].dgst[init_pos + 1][4] = dgst[4]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12500_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, rar3_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * aes shared - */ - - __shared__ u32 s_td0[256]; - __shared__ u32 s_td1[256]; - __shared__ u32 s_td2[256]; - __shared__ u32 s_td3[256]; - __shared__ u32 s_td4[256]; - - __shared__ u32 s_te0[256]; - __shared__ u32 s_te1[256]; - __shared__ u32 s_te2[256]; - __shared__ u32 s_te3[256]; - __shared__ u32 s_te4[256]; - - s_td0[lid] = td0[lid]; - s_td1[lid] = td1[lid]; - s_td2[lid] = td2[lid]; - s_td3[lid] = td3[lid]; - s_td4[lid] = td4[lid]; - - s_te0[lid] = te0[lid]; - s_te1[lid] = te1[lid]; - s_te2[lid] = te2[lid]; - s_te3[lid] = te3[lid]; - s_te4[lid] = te4[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - const u32 pw_len = pws[gid].pw_len; - - const u32 salt_len = 8; - - const u32 p3 = (pw_len * 2) + salt_len + 3; - - u32x w_buf[16]; - - w_buf[ 0] = 0x80000000; - w_buf[ 1] = 0; - w_buf[ 2] = 0; - w_buf[ 3] = 0; - w_buf[ 4] = 0; - w_buf[ 5] = 0; - w_buf[ 6] = 0; - w_buf[ 7] = 0; - w_buf[ 8] = 0; - w_buf[ 9] = 0; - w_buf[10] = 0; - w_buf[11] = 0; - w_buf[12] = 0; - w_buf[13] = 0; - w_buf[14] = 0; - w_buf[15] = (p3 * ROUNDS) * 8; - - u32 dgst[5]; - - dgst[0] = tmps[gid].dgst[16][0]; - dgst[1] = tmps[gid].dgst[16][1]; - dgst[2] = tmps[gid].dgst[16][2]; - dgst[3] = tmps[gid].dgst[16][3]; - dgst[4] = tmps[gid].dgst[16][4]; - - sha1_transform (w_buf, dgst); - - u32x rk[60]; - - u32 data[4]; - - data[0] = salt_bufs[salt_pos].salt_buf[2]; - data[1] = salt_bufs[salt_pos].salt_buf[3]; - data[2] = salt_bufs[salt_pos].salt_buf[4]; - data[3] = salt_bufs[salt_pos].salt_buf[5]; - - u32x ukeyx[4]; - - ukeyx[0] = swap_workaround (dgst[0]); - ukeyx[1] = swap_workaround (dgst[1]); - ukeyx[2] = swap_workaround (dgst[2]); - ukeyx[3] = swap_workaround (dgst[3]); - - AES128_ExpandKey (ukeyx, rk, s_te0, s_te1, s_te2, s_te3, s_te4); - - AES128_InvertKey (rk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4); - - u32 out[4]; - - AES128_decrypt (data, out, rk, s_td0, s_td1, s_td2, s_td3, s_td4); - - u32 iv[4]; - - iv[0] = 0; - iv[1] = 0; - iv[2] = 0; - iv[3] = 0; - - for (int i = 0; i < 16; i++) - { - u32 pw_buf[5]; - - pw_buf[0] = pws[gid].i[0]; - pw_buf[1] = pws[gid].i[1]; - pw_buf[2] = pws[gid].i[2]; - pw_buf[3] = pws[gid].i[3]; - pw_buf[4] = pws[gid].i[4]; - - const u32 pw_len = pws[gid].pw_len; - - u32 salt_buf[2]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - - const u32 salt_len = 8; - - const u32 p3 = (pw_len * 2) + salt_len + 3; - - u32x w[16]; - - w[ 0] = 0; - w[ 1] = 0; - w[ 2] = 0; - w[ 3] = 0; - w[ 4] = 0; - w[ 5] = 0; - w[ 6] = 0; - w[ 7] = 0; - w[ 8] = 0; - w[ 9] = 0; - w[10] = 0; - w[11] = 0; - w[12] = 0; - w[13] = 0; - w[14] = 0; - w[15] = 0; - - u32 p = 0; - - for (u32 j = 0; j < pw_len; j++, p += 2) - { - PUTCHAR_BE (w, p, GETCHAR (pw_buf, j)); - } - - for (u32 j = 0; j < salt_len; j++, p += 1) - { - PUTCHAR_BE (w, p, GETCHAR (salt_buf, j)); - } - - const u32 iter_pos = i * (ROUNDS / 16); - - PUTCHAR_BE (w, p + 0, (iter_pos >> 0) & 0xff); - PUTCHAR_BE (w, p + 1, (iter_pos >> 8) & 0xff); - PUTCHAR_BE (w, p + 2, (iter_pos >> 16) & 0xff); - - PUTCHAR_BE (w, p3, 0x80); - - w[15] = ((iter_pos + 1) * p3) * 8; - - u32 dgst[5]; - - dgst[0] = tmps[gid].dgst[i][0]; - dgst[1] = tmps[gid].dgst[i][1]; - dgst[2] = tmps[gid].dgst[i][2]; - dgst[3] = tmps[gid].dgst[i][3]; - dgst[4] = tmps[gid].dgst[i][4]; - - sha1_transform (w, dgst); - - PUTCHAR (iv, i, dgst[4] & 0xff); - } - - out[0] ^= swap_workaround (iv[0]); - out[1] ^= swap_workaround (iv[1]); - out[2] ^= swap_workaround (iv[2]); - out[3] ^= swap_workaround (iv[3]); - - const u32x r0 = out[0]; - const u32x r1 = out[1]; - const u32x r2 = 0; - const u32x r3 = 0; - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/m12600_a0.cu b/nv/m12600_a0.cu deleted file mode 100644 index 9190123..0000000 --- a/nv/m12600_a0.cu +++ /dev/null @@ -1,797 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA256_SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 7 -#define DGST_R2 2 -#define DGST_R3 6 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" -#include "include/rp_gpu.h" -#include "rp_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_upper8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_upper8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_upper8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ __constant__ gpu_rule_t c_rules[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m12600_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 pc256[8]; - - pc256[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - pc256[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - pc256[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - pc256[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - pc256[4] = salt_bufs[salt_pos].salt_buf_pc[4]; - pc256[5] = salt_bufs[salt_pos].salt_buf_pc[5]; - pc256[6] = salt_bufs[salt_pos].salt_buf_pc[6]; - pc256[7] = salt_bufs[salt_pos].salt_buf_pc[7]; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - /** - * sha1 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - u32x f = 0; - u32x g = 0; - u32x h = 0; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - /** - * sha256 - */ - - w0_t = uint_to_hex_upper8 ((a >> 24) & 255) << 0 - | uint_to_hex_upper8 ((a >> 16) & 255) << 16; - w1_t = uint_to_hex_upper8 ((a >> 8) & 255) << 0 - | uint_to_hex_upper8 ((a >> 0) & 255) << 16; - w2_t = uint_to_hex_upper8 ((b >> 24) & 255) << 0 - | uint_to_hex_upper8 ((b >> 16) & 255) << 16; - w3_t = uint_to_hex_upper8 ((b >> 8) & 255) << 0 - | uint_to_hex_upper8 ((b >> 0) & 255) << 16; - w4_t = uint_to_hex_upper8 ((c >> 24) & 255) << 0 - | uint_to_hex_upper8 ((c >> 16) & 255) << 16; - w5_t = uint_to_hex_upper8 ((c >> 8) & 255) << 0 - | uint_to_hex_upper8 ((c >> 0) & 255) << 16; - w6_t = uint_to_hex_upper8 ((d >> 24) & 255) << 0 - | uint_to_hex_upper8 ((d >> 16) & 255) << 16; - w7_t = uint_to_hex_upper8 ((d >> 8) & 255) << 0 - | uint_to_hex_upper8 ((d >> 0) & 255) << 16; - w8_t = uint_to_hex_upper8 ((e >> 24) & 255) << 0 - | uint_to_hex_upper8 ((e >> 16) & 255) << 16; - w9_t = uint_to_hex_upper8 ((e >> 8) & 255) << 0 - | uint_to_hex_upper8 ((e >> 0) & 255) << 16; - - w0_t = swap_workaround (w0_t); - w1_t = swap_workaround (w1_t); - w2_t = swap_workaround (w2_t); - w3_t = swap_workaround (w3_t); - w4_t = swap_workaround (w4_t); - w5_t = swap_workaround (w5_t); - w6_t = swap_workaround (w6_t); - w7_t = swap_workaround (w7_t); - w8_t = swap_workaround (w8_t); - w9_t = swap_workaround (w9_t); - wa_t = 0x80000000; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = (64 + 40) * 8; - - a = pc256[0]; - b = pc256[1]; - c = pc256[2]; - d = pc256[3]; - e = pc256[4]; - f = pc256[5]; - g = pc256[6]; - h = pc256[7]; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12600_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12600_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12600_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x pw_buf0[4]; - - pw_buf0[0] = pws[gid].i[ 0]; - pw_buf0[1] = pws[gid].i[ 1]; - pw_buf0[2] = pws[gid].i[ 2]; - pw_buf0[3] = pws[gid].i[ 3]; - - u32x pw_buf1[4]; - - pw_buf1[0] = pws[gid].i[ 4]; - pw_buf1[1] = pws[gid].i[ 5]; - pw_buf1[2] = pws[gid].i[ 6]; - pw_buf1[3] = pws[gid].i[ 7]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 pc256[8]; - - pc256[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - pc256[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - pc256[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - pc256[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - pc256[4] = salt_bufs[salt_pos].salt_buf_pc[4]; - pc256[5] = salt_bufs[salt_pos].salt_buf_pc[5]; - pc256[6] = salt_bufs[salt_pos].salt_buf_pc[6]; - pc256[7] = salt_bufs[salt_pos].salt_buf_pc[7]; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < rules_cnt; il_pos++) - { - u32x w0[4]; - - w0[0] = pw_buf0[0]; - w0[1] = pw_buf0[1]; - w0[2] = pw_buf0[2]; - w0[3] = pw_buf0[3]; - - u32x w1[4]; - - w1[0] = pw_buf1[0]; - w1[1] = pw_buf1[1]; - w1[2] = pw_buf1[2]; - w1[3] = pw_buf1[3]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 out_len = apply_rules (c_rules[il_pos].cmds, w0, w1, pw_len); - - append_0x80_2 (w0, w1, out_len); - - /** - * sha1 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = out_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - u32x f = 0; - u32x g = 0; - u32x h = 0; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - /** - * sha256 - */ - - w0_t = uint_to_hex_upper8 ((a >> 24) & 255) << 0 - | uint_to_hex_upper8 ((a >> 16) & 255) << 16; - w1_t = uint_to_hex_upper8 ((a >> 8) & 255) << 0 - | uint_to_hex_upper8 ((a >> 0) & 255) << 16; - w2_t = uint_to_hex_upper8 ((b >> 24) & 255) << 0 - | uint_to_hex_upper8 ((b >> 16) & 255) << 16; - w3_t = uint_to_hex_upper8 ((b >> 8) & 255) << 0 - | uint_to_hex_upper8 ((b >> 0) & 255) << 16; - w4_t = uint_to_hex_upper8 ((c >> 24) & 255) << 0 - | uint_to_hex_upper8 ((c >> 16) & 255) << 16; - w5_t = uint_to_hex_upper8 ((c >> 8) & 255) << 0 - | uint_to_hex_upper8 ((c >> 0) & 255) << 16; - w6_t = uint_to_hex_upper8 ((d >> 24) & 255) << 0 - | uint_to_hex_upper8 ((d >> 16) & 255) << 16; - w7_t = uint_to_hex_upper8 ((d >> 8) & 255) << 0 - | uint_to_hex_upper8 ((d >> 0) & 255) << 16; - w8_t = uint_to_hex_upper8 ((e >> 24) & 255) << 0 - | uint_to_hex_upper8 ((e >> 16) & 255) << 16; - w9_t = uint_to_hex_upper8 ((e >> 8) & 255) << 0 - | uint_to_hex_upper8 ((e >> 0) & 255) << 16; - - w0_t = swap_workaround (w0_t); - w1_t = swap_workaround (w1_t); - w2_t = swap_workaround (w2_t); - w3_t = swap_workaround (w3_t); - w4_t = swap_workaround (w4_t); - w5_t = swap_workaround (w5_t); - w6_t = swap_workaround (w6_t); - w7_t = swap_workaround (w7_t); - w8_t = swap_workaround (w8_t); - w9_t = swap_workaround (w9_t); - wa_t = 0x80000000; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = (64 + 40) * 8; - - a = pc256[0]; - b = pc256[1]; - c = pc256[2]; - d = pc256[3]; - e = pc256[4]; - f = pc256[5]; - g = pc256[6]; - h = pc256[7]; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12600_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12600_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m12600_a1.cu b/nv/m12600_a1.cu deleted file mode 100644 index 80e66b5..0000000 --- a/nv/m12600_a1.cu +++ /dev/null @@ -1,907 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA256_SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 7 -#define DGST_R2 2 -#define DGST_R3 6 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_upper8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_upper8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_upper8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ __constant__ comb_t c_combs[1024]; - -extern "C" __global__ void __launch_bounds__ (256, 1) m12600_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 pc256[8]; - - pc256[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - pc256[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - pc256[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - pc256[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - pc256[4] = salt_bufs[salt_pos].salt_buf_pc[4]; - pc256[5] = salt_bufs[salt_pos].salt_buf_pc[5]; - pc256[6] = salt_bufs[salt_pos].salt_buf_pc[6]; - pc256[7] = salt_bufs[salt_pos].salt_buf_pc[7]; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - append_0x80_2 (wordr0, wordr1, pw_r_len); - - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - /** - * sha1 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - u32x f = 0; - u32x g = 0; - u32x h = 0; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - /** - * sha256 - */ - - w0_t = uint_to_hex_upper8 ((a >> 24) & 255) << 0 - | uint_to_hex_upper8 ((a >> 16) & 255) << 16; - w1_t = uint_to_hex_upper8 ((a >> 8) & 255) << 0 - | uint_to_hex_upper8 ((a >> 0) & 255) << 16; - w2_t = uint_to_hex_upper8 ((b >> 24) & 255) << 0 - | uint_to_hex_upper8 ((b >> 16) & 255) << 16; - w3_t = uint_to_hex_upper8 ((b >> 8) & 255) << 0 - | uint_to_hex_upper8 ((b >> 0) & 255) << 16; - w4_t = uint_to_hex_upper8 ((c >> 24) & 255) << 0 - | uint_to_hex_upper8 ((c >> 16) & 255) << 16; - w5_t = uint_to_hex_upper8 ((c >> 8) & 255) << 0 - | uint_to_hex_upper8 ((c >> 0) & 255) << 16; - w6_t = uint_to_hex_upper8 ((d >> 24) & 255) << 0 - | uint_to_hex_upper8 ((d >> 16) & 255) << 16; - w7_t = uint_to_hex_upper8 ((d >> 8) & 255) << 0 - | uint_to_hex_upper8 ((d >> 0) & 255) << 16; - w8_t = uint_to_hex_upper8 ((e >> 24) & 255) << 0 - | uint_to_hex_upper8 ((e >> 16) & 255) << 16; - w9_t = uint_to_hex_upper8 ((e >> 8) & 255) << 0 - | uint_to_hex_upper8 ((e >> 0) & 255) << 16; - - w0_t = swap_workaround (w0_t); - w1_t = swap_workaround (w1_t); - w2_t = swap_workaround (w2_t); - w3_t = swap_workaround (w3_t); - w4_t = swap_workaround (w4_t); - w5_t = swap_workaround (w5_t); - w6_t = swap_workaround (w6_t); - w7_t = swap_workaround (w7_t); - w8_t = swap_workaround (w8_t); - w9_t = swap_workaround (w9_t); - wa_t = 0x80000000; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = (64 + 40) * 8; - - a = pc256[0]; - b = pc256[1]; - c = pc256[2]; - d = pc256[3]; - e = pc256[4]; - f = pc256[5]; - g = pc256[6]; - h = pc256[7]; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_M - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12600_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12600_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12600_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 combs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - u32x wordl0[4]; - - wordl0[0] = pws[gid].i[ 0]; - wordl0[1] = pws[gid].i[ 1]; - wordl0[2] = pws[gid].i[ 2]; - wordl0[3] = pws[gid].i[ 3]; - - u32x wordl1[4]; - - wordl1[0] = pws[gid].i[ 4]; - wordl1[1] = pws[gid].i[ 5]; - wordl1[2] = pws[gid].i[ 6]; - wordl1[3] = pws[gid].i[ 7]; - - u32x wordl2[4]; - - wordl2[0] = 0; - wordl2[1] = 0; - wordl2[2] = 0; - wordl2[3] = 0; - - u32x wordl3[4]; - - wordl3[0] = 0; - wordl3[1] = 0; - wordl3[2] = 0; - wordl3[3] = 0; - - const u32 pw_l_len = pws[gid].pw_len; - - if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) - { - append_0x80_2 (wordl0, wordl1, pw_l_len); - - switch_buffer_by_offset (wordl0, wordl1, wordl2, wordl3, c_combs[0].pw_len); - } - - /** - * salt - */ - - u32 pc256[8]; - - pc256[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - pc256[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - pc256[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - pc256[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - pc256[4] = salt_bufs[salt_pos].salt_buf_pc[4]; - pc256[5] = salt_bufs[salt_pos].salt_buf_pc[5]; - pc256[6] = salt_bufs[salt_pos].salt_buf_pc[6]; - pc256[7] = salt_bufs[salt_pos].salt_buf_pc[7]; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - for (u32 il_pos = 0; il_pos < combs_cnt; il_pos++) - { - const u32 pw_r_len = c_combs[il_pos].pw_len; - - const u32 pw_len = pw_l_len + pw_r_len; - - u32 wordr0[4]; - - wordr0[0] = c_combs[il_pos].i[0]; - wordr0[1] = c_combs[il_pos].i[1]; - wordr0[2] = c_combs[il_pos].i[2]; - wordr0[3] = c_combs[il_pos].i[3]; - - u32 wordr1[4]; - - wordr1[0] = c_combs[il_pos].i[4]; - wordr1[1] = c_combs[il_pos].i[5]; - wordr1[2] = c_combs[il_pos].i[6]; - wordr1[3] = c_combs[il_pos].i[7]; - - u32 wordr2[4]; - - wordr2[0] = 0; - wordr2[1] = 0; - wordr2[2] = 0; - wordr2[3] = 0; - - u32 wordr3[4]; - - wordr3[0] = 0; - wordr3[1] = 0; - wordr3[2] = 0; - wordr3[3] = 0; - - if (combs_mode == COMBINATOR_MODE_BASE_LEFT) - { - append_0x80_2 (wordr0, wordr1, pw_r_len); - - switch_buffer_by_offset (wordr0, wordr1, wordr2, wordr3, pw_l_len); - } - - u32x w0[4]; - - w0[0] = wordl0[0] | wordr0[0]; - w0[1] = wordl0[1] | wordr0[1]; - w0[2] = wordl0[2] | wordr0[2]; - w0[3] = wordl0[3] | wordr0[3]; - - u32x w1[4]; - - w1[0] = wordl1[0] | wordr1[0]; - w1[1] = wordl1[1] | wordr1[1]; - w1[2] = wordl1[2] | wordr1[2]; - w1[3] = wordl1[3] | wordr1[3]; - - u32x w2[4]; - - w2[0] = wordl2[0] | wordr2[0]; - w2[1] = wordl2[1] | wordr2[1]; - w2[2] = wordl2[2] | wordr2[2]; - w2[3] = wordl2[3] | wordr2[3]; - - u32x w3[4]; - - w3[0] = wordl3[0] | wordr3[0]; - w3[1] = wordl3[1] | wordr3[1]; - w3[2] = 0; - w3[3] = 0; - - /** - * sha1 - */ - - u32x w0_t = swap_workaround (w0[0]); - u32x w1_t = swap_workaround (w0[1]); - u32x w2_t = swap_workaround (w0[2]); - u32x w3_t = swap_workaround (w0[3]); - u32x w4_t = swap_workaround (w1[0]); - u32x w5_t = swap_workaround (w1[1]); - u32x w6_t = swap_workaround (w1[2]); - u32x w7_t = swap_workaround (w1[3]); - u32x w8_t = swap_workaround (w2[0]); - u32x w9_t = swap_workaround (w2[1]); - u32x wa_t = swap_workaround (w2[2]); - u32x wb_t = swap_workaround (w2[3]); - u32x wc_t = swap_workaround (w3[0]); - u32x wd_t = swap_workaround (w3[1]); - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - u32x f = 0; - u32x g = 0; - u32x h = 0; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - /** - * sha256 - */ - - w0_t = uint_to_hex_upper8 ((a >> 24) & 255) << 0 - | uint_to_hex_upper8 ((a >> 16) & 255) << 16; - w1_t = uint_to_hex_upper8 ((a >> 8) & 255) << 0 - | uint_to_hex_upper8 ((a >> 0) & 255) << 16; - w2_t = uint_to_hex_upper8 ((b >> 24) & 255) << 0 - | uint_to_hex_upper8 ((b >> 16) & 255) << 16; - w3_t = uint_to_hex_upper8 ((b >> 8) & 255) << 0 - | uint_to_hex_upper8 ((b >> 0) & 255) << 16; - w4_t = uint_to_hex_upper8 ((c >> 24) & 255) << 0 - | uint_to_hex_upper8 ((c >> 16) & 255) << 16; - w5_t = uint_to_hex_upper8 ((c >> 8) & 255) << 0 - | uint_to_hex_upper8 ((c >> 0) & 255) << 16; - w6_t = uint_to_hex_upper8 ((d >> 24) & 255) << 0 - | uint_to_hex_upper8 ((d >> 16) & 255) << 16; - w7_t = uint_to_hex_upper8 ((d >> 8) & 255) << 0 - | uint_to_hex_upper8 ((d >> 0) & 255) << 16; - w8_t = uint_to_hex_upper8 ((e >> 24) & 255) << 0 - | uint_to_hex_upper8 ((e >> 16) & 255) << 16; - w9_t = uint_to_hex_upper8 ((e >> 8) & 255) << 0 - | uint_to_hex_upper8 ((e >> 0) & 255) << 16; - - w0_t = swap_workaround (w0_t); - w1_t = swap_workaround (w1_t); - w2_t = swap_workaround (w2_t); - w3_t = swap_workaround (w3_t); - w4_t = swap_workaround (w4_t); - w5_t = swap_workaround (w5_t); - w6_t = swap_workaround (w6_t); - w7_t = swap_workaround (w7_t); - w8_t = swap_workaround (w8_t); - w9_t = swap_workaround (w9_t); - wa_t = 0x80000000; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = (64 + 40) * 8; - - a = pc256[0]; - b = pc256[1]; - c = pc256[2]; - d = pc256[3]; - e = pc256[4]; - f = pc256[5]; - g = pc256[6]; - h = pc256[7]; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12600_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12600_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ -} diff --git a/nv/m12600_a3.cu b/nv/m12600_a3.cu deleted file mode 100644 index 1617a8a..0000000 --- a/nv/m12600_a3.cu +++ /dev/null @@ -1,1036 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA256_SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 3 -#define DGST_R1 7 -#define DGST_R2 2 -#define DGST_R3 6 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_S "check_single_vect1_comp4.c" -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_S "check_single_vect2_comp4.c" -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - -#ifdef VECT_SIZE4 -#define VECT_COMPARE_S "check_single_vect4_comp4.c" -#define VECT_COMPARE_M "check_multi_vect4_comp4.c" -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_upper8(i) l_bin2asc[(i)] -#endif - -#ifdef VECT_SIZE2 -#define uint_to_hex_upper8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y]) -#endif - -#ifdef VECT_SIZE4 -#define uint_to_hex_upper8(i) u32x (l_bin2asc[(i).x], l_bin2asc[(i).y], l_bin2asc[(i).z], l_bin2asc[(i).w]) -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ __constant__ bf_t c_bfs[1024]; - -__device__ static void m12600m (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 pc256[8]; - - pc256[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - pc256[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - pc256[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - pc256[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - pc256[4] = salt_bufs[salt_pos].salt_buf_pc[4]; - pc256[5] = salt_bufs[salt_pos].salt_buf_pc[5]; - pc256[6] = salt_bufs[salt_pos].salt_buf_pc[6]; - pc256[7] = salt_bufs[salt_pos].salt_buf_pc[7]; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * sha1 - */ - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - u32x f = 0; - u32x g = 0; - u32x h = 0; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - /** - * sha256 - */ - - w0_t = uint_to_hex_upper8 ((a >> 24) & 255) << 0 - | uint_to_hex_upper8 ((a >> 16) & 255) << 16; - w1_t = uint_to_hex_upper8 ((a >> 8) & 255) << 0 - | uint_to_hex_upper8 ((a >> 0) & 255) << 16; - w2_t = uint_to_hex_upper8 ((b >> 24) & 255) << 0 - | uint_to_hex_upper8 ((b >> 16) & 255) << 16; - w3_t = uint_to_hex_upper8 ((b >> 8) & 255) << 0 - | uint_to_hex_upper8 ((b >> 0) & 255) << 16; - w4_t = uint_to_hex_upper8 ((c >> 24) & 255) << 0 - | uint_to_hex_upper8 ((c >> 16) & 255) << 16; - w5_t = uint_to_hex_upper8 ((c >> 8) & 255) << 0 - | uint_to_hex_upper8 ((c >> 0) & 255) << 16; - w6_t = uint_to_hex_upper8 ((d >> 24) & 255) << 0 - | uint_to_hex_upper8 ((d >> 16) & 255) << 16; - w7_t = uint_to_hex_upper8 ((d >> 8) & 255) << 0 - | uint_to_hex_upper8 ((d >> 0) & 255) << 16; - w8_t = uint_to_hex_upper8 ((e >> 24) & 255) << 0 - | uint_to_hex_upper8 ((e >> 16) & 255) << 16; - w9_t = uint_to_hex_upper8 ((e >> 8) & 255) << 0 - | uint_to_hex_upper8 ((e >> 0) & 255) << 16; - - w0_t = swap_workaround (w0_t); - w1_t = swap_workaround (w1_t); - w2_t = swap_workaround (w2_t); - w3_t = swap_workaround (w3_t); - w4_t = swap_workaround (w4_t); - w5_t = swap_workaround (w5_t); - w6_t = swap_workaround (w6_t); - w7_t = swap_workaround (w7_t); - w8_t = swap_workaround (w8_t); - w9_t = swap_workaround (w9_t); - wa_t = 0x80000000; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = (64 + 40) * 8; - - a = pc256[0]; - b = pc256[1]; - c = pc256[2]; - d = pc256[3]; - e = pc256[4]; - f = pc256[5]; - g = pc256[6]; - h = pc256[7]; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_M - } -} - -__device__ static void m12600s (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 pw_len, const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset) -{ - /** - * modifier - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * salt - */ - - u32 pc256[8]; - - pc256[0] = salt_bufs[salt_pos].salt_buf_pc[0]; - pc256[1] = salt_bufs[salt_pos].salt_buf_pc[1]; - pc256[2] = salt_bufs[salt_pos].salt_buf_pc[2]; - pc256[3] = salt_bufs[salt_pos].salt_buf_pc[3]; - pc256[4] = salt_bufs[salt_pos].salt_buf_pc[4]; - pc256[5] = salt_bufs[salt_pos].salt_buf_pc[5]; - pc256[6] = salt_bufs[salt_pos].salt_buf_pc[6]; - pc256[7] = salt_bufs[salt_pos].salt_buf_pc[7]; - - /** - * digest - */ - - const u32 search[4] = - { - digests_buf[digests_offset].digest_buf[DGST_R0], - digests_buf[digests_offset].digest_buf[DGST_R1], - digests_buf[digests_offset].digest_buf[DGST_R2], - digests_buf[digests_offset].digest_buf[DGST_R3] - }; - - /** - * loop - */ - - u32x w0l = w0[0]; - - for (u32 il_pos = 0; il_pos < bfs_cnt; il_pos++) - { - const u32 w0r = c_bfs[il_pos].i; - - w0[0] = w0l | w0r; - - /** - * sha1 - */ - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = 0; - u32x wf_t = pw_len * 8; - - u32x a = SHA1M_A; - u32x b = SHA1M_B; - u32x c = SHA1M_C; - u32x d = SHA1M_D; - u32x e = SHA1M_E; - u32x f = 0; - u32x g = 0; - u32x h = 0; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w0_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w1_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w2_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w3_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w4_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, w5_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, w6_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, w7_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, w8_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, w9_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wa_t); - SHA1_STEP (SHA1_F0o, e, a, b, c, d, wb_t); - SHA1_STEP (SHA1_F0o, d, e, a, b, c, wc_t); - SHA1_STEP (SHA1_F0o, c, d, e, a, b, wd_t); - SHA1_STEP (SHA1_F0o, b, c, d, e, a, we_t); - SHA1_STEP (SHA1_F0o, a, b, c, d, e, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, e, a, b, c, d, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, d, e, a, b, c, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, c, d, e, a, b, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, b, c, d, e, a, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, a, b, c, d, e, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, e, a, b, c, d, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, d, e, a, b, c, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, c, d, e, a, b, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, b, c, d, e, a, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, a, b, c, d, e, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, e, a, b, c, d, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, d, e, a, b, c, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, c, d, e, a, b, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, b, c, d, e, a, wf_t); - - a += SHA1M_A; - b += SHA1M_B; - c += SHA1M_C; - d += SHA1M_D; - e += SHA1M_E; - - /** - * sha256 - */ - - w0_t = uint_to_hex_upper8 ((a >> 24) & 255) << 0 - | uint_to_hex_upper8 ((a >> 16) & 255) << 16; - w1_t = uint_to_hex_upper8 ((a >> 8) & 255) << 0 - | uint_to_hex_upper8 ((a >> 0) & 255) << 16; - w2_t = uint_to_hex_upper8 ((b >> 24) & 255) << 0 - | uint_to_hex_upper8 ((b >> 16) & 255) << 16; - w3_t = uint_to_hex_upper8 ((b >> 8) & 255) << 0 - | uint_to_hex_upper8 ((b >> 0) & 255) << 16; - w4_t = uint_to_hex_upper8 ((c >> 24) & 255) << 0 - | uint_to_hex_upper8 ((c >> 16) & 255) << 16; - w5_t = uint_to_hex_upper8 ((c >> 8) & 255) << 0 - | uint_to_hex_upper8 ((c >> 0) & 255) << 16; - w6_t = uint_to_hex_upper8 ((d >> 24) & 255) << 0 - | uint_to_hex_upper8 ((d >> 16) & 255) << 16; - w7_t = uint_to_hex_upper8 ((d >> 8) & 255) << 0 - | uint_to_hex_upper8 ((d >> 0) & 255) << 16; - w8_t = uint_to_hex_upper8 ((e >> 24) & 255) << 0 - | uint_to_hex_upper8 ((e >> 16) & 255) << 16; - w9_t = uint_to_hex_upper8 ((e >> 8) & 255) << 0 - | uint_to_hex_upper8 ((e >> 0) & 255) << 16; - - w0_t = swap_workaround (w0_t); - w1_t = swap_workaround (w1_t); - w2_t = swap_workaround (w2_t); - w3_t = swap_workaround (w3_t); - w4_t = swap_workaround (w4_t); - w5_t = swap_workaround (w5_t); - w6_t = swap_workaround (w6_t); - w7_t = swap_workaround (w7_t); - w8_t = swap_workaround (w8_t); - w9_t = swap_workaround (w9_t); - wa_t = 0x80000000; - wb_t = 0; - wc_t = 0; - wd_t = 0; - we_t = 0; - wf_t = (64 + 40) * 8; - - a = pc256[0]; - b = pc256[1]; - c = pc256[2]; - d = pc256[3]; - e = pc256[4]; - f = pc256[5]; - g = pc256[6]; - h = pc256[7]; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - const u32x r0 = d; - const u32x r1 = h; - const u32x r2 = c; - const u32x r3 = g; - - #include VECT_COMPARE_S - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12600_m04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m12600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12600_m08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m12600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12600_m16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m12600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12600_s04 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m12600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12600_s08 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m12600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12600_s16 (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, const void *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 bfs_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - /** - * modifier - */ - - const u32 lid = threadIdx.x; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - const u32 pw_len = pws[gid].pw_len; - - /** - * bin2asc table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * main - */ - - m12600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_scryptV_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, bfs_cnt, digests_cnt, digests_offset); -} diff --git a/nv/m12700.cu b/nv/m12700.cu deleted file mode 100644 index a7c2a3a..0000000 --- a/nv/m12700.cu +++ /dev/null @@ -1,1557 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _SHA1_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE2 -#define VECT_COMPARE_M "check_multi_vect2_comp4.c" -#endif - - -__device__ __constant__ u32 te0[256] = -{ - 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, - 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, - 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, - 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, - 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, - 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, - 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, - 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, - 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, - 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, - 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, - 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, - 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, - 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, - 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, - 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, - 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, - 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, - 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, - 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, - 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, - 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, - 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, - 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, - 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, - 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, - 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, - 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, - 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, - 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, - 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, - 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, - 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, - 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, - 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, - 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, - 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, - 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, - 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, - 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, - 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, - 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, - 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, - 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, - 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, - 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, - 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, - 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, - 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, - 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, - 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, - 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, - 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, - 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, - 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, - 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, - 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, - 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, - 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, - 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, - 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, - 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, - 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, - 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a, -}; - -__device__ __constant__ u32 te1[256] = -{ - 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b, - 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5, - 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b, - 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676, - 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d, - 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0, - 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf, - 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0, - 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626, - 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc, - 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1, - 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515, - 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3, - 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a, - 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2, - 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575, - 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a, - 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0, - 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3, - 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484, - 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded, - 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b, - 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939, - 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf, - 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb, - 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585, - 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f, - 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8, - 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f, - 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5, - 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121, - 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2, - 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec, - 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717, - 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d, - 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373, - 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc, - 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888, - 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414, - 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb, - 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a, - 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c, - 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262, - 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979, - 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d, - 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9, - 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea, - 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808, - 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e, - 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6, - 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f, - 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a, - 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666, - 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e, - 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9, - 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e, - 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111, - 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494, - 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9, - 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf, - 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d, - 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868, - 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f, - 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616, -}; - -__device__ __constant__ u32 te2[256] = -{ - 0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b, - 0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5, - 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b, - 0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76, - 0xca458fca, 0x829d1f82, 0xc94089c9, 0x7d87fa7d, - 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0, - 0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af, - 0x9cbf239c, 0xa4f753a4, 0x7296e472, 0xc05b9bc0, - 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26, - 0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc, - 0x345c6834, 0xa5f451a5, 0xe534d1e5, 0xf108f9f1, - 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15, - 0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3, - 0x18283018, 0x96a13796, 0x050f0a05, 0x9ab52f9a, - 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2, - 0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75, - 0x091b1209, 0x839e1d83, 0x2c74582c, 0x1a2e341a, - 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0, - 0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3, - 0x297b5229, 0xe33edde3, 0x2f715e2f, 0x84971384, - 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed, - 0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b, - 0x6abed46a, 0xcb468dcb, 0xbed967be, 0x394b7239, - 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf, - 0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb, - 0x43c58643, 0x4dd79a4d, 0x33556633, 0x85941185, - 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f, - 0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8, - 0x51f3a251, 0xa3fe5da3, 0x40c08040, 0x8f8a058f, - 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5, - 0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221, - 0x10302010, 0xff1ae5ff, 0xf30efdf3, 0xd26dbfd2, - 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec, - 0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17, - 0xc45793c4, 0xa7f255a7, 0x7e82fc7e, 0x3d477a3d, - 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673, - 0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc, - 0x22664422, 0x2a7e542a, 0x90ab3b90, 0x88830b88, - 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814, - 0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb, - 0xe03bdbe0, 0x32566432, 0x3a4e743a, 0x0a1e140a, - 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c, - 0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462, - 0x91a83991, 0x95a43195, 0xe437d3e4, 0x798bf279, - 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d, - 0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9, - 0x6cb4d86c, 0x56faac56, 0xf407f3f4, 0xea25cfea, - 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008, - 0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e, - 0x1c24381c, 0xa6f157a6, 0xb4c773b4, 0xc65197c6, - 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f, - 0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a, - 0x7090e070, 0x3e427c3e, 0xb5c471b5, 0x66aacc66, - 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e, - 0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9, - 0x86911786, 0xc15899c1, 0x1d273a1d, 0x9eb9279e, - 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211, - 0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394, - 0x9bb62d9b, 0x1e223c1e, 0x87921587, 0xe920c9e9, - 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df, - 0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d, - 0xbfda65bf, 0xe631d7e6, 0x42c68442, 0x68b8d068, - 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f, - 0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16, -}; - -__device__ __constant__ u32 te3[256] = -{ - 0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6, - 0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491, - 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56, - 0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec, - 0xcaca458f, 0x82829d1f, 0xc9c94089, 0x7d7d87fa, - 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb, - 0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45, - 0x9c9cbf23, 0xa4a4f753, 0x727296e4, 0xc0c05b9b, - 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c, - 0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83, - 0x34345c68, 0xa5a5f451, 0xe5e534d1, 0xf1f108f9, - 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a, - 0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d, - 0x18182830, 0x9696a137, 0x05050f0a, 0x9a9ab52f, - 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf, - 0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea, - 0x09091b12, 0x83839e1d, 0x2c2c7458, 0x1a1a2e34, - 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b, - 0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d, - 0x29297b52, 0xe3e33edd, 0x2f2f715e, 0x84849713, - 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1, - 0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6, - 0x6a6abed4, 0xcbcb468d, 0xbebed967, 0x39394b72, - 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85, - 0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed, - 0x4343c586, 0x4d4dd79a, 0x33335566, 0x85859411, - 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe, - 0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b, - 0x5151f3a2, 0xa3a3fe5d, 0x4040c080, 0x8f8f8a05, - 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1, - 0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342, - 0x10103020, 0xffff1ae5, 0xf3f30efd, 0xd2d26dbf, - 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3, - 0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e, - 0xc4c45793, 0xa7a7f255, 0x7e7e82fc, 0x3d3d477a, - 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6, - 0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3, - 0x22226644, 0x2a2a7e54, 0x9090ab3b, 0x8888830b, - 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28, - 0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad, - 0xe0e03bdb, 0x32325664, 0x3a3a4e74, 0x0a0a1e14, - 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8, - 0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4, - 0x9191a839, 0x9595a431, 0xe4e437d3, 0x79798bf2, - 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da, - 0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049, - 0x6c6cb4d8, 0x5656faac, 0xf4f407f3, 0xeaea25cf, - 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810, - 0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c, - 0x1c1c2438, 0xa6a6f157, 0xb4b4c773, 0xc6c65197, - 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e, - 0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f, - 0x707090e0, 0x3e3e427c, 0xb5b5c471, 0x6666aacc, - 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c, - 0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069, - 0x86869117, 0xc1c15899, 0x1d1d273a, 0x9e9eb927, - 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322, - 0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733, - 0x9b9bb62d, 0x1e1e223c, 0x87879215, 0xe9e920c9, - 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5, - 0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a, - 0xbfbfda65, 0xe6e631d7, 0x4242c684, 0x6868b8d0, - 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e, - 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c, -}; - -__device__ __constant__ u32 te4[256] = -{ - 0x63636363, 0x7c7c7c7c, 0x77777777, 0x7b7b7b7b, - 0xf2f2f2f2, 0x6b6b6b6b, 0x6f6f6f6f, 0xc5c5c5c5, - 0x30303030, 0x01010101, 0x67676767, 0x2b2b2b2b, - 0xfefefefe, 0xd7d7d7d7, 0xabababab, 0x76767676, - 0xcacacaca, 0x82828282, 0xc9c9c9c9, 0x7d7d7d7d, - 0xfafafafa, 0x59595959, 0x47474747, 0xf0f0f0f0, - 0xadadadad, 0xd4d4d4d4, 0xa2a2a2a2, 0xafafafaf, - 0x9c9c9c9c, 0xa4a4a4a4, 0x72727272, 0xc0c0c0c0, - 0xb7b7b7b7, 0xfdfdfdfd, 0x93939393, 0x26262626, - 0x36363636, 0x3f3f3f3f, 0xf7f7f7f7, 0xcccccccc, - 0x34343434, 0xa5a5a5a5, 0xe5e5e5e5, 0xf1f1f1f1, - 0x71717171, 0xd8d8d8d8, 0x31313131, 0x15151515, - 0x04040404, 0xc7c7c7c7, 0x23232323, 0xc3c3c3c3, - 0x18181818, 0x96969696, 0x05050505, 0x9a9a9a9a, - 0x07070707, 0x12121212, 0x80808080, 0xe2e2e2e2, - 0xebebebeb, 0x27272727, 0xb2b2b2b2, 0x75757575, - 0x09090909, 0x83838383, 0x2c2c2c2c, 0x1a1a1a1a, - 0x1b1b1b1b, 0x6e6e6e6e, 0x5a5a5a5a, 0xa0a0a0a0, - 0x52525252, 0x3b3b3b3b, 0xd6d6d6d6, 0xb3b3b3b3, - 0x29292929, 0xe3e3e3e3, 0x2f2f2f2f, 0x84848484, - 0x53535353, 0xd1d1d1d1, 0x00000000, 0xedededed, - 0x20202020, 0xfcfcfcfc, 0xb1b1b1b1, 0x5b5b5b5b, - 0x6a6a6a6a, 0xcbcbcbcb, 0xbebebebe, 0x39393939, - 0x4a4a4a4a, 0x4c4c4c4c, 0x58585858, 0xcfcfcfcf, - 0xd0d0d0d0, 0xefefefef, 0xaaaaaaaa, 0xfbfbfbfb, - 0x43434343, 0x4d4d4d4d, 0x33333333, 0x85858585, - 0x45454545, 0xf9f9f9f9, 0x02020202, 0x7f7f7f7f, - 0x50505050, 0x3c3c3c3c, 0x9f9f9f9f, 0xa8a8a8a8, - 0x51515151, 0xa3a3a3a3, 0x40404040, 0x8f8f8f8f, - 0x92929292, 0x9d9d9d9d, 0x38383838, 0xf5f5f5f5, - 0xbcbcbcbc, 0xb6b6b6b6, 0xdadadada, 0x21212121, - 0x10101010, 0xffffffff, 0xf3f3f3f3, 0xd2d2d2d2, - 0xcdcdcdcd, 0x0c0c0c0c, 0x13131313, 0xecececec, - 0x5f5f5f5f, 0x97979797, 0x44444444, 0x17171717, - 0xc4c4c4c4, 0xa7a7a7a7, 0x7e7e7e7e, 0x3d3d3d3d, - 0x64646464, 0x5d5d5d5d, 0x19191919, 0x73737373, - 0x60606060, 0x81818181, 0x4f4f4f4f, 0xdcdcdcdc, - 0x22222222, 0x2a2a2a2a, 0x90909090, 0x88888888, - 0x46464646, 0xeeeeeeee, 0xb8b8b8b8, 0x14141414, - 0xdededede, 0x5e5e5e5e, 0x0b0b0b0b, 0xdbdbdbdb, - 0xe0e0e0e0, 0x32323232, 0x3a3a3a3a, 0x0a0a0a0a, - 0x49494949, 0x06060606, 0x24242424, 0x5c5c5c5c, - 0xc2c2c2c2, 0xd3d3d3d3, 0xacacacac, 0x62626262, - 0x91919191, 0x95959595, 0xe4e4e4e4, 0x79797979, - 0xe7e7e7e7, 0xc8c8c8c8, 0x37373737, 0x6d6d6d6d, - 0x8d8d8d8d, 0xd5d5d5d5, 0x4e4e4e4e, 0xa9a9a9a9, - 0x6c6c6c6c, 0x56565656, 0xf4f4f4f4, 0xeaeaeaea, - 0x65656565, 0x7a7a7a7a, 0xaeaeaeae, 0x08080808, - 0xbabababa, 0x78787878, 0x25252525, 0x2e2e2e2e, - 0x1c1c1c1c, 0xa6a6a6a6, 0xb4b4b4b4, 0xc6c6c6c6, - 0xe8e8e8e8, 0xdddddddd, 0x74747474, 0x1f1f1f1f, - 0x4b4b4b4b, 0xbdbdbdbd, 0x8b8b8b8b, 0x8a8a8a8a, - 0x70707070, 0x3e3e3e3e, 0xb5b5b5b5, 0x66666666, - 0x48484848, 0x03030303, 0xf6f6f6f6, 0x0e0e0e0e, - 0x61616161, 0x35353535, 0x57575757, 0xb9b9b9b9, - 0x86868686, 0xc1c1c1c1, 0x1d1d1d1d, 0x9e9e9e9e, - 0xe1e1e1e1, 0xf8f8f8f8, 0x98989898, 0x11111111, - 0x69696969, 0xd9d9d9d9, 0x8e8e8e8e, 0x94949494, - 0x9b9b9b9b, 0x1e1e1e1e, 0x87878787, 0xe9e9e9e9, - 0xcececece, 0x55555555, 0x28282828, 0xdfdfdfdf, - 0x8c8c8c8c, 0xa1a1a1a1, 0x89898989, 0x0d0d0d0d, - 0xbfbfbfbf, 0xe6e6e6e6, 0x42424242, 0x68686868, - 0x41414141, 0x99999999, 0x2d2d2d2d, 0x0f0f0f0f, - 0xb0b0b0b0, 0x54545454, 0xbbbbbbbb, 0x16161616, -}; - -__device__ __constant__ u32 td0[256] = -{ - 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, - 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, - 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, - 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, - 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, - 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, - 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, - 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, - 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, - 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, - 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, - 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, - 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, - 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, - 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, - 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, - 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, - 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, - 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, - 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, - 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, - 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, - 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, - 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, - 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, - 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, - 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, - 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, - 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, - 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, - 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, - 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, - 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, - 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, - 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, - 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, - 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, - 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, - 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, - 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, - 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, - 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, - 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, - 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, - 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, - 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, - 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, - 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, - 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, - 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, - 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, - 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, - 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, - 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, - 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, - 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, - 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, - 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, - 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, - 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, - 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, - 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, - 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, - 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742, -}; - -__device__ __constant__ u32 td1[256] = -{ - 0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e, - 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303, - 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c, - 0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3, - 0x49deb15a, 0x6725ba1b, 0x9845ea0e, 0xe15dfec0, - 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9, - 0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259, - 0x2dd4be83, 0xd3587421, 0x2949e069, 0x448ec9c8, - 0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971, - 0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a, - 0x1863df4a, 0x82e51a31, 0x60975133, 0x4562537f, - 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b, - 0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8, - 0x23ab73d3, 0xe2724b02, 0x57e31f8f, 0x2a6655ab, - 0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708, - 0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682, - 0x2b8acf1c, 0x92a779b4, 0xf0f307f2, 0xa14e69e2, - 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe, - 0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb, - 0x390b83ec, 0xaa4060ef, 0x065e719f, 0x51bd6e10, - 0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd, - 0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015, - 0x241998fb, 0x97d6bde9, 0xcc894043, 0x7767d99e, - 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee, - 0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000, - 0x83098086, 0x48322bed, 0xac1e1170, 0x4e6c5a72, - 0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39, - 0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e, - 0xb10c0a67, 0x0f9357e7, 0xd2b4ee96, 0x9e1b9b91, - 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a, - 0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17, - 0x0b0e090d, 0xadf28bc7, 0xb92db6a8, 0xc8141ea9, - 0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60, - 0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e, - 0x768b4329, 0xdccb23c6, 0x68b6edfc, 0x63b8e4f1, - 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611, - 0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1, - 0x4b1d9e2f, 0xf3dcb230, 0xec0d8652, 0xd077c1e3, - 0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964, - 0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390, - 0xc787494e, 0xc1d938d1, 0xfe8ccaa2, 0x3698d40b, - 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf, - 0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46, - 0xc2f68d13, 0xe890d8b8, 0x5e2e39f7, 0xf582c3af, - 0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512, - 0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb, - 0x09cd2678, 0xf46e5918, 0x01ec9ab7, 0xa8834f9a, - 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8, - 0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c, - 0xaf31a4b2, 0x312a3f23, 0x30c6a594, 0xc035a266, - 0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8, - 0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6, - 0x8d764dd6, 0x4d43efb0, 0x54ccaa4d, 0xdfe49604, - 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551, - 0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41, - 0x5ab3671d, 0x5292dbd2, 0x33e91056, 0x136dd647, - 0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c, - 0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1, - 0x599cd2df, 0x3f55f273, 0x791814ce, 0xbf73c737, - 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db, - 0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340, - 0x72161dc3, 0x0cbce225, 0x8b283c49, 0x41ff0d95, - 0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1, - 0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857, -}; - -__device__ __constant__ u32 td2[256] = -{ - 0xa75051f4, 0x65537e41, 0xa4c31a17, 0x5e963a27, - 0x6bcb3bab, 0x45f11f9d, 0x58abacfa, 0x03934be3, - 0xfa552030, 0x6df6ad76, 0x769188cc, 0x4c25f502, - 0xd7fc4fe5, 0xcbd7c52a, 0x44802635, 0xa38fb562, - 0x5a49deb1, 0x1b6725ba, 0x0e9845ea, 0xc0e15dfe, - 0x7502c32f, 0xf012814c, 0x97a38d46, 0xf9c66bd3, - 0x5fe7038f, 0x9c951592, 0x7aebbf6d, 0x59da9552, - 0x832dd4be, 0x21d35874, 0x692949e0, 0xc8448ec9, - 0x896a75c2, 0x7978f48e, 0x3e6b9958, 0x71dd27b9, - 0x4fb6bee1, 0xad17f088, 0xac66c920, 0x3ab47dce, - 0x4a1863df, 0x3182e51a, 0x33609751, 0x7f456253, - 0x77e0b164, 0xae84bb6b, 0xa01cfe81, 0x2b94f908, - 0x68587048, 0xfd198f45, 0x6c8794de, 0xf8b7527b, - 0xd323ab73, 0x02e2724b, 0x8f57e31f, 0xab2a6655, - 0x2807b2eb, 0xc2032fb5, 0x7b9a86c5, 0x08a5d337, - 0x87f23028, 0xa5b223bf, 0x6aba0203, 0x825ced16, - 0x1c2b8acf, 0xb492a779, 0xf2f0f307, 0xe2a14e69, - 0xf4cd65da, 0xbed50605, 0x621fd134, 0xfe8ac4a6, - 0x539d342e, 0x55a0a2f3, 0xe132058a, 0xeb75a4f6, - 0xec390b83, 0xefaa4060, 0x9f065e71, 0x1051bd6e, - 0x8af93e21, 0x063d96dd, 0x05aedd3e, 0xbd464de6, - 0x8db59154, 0x5d0571c4, 0xd46f0406, 0x15ff6050, - 0xfb241998, 0xe997d6bd, 0x43cc8940, 0x9e7767d9, - 0x42bdb0e8, 0x8b880789, 0x5b38e719, 0xeedb79c8, - 0x0a47a17c, 0x0fe97c42, 0x1ec9f884, 0x00000000, - 0x86830980, 0xed48322b, 0x70ac1e11, 0x724e6c5a, - 0xfffbfd0e, 0x38560f85, 0xd51e3dae, 0x3927362d, - 0xd9640a0f, 0xa621685c, 0x54d19b5b, 0x2e3a2436, - 0x67b10c0a, 0xe70f9357, 0x96d2b4ee, 0x919e1b9b, - 0xc54f80c0, 0x20a261dc, 0x4b695a77, 0x1a161c12, - 0xba0ae293, 0x2ae5c0a0, 0xe0433c22, 0x171d121b, - 0x0d0b0e09, 0xc7adf28b, 0xa8b92db6, 0xa9c8141e, - 0x198557f1, 0x074caf75, 0xddbbee99, 0x60fda37f, - 0x269ff701, 0xf5bc5c72, 0x3bc54466, 0x7e345bfb, - 0x29768b43, 0xc6dccb23, 0xfc68b6ed, 0xf163b8e4, - 0xdccad731, 0x85104263, 0x22401397, 0x112084c6, - 0x247d854a, 0x3df8d2bb, 0x3211aef9, 0xa16dc729, - 0x2f4b1d9e, 0x30f3dcb2, 0x52ec0d86, 0xe3d077c1, - 0x166c2bb3, 0xb999a970, 0x48fa1194, 0x642247e9, - 0x8cc4a8fc, 0x3f1aa0f0, 0x2cd8567d, 0x90ef2233, - 0x4ec78749, 0xd1c1d938, 0xa2fe8cca, 0x0b3698d4, - 0x81cfa6f5, 0xde28a57a, 0x8e26dab7, 0xbfa43fad, - 0x9de42c3a, 0x920d5078, 0xcc9b6a5f, 0x4662547e, - 0x13c2f68d, 0xb8e890d8, 0xf75e2e39, 0xaff582c3, - 0x80be9f5d, 0x937c69d0, 0x2da96fd5, 0x12b3cf25, - 0x993bc8ac, 0x7da71018, 0x636ee89c, 0xbb7bdb3b, - 0x7809cd26, 0x18f46e59, 0xb701ec9a, 0x9aa8834f, - 0x6e65e695, 0xe67eaaff, 0xcf0821bc, 0xe8e6ef15, - 0x9bd9bae7, 0x36ce4a6f, 0x09d4ea9f, 0x7cd629b0, - 0xb2af31a4, 0x23312a3f, 0x9430c6a5, 0x66c035a2, - 0xbc37744e, 0xcaa6fc82, 0xd0b0e090, 0xd81533a7, - 0x984af104, 0xdaf741ec, 0x500e7fcd, 0xf62f1791, - 0xd68d764d, 0xb04d43ef, 0x4d54ccaa, 0x04dfe496, - 0xb5e39ed1, 0x881b4c6a, 0x1fb8c12c, 0x517f4665, - 0xea049d5e, 0x355d018c, 0x7473fa87, 0x412efb0b, - 0x1d5ab367, 0xd25292db, 0x5633e910, 0x47136dd6, - 0x618c9ad7, 0x0c7a37a1, 0x148e59f8, 0x3c89eb13, - 0x27eecea9, 0xc935b761, 0xe5ede11c, 0xb13c7a47, - 0xdf599cd2, 0x733f55f2, 0xce791814, 0x37bf73c7, - 0xcdea53f7, 0xaa5b5ffd, 0x6f14df3d, 0xdb867844, - 0xf381caaf, 0xc43eb968, 0x342c3824, 0x405fc2a3, - 0xc372161d, 0x250cbce2, 0x498b283c, 0x9541ff0d, - 0x017139a8, 0xb3de080c, 0xe49cd8b4, 0xc1906456, - 0x84617bcb, 0xb670d532, 0x5c74486c, 0x5742d0b8, -}; - -__device__ __constant__ u32 td3[256] = -{ - 0xf4a75051, 0x4165537e, 0x17a4c31a, 0x275e963a, - 0xab6bcb3b, 0x9d45f11f, 0xfa58abac, 0xe303934b, - 0x30fa5520, 0x766df6ad, 0xcc769188, 0x024c25f5, - 0xe5d7fc4f, 0x2acbd7c5, 0x35448026, 0x62a38fb5, - 0xb15a49de, 0xba1b6725, 0xea0e9845, 0xfec0e15d, - 0x2f7502c3, 0x4cf01281, 0x4697a38d, 0xd3f9c66b, - 0x8f5fe703, 0x929c9515, 0x6d7aebbf, 0x5259da95, - 0xbe832dd4, 0x7421d358, 0xe0692949, 0xc9c8448e, - 0xc2896a75, 0x8e7978f4, 0x583e6b99, 0xb971dd27, - 0xe14fb6be, 0x88ad17f0, 0x20ac66c9, 0xce3ab47d, - 0xdf4a1863, 0x1a3182e5, 0x51336097, 0x537f4562, - 0x6477e0b1, 0x6bae84bb, 0x81a01cfe, 0x082b94f9, - 0x48685870, 0x45fd198f, 0xde6c8794, 0x7bf8b752, - 0x73d323ab, 0x4b02e272, 0x1f8f57e3, 0x55ab2a66, - 0xeb2807b2, 0xb5c2032f, 0xc57b9a86, 0x3708a5d3, - 0x2887f230, 0xbfa5b223, 0x036aba02, 0x16825ced, - 0xcf1c2b8a, 0x79b492a7, 0x07f2f0f3, 0x69e2a14e, - 0xdaf4cd65, 0x05bed506, 0x34621fd1, 0xa6fe8ac4, - 0x2e539d34, 0xf355a0a2, 0x8ae13205, 0xf6eb75a4, - 0x83ec390b, 0x60efaa40, 0x719f065e, 0x6e1051bd, - 0x218af93e, 0xdd063d96, 0x3e05aedd, 0xe6bd464d, - 0x548db591, 0xc45d0571, 0x06d46f04, 0x5015ff60, - 0x98fb2419, 0xbde997d6, 0x4043cc89, 0xd99e7767, - 0xe842bdb0, 0x898b8807, 0x195b38e7, 0xc8eedb79, - 0x7c0a47a1, 0x420fe97c, 0x841ec9f8, 0x00000000, - 0x80868309, 0x2bed4832, 0x1170ac1e, 0x5a724e6c, - 0x0efffbfd, 0x8538560f, 0xaed51e3d, 0x2d392736, - 0x0fd9640a, 0x5ca62168, 0x5b54d19b, 0x362e3a24, - 0x0a67b10c, 0x57e70f93, 0xee96d2b4, 0x9b919e1b, - 0xc0c54f80, 0xdc20a261, 0x774b695a, 0x121a161c, - 0x93ba0ae2, 0xa02ae5c0, 0x22e0433c, 0x1b171d12, - 0x090d0b0e, 0x8bc7adf2, 0xb6a8b92d, 0x1ea9c814, - 0xf1198557, 0x75074caf, 0x99ddbbee, 0x7f60fda3, - 0x01269ff7, 0x72f5bc5c, 0x663bc544, 0xfb7e345b, - 0x4329768b, 0x23c6dccb, 0xedfc68b6, 0xe4f163b8, - 0x31dccad7, 0x63851042, 0x97224013, 0xc6112084, - 0x4a247d85, 0xbb3df8d2, 0xf93211ae, 0x29a16dc7, - 0x9e2f4b1d, 0xb230f3dc, 0x8652ec0d, 0xc1e3d077, - 0xb3166c2b, 0x70b999a9, 0x9448fa11, 0xe9642247, - 0xfc8cc4a8, 0xf03f1aa0, 0x7d2cd856, 0x3390ef22, - 0x494ec787, 0x38d1c1d9, 0xcaa2fe8c, 0xd40b3698, - 0xf581cfa6, 0x7ade28a5, 0xb78e26da, 0xadbfa43f, - 0x3a9de42c, 0x78920d50, 0x5fcc9b6a, 0x7e466254, - 0x8d13c2f6, 0xd8b8e890, 0x39f75e2e, 0xc3aff582, - 0x5d80be9f, 0xd0937c69, 0xd52da96f, 0x2512b3cf, - 0xac993bc8, 0x187da710, 0x9c636ee8, 0x3bbb7bdb, - 0x267809cd, 0x5918f46e, 0x9ab701ec, 0x4f9aa883, - 0x956e65e6, 0xffe67eaa, 0xbccf0821, 0x15e8e6ef, - 0xe79bd9ba, 0x6f36ce4a, 0x9f09d4ea, 0xb07cd629, - 0xa4b2af31, 0x3f23312a, 0xa59430c6, 0xa266c035, - 0x4ebc3774, 0x82caa6fc, 0x90d0b0e0, 0xa7d81533, - 0x04984af1, 0xecdaf741, 0xcd500e7f, 0x91f62f17, - 0x4dd68d76, 0xefb04d43, 0xaa4d54cc, 0x9604dfe4, - 0xd1b5e39e, 0x6a881b4c, 0x2c1fb8c1, 0x65517f46, - 0x5eea049d, 0x8c355d01, 0x877473fa, 0x0b412efb, - 0x671d5ab3, 0xdbd25292, 0x105633e9, 0xd647136d, - 0xd7618c9a, 0xa10c7a37, 0xf8148e59, 0x133c89eb, - 0xa927eece, 0x61c935b7, 0x1ce5ede1, 0x47b13c7a, - 0xd2df599c, 0xf2733f55, 0x14ce7918, 0xc737bf73, - 0xf7cdea53, 0xfdaa5b5f, 0x3d6f14df, 0x44db8678, - 0xaff381ca, 0x68c43eb9, 0x24342c38, 0xa3405fc2, - 0x1dc37216, 0xe2250cbc, 0x3c498b28, 0x0d9541ff, - 0xa8017139, 0x0cb3de08, 0xb4e49cd8, 0x56c19064, - 0xcb84617b, 0x32b670d5, 0x6c5c7448, 0xb85742d0, -}; - -__device__ __constant__ u32 td4[256] = -{ - 0x52525252, 0x09090909, 0x6a6a6a6a, 0xd5d5d5d5, - 0x30303030, 0x36363636, 0xa5a5a5a5, 0x38383838, - 0xbfbfbfbf, 0x40404040, 0xa3a3a3a3, 0x9e9e9e9e, - 0x81818181, 0xf3f3f3f3, 0xd7d7d7d7, 0xfbfbfbfb, - 0x7c7c7c7c, 0xe3e3e3e3, 0x39393939, 0x82828282, - 0x9b9b9b9b, 0x2f2f2f2f, 0xffffffff, 0x87878787, - 0x34343434, 0x8e8e8e8e, 0x43434343, 0x44444444, - 0xc4c4c4c4, 0xdededede, 0xe9e9e9e9, 0xcbcbcbcb, - 0x54545454, 0x7b7b7b7b, 0x94949494, 0x32323232, - 0xa6a6a6a6, 0xc2c2c2c2, 0x23232323, 0x3d3d3d3d, - 0xeeeeeeee, 0x4c4c4c4c, 0x95959595, 0x0b0b0b0b, - 0x42424242, 0xfafafafa, 0xc3c3c3c3, 0x4e4e4e4e, - 0x08080808, 0x2e2e2e2e, 0xa1a1a1a1, 0x66666666, - 0x28282828, 0xd9d9d9d9, 0x24242424, 0xb2b2b2b2, - 0x76767676, 0x5b5b5b5b, 0xa2a2a2a2, 0x49494949, - 0x6d6d6d6d, 0x8b8b8b8b, 0xd1d1d1d1, 0x25252525, - 0x72727272, 0xf8f8f8f8, 0xf6f6f6f6, 0x64646464, - 0x86868686, 0x68686868, 0x98989898, 0x16161616, - 0xd4d4d4d4, 0xa4a4a4a4, 0x5c5c5c5c, 0xcccccccc, - 0x5d5d5d5d, 0x65656565, 0xb6b6b6b6, 0x92929292, - 0x6c6c6c6c, 0x70707070, 0x48484848, 0x50505050, - 0xfdfdfdfd, 0xedededed, 0xb9b9b9b9, 0xdadadada, - 0x5e5e5e5e, 0x15151515, 0x46464646, 0x57575757, - 0xa7a7a7a7, 0x8d8d8d8d, 0x9d9d9d9d, 0x84848484, - 0x90909090, 0xd8d8d8d8, 0xabababab, 0x00000000, - 0x8c8c8c8c, 0xbcbcbcbc, 0xd3d3d3d3, 0x0a0a0a0a, - 0xf7f7f7f7, 0xe4e4e4e4, 0x58585858, 0x05050505, - 0xb8b8b8b8, 0xb3b3b3b3, 0x45454545, 0x06060606, - 0xd0d0d0d0, 0x2c2c2c2c, 0x1e1e1e1e, 0x8f8f8f8f, - 0xcacacaca, 0x3f3f3f3f, 0x0f0f0f0f, 0x02020202, - 0xc1c1c1c1, 0xafafafaf, 0xbdbdbdbd, 0x03030303, - 0x01010101, 0x13131313, 0x8a8a8a8a, 0x6b6b6b6b, - 0x3a3a3a3a, 0x91919191, 0x11111111, 0x41414141, - 0x4f4f4f4f, 0x67676767, 0xdcdcdcdc, 0xeaeaeaea, - 0x97979797, 0xf2f2f2f2, 0xcfcfcfcf, 0xcececece, - 0xf0f0f0f0, 0xb4b4b4b4, 0xe6e6e6e6, 0x73737373, - 0x96969696, 0xacacacac, 0x74747474, 0x22222222, - 0xe7e7e7e7, 0xadadadad, 0x35353535, 0x85858585, - 0xe2e2e2e2, 0xf9f9f9f9, 0x37373737, 0xe8e8e8e8, - 0x1c1c1c1c, 0x75757575, 0xdfdfdfdf, 0x6e6e6e6e, - 0x47474747, 0xf1f1f1f1, 0x1a1a1a1a, 0x71717171, - 0x1d1d1d1d, 0x29292929, 0xc5c5c5c5, 0x89898989, - 0x6f6f6f6f, 0xb7b7b7b7, 0x62626262, 0x0e0e0e0e, - 0xaaaaaaaa, 0x18181818, 0xbebebebe, 0x1b1b1b1b, - 0xfcfcfcfc, 0x56565656, 0x3e3e3e3e, 0x4b4b4b4b, - 0xc6c6c6c6, 0xd2d2d2d2, 0x79797979, 0x20202020, - 0x9a9a9a9a, 0xdbdbdbdb, 0xc0c0c0c0, 0xfefefefe, - 0x78787878, 0xcdcdcdcd, 0x5a5a5a5a, 0xf4f4f4f4, - 0x1f1f1f1f, 0xdddddddd, 0xa8a8a8a8, 0x33333333, - 0x88888888, 0x07070707, 0xc7c7c7c7, 0x31313131, - 0xb1b1b1b1, 0x12121212, 0x10101010, 0x59595959, - 0x27272727, 0x80808080, 0xecececec, 0x5f5f5f5f, - 0x60606060, 0x51515151, 0x7f7f7f7f, 0xa9a9a9a9, - 0x19191919, 0xb5b5b5b5, 0x4a4a4a4a, 0x0d0d0d0d, - 0x2d2d2d2d, 0xe5e5e5e5, 0x7a7a7a7a, 0x9f9f9f9f, - 0x93939393, 0xc9c9c9c9, 0x9c9c9c9c, 0xefefefef, - 0xa0a0a0a0, 0xe0e0e0e0, 0x3b3b3b3b, 0x4d4d4d4d, - 0xaeaeaeae, 0x2a2a2a2a, 0xf5f5f5f5, 0xb0b0b0b0, - 0xc8c8c8c8, 0xebebebeb, 0xbbbbbbbb, 0x3c3c3c3c, - 0x83838383, 0x53535353, 0x99999999, 0x61616161, - 0x17171717, 0x2b2b2b2b, 0x04040404, 0x7e7e7e7e, - 0xbabababa, 0x77777777, 0xd6d6d6d6, 0x26262626, - 0xe1e1e1e1, 0x69696969, 0x14141414, 0x63636363, - 0x55555555, 0x21212121, 0x0c0c0c0c, 0x7d7d7d7d, -}; - -__device__ __constant__ u32 rcon[] = -{ - 0x01000000, 0x02000000, 0x04000000, 0x08000000, - 0x10000000, 0x20000000, 0x40000000, 0x80000000, - 0x1b000000, 0x36000000, -}; - -__device__ static void AES256_ExpandKey (u32 *userkey, u32 *rek, u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - rek[0] = userkey[0]; - rek[1] = userkey[1]; - rek[2] = userkey[2]; - rek[3] = userkey[3]; - rek[4] = userkey[4]; - rek[5] = userkey[5]; - rek[6] = userkey[6]; - rek[7] = userkey[7]; - - int i; - int j; - - i = 0; - j = 0; - - u32 run = 1; - - while (run) - { - u32 temp = rek[j + 7]; - - rek[j + 8] = rek[j + 0] - ^ (s_te2[(temp >> 16) & 0xff] & 0xff000000) - ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000) - ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00) - ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff) - ^ rcon[i]; - - rek[j + 9] = rek[j + 1] ^ rek[j + 8]; - rek[j + 10] = rek[j + 2] ^ rek[j + 9]; - rek[j + 11] = rek[j + 3] ^ rek[j + 10]; - - if (++i == 7) - { - run = 0; - continue; - } - - temp = rek[j + 11]; - - rek[j + 12] = rek[j + 4] - ^ (s_te2[(temp >> 24) & 0xff] & 0xff000000) - ^ (s_te3[(temp >> 16) & 0xff] & 0x00ff0000) - ^ (s_te0[(temp >> 8) & 0xff] & 0x0000ff00) - ^ (s_te1[(temp >> 0) & 0xff] & 0x000000ff); - - rek[j + 13] = rek[j + 5] ^ rek[j + 12]; - rek[j + 14] = rek[j + 6] ^ rek[j + 13]; - rek[j + 15] = rek[j + 7] ^ rek[j + 14]; - - j += 8; - } -} - -__device__ static void AES256_InvertKey (u32 *rdk, u32 s_td0[256], u32 s_td1[256], u32 s_td2[256], u32 s_td3[256], u32 s_td4[256], u32 s_te0[256], u32 s_te1[256], u32 s_te2[256], u32 s_te3[256], u32 s_te4[256]) -{ - for (u32 i = 0, j = 56; i < j; i += 4, j -= 4) - { - u32 temp; - - temp = rdk[i + 0]; rdk[i + 0] = rdk[j + 0]; rdk[j + 0] = temp; - temp = rdk[i + 1]; rdk[i + 1] = rdk[j + 1]; rdk[j + 1] = temp; - temp = rdk[i + 2]; rdk[i + 2] = rdk[j + 2]; rdk[j + 2] = temp; - temp = rdk[i + 3]; rdk[i + 3] = rdk[j + 3]; rdk[j + 3] = temp; - } - - for (u32 i = 1, j = 4; i < 14; i += 1, j += 4) - { - rdk[j + 0] = - s_td0[s_te1[(rdk[j + 0] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 0] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 0] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 0] >> 0) & 0xff] & 0xff]; - - rdk[j + 1] = - s_td0[s_te1[(rdk[j + 1] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 1] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 1] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 1] >> 0) & 0xff] & 0xff]; - - rdk[j + 2] = - s_td0[s_te1[(rdk[j + 2] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 2] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 2] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 2] >> 0) & 0xff] & 0xff]; - - rdk[j + 3] = - s_td0[s_te1[(rdk[j + 3] >> 24) & 0xff] & 0xff] ^ - s_td1[s_te1[(rdk[j + 3] >> 16) & 0xff] & 0xff] ^ - s_td2[s_te1[(rdk[j + 3] >> 8) & 0xff] & 0xff] ^ - s_td3[s_te1[(rdk[j + 3] >> 0) & 0xff] & 0xff]; - } -} - -__device__ static void AES256_decrypt (const u32 *in, u32 *out, const u32 *rdk, u32 s_td0[256], u32 s_td1[256], u32 s_td2[256], u32 s_td3[256], u32 s_td4[256]) -{ - u32 s0 = in[0] ^ rdk[0]; - u32 s1 = in[1] ^ rdk[1]; - u32 s2 = in[2] ^ rdk[2]; - u32 s3 = in[3] ^ rdk[3]; - - u32 t0; - u32 t1; - u32 t2; - u32 t3; - - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[ 4]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[ 5]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[ 6]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[ 7]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[ 8]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[ 9]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[10]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[11]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[12]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[13]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[14]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[15]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[16]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[17]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[18]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[19]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[20]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[21]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[22]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[23]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[24]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[25]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[26]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[27]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[28]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[29]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[30]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[31]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[32]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[33]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[34]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[35]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[36]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[37]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[38]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[39]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[40]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[41]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[42]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[43]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[44]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[45]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[46]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[47]; - s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ rdk[48]; - s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ rdk[49]; - s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ rdk[50]; - s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ rdk[51]; - t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ rdk[52]; - t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ rdk[53]; - t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ rdk[54]; - t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ rdk[55]; - - out[0] = (s_td4[(t0 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t3 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t2 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t1 >> 0) & 0xff] & 0x000000ff) - ^ rdk[56]; - - out[1] = (s_td4[(t1 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t0 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t3 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t2 >> 0) & 0xff] & 0x000000ff) - ^ rdk[57]; - - out[2] = (s_td4[(t2 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t1 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t0 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t3 >> 0) & 0xff] & 0x000000ff) - ^ rdk[58]; - - out[3] = (s_td4[(t3 >> 24) & 0xff] & 0xff000000) - ^ (s_td4[(t2 >> 16) & 0xff] & 0x00ff0000) - ^ (s_td4[(t1 >> 8) & 0xff] & 0x0000ff00) - ^ (s_td4[(t0 >> 0) & 0xff] & 0x000000ff) - ^ rdk[59]; -} - -__device__ static void sha1_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[5]) -{ - u32x A = digest[0]; - u32x B = digest[1]; - u32x C = digest[2]; - u32x D = digest[3]; - u32x E = digest[4]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - #undef K - #define K SHA1C00 - - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w0_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w1_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w2_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w3_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w4_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, w5_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, w6_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, w7_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, w8_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, w9_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wa_t); - SHA1_STEP (SHA1_F0o, E, A, B, C, D, wb_t); - SHA1_STEP (SHA1_F0o, D, E, A, B, C, wc_t); - SHA1_STEP (SHA1_F0o, C, D, E, A, B, wd_t); - SHA1_STEP (SHA1_F0o, B, C, D, E, A, we_t); - SHA1_STEP (SHA1_F0o, A, B, C, D, E, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F0o, E, A, B, C, D, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F0o, D, E, A, B, C, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F0o, C, D, E, A, B, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F0o, B, C, D, E, A, w3_t); - - #undef K - #define K SHA1C01 - - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w7_t); - - #undef K - #define K SHA1C02 - - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F2o, A, B, C, D, E, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F2o, E, A, B, C, D, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F2o, D, E, A, B, C, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F2o, C, D, E, A, B, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F2o, B, C, D, E, A, wb_t); - - #undef K - #define K SHA1C03 - - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, wf_t); - w0_t = rotl32 ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w0_t); - w1_t = rotl32 ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w1_t); - w2_t = rotl32 ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w2_t); - w3_t = rotl32 ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w3_t); - w4_t = rotl32 ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w4_t); - w5_t = rotl32 ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, w5_t); - w6_t = rotl32 ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, w6_t); - w7_t = rotl32 ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, w7_t); - w8_t = rotl32 ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, w8_t); - w9_t = rotl32 ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, w9_t); - wa_t = rotl32 ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wa_t); - wb_t = rotl32 ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP (SHA1_F1, A, B, C, D, E, wb_t); - wc_t = rotl32 ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP (SHA1_F1, E, A, B, C, D, wc_t); - wd_t = rotl32 ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP (SHA1_F1, D, E, A, B, C, wd_t); - we_t = rotl32 ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP (SHA1_F1, C, D, E, A, B, we_t); - wf_t = rotl32 ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP (SHA1_F1, B, C, D, E, A, wf_t); - - digest[0] += A; - digest[1] += B; - digest[2] += C; - digest[3] += D; - digest[4] += E; -} - -__device__ static void hmac_sha1_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = SHA1M_A; - ipad[1] = SHA1M_B; - ipad[2] = SHA1M_C; - ipad[3] = SHA1M_D; - ipad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = SHA1M_A; - opad[1] = SHA1M_B; - opad[2] = SHA1M_C; - opad[3] = SHA1M_D; - opad[4] = SHA1M_E; - - sha1_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha1_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[5], u32x opad[5], u32x digest[5]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - - sha1_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - - sha1_transform (w0, w1, w2, w3, digest); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12700_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, mywallet_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x w0[4]; - - w0[0] = pws[gid].i[ 0]; - w0[1] = pws[gid].i[ 1]; - w0[2] = pws[gid].i[ 2]; - w0[3] = pws[gid].i[ 3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[ 4]; - w1[1] = pws[gid].i[ 5]; - w1[2] = pws[gid].i[ 6]; - w1[3] = pws[gid].i[ 7]; - - u32x w2[4]; - - w2[0] = pws[gid].i[ 8]; - w2[1] = pws[gid].i[ 9]; - w2[2] = pws[gid].i[10]; - w2[3] = pws[gid].i[11]; - - u32x w3[4]; - - w3[0] = pws[gid].i[12]; - w3[1] = pws[gid].i[13]; - w3[2] = pws[gid].i[14]; - w3[3] = pws[gid].i[15]; - - /** - * salt - */ - - u32 salt_len = 16; - - u32 salt_buf[4]; - - salt_buf[0] = salt_bufs[salt_pos].salt_buf[0]; - salt_buf[1] = salt_bufs[salt_pos].salt_buf[1]; - salt_buf[2] = salt_bufs[salt_pos].salt_buf[2]; - salt_buf[3] = salt_bufs[salt_pos].salt_buf[3]; - - /** - * pads - */ - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - w2[0] = swap_workaround (w2[0]); - w2[1] = swap_workaround (w2[1]); - w2[2] = swap_workaround (w2[2]); - w2[3] = swap_workaround (w2[3]); - w3[0] = swap_workaround (w3[0]); - w3[1] = swap_workaround (w3[1]); - w3[2] = swap_workaround (w3[2]); - w3[3] = swap_workaround (w3[3]); - - u32x ipad[5]; - u32x opad[5]; - - hmac_sha1_pad (w0, w1, w2, w3, ipad, opad); - - tmps[gid].ipad[0] = ipad[0]; - tmps[gid].ipad[1] = ipad[1]; - tmps[gid].ipad[2] = ipad[2]; - tmps[gid].ipad[3] = ipad[3]; - tmps[gid].ipad[4] = ipad[4]; - - tmps[gid].opad[0] = opad[0]; - tmps[gid].opad[1] = opad[1]; - tmps[gid].opad[2] = opad[2]; - tmps[gid].opad[3] = opad[3]; - tmps[gid].opad[4] = opad[4]; - - // first 160 bits - - { - w0[0] = salt_buf[0]; - w0[1] = salt_buf[1]; - w0[2] = salt_buf[2]; - w0[3] = salt_buf[3]; - w1[0] = 0x00000001; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + salt_len + 4) * 8; - - u32x dgst1[5]; - - hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst1); - - tmps[gid].dgst1[0] = dgst1[0]; - tmps[gid].dgst1[1] = dgst1[1]; - tmps[gid].dgst1[2] = dgst1[2]; - tmps[gid].dgst1[3] = dgst1[3]; - tmps[gid].dgst1[4] = dgst1[4]; - - tmps[gid].out1[0] = dgst1[0]; - tmps[gid].out1[1] = dgst1[1]; - tmps[gid].out1[2] = dgst1[2]; - tmps[gid].out1[3] = dgst1[3]; - tmps[gid].out1[4] = dgst1[4]; - } - - // second 160 bits - - { - w0[0] = salt_buf[0]; - w0[1] = salt_buf[1]; - w0[2] = salt_buf[2]; - w0[3] = salt_buf[3]; - w1[0] = 0x00000002; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + salt_len + 4) * 8; - - u32x dgst2[5]; - - hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst2); - - tmps[gid].dgst2[0] = dgst2[0]; - tmps[gid].dgst2[1] = dgst2[1]; - tmps[gid].dgst2[2] = dgst2[2]; - tmps[gid].dgst2[3] = dgst2[3]; - tmps[gid].dgst2[4] = dgst2[4]; - - tmps[gid].out2[0] = dgst2[0]; - tmps[gid].out2[1] = dgst2[1]; - tmps[gid].out2[2] = dgst2[2]; - tmps[gid].out2[3] = dgst2[3]; - tmps[gid].out2[4] = dgst2[4]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12700_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, mywallet_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32x ipad[5]; - u32x opad[5]; - - ipad[0] = tmps[gid].ipad[0]; - ipad[1] = tmps[gid].ipad[1]; - ipad[2] = tmps[gid].ipad[2]; - ipad[3] = tmps[gid].ipad[3]; - ipad[4] = tmps[gid].ipad[4]; - - opad[0] = tmps[gid].opad[0]; - opad[1] = tmps[gid].opad[1]; - opad[2] = tmps[gid].opad[2]; - opad[3] = tmps[gid].opad[3]; - opad[4] = tmps[gid].opad[4]; - - // first 160 bits - - { - u32x dgst1[5]; - u32x out1[5]; - - dgst1[0] = tmps[gid].dgst1[0]; - dgst1[1] = tmps[gid].dgst1[1]; - dgst1[2] = tmps[gid].dgst1[2]; - dgst1[3] = tmps[gid].dgst1[3]; - dgst1[4] = tmps[gid].dgst1[4]; - - out1[0] = tmps[gid].out1[0]; - out1[1] = tmps[gid].out1[1]; - out1[2] = tmps[gid].out1[2]; - out1[3] = tmps[gid].out1[3]; - out1[4] = tmps[gid].out1[4]; - - for (u32 j = 0; j < loop_cnt; j++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = dgst1[0]; - w0[1] = dgst1[1]; - w0[2] = dgst1[2]; - w0[3] = dgst1[3]; - w1[0] = dgst1[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst1); - - out1[0] ^= dgst1[0]; - out1[1] ^= dgst1[1]; - out1[2] ^= dgst1[2]; - out1[3] ^= dgst1[3]; - out1[4] ^= dgst1[4]; - } - - tmps[gid].dgst1[0] = dgst1[0]; - tmps[gid].dgst1[1] = dgst1[1]; - tmps[gid].dgst1[2] = dgst1[2]; - tmps[gid].dgst1[3] = dgst1[3]; - tmps[gid].dgst1[4] = dgst1[4]; - - tmps[gid].out1[0] = out1[0]; - tmps[gid].out1[1] = out1[1]; - tmps[gid].out1[2] = out1[2]; - tmps[gid].out1[3] = out1[3]; - tmps[gid].out1[4] = out1[4]; - } - - // second 160 bits - - { - u32x dgst2[5]; - u32x out2[5]; - - dgst2[0] = tmps[gid].dgst2[0]; - dgst2[1] = tmps[gid].dgst2[1]; - dgst2[2] = tmps[gid].dgst2[2]; - dgst2[3] = tmps[gid].dgst2[3]; - dgst2[4] = tmps[gid].dgst2[4]; - - out2[0] = tmps[gid].out2[0]; - out2[1] = tmps[gid].out2[1]; - out2[2] = tmps[gid].out2[2]; - out2[3] = tmps[gid].out2[3]; - out2[4] = tmps[gid].out2[4]; - - for (u32 j = 0; j < loop_cnt; j++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = dgst2[0]; - w0[1] = dgst2[1]; - w0[2] = dgst2[2]; - w0[3] = dgst2[3]; - w1[0] = dgst2[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - hmac_sha1_run (w0, w1, w2, w3, ipad, opad, dgst2); - - out2[0] ^= dgst2[0]; - out2[1] ^= dgst2[1]; - out2[2] ^= dgst2[2]; - out2[3] ^= dgst2[3]; - out2[4] ^= dgst2[4]; - } - - tmps[gid].dgst2[0] = dgst2[0]; - tmps[gid].dgst2[1] = dgst2[1]; - tmps[gid].dgst2[2] = dgst2[2]; - tmps[gid].dgst2[3] = dgst2[3]; - tmps[gid].dgst2[4] = dgst2[4]; - - tmps[gid].out2[0] = out2[0]; - tmps[gid].out2[1] = out2[1]; - tmps[gid].out2[2] = out2[2]; - tmps[gid].out2[3] = out2[3]; - tmps[gid].out2[4] = out2[4]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12700_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, mywallet_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const void *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * aes shared - */ - - __shared__ u32 s_td0[256]; - __shared__ u32 s_td1[256]; - __shared__ u32 s_td2[256]; - __shared__ u32 s_td3[256]; - __shared__ u32 s_td4[256]; - - __shared__ u32 s_te0[256]; - __shared__ u32 s_te1[256]; - __shared__ u32 s_te2[256]; - __shared__ u32 s_te3[256]; - __shared__ u32 s_te4[256]; - - s_td0[lid] = td0[lid]; - s_td1[lid] = td1[lid]; - s_td2[lid] = td2[lid]; - s_td3[lid] = td3[lid]; - s_td4[lid] = td4[lid]; - - s_te0[lid] = te0[lid]; - s_te1[lid] = te1[lid]; - s_te2[lid] = te2[lid]; - s_te3[lid] = te3[lid]; - s_te4[lid] = te4[lid]; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * AES part - */ - - const u32 iv[4] = - { - salt_bufs[salt_pos].salt_buf[0], - salt_bufs[salt_pos].salt_buf[1], - salt_bufs[salt_pos].salt_buf[2], - salt_bufs[salt_pos].salt_buf[3] - }; - - const u32 data[4] = - { - salt_bufs[salt_pos].salt_buf[4], - salt_bufs[salt_pos].salt_buf[5], - salt_bufs[salt_pos].salt_buf[6], - salt_bufs[salt_pos].salt_buf[7] - }; - - u32x ukey[8]; - - ukey[0] = tmps[gid].out1[0]; - ukey[1] = tmps[gid].out1[1]; - ukey[2] = tmps[gid].out1[2]; - ukey[3] = tmps[gid].out1[3]; - ukey[4] = tmps[gid].out1[4]; - ukey[5] = tmps[gid].out2[0]; - ukey[6] = tmps[gid].out2[1]; - ukey[7] = tmps[gid].out2[2]; - - #define KEYLEN 60 - - u32 rk[KEYLEN]; - - AES256_ExpandKey (ukey, rk, s_te0, s_te1, s_te2, s_te3, s_te4); - - AES256_InvertKey (rk, s_td0, s_td1, s_td2, s_td3, s_td4, s_te0, s_te1, s_te2, s_te3, s_te4); - - u32 out[4]; - - AES256_decrypt (data, out, rk, s_td0, s_td1, s_td2, s_td3, s_td4); - - out[0] ^= iv[0]; - out[1] ^= iv[1]; - out[2] ^= iv[2]; - out[3] ^= iv[3]; - - out[0] = swap_workaround (out[0]); - out[1] = swap_workaround (out[1]); - out[2] = swap_workaround (out[2]); - out[3] = swap_workaround (out[3]); - - if ((out[0] & 0xff) != '{') return; - - char *pt = (char *) out; - - for (int i = 1; i < 16 - 6; i++) - { - if (pt[i + 0] != '"') continue; - if (pt[i + 1] != 'g') continue; - if (pt[i + 2] != 'u') continue; - if (pt[i + 3] != 'i') continue; - if (pt[i + 4] != 'd') continue; - if (pt[i + 5] != '"') continue; - - const u32x r0 = data[0]; - const u32x r1 = data[1]; - const u32x r2 = data[2]; - const u32x r3 = data[3]; - - #define il_pos 0 - - #include VECT_COMPARE_M - } -} diff --git a/nv/m12800.cu b/nv/m12800.cu deleted file mode 100644 index db65b4f..0000000 --- a/nv/m12800.cu +++ /dev/null @@ -1,631 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define _MS_DRSR_ - -#include "include/constants.h" -#include "include/kernel_vendor.h" - -#ifdef VLIW1 -#define VECT_SIZE1 -#endif - -#ifdef VLIW2 -#define VECT_SIZE1 -#endif - -#define DGST_R0 0 -#define DGST_R1 1 -#define DGST_R2 2 -#define DGST_R3 3 - -#include "include/kernel_functions.c" - -#include "types_nv.c" -#include "common_nv.c" - -#ifdef VECT_SIZE1 -#define VECT_COMPARE_M "check_multi_vect1_comp4.c" -#endif - -#ifdef VECT_SIZE1 -#define uint_to_hex_lower8(i) l_bin2asc[(i)] -#endif - -__device__ __constant__ char c_bin2asc[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; - -__device__ __shared__ short l_bin2asc[256]; - -__device__ static void md4_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[4]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - - MD4_STEP (MD4_Fo, a, b, c, d, w0[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w0[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w0[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w0[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w1[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w1[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w1[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w1[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w2[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w2[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w2[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w2[3], MD4C00, MD4S03); - MD4_STEP (MD4_Fo, a, b, c, d, w3[0], MD4C00, MD4S00); - MD4_STEP (MD4_Fo, d, a, b, c, w3[1], MD4C00, MD4S01); - MD4_STEP (MD4_Fo, c, d, a, b, w3[2], MD4C00, MD4S02); - MD4_STEP (MD4_Fo, b, c, d, a, w3[3], MD4C00, MD4S03); - - MD4_STEP (MD4_Go, a, b, c, d, w0[0], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[0], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[0], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[0], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0[1], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[1], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[1], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[1], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0[2], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[2], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[2], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[2], MD4C01, MD4S13); - MD4_STEP (MD4_Go, a, b, c, d, w0[3], MD4C01, MD4S10); - MD4_STEP (MD4_Go, d, a, b, c, w1[3], MD4C01, MD4S11); - MD4_STEP (MD4_Go, c, d, a, b, w2[3], MD4C01, MD4S12); - MD4_STEP (MD4_Go, b, c, d, a, w3[3], MD4C01, MD4S13); - - MD4_STEP (MD4_H , a, b, c, d, w0[0], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[0], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[0], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[0], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0[2], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[2], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[2], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[2], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0[1], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[1], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[1], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[1], MD4C02, MD4S23); - MD4_STEP (MD4_H , a, b, c, d, w0[3], MD4C02, MD4S20); - MD4_STEP (MD4_H , d, a, b, c, w2[3], MD4C02, MD4S21); - MD4_STEP (MD4_H , c, d, a, b, w1[3], MD4C02, MD4S22); - MD4_STEP (MD4_H , b, c, d, a, w3[3], MD4C02, MD4S23); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; -} - -__device__ static void sha256_transform (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[8]) -{ - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C00); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C01); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C02); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C03); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C04); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C05); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C06); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C07); - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C08); - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C09); - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C0a); - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C0b); - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C0c); - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C0d); - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C0e); - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C0f); - w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C10); - w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C11); - w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C12); - w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C13); - w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C14); - w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C15); - w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C16); - w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C17); - w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C18); - w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C19); - wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C1a); - wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C1b); - wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C1c); - wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C1d); - we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C1e); - wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C1f); - w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C20); - w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C21); - w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C22); - w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C23); - w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C24); - w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C25); - w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C26); - w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C27); - w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C28); - w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C29); - wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C2a); - wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C2b); - wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C2c); - wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C2d); - we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C2e); - wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C2f); - w0_t = SHA256_S1(we_t) + w9_t + SHA256_S0(w1_t) + w0_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, SHA256C30); - w1_t = SHA256_S1(wf_t) + wa_t + SHA256_S0(w2_t) + w1_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, SHA256C31); - w2_t = SHA256_S1(w0_t) + wb_t + SHA256_S0(w3_t) + w2_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, SHA256C32); - w3_t = SHA256_S1(w1_t) + wc_t + SHA256_S0(w4_t) + w3_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, SHA256C33); - w4_t = SHA256_S1(w2_t) + wd_t + SHA256_S0(w5_t) + w4_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, SHA256C34); - w5_t = SHA256_S1(w3_t) + we_t + SHA256_S0(w6_t) + w5_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, SHA256C35); - w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, SHA256C36); - w7_t = SHA256_S1(w5_t) + w0_t + SHA256_S0(w8_t) + w7_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, SHA256C37); - w8_t = SHA256_S1(w6_t) + w1_t + SHA256_S0(w9_t) + w8_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, SHA256C38); - w9_t = SHA256_S1(w7_t) + w2_t + SHA256_S0(wa_t) + w9_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, SHA256C39); - wa_t = SHA256_S1(w8_t) + w3_t + SHA256_S0(wb_t) + wa_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, SHA256C3a); - wb_t = SHA256_S1(w9_t) + w4_t + SHA256_S0(wc_t) + wb_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, SHA256C3b); - wc_t = SHA256_S1(wa_t) + w5_t + SHA256_S0(wd_t) + wc_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, SHA256C3c); - wd_t = SHA256_S1(wb_t) + w6_t + SHA256_S0(we_t) + wd_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, SHA256C3d); - we_t = SHA256_S1(wc_t) + w7_t + SHA256_S0(wf_t) + we_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, SHA256C3e); - wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, SHA256C3f); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; -} - -__device__ static void hmac_sha256_pad (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8]) -{ - w0[0] = w0[0] ^ 0x36363636; - w0[1] = w0[1] ^ 0x36363636; - w0[2] = w0[2] ^ 0x36363636; - w0[3] = w0[3] ^ 0x36363636; - w1[0] = w1[0] ^ 0x36363636; - w1[1] = w1[1] ^ 0x36363636; - w1[2] = w1[2] ^ 0x36363636; - w1[3] = w1[3] ^ 0x36363636; - w2[0] = w2[0] ^ 0x36363636; - w2[1] = w2[1] ^ 0x36363636; - w2[2] = w2[2] ^ 0x36363636; - w2[3] = w2[3] ^ 0x36363636; - w3[0] = w3[0] ^ 0x36363636; - w3[1] = w3[1] ^ 0x36363636; - w3[2] = w3[2] ^ 0x36363636; - w3[3] = w3[3] ^ 0x36363636; - - ipad[0] = SHA256M_A; - ipad[1] = SHA256M_B; - ipad[2] = SHA256M_C; - ipad[3] = SHA256M_D; - ipad[4] = SHA256M_E; - ipad[5] = SHA256M_F; - ipad[6] = SHA256M_G; - ipad[7] = SHA256M_H; - - sha256_transform (w0, w1, w2, w3, ipad); - - w0[0] = w0[0] ^ 0x6a6a6a6a; - w0[1] = w0[1] ^ 0x6a6a6a6a; - w0[2] = w0[2] ^ 0x6a6a6a6a; - w0[3] = w0[3] ^ 0x6a6a6a6a; - w1[0] = w1[0] ^ 0x6a6a6a6a; - w1[1] = w1[1] ^ 0x6a6a6a6a; - w1[2] = w1[2] ^ 0x6a6a6a6a; - w1[3] = w1[3] ^ 0x6a6a6a6a; - w2[0] = w2[0] ^ 0x6a6a6a6a; - w2[1] = w2[1] ^ 0x6a6a6a6a; - w2[2] = w2[2] ^ 0x6a6a6a6a; - w2[3] = w2[3] ^ 0x6a6a6a6a; - w3[0] = w3[0] ^ 0x6a6a6a6a; - w3[1] = w3[1] ^ 0x6a6a6a6a; - w3[2] = w3[2] ^ 0x6a6a6a6a; - w3[3] = w3[3] ^ 0x6a6a6a6a; - - opad[0] = SHA256M_A; - opad[1] = SHA256M_B; - opad[2] = SHA256M_C; - opad[3] = SHA256M_D; - opad[4] = SHA256M_E; - opad[5] = SHA256M_F; - opad[6] = SHA256M_G; - opad[7] = SHA256M_H; - - sha256_transform (w0, w1, w2, w3, opad); -} - -__device__ static void hmac_sha256_run (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[8], u32x opad[8], u32x digest[8]) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - digest[5] = ipad[5]; - digest[6] = ipad[6]; - digest[7] = ipad[7]; - - sha256_transform (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = digest[5]; - w1[2] = digest[6]; - w1[3] = digest[7]; - w2[0] = 0x80000000; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 32) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - digest[5] = opad[5]; - digest[6] = opad[6]; - digest[7] = opad[7]; - - sha256_transform (w0, w1, w2, w3, digest); -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12800_init (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, pbkdf2_sha256_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pbkdf2_sha256_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - const u32 lid = threadIdx.x; - - /** - * lookup ascii table - */ - - l_bin2asc[lid] = c_bin2asc[(lid >> 0) & 15] << 8 - | c_bin2asc[(lid >> 4) & 15] << 0; - - __syncthreads (); - - if (gid >= gid_max) return; - - /** - * base - */ - - u32x w0[4]; - - w0[0] = pws[gid].i[0]; - w0[1] = pws[gid].i[1]; - w0[2] = pws[gid].i[2]; - w0[3] = pws[gid].i[3]; - - u32x w1[4]; - - w1[0] = pws[gid].i[4]; - w1[1] = pws[gid].i[5]; - w1[2] = 0; - w1[3] = 0; - - u32x w2[4]; - - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - - u32x w3[4]; - - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - const u32 pw_len = pws[gid].pw_len; - - /** - * salt - */ - - u32 salt_len = salt_bufs[salt_pos].salt_len; - - u32 salt_buf0[4]; - - salt_buf0[0] = swap_workaround (salt_bufs[salt_pos].salt_buf[0]); - salt_buf0[1] = swap_workaround (salt_bufs[salt_pos].salt_buf[1]); - salt_buf0[2] = swap_workaround (salt_bufs[salt_pos].salt_buf[2]); - salt_buf0[3] = swap_workaround (salt_bufs[salt_pos].salt_buf[3]); - - u32 salt_buf1[4]; - - salt_buf1[0] = 0; - salt_buf1[1] = 0; - salt_buf1[2] = 0; - salt_buf1[3] = 0; - - u32 salt_buf2[4]; - - salt_buf2[0] = 0; - salt_buf2[1] = 0; - salt_buf2[2] = 0; - salt_buf2[3] = 0; - - u32 salt_buf3[4]; - - salt_buf3[0] = 0; - salt_buf3[1] = 0; - salt_buf3[2] = 0; - salt_buf3[3] = (64 + salt_len + 4) * 8; - - /** - * generate nthash - */ - - append_0x80_2 (w0, w1, pw_len); - - make_unicode (w1, w2, w3); - make_unicode (w0, w0, w1); - - w3[2] = pw_len * 2 * 8; - - u32x digest_md4[4]; - - digest_md4[0] = MD4M_A; - digest_md4[1] = MD4M_B; - digest_md4[2] = MD4M_C; - digest_md4[3] = MD4M_D; - - md4_transform (w0, w1, w2, w3, digest_md4); - - w0[0] = uint_to_hex_lower8 ((digest_md4[0] >> 0) & 255) << 0 - | uint_to_hex_lower8 ((digest_md4[0] >> 8) & 255) << 16; - w0[1] = uint_to_hex_lower8 ((digest_md4[0] >> 16) & 255) << 0 - | uint_to_hex_lower8 ((digest_md4[0] >> 24) & 255) << 16; - w0[2] = uint_to_hex_lower8 ((digest_md4[1] >> 0) & 255) << 0 - | uint_to_hex_lower8 ((digest_md4[1] >> 8) & 255) << 16; - w0[3] = uint_to_hex_lower8 ((digest_md4[1] >> 16) & 255) << 0 - | uint_to_hex_lower8 ((digest_md4[1] >> 24) & 255) << 16; - w1[0] = uint_to_hex_lower8 ((digest_md4[2] >> 0) & 255) << 0 - | uint_to_hex_lower8 ((digest_md4[2] >> 8) & 255) << 16; - w1[1] = uint_to_hex_lower8 ((digest_md4[2] >> 16) & 255) << 0 - | uint_to_hex_lower8 ((digest_md4[2] >> 24) & 255) << 16; - w1[2] = uint_to_hex_lower8 ((digest_md4[3] >> 0) & 255) << 0 - | uint_to_hex_lower8 ((digest_md4[3] >> 8) & 255) << 16; - w1[3] = uint_to_hex_lower8 ((digest_md4[3] >> 16) & 255) << 0 - | uint_to_hex_lower8 ((digest_md4[3] >> 24) & 255) << 16; - - make_unicode (w1, w2, w3); - make_unicode (w0, w0, w1); - - w0[0] = swap_workaround (w0[0]); - w0[1] = swap_workaround (w0[1]); - w0[2] = swap_workaround (w0[2]); - w0[3] = swap_workaround (w0[3]); - w1[0] = swap_workaround (w1[0]); - w1[1] = swap_workaround (w1[1]); - w1[2] = swap_workaround (w1[2]); - w1[3] = swap_workaround (w1[3]); - w2[0] = swap_workaround (w2[0]); - w2[1] = swap_workaround (w2[1]); - w2[2] = swap_workaround (w2[2]); - w2[3] = swap_workaround (w2[3]); - w3[0] = swap_workaround (w3[0]); - w3[1] = swap_workaround (w3[1]); - w3[2] = swap_workaround (w3[2]); - w3[3] = swap_workaround (w3[3]); - - u32 ipad[8]; - u32 opad[8]; - - hmac_sha256_pad (w0, w1, w2, w3, ipad, opad); - - tmps[gid].ipad[0] = ipad[0]; - tmps[gid].ipad[1] = ipad[1]; - tmps[gid].ipad[2] = ipad[2]; - tmps[gid].ipad[3] = ipad[3]; - tmps[gid].ipad[4] = ipad[4]; - tmps[gid].ipad[5] = ipad[5]; - tmps[gid].ipad[6] = ipad[6]; - tmps[gid].ipad[7] = ipad[7]; - - tmps[gid].opad[0] = opad[0]; - tmps[gid].opad[1] = opad[1]; - tmps[gid].opad[2] = opad[2]; - tmps[gid].opad[3] = opad[3]; - tmps[gid].opad[4] = opad[4]; - tmps[gid].opad[5] = opad[5]; - tmps[gid].opad[6] = opad[6]; - tmps[gid].opad[7] = opad[7]; - - for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) - { - u32 dgst[8]; - - hmac_sha256_run (salt_buf0, salt_buf1, salt_buf2, salt_buf3, ipad, opad, dgst); - - tmps[gid].dgst[i + 0] = dgst[0]; - tmps[gid].dgst[i + 1] = dgst[1]; - tmps[gid].dgst[i + 2] = dgst[2]; - tmps[gid].dgst[i + 3] = dgst[3]; - tmps[gid].dgst[i + 4] = dgst[4]; - tmps[gid].dgst[i + 5] = dgst[5]; - tmps[gid].dgst[i + 6] = dgst[6]; - tmps[gid].dgst[i + 7] = dgst[7]; - - tmps[gid].out[i + 0] = dgst[0]; - tmps[gid].out[i + 1] = dgst[1]; - tmps[gid].out[i + 2] = dgst[2]; - tmps[gid].out[i + 3] = dgst[3]; - tmps[gid].out[i + 4] = dgst[4]; - tmps[gid].out[i + 5] = dgst[5]; - tmps[gid].out[i + 6] = dgst[6]; - tmps[gid].out[i + 7] = dgst[7]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12800_loop (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, pbkdf2_sha256_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pbkdf2_sha256_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 ipad[8]; - - ipad[0] = tmps[gid].ipad[0]; - ipad[1] = tmps[gid].ipad[1]; - ipad[2] = tmps[gid].ipad[2]; - ipad[3] = tmps[gid].ipad[3]; - ipad[4] = tmps[gid].ipad[4]; - ipad[5] = tmps[gid].ipad[5]; - ipad[6] = tmps[gid].ipad[6]; - ipad[7] = tmps[gid].ipad[7]; - - u32 opad[8]; - - opad[0] = tmps[gid].opad[0]; - opad[1] = tmps[gid].opad[1]; - opad[2] = tmps[gid].opad[2]; - opad[3] = tmps[gid].opad[3]; - opad[4] = tmps[gid].opad[4]; - opad[5] = tmps[gid].opad[5]; - opad[6] = tmps[gid].opad[6]; - opad[7] = tmps[gid].opad[7]; - - for (u32 i = 0; i < 8; i += 8) - { - u32 dgst[8]; - - dgst[0] = tmps[gid].dgst[i + 0]; - dgst[1] = tmps[gid].dgst[i + 1]; - dgst[2] = tmps[gid].dgst[i + 2]; - dgst[3] = tmps[gid].dgst[i + 3]; - dgst[4] = tmps[gid].dgst[i + 4]; - dgst[5] = tmps[gid].dgst[i + 5]; - dgst[6] = tmps[gid].dgst[i + 6]; - dgst[7] = tmps[gid].dgst[i + 7]; - - u32 out[8]; - - out[0] = tmps[gid].out[i + 0]; - out[1] = tmps[gid].out[i + 1]; - out[2] = tmps[gid].out[i + 2]; - out[3] = tmps[gid].out[i + 3]; - out[4] = tmps[gid].out[i + 4]; - out[5] = tmps[gid].out[i + 5]; - out[6] = tmps[gid].out[i + 6]; - out[7] = tmps[gid].out[i + 7]; - - for (u32 j = 0; j < loop_cnt; j++) - { - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; - - w0[0] = dgst[0]; - w0[1] = dgst[1]; - w0[2] = dgst[2]; - w0[3] = dgst[3]; - w1[0] = dgst[4]; - w1[1] = dgst[5]; - w1[2] = dgst[6]; - w1[3] = dgst[7]; - w2[0] = 0x80000000; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 32) * 8; - - hmac_sha256_run (w0, w1, w2, w3, ipad, opad, dgst); - - out[0] ^= dgst[0]; - out[1] ^= dgst[1]; - out[2] ^= dgst[2]; - out[3] ^= dgst[3]; - out[4] ^= dgst[4]; - out[5] ^= dgst[5]; - out[6] ^= dgst[6]; - out[7] ^= dgst[7]; - } - - tmps[gid].dgst[i + 0] = dgst[0]; - tmps[gid].dgst[i + 1] = dgst[1]; - tmps[gid].dgst[i + 2] = dgst[2]; - tmps[gid].dgst[i + 3] = dgst[3]; - tmps[gid].dgst[i + 4] = dgst[4]; - tmps[gid].dgst[i + 5] = dgst[5]; - tmps[gid].dgst[i + 6] = dgst[6]; - tmps[gid].dgst[i + 7] = dgst[7]; - - tmps[gid].out[i + 0] = out[0]; - tmps[gid].out[i + 1] = out[1]; - tmps[gid].out[i + 2] = out[2]; - tmps[gid].out[i + 3] = out[3]; - tmps[gid].out[i + 4] = out[4]; - tmps[gid].out[i + 5] = out[5]; - tmps[gid].out[i + 6] = out[6]; - tmps[gid].out[i + 7] = out[7]; - } -} - -extern "C" __global__ void __launch_bounds__ (256, 1) m12800_comp (const pw_t *pws, const gpu_rule_t *rules_buf, const comb_t *combs_buf, const bf_t *bfs_buf, pbkdf2_sha256_tmp_t *tmps, void *hooks, const u32 *bitmaps_buf_s1_a, const u32 *bitmaps_buf_s1_b, const u32 *bitmaps_buf_s1_c, const u32 *bitmaps_buf_s1_d, const u32 *bitmaps_buf_s2_a, const u32 *bitmaps_buf_s2_b, const u32 *bitmaps_buf_s2_c, const u32 *bitmaps_buf_s2_d, plain_t *plains_buf, const digest_t *digests_buf, u32 *hashes_shown, const salt_t *salt_bufs, const pbkdf2_sha256_t *esalt_bufs, u32 *d_return_buf, u32 *d_scryptV_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 rules_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) -{ - /** - * base - */ - - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 lid = threadIdx.x; - - const u32x r0 = tmps[gid].out[DGST_R0]; - const u32x r1 = tmps[gid].out[DGST_R1]; - const u32x r2 = tmps[gid].out[DGST_R2]; - const u32x r3 = tmps[gid].out[DGST_R3]; - - #define il_pos 0 - - #include VECT_COMPARE_M -} diff --git a/nv/markov_be_v1.cu b/nv/markov_be_v1.cu deleted file mode 100644 index ab6f993..0000000 --- a/nv/markov_be_v1.cu +++ /dev/null @@ -1,127 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define CHARSIZ 256 - -#define VECT_SIZE1 - -#include "types_nv.c" - -__device__ static void generate_pw (u32 pw_buf[16], cs_t *root_css_buf, cs_t *markov_css_buf, const u32 pw_l_len, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, u64 val) -{ - pw_buf[ 0] = 0; - pw_buf[ 1] = 0; - pw_buf[ 2] = 0; - pw_buf[ 3] = 0; - pw_buf[ 4] = 0; - pw_buf[ 5] = 0; - pw_buf[ 6] = 0; - pw_buf[ 7] = 0; - pw_buf[ 8] = 0; - pw_buf[ 9] = 0; - pw_buf[10] = 0; - pw_buf[11] = 0; - pw_buf[12] = 0; - pw_buf[13] = 0; - pw_buf[14] = 0; - pw_buf[15] = 0; - - cs_t *cs = &root_css_buf[pw_r_len]; - - u32 i; - u32 j; - - for (i = 0, j = pw_r_len; i < pw_l_len; i++, j++) - { - const u32 len = cs->cs_len; - - const u64 next = val / len; - const u64 pos = val % len; - - val = next; - - const u32 key = cs->cs_buf[pos]; - - const u32 jd4 = j / 4; - const u32 jm4 = j % 4; - - pw_buf[jd4] |= key << ((3 - jm4) * 8); - - cs = &markov_css_buf[(j * CHARSIZ) + key]; - } - - const u32 jd4 = j / 4; - const u32 jm4 = j % 4; - - pw_buf[jd4] |= (0xff << ((3 - jm4) * 8)) & mask80; - - if (bits14) pw_buf[14] = (pw_l_len + pw_r_len) * 8; - if (bits15) pw_buf[15] = (pw_l_len + pw_r_len) * 8; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) l_markov (pw_t *pws_buf_l, cs_t *root_css_buf, cs_t *markov_css_buf, const u64 off, const u32 pw_l_len, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 pw_buf[16]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_l_len, pw_r_len, mask80, bits14, bits15, off + gid); - - pws_buf_l[gid].i[ 0] = pw_buf[ 0]; - pws_buf_l[gid].i[ 1] = pw_buf[ 1]; - pws_buf_l[gid].i[ 2] = pw_buf[ 2]; - pws_buf_l[gid].i[ 3] = pw_buf[ 3]; - pws_buf_l[gid].i[ 4] = pw_buf[ 4]; - pws_buf_l[gid].i[ 5] = pw_buf[ 5]; - pws_buf_l[gid].i[ 6] = pw_buf[ 6]; - pws_buf_l[gid].i[ 7] = pw_buf[ 7]; - pws_buf_l[gid].i[ 8] = pw_buf[ 8]; - pws_buf_l[gid].i[ 9] = pw_buf[ 9]; - pws_buf_l[gid].i[10] = pw_buf[10]; - pws_buf_l[gid].i[11] = pw_buf[11]; - pws_buf_l[gid].i[12] = pw_buf[12]; - pws_buf_l[gid].i[13] = pw_buf[13]; - pws_buf_l[gid].i[14] = pw_buf[14]; - pws_buf_l[gid].i[15] = pw_buf[15]; - - pws_buf_l[gid].pw_len = pw_l_len + pw_r_len; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) r_markov (bf_t *pws_buf_r, cs_t *root_css_buf, cs_t *markov_css_buf, const u64 off, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 pw_buf[16]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid); - - pws_buf_r[gid].i = pw_buf[0]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) C_markov (comb_t *pws_buf, cs_t *root_css_buf, cs_t *markov_css_buf, const u64 off, const u32 pw_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 pw_buf[16]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid); - - pws_buf[gid].i[ 0] = pw_buf[ 0]; - pws_buf[gid].i[ 1] = pw_buf[ 1]; - pws_buf[gid].i[ 2] = pw_buf[ 2]; - pws_buf[gid].i[ 3] = pw_buf[ 3]; - pws_buf[gid].i[ 4] = pw_buf[ 4]; - pws_buf[gid].i[ 5] = pw_buf[ 5]; - pws_buf[gid].i[ 6] = pw_buf[ 6]; - pws_buf[gid].i[ 7] = pw_buf[ 7]; - - pws_buf[gid].pw_len = pw_len; -} diff --git a/nv/markov_be_v2.cu b/nv/markov_be_v2.cu deleted file mode 100644 index 892306e..0000000 --- a/nv/markov_be_v2.cu +++ /dev/null @@ -1,142 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define CHARSIZ 256 - -#define VECT_SIZE2 - -#include "types_nv.c" - -__device__ static void generate_pw (u32 pw_buf[16], cs_t *root_css_buf, cs_t *markov_css_buf, const u32 pw_l_len, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, u64 val) -{ - pw_buf[ 0] = 0; - pw_buf[ 1] = 0; - pw_buf[ 2] = 0; - pw_buf[ 3] = 0; - pw_buf[ 4] = 0; - pw_buf[ 5] = 0; - pw_buf[ 6] = 0; - pw_buf[ 7] = 0; - pw_buf[ 8] = 0; - pw_buf[ 9] = 0; - pw_buf[10] = 0; - pw_buf[11] = 0; - pw_buf[12] = 0; - pw_buf[13] = 0; - pw_buf[14] = 0; - pw_buf[15] = 0; - - cs_t *cs = &root_css_buf[pw_r_len]; - - u32 i; - u32 j; - - for (i = 0, j = pw_r_len; i < pw_l_len; i++, j++) - { - const u32 len = cs->cs_len; - - const u64 next = val / len; - const u64 pos = val % len; - - val = next; - - const u32 key = cs->cs_buf[pos]; - - const u32 jd4 = j / 4; - const u32 jm4 = j % 4; - - pw_buf[jd4] |= key << ((3 - jm4) * 8); - - cs = &markov_css_buf[(j * CHARSIZ) + key]; - } - - const u32 jd4 = j / 4; - const u32 jm4 = j % 4; - - pw_buf[jd4] |= (0xff << ((3 - jm4) * 8)) & mask80; - - if (bits14) pw_buf[14] = (pw_l_len + pw_r_len) * 8; - if (bits15) pw_buf[15] = (pw_l_len + pw_r_len) * 8; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) l_markov (pw_t *pws_buf_l, cs_t *root_css_buf, cs_t *markov_css_buf, const u64 off, const u32 pw_l_len, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 gid2 = gid * 2; - - u32 pw_buf0[16]; - u32 pw_buf1[16]; - - generate_pw (pw_buf0, root_css_buf, markov_css_buf, pw_l_len, pw_r_len, mask80, bits14, bits15, off + gid2 + 0); - generate_pw (pw_buf1, root_css_buf, markov_css_buf, pw_l_len, pw_r_len, mask80, bits14, bits15, off + gid2 + 1); - - #pragma unroll 16 - for (int i = 0; i < 16; i++) - { - pws_buf_l[gid].i[i].x = pw_buf0[i]; - pws_buf_l[gid].i[i].y = pw_buf1[i]; - } - - pws_buf_l[gid].pw_len = pw_l_len + pw_r_len; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) r_markov (bf_t *pws_buf_r, cs_t *root_css_buf, cs_t *markov_css_buf, const u64 off, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 gid2 = gid * 2; - - u32 pw_buf[16]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid2 + 0); - - pws_buf_r[gid2 + 0].i = pw_buf[0]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid2 + 1); - - pws_buf_r[gid2 + 1].i = pw_buf[0]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) C_markov (comb_t *pws_buf, cs_t *root_css_buf, cs_t *markov_css_buf, const u64 off, const u32 pw_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 gid2 = gid * 2; - - u32 pw_buf[16]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid2 + 0); - - pws_buf[gid2 + 0].i[ 0] = pw_buf[ 0]; - pws_buf[gid2 + 0].i[ 1] = pw_buf[ 1]; - pws_buf[gid2 + 0].i[ 2] = pw_buf[ 2]; - pws_buf[gid2 + 0].i[ 3] = pw_buf[ 3]; - pws_buf[gid2 + 0].i[ 4] = pw_buf[ 4]; - pws_buf[gid2 + 0].i[ 5] = pw_buf[ 5]; - pws_buf[gid2 + 0].i[ 6] = pw_buf[ 6]; - pws_buf[gid2 + 0].i[ 7] = pw_buf[ 7]; - - pws_buf[gid2 + 0].pw_len = pw_len; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid2 + 1); - - pws_buf[gid2 + 1].i[ 0] = pw_buf[ 0]; - pws_buf[gid2 + 1].i[ 1] = pw_buf[ 1]; - pws_buf[gid2 + 1].i[ 2] = pw_buf[ 2]; - pws_buf[gid2 + 1].i[ 3] = pw_buf[ 3]; - pws_buf[gid2 + 1].i[ 4] = pw_buf[ 4]; - pws_buf[gid2 + 1].i[ 5] = pw_buf[ 5]; - pws_buf[gid2 + 1].i[ 6] = pw_buf[ 6]; - pws_buf[gid2 + 1].i[ 7] = pw_buf[ 7]; - - pws_buf[gid2 + 1].pw_len = pw_len; -} diff --git a/nv/markov_be_v4.cu b/nv/markov_be_v4.cu deleted file mode 100644 index 13cee2a..0000000 --- a/nv/markov_be_v4.cu +++ /dev/null @@ -1,182 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define CHARSIZ 256 - -#define VECT_SIZE4 - -#include "types_nv.c" - -__device__ static void generate_pw (u32 pw_buf[16], cs_t *root_css_buf, cs_t *markov_css_buf, const u32 pw_l_len, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, u64 val) -{ - pw_buf[ 0] = 0; - pw_buf[ 1] = 0; - pw_buf[ 2] = 0; - pw_buf[ 3] = 0; - pw_buf[ 4] = 0; - pw_buf[ 5] = 0; - pw_buf[ 6] = 0; - pw_buf[ 7] = 0; - pw_buf[ 8] = 0; - pw_buf[ 9] = 0; - pw_buf[10] = 0; - pw_buf[11] = 0; - pw_buf[12] = 0; - pw_buf[13] = 0; - pw_buf[14] = 0; - pw_buf[15] = 0; - - cs_t *cs = &root_css_buf[pw_r_len]; - - u32 i; - u32 j; - - for (i = 0, j = pw_r_len; i < pw_l_len; i++, j++) - { - const u32 len = cs->cs_len; - - const u64 next = val / len; - const u64 pos = val % len; - - val = next; - - const u32 key = cs->cs_buf[pos]; - - const u32 jd4 = j / 4; - const u32 jm4 = j % 4; - - pw_buf[jd4] |= key << ((3 - jm4) * 8); - - cs = &markov_css_buf[(j * CHARSIZ) + key]; - } - - const u32 jd4 = j / 4; - const u32 jm4 = j % 4; - - pw_buf[jd4] |= (0xff << ((3 - jm4) * 8)) & mask80; - - if (bits14) pw_buf[14] = (pw_l_len + pw_r_len) * 8; - if (bits15) pw_buf[15] = (pw_l_len + pw_r_len) * 8; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) l_markov (pw_t *pws_buf_l, cs_t *root_css_buf, cs_t *markov_css_buf, const u64 off, const u32 pw_l_len, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 gid4 = gid * 4; - - u32 pw_buf0[16]; - u32 pw_buf1[16]; - u32 pw_buf2[16]; - u32 pw_buf3[16]; - - generate_pw (pw_buf0, root_css_buf, markov_css_buf, pw_l_len, pw_r_len, mask80, bits14, bits15, off + gid4 + 0); - generate_pw (pw_buf1, root_css_buf, markov_css_buf, pw_l_len, pw_r_len, mask80, bits14, bits15, off + gid4 + 1); - generate_pw (pw_buf2, root_css_buf, markov_css_buf, pw_l_len, pw_r_len, mask80, bits14, bits15, off + gid4 + 2); - generate_pw (pw_buf3, root_css_buf, markov_css_buf, pw_l_len, pw_r_len, mask80, bits14, bits15, off + gid4 + 3); - - #pragma unroll 16 - for (int i = 0; i < 16; i++) - { - pws_buf_l[gid].i[i].x = pw_buf0[i]; - pws_buf_l[gid].i[i].y = pw_buf1[i]; - pws_buf_l[gid].i[i].z = pw_buf2[i]; - pws_buf_l[gid].i[i].w = pw_buf3[i]; - } - - pws_buf_l[gid].pw_len = pw_l_len + pw_r_len; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) r_markov (bf_t *pws_buf_r, cs_t *root_css_buf, cs_t *markov_css_buf, const u64 off, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 gid4 = gid * 4; - - u32 pw_buf[16]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid4 + 0); - - pws_buf_r[gid4 + 0].i = pw_buf[0]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid4 + 1); - - pws_buf_r[gid4 + 1].i = pw_buf[0]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid4 + 2); - - pws_buf_r[gid4 + 2].i = pw_buf[0]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid4 + 3); - - pws_buf_r[gid4 + 3].i = pw_buf[0]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) C_markov (comb_t *pws_buf, cs_t *root_css_buf, cs_t *markov_css_buf, const u64 off, const u32 pw_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 gid4 = gid * 4; - - u32 pw_buf[16]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid4 + 0); - - pws_buf[gid4 + 0].i[ 0] = pw_buf[ 0]; - pws_buf[gid4 + 0].i[ 1] = pw_buf[ 1]; - pws_buf[gid4 + 0].i[ 2] = pw_buf[ 2]; - pws_buf[gid4 + 0].i[ 3] = pw_buf[ 3]; - pws_buf[gid4 + 0].i[ 4] = pw_buf[ 4]; - pws_buf[gid4 + 0].i[ 5] = pw_buf[ 5]; - pws_buf[gid4 + 0].i[ 6] = pw_buf[ 6]; - pws_buf[gid4 + 0].i[ 7] = pw_buf[ 7]; - - pws_buf[gid4 + 0].pw_len = pw_len; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid4 + 1); - - pws_buf[gid4 + 1].i[ 0] = pw_buf[ 0]; - pws_buf[gid4 + 1].i[ 1] = pw_buf[ 1]; - pws_buf[gid4 + 1].i[ 2] = pw_buf[ 2]; - pws_buf[gid4 + 1].i[ 3] = pw_buf[ 3]; - pws_buf[gid4 + 1].i[ 4] = pw_buf[ 4]; - pws_buf[gid4 + 1].i[ 5] = pw_buf[ 5]; - pws_buf[gid4 + 1].i[ 6] = pw_buf[ 6]; - pws_buf[gid4 + 1].i[ 7] = pw_buf[ 7]; - - pws_buf[gid4 + 1].pw_len = pw_len; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid4 + 2); - - pws_buf[gid4 + 2].i[ 0] = pw_buf[ 0]; - pws_buf[gid4 + 2].i[ 1] = pw_buf[ 1]; - pws_buf[gid4 + 2].i[ 2] = pw_buf[ 2]; - pws_buf[gid4 + 2].i[ 3] = pw_buf[ 3]; - pws_buf[gid4 + 2].i[ 4] = pw_buf[ 4]; - pws_buf[gid4 + 2].i[ 5] = pw_buf[ 5]; - pws_buf[gid4 + 2].i[ 6] = pw_buf[ 6]; - pws_buf[gid4 + 2].i[ 7] = pw_buf[ 7]; - - pws_buf[gid4 + 2].pw_len = pw_len; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid4 + 3); - - pws_buf[gid4 + 3].i[ 0] = pw_buf[ 0]; - pws_buf[gid4 + 3].i[ 1] = pw_buf[ 1]; - pws_buf[gid4 + 3].i[ 2] = pw_buf[ 2]; - pws_buf[gid4 + 3].i[ 3] = pw_buf[ 3]; - pws_buf[gid4 + 3].i[ 4] = pw_buf[ 4]; - pws_buf[gid4 + 3].i[ 5] = pw_buf[ 5]; - pws_buf[gid4 + 3].i[ 6] = pw_buf[ 6]; - pws_buf[gid4 + 3].i[ 7] = pw_buf[ 7]; - - pws_buf[gid4 + 3].pw_len = pw_len; -} diff --git a/nv/markov_le_v1.cu b/nv/markov_le_v1.cu deleted file mode 100644 index c090bb2..0000000 --- a/nv/markov_le_v1.cu +++ /dev/null @@ -1,127 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define CHARSIZ 256 - -#define VECT_SIZE1 - -#include "types_nv.c" - -__device__ static void generate_pw (u32 pw_buf[16], cs_t *root_css_buf, cs_t *markov_css_buf, const u32 pw_l_len, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, u64 val) -{ - pw_buf[ 0] = 0; - pw_buf[ 1] = 0; - pw_buf[ 2] = 0; - pw_buf[ 3] = 0; - pw_buf[ 4] = 0; - pw_buf[ 5] = 0; - pw_buf[ 6] = 0; - pw_buf[ 7] = 0; - pw_buf[ 8] = 0; - pw_buf[ 9] = 0; - pw_buf[10] = 0; - pw_buf[11] = 0; - pw_buf[12] = 0; - pw_buf[13] = 0; - pw_buf[14] = 0; - pw_buf[15] = 0; - - cs_t *cs = &root_css_buf[pw_r_len]; - - u32 i; - u32 j; - - for (i = 0, j = pw_r_len; i < pw_l_len; i++, j++) - { - const u32 len = cs->cs_len; - - const u64 next = val / len; - const u64 pos = val % len; - - val = next; - - const u32 key = cs->cs_buf[pos]; - - const u32 jd4 = j / 4; - const u32 jm4 = j % 4; - - pw_buf[jd4] |= key << (jm4 * 8); - - cs = &markov_css_buf[(j * CHARSIZ) + key]; - } - - const u32 jd4 = j / 4; - const u32 jm4 = j % 4; - - pw_buf[jd4] |= (0xff << (jm4 * 8)) & mask80; - - if (bits14) pw_buf[14] = (pw_l_len + pw_r_len) * 8; - if (bits15) pw_buf[15] = (pw_l_len + pw_r_len) * 8; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) l_markov (pw_t *pws_buf_l, cs_t *root_css_buf, cs_t *markov_css_buf, const u64 off, const u32 pw_l_len, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 pw_buf[16]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_l_len, pw_r_len, mask80, bits14, bits15, off + gid); - - pws_buf_l[gid].i[ 0] = pw_buf[ 0]; - pws_buf_l[gid].i[ 1] = pw_buf[ 1]; - pws_buf_l[gid].i[ 2] = pw_buf[ 2]; - pws_buf_l[gid].i[ 3] = pw_buf[ 3]; - pws_buf_l[gid].i[ 4] = pw_buf[ 4]; - pws_buf_l[gid].i[ 5] = pw_buf[ 5]; - pws_buf_l[gid].i[ 6] = pw_buf[ 6]; - pws_buf_l[gid].i[ 7] = pw_buf[ 7]; - pws_buf_l[gid].i[ 8] = pw_buf[ 8]; - pws_buf_l[gid].i[ 9] = pw_buf[ 9]; - pws_buf_l[gid].i[10] = pw_buf[10]; - pws_buf_l[gid].i[11] = pw_buf[11]; - pws_buf_l[gid].i[12] = pw_buf[12]; - pws_buf_l[gid].i[13] = pw_buf[13]; - pws_buf_l[gid].i[14] = pw_buf[14]; - pws_buf_l[gid].i[15] = pw_buf[15]; - - pws_buf_l[gid].pw_len = pw_l_len + pw_r_len; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) r_markov (bf_t *pws_buf_r, cs_t *root_css_buf, cs_t *markov_css_buf, const u64 off, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 pw_buf[16]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid); - - pws_buf_r[gid].i = pw_buf[0]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) C_markov (comb_t *pws_buf, cs_t *root_css_buf, cs_t *markov_css_buf, const u64 off, const u32 pw_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - u32 pw_buf[16]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid); - - pws_buf[gid].i[ 0] = pw_buf[ 0]; - pws_buf[gid].i[ 1] = pw_buf[ 1]; - pws_buf[gid].i[ 2] = pw_buf[ 2]; - pws_buf[gid].i[ 3] = pw_buf[ 3]; - pws_buf[gid].i[ 4] = pw_buf[ 4]; - pws_buf[gid].i[ 5] = pw_buf[ 5]; - pws_buf[gid].i[ 6] = pw_buf[ 6]; - pws_buf[gid].i[ 7] = pw_buf[ 7]; - - pws_buf[gid].pw_len = pw_len; -} diff --git a/nv/markov_le_v2.cu b/nv/markov_le_v2.cu deleted file mode 100644 index 23503c9..0000000 --- a/nv/markov_le_v2.cu +++ /dev/null @@ -1,142 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define CHARSIZ 256 - -#define VECT_SIZE2 - -#include "types_nv.c" - -__device__ static void generate_pw (u32 pw_buf[16], cs_t *root_css_buf, cs_t *markov_css_buf, const u32 pw_l_len, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, u64 val) -{ - pw_buf[ 0] = 0; - pw_buf[ 1] = 0; - pw_buf[ 2] = 0; - pw_buf[ 3] = 0; - pw_buf[ 4] = 0; - pw_buf[ 5] = 0; - pw_buf[ 6] = 0; - pw_buf[ 7] = 0; - pw_buf[ 8] = 0; - pw_buf[ 9] = 0; - pw_buf[10] = 0; - pw_buf[11] = 0; - pw_buf[12] = 0; - pw_buf[13] = 0; - pw_buf[14] = 0; - pw_buf[15] = 0; - - cs_t *cs = &root_css_buf[pw_r_len]; - - u32 i; - u32 j; - - for (i = 0, j = pw_r_len; i < pw_l_len; i++, j++) - { - const u32 len = cs->cs_len; - - const u64 next = val / len; - const u64 pos = val % len; - - val = next; - - const u32 key = cs->cs_buf[pos]; - - const u32 jd4 = j / 4; - const u32 jm4 = j % 4; - - pw_buf[jd4] |= key << (jm4 * 8); - - cs = &markov_css_buf[(j * CHARSIZ) + key]; - } - - const u32 jd4 = j / 4; - const u32 jm4 = j % 4; - - pw_buf[jd4] |= (0xff << (jm4 * 8)) & mask80; - - if (bits14) pw_buf[14] = (pw_l_len + pw_r_len) * 8; - if (bits15) pw_buf[15] = (pw_l_len + pw_r_len) * 8; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) l_markov (pw_t *pws_buf_l, cs_t *root_css_buf, cs_t *markov_css_buf, const u64 off, const u32 pw_l_len, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 gid2 = gid * 2; - - u32 pw_buf0[16]; - u32 pw_buf1[16]; - - generate_pw (pw_buf0, root_css_buf, markov_css_buf, pw_l_len, pw_r_len, mask80, bits14, bits15, off + gid2 + 0); - generate_pw (pw_buf1, root_css_buf, markov_css_buf, pw_l_len, pw_r_len, mask80, bits14, bits15, off + gid2 + 1); - - #pragma unroll 16 - for (int i = 0; i < 16; i++) - { - pws_buf_l[gid].i[i].x = pw_buf0[i]; - pws_buf_l[gid].i[i].y = pw_buf1[i]; - } - - pws_buf_l[gid].pw_len = pw_l_len + pw_r_len; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) r_markov (bf_t *pws_buf_r, cs_t *root_css_buf, cs_t *markov_css_buf, const u64 off, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 gid2 = gid * 2; - - u32 pw_buf[16]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid2 + 0); - - pws_buf_r[gid2 + 0].i = pw_buf[0]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid2 + 1); - - pws_buf_r[gid2 + 1].i = pw_buf[0]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) C_markov (comb_t *pws_buf, cs_t *root_css_buf, cs_t *markov_css_buf, const u64 off, const u32 pw_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 gid2 = gid * 2; - - u32 pw_buf[16]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid2 + 0); - - pws_buf[gid2 + 0].i[ 0] = pw_buf[ 0]; - pws_buf[gid2 + 0].i[ 1] = pw_buf[ 1]; - pws_buf[gid2 + 0].i[ 2] = pw_buf[ 2]; - pws_buf[gid2 + 0].i[ 3] = pw_buf[ 3]; - pws_buf[gid2 + 0].i[ 4] = pw_buf[ 4]; - pws_buf[gid2 + 0].i[ 5] = pw_buf[ 5]; - pws_buf[gid2 + 0].i[ 6] = pw_buf[ 6]; - pws_buf[gid2 + 0].i[ 7] = pw_buf[ 7]; - - pws_buf[gid2 + 0].pw_len = pw_len; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid2 + 1); - - pws_buf[gid2 + 1].i[ 0] = pw_buf[ 0]; - pws_buf[gid2 + 1].i[ 1] = pw_buf[ 1]; - pws_buf[gid2 + 1].i[ 2] = pw_buf[ 2]; - pws_buf[gid2 + 1].i[ 3] = pw_buf[ 3]; - pws_buf[gid2 + 1].i[ 4] = pw_buf[ 4]; - pws_buf[gid2 + 1].i[ 5] = pw_buf[ 5]; - pws_buf[gid2 + 1].i[ 6] = pw_buf[ 6]; - pws_buf[gid2 + 1].i[ 7] = pw_buf[ 7]; - - pws_buf[gid2 + 1].pw_len = pw_len; -} diff --git a/nv/markov_le_v4.cu b/nv/markov_le_v4.cu deleted file mode 100644 index d41244a..0000000 --- a/nv/markov_le_v4.cu +++ /dev/null @@ -1,182 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#define CHARSIZ 256 - -#define VECT_SIZE4 - -#include "types_nv.c" - -__device__ static void generate_pw (u32 pw_buf[16], cs_t *root_css_buf, cs_t *markov_css_buf, const u32 pw_l_len, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, u64 val) -{ - pw_buf[ 0] = 0; - pw_buf[ 1] = 0; - pw_buf[ 2] = 0; - pw_buf[ 3] = 0; - pw_buf[ 4] = 0; - pw_buf[ 5] = 0; - pw_buf[ 6] = 0; - pw_buf[ 7] = 0; - pw_buf[ 8] = 0; - pw_buf[ 9] = 0; - pw_buf[10] = 0; - pw_buf[11] = 0; - pw_buf[12] = 0; - pw_buf[13] = 0; - pw_buf[14] = 0; - pw_buf[15] = 0; - - cs_t *cs = &root_css_buf[pw_r_len]; - - u32 i; - u32 j; - - for (i = 0, j = pw_r_len; i < pw_l_len; i++, j++) - { - const u32 len = cs->cs_len; - - const u64 next = val / len; - const u64 pos = val % len; - - val = next; - - const u32 key = cs->cs_buf[pos]; - - const u32 jd4 = j / 4; - const u32 jm4 = j % 4; - - pw_buf[jd4] |= key << (jm4 * 8); - - cs = &markov_css_buf[(j * CHARSIZ) + key]; - } - - const u32 jd4 = j / 4; - const u32 jm4 = j % 4; - - pw_buf[jd4] |= (0xff << (jm4 * 8)) & mask80; - - if (bits14) pw_buf[14] = (pw_l_len + pw_r_len) * 8; - if (bits15) pw_buf[15] = (pw_l_len + pw_r_len) * 8; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) l_markov (pw_t *pws_buf_l, cs_t *root_css_buf, cs_t *markov_css_buf, const u64 off, const u32 pw_l_len, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 gid4 = gid * 4; - - u32 pw_buf0[16]; - u32 pw_buf1[16]; - u32 pw_buf2[16]; - u32 pw_buf3[16]; - - generate_pw (pw_buf0, root_css_buf, markov_css_buf, pw_l_len, pw_r_len, mask80, bits14, bits15, off + gid4 + 0); - generate_pw (pw_buf1, root_css_buf, markov_css_buf, pw_l_len, pw_r_len, mask80, bits14, bits15, off + gid4 + 1); - generate_pw (pw_buf2, root_css_buf, markov_css_buf, pw_l_len, pw_r_len, mask80, bits14, bits15, off + gid4 + 2); - generate_pw (pw_buf3, root_css_buf, markov_css_buf, pw_l_len, pw_r_len, mask80, bits14, bits15, off + gid4 + 3); - - #pragma unroll 16 - for (int i = 0; i < 16; i++) - { - pws_buf_l[gid].i[i].x = pw_buf0[i]; - pws_buf_l[gid].i[i].y = pw_buf1[i]; - pws_buf_l[gid].i[i].z = pw_buf2[i]; - pws_buf_l[gid].i[i].w = pw_buf3[i]; - } - - pws_buf_l[gid].pw_len = pw_l_len + pw_r_len; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) r_markov (bf_t *pws_buf_r, cs_t *root_css_buf, cs_t *markov_css_buf, const u64 off, const u32 pw_r_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 gid4 = gid * 4; - - u32 pw_buf[16]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid4 + 0); - - pws_buf_r[gid4 + 0].i = pw_buf[0]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid4 + 1); - - pws_buf_r[gid4 + 1].i = pw_buf[0]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid4 + 2); - - pws_buf_r[gid4 + 2].i = pw_buf[0]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_r_len, 0, 0, 0, 0, off + gid4 + 3); - - pws_buf_r[gid4 + 3].i = pw_buf[0]; -} - -extern "C" __global__ void __launch_bounds__ (256, 1) C_markov (comb_t *pws_buf, cs_t *root_css_buf, cs_t *markov_css_buf, const u64 off, const u32 pw_len, const u32 mask80, const u32 bits14, const u32 bits15, const u32 gid_max) -{ - const u32 gid = (blockIdx.x * blockDim.x) + threadIdx.x; - - if (gid >= gid_max) return; - - const u32 gid4 = gid * 4; - - u32 pw_buf[16]; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid4 + 0); - - pws_buf[gid4 + 0].i[ 0] = pw_buf[ 0]; - pws_buf[gid4 + 0].i[ 1] = pw_buf[ 1]; - pws_buf[gid4 + 0].i[ 2] = pw_buf[ 2]; - pws_buf[gid4 + 0].i[ 3] = pw_buf[ 3]; - pws_buf[gid4 + 0].i[ 4] = pw_buf[ 4]; - pws_buf[gid4 + 0].i[ 5] = pw_buf[ 5]; - pws_buf[gid4 + 0].i[ 6] = pw_buf[ 6]; - pws_buf[gid4 + 0].i[ 7] = pw_buf[ 7]; - - pws_buf[gid4 + 0].pw_len = pw_len; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid4 + 1); - - pws_buf[gid4 + 1].i[ 0] = pw_buf[ 0]; - pws_buf[gid4 + 1].i[ 1] = pw_buf[ 1]; - pws_buf[gid4 + 1].i[ 2] = pw_buf[ 2]; - pws_buf[gid4 + 1].i[ 3] = pw_buf[ 3]; - pws_buf[gid4 + 1].i[ 4] = pw_buf[ 4]; - pws_buf[gid4 + 1].i[ 5] = pw_buf[ 5]; - pws_buf[gid4 + 1].i[ 6] = pw_buf[ 6]; - pws_buf[gid4 + 1].i[ 7] = pw_buf[ 7]; - - pws_buf[gid4 + 1].pw_len = pw_len; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid4 + 2); - - pws_buf[gid4 + 2].i[ 0] = pw_buf[ 0]; - pws_buf[gid4 + 2].i[ 1] = pw_buf[ 1]; - pws_buf[gid4 + 2].i[ 2] = pw_buf[ 2]; - pws_buf[gid4 + 2].i[ 3] = pw_buf[ 3]; - pws_buf[gid4 + 2].i[ 4] = pw_buf[ 4]; - pws_buf[gid4 + 2].i[ 5] = pw_buf[ 5]; - pws_buf[gid4 + 2].i[ 6] = pw_buf[ 6]; - pws_buf[gid4 + 2].i[ 7] = pw_buf[ 7]; - - pws_buf[gid4 + 2].pw_len = pw_len; - - generate_pw (pw_buf, root_css_buf, markov_css_buf, pw_len, 0, mask80, bits14, bits15, off + gid4 + 3); - - pws_buf[gid4 + 3].i[ 0] = pw_buf[ 0]; - pws_buf[gid4 + 3].i[ 1] = pw_buf[ 1]; - pws_buf[gid4 + 3].i[ 2] = pw_buf[ 2]; - pws_buf[gid4 + 3].i[ 3] = pw_buf[ 3]; - pws_buf[gid4 + 3].i[ 4] = pw_buf[ 4]; - pws_buf[gid4 + 3].i[ 5] = pw_buf[ 5]; - pws_buf[gid4 + 3].i[ 6] = pw_buf[ 6]; - pws_buf[gid4 + 3].i[ 7] = pw_buf[ 7]; - - pws_buf[gid4 + 3].pw_len = pw_len; -} diff --git a/nv/types_nv.c b/nv/types_nv.c deleted file mode 100644 index 02c7364..0000000 --- a/nv/types_nv.c +++ /dev/null @@ -1,1402 +0,0 @@ -/** - * Author......: Jens Steube - * License.....: MIT - */ - -#include - -typedef uint8_t u8; -typedef uint16_t u16; -typedef uint32_t u32; -typedef uint64_t u64; - -__device__ static u32 lut3_2d (const u32 a, const u32 b, const u32 c) -{ - u32 r; - - asm ("lop3.b32 %0, %1, %2, %3, 0x2d;" : "=r" (r) : "r" (a), "r" (b), "r" (c)); - - return r; -} - -__device__ static u32 lut3_39 (const u32 a, const u32 b, const u32 c) -{ - u32 r; - - asm ("lop3.b32 %0, %1, %2, %3, 0x39;" : "=r" (r) : "r" (a), "r" (b), "r" (c)); - - return r; -} - -__device__ static u32 lut3_59 (const u32 a, const u32 b, const u32 c) -{ - u32 r; - - asm ("lop3.b32 %0, %1, %2, %3, 0x59;" : "=r" (r) : "r" (a), "r" (b), "r" (c)); - - return r; -} - -__device__ static u32 lut3_96 (const u32 a, const u32 b, const u32 c) -{ - u32 r; - - asm ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r" (r) : "r" (a), "r" (b), "r" (c)); - - return r; -} - -__device__ static u32 lut3_e4 (const u32 a, const u32 b, const u32 c) -{ - u32 r; - - asm ("lop3.b32 %0, %1, %2, %3, 0xe4;" : "=r" (r) : "r" (a), "r" (b), "r" (c)); - - return r; -} - -__device__ static u32 lut3_e8 (const u32 a, const u32 b, const u32 c) -{ - u32 r; - - asm ("lop3.b32 %0, %1, %2, %3, 0xe8;" : "=r" (r) : "r" (a), "r" (b), "r" (c)); - - return r; -} - -__device__ static u32 lut3_ca (const u32 a, const u32 b, const u32 c) -{ - u32 r; - - asm ("lop3.b32 %0, %1, %2, %3, 0xca;" : "=r" (r) : "r" (a), "r" (b), "r" (c)); - - return r; -} - -#if __CUDA_ARCH__ >= 350 - -__device__ static u32 rotr32 (const u32 a, const u32 n) -{ - return __funnelshift_r (a, a, n); -} - -__device__ static u32 rotl32 (const u32 a, const u32 n) -{ - return rotr32 (a, 32 - n); -} - -__device__ static u64 rotr64 (const u64 a, const u32 n) -{ - u32 il; - u32 ir; - - asm ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(a)); - - u32 tl; - u32 tr; - - if (n >= 32) - { - asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(ir), "r"(il), "r"(n - 32)); - asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(il), "r"(ir), "r"(n - 32)); - } - else - { - asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tl) : "r"(il), "r"(ir), "r"(n)); - asm ("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(tr) : "r"(ir), "r"(il), "r"(n)); - } - - u64 r; - - asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tl), "r"(tr)); - - return r; -} - -__device__ static u64 rotl64 (const u64 a, const u32 n) -{ - return rotr64 (a, 64 - n); -} - -#else - -__device__ static u32 rotr32 (const u32 a, const u32 n) -{ - return (((a) >> (n)) + ((a) << (32 - (n)))); -} - -__device__ static u32 rotl32 (const u32 a, const u32 n) -{ - return rotr32 (a, 32 - n); -} - -__device__ static u64 rotr64 (const u64 a, const u32 n) -{ - return (((a) >> (n)) + ((a) << (64 - (n)))); -} - -__device__ static u64 rotl64 (const u64 a, const u32 n) -{ - return rotr64 (a, 64 - n); -} - - -#endif - -#ifdef VECT_SIZE1 -#define VECT_SHIFT 0 -#define VECT_DIV 1 - -typedef u8 u8x; -typedef u16 u16x; -typedef u32 u32x; -typedef u64 u64x; - -__device__ static u32 l32_from_64 (u64 a) -{ - const u32 r = (u32) a; - - return r; -} - -__device__ static u32 h32_from_64 (u64 a) -{ - a >>= 32; - - const u32 r = (u32) a; - - return r; -} - -__device__ static u64 hl32_to_64 (const u32x a, const u32x b) -{ - u64 r; - - asm ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(b), "r"(a)); - - return r; -} - -#endif - -#ifdef VECT_SIZE2 -#define VECT_SHIFT 1 -#define VECT_DIV 2 - -class u8x -{ - private: - public: - - u8 x; - u8 y; - - inline __device__ u8x (const u8 a, const u8 b) : x(a), y(b) { } - inline __device__ u8x (const u8 a) : x(a), y(a) { } - - inline __device__ u8x (void) { } - inline __device__ ~u8x (void) { } -}; - -class u16x -{ - private: - public: - - u16 x; - u16 y; - - inline __device__ u16x (const u16 a, const u16 b) : x(a), y(b) { } - inline __device__ u16x (const u16 a) : x(a), y(a) { } - - inline __device__ u16x (void) { } - inline __device__ ~u16x (void) { } -}; - -class u32x -{ - private: - public: - - u32 x; - u32 y; - - inline __device__ u32x (const u32 a, const u32 b) : x(a), y(b) { } - inline __device__ u32x (const u32 a) : x(a), y(a) { } - - inline __device__ u32x (void) { } - inline __device__ ~u32x (void) { } -}; - -class u64x -{ - private: - public: - - u64 x; - u64 y; - - inline __device__ u64x (const u32x a) : x(a.x), y(a.y) { } - - inline __device__ u64x (const u64 a, const u64 b) : x(a), y(b) { } - inline __device__ u64x (const u64 a) : x(a), y(a) { } - - inline __device__ u64x (void) { } - inline __device__ ~u64x (void) { } -}; - -inline __device__ bool operator != (const u32x a, const u32 b) { return ((a.x != b ) && (a.y != b )); } -inline __device__ bool operator != (const u32x a, const u32x b) { return ((a.x != b.x) && (a.y != b.y)); } - -inline __device__ void operator ^= (u32x &a, const u32 b) { a.x ^= b; a.y ^= b; } -inline __device__ void operator ^= (u32x &a, const u32x b) { a.x ^= b.x; a.y ^= b.y; } - -inline __device__ void operator |= (u32x &a, const u32 b) { a.x |= b; a.y |= b; } -inline __device__ void operator |= (u32x &a, const u32x b) { a.x |= b.x; a.y |= b.y; } - -inline __device__ void operator &= (u32x &a, const u32 b) { a.x &= b; a.y &= b; } -inline __device__ void operator &= (u32x &a, const u32x b) { a.x &= b.x; a.y &= b.y; } - -inline __device__ void operator += (u32x &a, const u32 b) { a.x += b; a.y += b; } -inline __device__ void operator += (u32x &a, const u32x b) { a.x += b.x; a.y += b.y; } - -inline __device__ void operator -= (u32x &a, const u32 b) { a.x -= b; a.y -= b; } -inline __device__ void operator -= (u32x &a, const u32x b) { a.x -= b.x; a.y -= b.y; } - -inline __device__ u32x operator << (const u32x a, const u32 b) { return u32x ((a.x << b ), (a.y << b )); } -inline __device__ u32x operator << (const u32x a, const u32x b) { return u32x ((a.x << b.x), (a.y << b.y)); } - -inline __device__ u32x operator >> (const u32x a, const u32 b) { return u32x ((a.x >> b ), (a.y >> b )); } -inline __device__ u32x operator >> (const u32x a, const u32x b) { return u32x ((a.x >> b.x), (a.y >> b.y)); } - -inline __device__ u32x operator ^ (const u32x a, const u32 b) { return u32x ((a.x ^ b ), (a.y ^ b )); } -inline __device__ u32x operator ^ (const u32x a, const u32x b) { return u32x ((a.x ^ b.x), (a.y ^ b.y)); } - -inline __device__ u32x operator | (const u32x a, const u32 b) { return u32x ((a.x | b ), (a.y | b )); } -inline __device__ u32x operator | (const u32x a, const u32x b) { return u32x ((a.x | b.x), (a.y | b.y)); } - -inline __device__ u32x operator & (const u32x a, const u32 b) { return u32x ((a.x & b ), (a.y & b )); } -inline __device__ u32x operator & (const u32x a, const u32x b) { return u32x ((a.x & b.x), (a.y & b.y)); } - -inline __device__ u32x operator + (const u32x a, const u32 b) { return u32x ((a.x + b ), (a.y + b )); } -inline __device__ u32x operator + (const u32x a, const u32x b) { return u32x ((a.x + b.x), (a.y + b.y)); } - -inline __device__ u32x operator - (const u32x a, const u32 b) { return u32x ((a.x - b ), (a.y - b )); } -inline __device__ u32x operator - (const u32x a, const u32x b) { return u32x ((a.x - b.x), (a.y - b.y)); } - -inline __device__ u32x operator * (const u32x a, const u32 b) { return u32x ((a.x * b ), (a.y * b )); } -inline __device__ u32x operator * (const u32x a, const u32x b) { return u32x ((a.x * b.x), (a.y * b.y)); } - -inline __device__ u32x operator ~ (const u32x a) { return u32x (~a.x, ~a.y); } - -inline __device__ bool operator != (const u64x a, const u64 b) { return ((a.x != b ) && (a.y != b )); } -inline __device__ bool operator != (const u64x a, const u64x b) { return ((a.x != b.x) && (a.y != b.y)); } - -inline __device__ void operator ^= (u64x &a, const u64 b) { a.x ^= b; a.y ^= b; } -inline __device__ void operator ^= (u64x &a, const u64x b) { a.x ^= b.x; a.y ^= b.y; } - -inline __device__ void operator |= (u64x &a, const u64 b) { a.x |= b; a.y |= b; } -inline __device__ void operator |= (u64x &a, const u64x b) { a.x |= b.x; a.y |= b.y; } - -inline __device__ void operator &= (u64x &a, const u64 b) { a.x &= b; a.y &= b; } -inline __device__ void operator &= (u64x &a, const u64x b) { a.x &= b.x; a.y &= b.y; } - -inline __device__ void operator += (u64x &a, const u64 b) { a.x += b; a.y += b; } -inline __device__ void operator += (u64x &a, const u64x b) { a.x += b.x; a.y += b.y; } - -inline __device__ void operator -= (u64x &a, const u64 b) { a.x -= b; a.y -= b; } -inline __device__ void operator -= (u64x &a, const u64x b) { a.x -= b.x; a.y -= b.y; } - -inline __device__ u64x operator << (const u64x a, const u64 b) { return u64x ((a.x << b ), (a.y << b )); } -inline __device__ u64x operator << (const u64x a, const u64x b) { return u64x ((a.x << b.x), (a.y << b.y)); } - -inline __device__ u64x operator >> (const u64x a, const u64 b) { return u64x ((a.x >> b ), (a.y >> b )); } -inline __device__ u64x operator >> (const u64x a, const u64x b) { return u64x ((a.x >> b.x), (a.y >> b.y)); } - -inline __device__ u64x operator ^ (const u64x a, const u64 b) { return u64x ((a.x ^ b ), (a.y ^ b )); } -inline __device__ u64x operator ^ (const u64x a, const u64x b) { return u64x ((a.x ^ b.x), (a.y ^ b.y)); } - -inline __device__ u64x operator | (const u64x a, const u64 b) { return u64x ((a.x | b ), (a.y | b )); } -inline __device__ u64x operator | (const u64x a, const u64x b) { return u64x ((a.x | b.x), (a.y | b.y)); } - -inline __device__ u64x operator & (const u64x a, const u64 b) { return u64x ((a.x & b ), (a.y & b )); } -inline __device__ u64x operator & (const u64x a, const u64x b) { return u64x ((a.x & b.x), (a.y & b.y)); } - -inline __device__ u64x operator + (const u64x a, const u64 b) { return u64x ((a.x + b ), (a.y + b )); } -inline __device__ u64x operator + (const u64x a, const u64x b) { return u64x ((a.x + b.x), (a.y + b.y)); } - -inline __device__ u64x operator - (const u64x a, const u64 b) { return u64x ((a.x - b ), (a.y - b )); } -inline __device__ u64x operator - (const u64x a, const u64x b) { return u64x ((a.x - b.x), (a.y - b.y)); } - -inline __device__ u64x operator ~ (const u64x a) { return u64x (~a.x, ~a.y); } - -__device__ static u32x lut3_2d (const u32x a, const u32x b, const u32x c) -{ - return u32x (lut3_2d (a.x, b.x, c.x), - lut3_2d (a.y, b.y, c.y)); -} - -__device__ static u32x lut3_39 (const u32x a, const u32x b, const u32x c) -{ - return u32x (lut3_39 (a.x, b.x, c.x), - lut3_39 (a.y, b.y, c.y)); -} - -__device__ static u32x lut3_59 (const u32x a, const u32x b, const u32x c) -{ - return u32x (lut3_59 (a.x, b.x, c.x), - lut3_59 (a.y, b.y, c.y)); -} - -__device__ static u32x lut3_96 (const u32x a, const u32x b, const u32x c) -{ - return u32x (lut3_96 (a.x, b.x, c.x), - lut3_96 (a.y, b.y, c.y)); -} - -__device__ static u32x lut3_e4 (const u32x a, const u32x b, const u32x c) -{ - return u32x (lut3_e4 (a.x, b.x, c.x), - lut3_e4 (a.y, b.y, c.y)); -} - -__device__ static u32x lut3_e8 (const u32x a, const u32x b, const u32x c) -{ - return u32x (lut3_e8 (a.x, b.x, c.x), - lut3_e8 (a.y, b.y, c.y)); -} - -__device__ static u32x lut3_ca (const u32x a, const u32x b, const u32x c) -{ - return u32x (lut3_ca (a.x, b.x, c.x), - lut3_ca (a.y, b.y, c.y)); -} - -__device__ static u32x rotl32(const u32x a, const u32 n) -{ - return u32x (rotl32 (a.x, n), - rotl32 (a.y, n)); -} - -__device__ static u32x rotr32(const u32x a, const u32 n) -{ - return u32x (rotr32 (a.x, n), - rotr32 (a.y, n)); -} - -__device__ static u64x rotl64(const u64x a, const u32 n) -{ - return u64x (rotl64 (a.x, n), - rotl64 (a.y, n)); -} - -__device__ static u64x rotr64(const u64x a, const u32 n) -{ - return u64x (rotr64 (a.x, n), - rotr64 (a.y, n)); -} - -__device__ static u32x __byte_perm (const u32x a, const u32x b, const u32 c) -{ - return u32x (__byte_perm (a.x, b.x, c), - __byte_perm (a.y, b.y, c)); -} - -#endif - -#ifdef VECT_SIZE4 -#define VECT_SHIFT 2 -#define VECT_DIV 4 - -class u8x -{ - private: - public: - - u8 x; - u8 y; - u8 z; - u8 w; - - inline __device__ u8x (const u8 a, const u8 b, const u8 c, const u8 d) : x(a), y(b), z(c), w(d) { } - inline __device__ u8x (const u8 a) : x(a), y(a), z(a), w(a) { } - - inline __device__ u8x (void) { } - inline __device__ ~u8x (void) { } -}; - -class u16x -{ - private: - public: - - u16 x; - u16 y; - u16 z; - u16 w; - - inline __device__ u16x (const u16 a, const u16 b, const u16 c, const u16 d) : x(a), y(b), z(c), w(d) { } - inline __device__ u16x (const u16 a) : x(a), y(a), z(a), w(a) { } - - inline __device__ u16x (void) { } - inline __device__ ~u16x (void) { } -}; - -class u32x -{ - private: - public: - - u32 x; - u32 y; - u32 z; - u32 w; - - inline __device__ u32x (const u32 a, const u32 b, const u32 c, const u32 d) : x(a), y(b), z(c), w(d) { } - inline __device__ u32x (const u32 a) : x(a), y(a), z(a), w(a) { } - - inline __device__ u32x (void) { } - inline __device__ ~u32x (void) { } -}; - -class u64x -{ - private: - public: - - u64 x; - u64 y; - u64 z; - u64 w; - - inline __device__ u64x (const u32x a) : x(a.x), y(a.y), z(a.z), w(a.w) { } - - inline __device__ u64x (const u64 a, const u64 b, const u64 c, const u64 d) : x(a), y(b), z(c), w(d) { } - inline __device__ u64x (const u64 a) : x(a), y(a), z(a), w(a) { } - - inline __device__ u64x (void) { } - inline __device__ ~u64x (void) { } -}; - -inline __device__ bool operator != (const u32x a, const u32 b) { return ((a.x != b ) && (a.y != b ) && (a.z != b ) && (a.w != b )); } -inline __device__ bool operator != (const u32x a, const u32x b) { return ((a.x != b.x) && (a.y != b.y) && (a.z != b.z) && (a.w != b.w)); } - -inline __device__ void operator ^= (u32x &a, const u32 b) { a.x ^= b; a.y ^= b; a.z ^= b; a.w ^= b; } -inline __device__ void operator ^= (u32x &a, const u32x b) { a.x ^= b.x; a.y ^= b.y; a.z ^= b.z; a.w ^= b.w; } - -inline __device__ void operator |= (u32x &a, const u32 b) { a.x |= b; a.y |= b; a.z |= b; a.w |= b; } -inline __device__ void operator |= (u32x &a, const u32x b) { a.x |= b.x; a.y |= b.y; a.z |= b.z; a.w |= b.w; } - -inline __device__ void operator &= (u32x &a, const u32 b) { a.x &= b; a.y &= b; a.z &= b; a.w &= b; } -inline __device__ void operator &= (u32x &a, const u32x b) { a.x &= b.x; a.y &= b.y; a.z &= b.z; a.w &= b.w; } - -inline __device__ void operator += (u32x &a, const u32 b) { a.x += b; a.y += b; a.z += b; a.w += b; } -inline __device__ void operator += (u32x &a, const u32x b) { a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w; } - -inline __device__ void operator -= (u32x &a, const u32 b) { a.x -= b; a.y -= b; a.z -= b; a.w -= b; } -inline __device__ void operator -= (u32x &a, const u32x b) { a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w; } - -inline __device__ u32x operator << (const u32x a, const u32 b) { return u32x ((a.x << b ), (a.y << b ), (a.z << b ), (a.w << b )); } -inline __device__ u32x operator << (const u32x a, const u32x b) { return u32x ((a.x << b.x), (a.y << b.y), (a.z << b.z), (a.w << b.w)); } - -inline __device__ u32x operator >> (const u32x a, const u32 b) { return u32x ((a.x >> b ), (a.y >> b ), (a.z >> b ), (a.w >> b )); } -inline __device__ u32x operator >> (const u32x a, const u32x b) { return u32x ((a.x >> b.x), (a.y >> b.y), (a.z >> b.z), (a.w >> b.w)); } - -inline __device__ u32x operator ^ (const u32x a, const u32 b) { return u32x ((a.x ^ b ), (a.y ^ b ), (a.z ^ b ), (a.w ^ b )); } -inline __device__ u32x operator ^ (const u32x a, const u32x b) { return u32x ((a.x ^ b.x), (a.y ^ b.y), (a.z ^ b.z), (a.w ^ b.w)); } - -inline __device__ u32x operator | (const u32x a, const u32 b) { return u32x ((a.x | b ), (a.y | b ), (a.z | b ), (a.w | b )); } -inline __device__ u32x operator | (const u32x a, const u32x b) { return u32x ((a.x | b.x), (a.y | b.y), (a.z | b.z), (a.w | b.w)); } - -inline __device__ u32x operator & (const u32x a, const u32 b) { return u32x ((a.x & b ), (a.y & b ), (a.z & b ), (a.w & b )); } -inline __device__ u32x operator & (const u32x a, const u32x b) { return u32x ((a.x & b.x), (a.y & b.y), (a.z & b.z), (a.w & b.w)); } - -inline __device__ u32x operator + (const u32x a, const u32 b) { return u32x ((a.x + b ), (a.y + b ), (a.z + b ), (a.w + b )); } -inline __device__ u32x operator + (const u32x a, const u32x b) { return u32x ((a.x + b.x), (a.y + b.y), (a.z + b.z), (a.w + b.w)); } - -inline __device__ u32x operator - (const u32x a, const u32 b) { return u32x ((a.x - b ), (a.y - b ), (a.z - b ), (a.w - b )); } -inline __device__ u32x operator - (const u32x a, const u32x b) { return u32x ((a.x - b.x), (a.y - b.y), (a.z - b.z), (a.w - b.w)); } - -inline __device__ u32x operator * (const u32x a, const u32 b) { return u32x ((a.x * b ), (a.y * b ), (a.z * b ), (a.w * b )); } -inline __device__ u32x operator * (const u32x a, const u32x b) { return u32x ((a.x * b.x), (a.y * b.y), (a.z * b.z), (a.w * b.w)); } - -inline __device__ u32x operator ~ (const u32x a) { return u32x (~a.x, ~a.y, ~a.z, ~a.w); } - -inline __device__ bool operator != (const u64x a, const u64 b) { return ((a.x != b ) && (a.y != b ) && (a.z != b ) && (a.w != b )); } -inline __device__ bool operator != (const u64x a, const u64x b) { return ((a.x != b.x) && (a.y != b.y) && (a.z != b.z) && (a.w != b.w)); } - -inline __device__ void operator ^= (u64x &a, const u64 b) { a.x ^= b; a.y ^= b; a.z ^= b; a.w ^= b; } -inline __device__ void operator ^= (u64x &a, const u64x b) { a.x ^= b.x; a.y ^= b.y; a.z ^= b.z; a.w ^= b.w; } - -inline __device__ void operator |= (u64x &a, const u64 b) { a.x |= b; a.y |= b; a.z |= b; a.w |= b; } -inline __device__ void operator |= (u64x &a, const u64x b) { a.x |= b.x; a.y |= b.y; a.z |= b.z; a.w |= b.w; } - -inline __device__ void operator &= (u64x &a, const u64 b) { a.x &= b; a.y &= b; a.z &= b; a.w &= b; } -inline __device__ void operator &= (u64x &a, const u64x b) { a.x &= b.x; a.y &= b.y; a.z &= b.z; a.w &= b.w; } - -inline __device__ void operator += (u64x &a, const u64 b) { a.x += b; a.y += b; a.z += b; a.w += b; } -inline __device__ void operator += (u64x &a, const u64x b) { a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w; } - -inline __device__ void operator -= (u64x &a, const u64 b) { a.x -= b; a.y -= b; a.z -= b; a.w -= b; } -inline __device__ void operator -= (u64x &a, const u64x b) { a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w; } - -inline __device__ u64x operator << (const u64x a, const u64 b) { return u64x ((a.x << b ), (a.y << b ), (a.z << b ), (a.w << b )); } -inline __device__ u64x operator << (const u64x a, const u64x b) { return u64x ((a.x << b.x), (a.y << b.y), (a.z << b.z), (a.w << b.w)); } - -inline __device__ u64x operator >> (const u64x a, const u64 b) { return u64x ((a.x >> b ), (a.y >> b ), (a.z >> b ), (a.w >> b )); } -inline __device__ u64x operator >> (const u64x a, const u64x b) { return u64x ((a.x >> b.x), (a.y >> b.y), (a.z >> b.z), (a.w >> b.w)); } - -inline __device__ u64x operator ^ (const u64x a, const u64 b) { return u64x ((a.x ^ b ), (a.y ^ b ), (a.z ^ b ), (a.w ^ b )); } -inline __device__ u64x operator ^ (const u64x a, const u64x b) { return u64x ((a.x ^ b.x), (a.y ^ b.y), (a.z ^ b.z), (a.w ^ b.w)); } - -inline __device__ u64x operator | (const u64x a, const u64 b) { return u64x ((a.x | b ), (a.y | b ), (a.z | b ), (a.w | b )); } -inline __device__ u64x operator | (const u64x a, const u64x b) { return u64x ((a.x | b.x), (a.y | b.y), (a.z | b.z), (a.w | b.w)); } - -inline __device__ u64x operator & (const u64x a, const u64 b) { return u64x ((a.x & b ), (a.y & b ), (a.z & b ), (a.w & b )); } -inline __device__ u64x operator & (const u64x a, const u64x b) { return u64x ((a.x & b.x), (a.y & b.y), (a.z & b.z), (a.w & b.w)); } - -inline __device__ u64x operator + (const u64x a, const u64 b) { return u64x ((a.x + b ), (a.y + b ), (a.z + b ), (a.w + b )); } -inline __device__ u64x operator + (const u64x a, const u64x b) { return u64x ((a.x + b.x), (a.y + b.y), (a.z + b.z), (a.w + b.w)); } - -inline __device__ u64x operator - (const u64x a, const u64 b) { return u64x ((a.x - b ), (a.y - b ), (a.z - b ), (a.w - b )); } -inline __device__ u64x operator - (const u64x a, const u64x b) { return u64x ((a.x - b.x), (a.y - b.y), (a.z - b.z), (a.w - b.w)); } - -inline __device__ u64x operator * (const u64x a, const u64 b) { return u64x ((a.x * b ), (a.y * b ), (a.z * b ), (a.w * b )); } -inline __device__ u64x operator * (const u64x a, const u64x b) { return u64x ((a.x * b.x), (a.y * b.y), (a.z * b.z), (a.w * b.w)); } - -inline __device__ u64x operator ~ (const u64x a) { return u64x (~a.x, ~a.y, ~a.z, ~a.w); } - -__device__ static u32x lut3_2d (const u32x a, const u32x b, const u32x c) -{ - return u32x (lut3_2d(a.x, b.x, c.x), - lut3_2d (a.y, b.y, c.y), - lut3_2d (a.z, b.z, c.z), - lut3_2d (a.w, b.w, c.w)); -} - -__device__ static u32x lut3_39 (const u32x a, const u32x b, const u32x c) -{ - return u32x (lut3_39 (a.x, b.x, c.x), - lut3_39 (a.y, b.y, c.y), - lut3_39 (a.z, b.z, c.z), - lut3_39 (a.w, b.w, c.w)); -} - -__device__ static u32x lut3_59 (const u32x a, const u32x b, const u32x c) -{ - return u32x (lut3_59 (a.x, b.x, c.x), - lut3_59 (a.y, b.y, c.y), - lut3_59 (a.z, b.z, c.z), - lut3_59 (a.w, b.w, c.w)); -} - -__device__ static u32x lut3_96 (const u32x a, const u32x b, const u32x c) -{ - return u32x (lut3_96 (a.x, b.x, c.x), - lut3_96 (a.y, b.y, c.y), - lut3_96 (a.z, b.z, c.z), - lut3_96 (a.w, b.w, c.w)); -} - -__device__ static u32x lut3_e4 (const u32x a, const u32x b, const u32x c) -{ - return u32x (lut3_e4 (a.x, b.x, c.x), - lut3_e4 (a.y, b.y, c.y), - lut3_e4 (a.z, b.z, c.z), - lut3_e4 (a.w, b.w, c.w)); -} - -__device__ static u32x lut3_e8 (const u32x a, const u32x b, const u32x c) -{ - return u32x (lut3_e8 (a.x, b.x, c.x), - lut3_e8 (a.y, b.y, c.y), - lut3_e8 (a.z, b.z, c.z), - lut3_e8 (a.w, b.w, c.w)); -} - -__device__ static u32x lut3_ca (const u32x a, const u32x b, const u32x c) -{ - return u32x (lut3_ca (a.x, b.x, c.x), - lut3_ca (a.y, b.y, c.y), - lut3_ca (a.z, b.z, c.z), - lut3_ca (a.w, b.w, c.w)); -} - -__device__ static u32x rotl32(const u32x a, const u32 n) -{ - return u32x (rotl32 (a.x, n), - rotl32 (a.y, n), - rotl32 (a.z, n), - rotl32 (a.w, n)); -} - -__device__ static u32x rotr32(const u32x a, const u32 n) -{ - return u32x (rotr32 (a.x, n), - rotr32 (a.y, n), - rotr32 (a.z, n), - rotr32 (a.w, n)); -} - -__device__ static u64x rotl64(const u64x a, const u32 n) -{ - return u64x (rotl64 (a.x, n), - rotl64 (a.y, n), - rotl64 (a.z, n), - rotl64 (a.w, n)); -} - -__device__ static u64x rotr64(const u64x a, const u32 n) -{ - return u64x (rotr64 (a.x, n), - rotr64 (a.y, n), - rotr64 (a.z, n), - rotr64 (a.w, n)); -} - -__device__ static u32x __byte_perm (const u32x a, const u32x b, const u32 c) -{ - return u32x (__byte_perm (a.x, b.x, c), - __byte_perm (a.y, b.y, c), - __byte_perm (a.z, b.z, c), - __byte_perm (a.w, b.w, c)); -} - -#endif - -typedef struct -{ - #if defined _DES_ - u32 digest_buf[4]; - #elif defined _MD4_ - u32 digest_buf[4]; - #elif defined _MD5_ - u32 digest_buf[4]; - #elif defined _MD5H_ - u32 digest_buf[4]; - #elif defined _SHA1_ - u32 digest_buf[5]; - #elif defined _BCRYPT_ - u32 digest_buf[6]; - #elif defined _SHA256_ - u32 digest_buf[8]; - #elif defined _SHA384_ - u32 digest_buf[16]; - #elif defined _SHA512_ - u32 digest_buf[16]; - #elif defined _KECCAK_ - u32 digest_buf[50]; - #elif defined _RIPEMD160_ - u32 digest_buf[5]; - #elif defined _WHIRLPOOL_ - u32 digest_buf[16]; - #elif defined _GOST_ - u32 digest_buf[8]; - #elif defined _GOST2012_256_ - u32 digest_buf[8]; - #elif defined _GOST2012_512_ - u32 digest_buf[16]; - #elif defined _SAPB_ - u32 digest_buf[4]; - #elif defined _SAPG_ - u32 digest_buf[5]; - #elif defined _MYSQL323_ - u32 digest_buf[4]; - #elif defined _LOTUS5_ - u32 digest_buf[4]; - #elif defined _LOTUS6_ - u32 digest_buf[4]; - #elif defined _SCRYPT_ - u32 digest_buf[8]; - #elif defined _LOTUS8_ - u32 digest_buf[4]; - #elif defined _OFFICE2007_ - u32 digest_buf[4]; - #elif defined _OFFICE2010_ - u32 digest_buf[4]; - #elif defined _OFFICE2013_ - u32 digest_buf[4]; - #elif defined _OLDOFFICE01_ - u32 digest_buf[4]; - #elif defined _OLDOFFICE34_ - u32 digest_buf[4]; - #elif defined _SIPHASH_ - u32 digest_buf[4]; - #elif defined _PBKDF2_MD5_ - u32 digest_buf[32]; - #elif defined _PBKDF2_SHA1_ - u32 digest_buf[32]; - #elif defined _PBKDF2_SHA256_ - u32 digest_buf[32]; - #elif defined _PBKDF2_SHA512_ - u32 digest_buf[32]; - #elif defined _PDF17L8_ - u32 digest_buf[8]; - #elif defined _CRC32_ - u32 digest_buf[4]; - #elif defined _SEVEN_ZIP_ - u32 digest_buf[4]; - #elif defined _ANDROIDFDE_ - u32 digest_buf[4]; - #elif defined _DCC2_ - u32 digest_buf[4]; - #elif defined _WPA_ - u32 digest_buf[4]; - #elif defined _MD5_SHA1_ - u32 digest_buf[4]; - #elif defined _SHA1_MD5_ - u32 digest_buf[5]; - #elif defined _NETNTLMV2_ - u32 digest_buf[4]; - #elif defined _KRB5PA_ - u32 digest_buf[4]; - #elif defined _CLOUDKEY_ - u32 digest_buf[8]; - #elif defined _SCRYPT_ - u32 digest_buf[4]; - #elif defined _PSAFE2_ - u32 digest_buf[5]; - #elif defined _LOTUS8_ - u32 digest_buf[4]; - #elif defined _RAR3_ - u32 digest_buf[4]; - #elif defined _SHA256_SHA1_ - u32 digest_buf[8]; - #elif defined _MS_DRSR_ - u32 digest_buf[8]; - #endif - -} digest_t; - -typedef struct -{ - u32 salt_buf[16]; - u32 salt_buf_pc[8]; - - u32 salt_len; - u32 salt_iter; - u32 salt_sign[2]; - - u32 keccak_mdlen; - u32 truecrypt_mdlen; - - u32 digests_cnt; - u32 digests_done; - - u32 digests_offset; - - u32 scrypt_N; - u32 scrypt_r; - u32 scrypt_p; - u32 scrypt_tmto; - u32 scrypt_phy; - -} salt_t; - -typedef struct -{ - int V; - int R; - int P; - - int enc_md; - - u32 id_buf[8]; - u32 u_buf[32]; - u32 o_buf[32]; - - int id_len; - int o_len; - int u_len; - - u32 rc4key[2]; - u32 rc4data[2]; - -} pdf_t; - -typedef struct -{ - u32 pke[25]; - u32 eapol[64]; - int eapol_size; - int keyver; - -} wpa_t; - -typedef struct -{ - u32 cry_master_buf[64]; - u32 ckey_buf[64]; - u32 public_key_buf[64]; - - u32 cry_master_len; - u32 ckey_len; - u32 public_key_len; - -} bitcoin_wallet_t; - -typedef struct -{ - u32 salt_buf[30]; - u32 salt_len; - - u32 esalt_buf[38]; - u32 esalt_len; - -} sip_t; - -typedef struct -{ - u32 data[384]; - -} androidfde_t; - -typedef struct -{ - u32 nr_buf[16]; - u32 nr_len; - - u32 msg_buf[128]; - u32 msg_len; - -} ikepsk_t; - -typedef struct -{ - u32 user_len; - u32 domain_len; - u32 srvchall_len; - u32 clichall_len; - - u32 userdomain_buf[64]; - u32 chall_buf[256]; - -} netntlm_t; - -typedef struct -{ - u32 user[16]; - u32 realm[16]; - u32 salt[32]; - u32 timestamp[16]; - u32 checksum[4]; - -} krb5pa_t; - -typedef struct -{ - u32 salt_buf[16]; - u32 data_buf[112]; - u32 keyfile_buf[16]; - -} tc_t; - -typedef struct -{ - u32 salt_buf[16]; - -} pbkdf2_md5_t; - -typedef struct -{ - u32 salt_buf[16]; - -} pbkdf2_sha1_t; - -typedef struct -{ - u32 salt_buf[16]; - -} pbkdf2_sha256_t; - -typedef struct -{ - u32 salt_buf[32]; - -} pbkdf2_sha512_t; - -typedef struct -{ - u32 salt_buf[128]; - u32 salt_len; - -} rakp_t; - -typedef struct -{ - u32 data_len; - u32 data_buf[512]; - -} cloudkey_t; - -typedef struct -{ - u32 encryptedVerifier[4]; - u32 encryptedVerifierHash[5]; - - u32 keySize; - -} office2007_t; - -typedef struct -{ - u32 encryptedVerifier[4]; - u32 encryptedVerifierHash[8]; - -} office2010_t; - -typedef struct -{ - u32 encryptedVerifier[4]; - u32 encryptedVerifierHash[8]; - -} office2013_t; - -typedef struct -{ - u32 version; - u32 encryptedVerifier[4]; - u32 encryptedVerifierHash[4]; - u32 rc4key[2]; - -} oldoffice01_t; - -typedef struct -{ - u32 version; - u32 encryptedVerifier[4]; - u32 encryptedVerifierHash[5]; - u32 rc4key[2]; - -} oldoffice34_t; - -typedef struct -{ - u32x digest[4]; - u32x out[4]; - -} pdf14_tmp_t; - -typedef struct -{ - union - { - u32 dgst32[16]; - u64 dgst64[8]; - }; - - u32 dgst_len; - u32 W_len; - -} pdf17l8_tmp_t; - -typedef struct -{ - u32x digest_buf[4]; - -} phpass_tmp_t; - -typedef struct -{ - u32x digest_buf[4]; - -} md5crypt_tmp_t; - -typedef struct -{ - u32x alt_result[8]; - - u32x p_bytes[4]; - u32x s_bytes[4]; - -} sha256crypt_tmp_t; - -typedef struct -{ - u64x l_alt_result[8]; - - u64x l_p_bytes[2]; - u64x l_s_bytes[2]; - -} sha512crypt_tmp_t; - -typedef struct -{ - u32x ipad[5]; - u32x opad[5]; - - u32x dgst[10]; - u32x out[10]; - -} wpa_tmp_t; - -typedef struct -{ - u64x dgst[8]; - -} bitcoin_wallet_tmp_t; - -typedef struct -{ - u32x ipad[5]; - u32x opad[5]; - - u32x dgst[5]; - u32x out[4]; - -} dcc2_tmp_t; - -typedef struct -{ - u32x P[18]; - - u32x S0[256]; - u32x S1[256]; - u32x S2[256]; - u32x S3[256]; - -} bcrypt_tmp_t; - -typedef struct -{ - u32x digest[2]; - - u32x P[18]; - - u32x S0[256]; - u32x S1[256]; - u32x S2[256]; - u32x S3[256]; - -} pwsafe2_tmp_t; - -typedef struct -{ - u32x digest_buf[8]; - -} pwsafe3_tmp_t; - -typedef struct -{ - u32x digest_buf[5]; - -} androidpin_tmp_t; - -typedef struct -{ - u32x ipad[5]; - u32x opad[5]; - - u32x dgst[10]; - u32x out[10]; - -} androidfde_tmp_t; - -typedef struct -{ - u32x ipad[16]; - u32x opad[16]; - - u32x dgst[64]; - u32x out[64]; - -} tc_tmp_t; - -typedef struct -{ - u64x ipad[8]; - u64x opad[8]; - - u64x dgst[32]; - u64x out[32]; - -} tc64_tmp_t; - -typedef struct -{ - u32x ipad[4]; - u32x opad[4]; - - u32x dgst[32]; - u32x out[32]; - -} pbkdf2_md5_tmp_t; - -typedef struct -{ - u32x ipad[5]; - u32x opad[5]; - - u32x dgst[32]; - u32x out[32]; - -} pbkdf2_sha1_tmp_t; - -typedef struct -{ - u32x ipad[8]; - u32x opad[8]; - - u32x dgst[32]; - u32x out[32]; - -} pbkdf2_sha256_tmp_t; - -typedef struct -{ - u64x ipad[8]; - u64x opad[8]; - - u64x dgst[16]; - u64x out[16]; - -} pbkdf2_sha512_tmp_t; - -typedef struct -{ - u64x out[8]; - -} ecryptfs_tmp_t; - -typedef struct -{ - u64x ipad[8]; - u64x opad[8]; - - u64x dgst[16]; - u64x out[16]; - -} oraclet_tmp_t; - -typedef struct -{ - u32x ipad[5]; - u32x opad[5]; - - u32x dgst[5]; - u32x out[5]; - -} agilekey_tmp_t; - -typedef struct -{ - u32 ipad[5]; - u32 opad[5]; - - u32 dgst1[5]; - u32 out1[5]; - - u32 dgst2[5]; - u32 out2[5]; - -} mywallet_tmp_t; - -typedef struct -{ - u32x ipad[5]; - u32x opad[5]; - - u32x dgst[5]; - u32x out[5]; - -} sha1aix_tmp_t; - -typedef struct -{ - u32x ipad[8]; - u32x opad[8]; - - u32x dgst[8]; - u32x out[8]; - -} sha256aix_tmp_t; - -typedef struct -{ - u64x ipad[8]; - u64x opad[8]; - - u64x dgst[8]; - u64x out[8]; - -} sha512aix_tmp_t; - -typedef struct -{ - u32x ipad[8]; - u32x opad[8]; - - u32x dgst[8]; - u32x out[8]; - -} lastpass_tmp_t; - -typedef struct -{ - u64x digest_buf[8]; - -} drupal7_tmp_t; - -typedef struct -{ - u32x ipad[5]; - u32x opad[5]; - - u32x dgst[5]; - u32x out[5]; - -} lotus8_tmp_t; - -typedef struct -{ - u32x out[5]; - -} office2007_tmp_t; - -typedef struct -{ - u32x out[5]; - -} office2010_tmp_t; - -typedef struct -{ - u64x out[8]; - -} office2013_tmp_t; - -typedef struct -{ - u32x digest_buf[5]; - -} saph_sha1_tmp_t; - -typedef struct -{ - u32x block[16]; - - u32x dgst[8]; - - u32x block_len; - u32x final_len; - -} seven_zip_tmp_t; - -typedef struct -{ - u32x Kc[16]; - u32x Kd[16]; - - u32x iv[2]; - -} bsdicrypt_tmp_t; - -typedef struct -{ - u32 dgst[17][5]; - -} rar3_tmp_t; - -typedef struct -{ - u32 user[16]; - -} cram_md5_t; - -typedef struct -{ - u32 iv_buf[4]; - u32 iv_len; - - u32 salt_buf[4]; - u32 salt_len; - - u32 crc; - - u32 data_buf[96]; - u32 data_len; - - u32 unpack_size; - -} seven_zip_t; - -typedef struct -{ - u32 key; - u64 val; - -} hcstat_table_t; - -typedef struct -{ - u32 cs_buf[0x100]; - u32 cs_len; - -} cs_t; - -typedef struct -{ - u32 cmds[15]; - -} gpu_rule_t; - -/* -typedef struct -{ - u32 plain_buf[16]; - u32 plailen; - -} plain_t; -*/ - -typedef struct -{ - u32 gidvid; - u32 il_pos; - -} plain_t; - -typedef struct -{ - #ifdef _SCALAR_ - u32 i[64]; - #else - #ifdef VECT_SIZE4 - u32x i[16]; - #endif - - #ifdef VECT_SIZE2 - u32x i[32]; - #endif - - #ifdef VECT_SIZE1 - u32x i[64]; - #endif - #endif - - u32 pw_len; - u32 alignment_placeholder_1; - u32 alignment_placeholder_2; - u32 alignment_placeholder_3; - -} pw_t; - -typedef struct -{ - u32 i; - -} bf_t; - -typedef struct -{ - u32 i[8]; - - u32 pw_len; - -} comb_t; - -typedef struct -{ - u32 b[32]; - -} bs_word_t; diff --git a/src/Makefile b/src/Makefile index 30a7ab6..c1c0e43 100644 --- a/src/Makefile +++ b/src/Makefile @@ -36,14 +36,14 @@ DOCUMENT_FOLDER ?= /opt/test/usr/share/doc/oclHashcat ## Library paths ## -OCL := deps/amd-app-sdk -OCLLIBPATH32 := $(OCL)/lib/x86 -OCLLIBPATH64 := $(OCL)/lib/x86_64 - CUDA := deps/cuda-7.5 CUDALIBPATH32 := deps/NVIDIA-Linux-x86_64-352.21/32 CUDALIBPATH64 := deps/NVIDIA-Linux-x86_64-352.21 +OCL := $(CUDA) +OCLLIBPATH32 := $(CUDALIBPATH32) +OCLLIBPATH64 := $(CUDALIBPATH64) + ADL := deps/adl-sdk GDK := deps/nvidia-gdk @@ -69,7 +69,6 @@ CC_WIN_64 := x86_64-w64-mingw32-gcc DLL_WIN_32 := i686-w64-mingw32-dlltool DLL_WIN_64 := x86_64-w64-mingw32-dlltool -NVCC := $(CUDA)/bin/nvcc CLCOMPILE := $(CLCOMPILE_PATH)/clcompile.bin BIN := . @@ -96,52 +95,20 @@ CFLAGS_64 := -m64 CFLAGS_LINUX := -D_POSIX -DLINUX CFLAGS_WIN := -D_WIN -DWIN -D__MSVCRT__ -D__USE_MINGW_ANSI_STDIO=1 -CFLAGS_OCL_LINUX := -D_OCL -I$(OCL)/include/ -I$(ADL)/include/ -CFLAGS_OCL_WIN := -D_OCL -I$(OCL)/include/ -I$(ADL)/include/ -CFLAGS_CUDA_LINUX := -D_CUDA -I$(CUDA)/include/ -I$(GDK)/usr/include/nvidia/gdk/ -CFLAGS_CUDA_WIN := -D_CUDA -I$(CUDA)/include/ -I$(NVAPI)/ - -LFLAGS_OCL_LINUX := -lpthread -lOpenCL -ldl -LFLAGS_OCL_WIN := -lpsapi -L./lib -LFLAGS_CUDA_LINUX := -lpthread -lnvidia-ml -lcuda -LFLAGS_CUDA_WIN := -lpsapi -L./lib - -## -## Kernels -## - -AMD_IN := amd -AMD_OUT := kernels/4098 - -NV_IN := nv -NV_OUT := kernels/4318 - -KERNELS_MODE_ALL := m00400 m00500 m01600 m01800 m02100 m02500 m03200 m05200 m05800 m06211 m06212 m06213 m06221 m06222 m06223 m06231 m06232 m06233 m06300 m06400 m06500 m06600 m06700 m06800 m07100 m07400 m07900 m08200 m08800 m08900 m09000 m09100 m09400 m09500 m09600 m10300 m10500 m10700 m10900 m11300 m11600 m11900 m12000 m12200 m12300 m12400 m12500 m12700 m12800 m00000_a0 m00000_a1 m00000_a3 m00010_a0 m00010_a1 m00010_a3 m00020_a0 m00020_a1 m00020_a3 m00030_a0 m00030_a1 m00030_a3 m00040_a0 m00040_a1 m00040_a3 m00050_a0 m00050_a1 m00050_a3 m00060_a0 m00060_a1 m00060_a3 m00100_a0 m00100_a1 m00100_a3 m00110_a0 m00110_a1 m00110_a3 m00120_a0 m00120_a1 m00120_a3 m00130_a0 m00130_a1 m00130_a3 m00140_a0 m00140_a1 m00140_a3 m00150_a0 m00150_a1 m00150_a3 m00160_a0 m00160_a1 m00160_a3 m00190_a0 m00190_a1 m00190_a3 m00200_a0 m00200_a1 m00200_a3 m00300_a0 m00300_a1 m00300_a3 m00900_a0 m00900_a1 m00900_a3 m01000_a0 m01000_a1 m01000_a3 m01100_a0 m01100_a1 m01100_a3 m01400_a0 m01400_a1 m01400_a3 m01410_a0 m01410_a1 m01410_a3 m01420_a0 m01420_a1 m01420_a3 m01430_a0 m01430_a1 m01430_a3 m01440_a0 m01440_a1 m01440_a3 m01450_a0 m01450_a1 m01450_a3 m01460_a0 m01460_a1 m01460_a3 m01500_a0 m01500_a1 m01500_a3 m01700_a0 m01700_a1 m01700_a3 m01710_a0 m01710_a1 m01710_a3 m01720_a0 m01720_a1 m01720_a3 m01730_a0 m01730_a1 m01730_a3 m01740_a0 m01740_a1 m01740_a3 m01750_a0 m01750_a1 m01750_a3 m01760_a0 m01760_a1 m01760_a3 m02400_a0 m02400_a1 m02400_a3 m02410_a0 m02410_a1 m02410_a3 m02610_a0 m02610_a1 m02610_a3 m02710_a0 m02710_a1 m02710_a3 m02810_a0 m02810_a1 m02810_a3 m03000_a0 m03000_a1 m03000_a3 m03100_a0 m03100_a1 m03100_a3 m03710_a0 m03710_a1 m03710_a3 m03800_a0 m03800_a1 m03800_a3 m04310_a0 m04310_a1 m04310_a3 m04400_a0 m04400_a1 m04400_a3 m04500_a0 m04500_a1 m04500_a3 m04700_a0 m04700_a1 m04700_a3 m04800_a0 m04800_a1 m04800_a3 m04900_a0 m04900_a1 m04900_a3 m05000_a0 m05000_a1 m05000_a3 m05100_a0 m05100_a1 m05100_a3 m05300_a0 m05300_a1 m05300_a3 m05400_a0 m05400_a1 m05400_a3 m05500_a0 m05500_a1 m05500_a3 m05600_a0 m05600_a1 m05600_a3 m06000_a0 m06000_a1 m06000_a3 m06100_a0 m06100_a1 m06100_a3 m06900_a0 m06900_a1 m06900_a3 m07300_a0 m07300_a1 m07300_a3 m07500_a0 m07500_a1 m07500_a3 m07600_a0 m07600_a1 m07600_a3 m07700_a0 m07700_a1 m07700_a3 m07800_a0 m07800_a1 m07800_a3 m08000_a0 m08000_a1 m08000_a3 m08100_a0 m08100_a1 m08100_a3 m08300_a0 m08300_a1 m08300_a3 m08400_a0 m08400_a1 m08400_a3 m08500_a0 m08500_a1 m08500_a3 m08600_a0 m08600_a1 m08600_a3 m08700_a0 m08700_a1 m08700_a3 m09700_a0 m09700_a1 m09700_a3 m09710_a0 m09710_a1 m09710_a3 m09720_a0 m09720_a1 m09720_a3 m09800_a0 m09800_a1 m09800_a3 m09810_a0 m09810_a1 m09810_a3 m09820_a0 m09820_a1 m09820_a3 m09900_a0 m09900_a1 m09900_a3 m10100_a0 m10100_a1 m10100_a3 m10400_a0 m10400_a1 m10400_a3 m10410_a0 m10410_a1 m10410_a3 m10420_a0 m10420_a1 m10420_a3 m10800_a0 m10800_a1 m10800_a3 m11000_a0 m11000_a1 m11000_a3 m11100_a0 m11100_a1 m11100_a3 m11200_a0 m11200_a1 m11200_a3 m11400_a0 m11400_a1 m11400_a3 m11500_a0 m11500_a1 m11500_a3 m11700_a0 m11700_a1 m11700_a3 m11800_a0 m11800_a1 m11800_a3 m12600_a0 m12600_a1 m12600_a3 -KERNELS_MASK_ALL := markov_le_v1 markov_le_v2 markov_le_v4 markov_be_v1 markov_be_v2 markov_be_v4 -KERNELS_AMP_ALL := amp_a0_v1 amp_a0_v2 amp_a0_v4 amp_a1_v1 amp_a1_v2 amp_a1_v4 amp_a3_v1 amp_a3_v2 amp_a3_v4 -AMD_DEVICES_ALL := VLIW1 VLIW4 VLIW5 -NV_DEVICES_ALL := sm_20 sm_21 sm_30 sm_35 sm_37 sm_50 sm_52 -NV_BITNESS_ALL := 32 64 - -## -## Targets for scrypt -## +CFLAGS_LINUX += -I$(OCL)/include/ -I$(ADL)/include/ -I$(GDK)/usr/include/nvidia/gdk/ +CFLAGS_WIN += -I$(OCL)/include/ -I$(ADL)/include/ -I$(NVAPI)/ -SCRYPT_N_ALL := 1024 16384 -SCRYPT_R_ALL := 1 -SCRYPT_P_ALL := 1 -SCRYPT_TMTO_ALL := 2 4 8 16 32 64 128 256 512 1024 +LFLAGS_LINUX := -lpthread -lOpenCL -ldl -lnvidia-ml +LFLAGS_WIN := -lpsapi -L./lib ## ## Targets: Global ## -all: binaries_all kernels_all +all: binaries_all binaries_all: linux32 linux64 win32 win64 rules_optimize -kernels_all: amd_all nv_all - release: binaries linux: linux32 linux64 rules_optimize @@ -150,7 +117,7 @@ windows: win32 win64 rules_optimize clean: clean_bin -clean_all: clean_bin clean_kernel clean_clcompile +clean_all: clean_bin clean_clcompile clean_bin: rm -f obj/*.o lib/*.a $(BIN)/*.bin $(BIN)/*.exe $(BIN)/*.app *.restore *.out *.pot *.dictstat *.log @@ -158,37 +125,20 @@ clean_bin: rm -rf *Hashcat.outfiles $(MAKE) -C $(RULES_OPTIMIZE_PATH) clean -clean_kernel: - rm -f $(AMD_OUT)/* $(NV_OUT)/* - clean_clcompile: rm -f ${CLCOMPILE_PATH}/clcompile.bin ${CLCOMPILE}: ${CLCOMPILE_PATH}/clcompile.c - $(CC_LINUX_64) $(CFLAGS) -o $@ -I$(CFLAGS_OCL_LINUX) $< -L$(OCLLIBPATH64) $(LFLAGS_OCL_LINUX) + $(CC_LINUX_64) $(CFLAGS) -o $@ -I$(CFLAGS_LINUX) $< -L$(OCLLIBPATH64) $(LFLAGS_LINUX) rules_optimize: $(MAKE) -C $(RULES_OPTIMIZE_PATH) -linux32: oclHashcat32.bin cudaHashcat32.bin -linux64: oclHashcat64.bin cudaHashcat64.bin +linux32: oclHashcat32.bin +linux64: oclHashcat64.bin -win32: oclHashcat32.exe cudaHashcat32.exe -win64: oclHashcat64.exe cudaHashcat64.exe - -amd_all: $(foreach KERNEL,$(KERNELS_MODE_ALL),$(foreach DEVICE,$(AMD_DEVICES_ALL),$(AMD_OUT)/$(KERNEL).$(DEVICE).llvmir)) \ - $(AMD_OUT)/markov_le_v1.llvmir $(AMD_OUT)/markov_le_v2.llvmir $(AMD_OUT)/markov_le_v4.llvmir \ - $(AMD_OUT)/markov_be_v1.llvmir $(AMD_OUT)/markov_be_v2.llvmir $(AMD_OUT)/markov_be_v4.llvmir \ - $(AMD_OUT)/amp_a0_v1.llvmir $(AMD_OUT)/amp_a0_v2.llvmir $(AMD_OUT)/amp_a0_v4.llvmir \ - $(AMD_OUT)/amp_a1_v1.llvmir $(AMD_OUT)/amp_a1_v2.llvmir $(AMD_OUT)/amp_a1_v4.llvmir \ - $(AMD_OUT)/amp_a3_v1.llvmir $(AMD_OUT)/amp_a3_v2.llvmir $(AMD_OUT)/amp_a3_v4.llvmir - -nv_all: $(foreach KERNEL,$(KERNELS_MODE_ALL),$(foreach DEVICE,$(NV_DEVICES_ALL),$(foreach BITNESS,$(NV_BITNESS_ALL),$(NV_OUT)/$(KERNEL).$(DEVICE).$(BITNESS).cubin))) \ - $(foreach DEVICE,$(NV_DEVICES_ALL),$(foreach BITNESS,$(NV_BITNESS_ALL),$(NV_OUT)/markov_le_v1.$(DEVICE).$(BITNESS).cubin)) $(foreach DEVICE,$(NV_DEVICES_ALL),$(foreach BITNESS,$(NV_BITNESS_ALL),$(NV_OUT)/markov_le_v2.$(DEVICE).$(BITNESS).cubin)) $(foreach DEVICE,$(NV_DEVICES_ALL),$(foreach BITNESS,$(NV_BITNESS_ALL),$(NV_OUT)/markov_le_v4.$(DEVICE).$(BITNESS).cubin)) \ - $(foreach DEVICE,$(NV_DEVICES_ALL),$(foreach BITNESS,$(NV_BITNESS_ALL),$(NV_OUT)/markov_be_v1.$(DEVICE).$(BITNESS).cubin)) $(foreach DEVICE,$(NV_DEVICES_ALL),$(foreach BITNESS,$(NV_BITNESS_ALL),$(NV_OUT)/markov_be_v2.$(DEVICE).$(BITNESS).cubin)) $(foreach DEVICE,$(NV_DEVICES_ALL),$(foreach BITNESS,$(NV_BITNESS_ALL),$(NV_OUT)/markov_be_v4.$(DEVICE).$(BITNESS).cubin)) \ - $(foreach DEVICE,$(NV_DEVICES_ALL),$(foreach BITNESS,$(NV_BITNESS_ALL),$(NV_OUT)/amp_a0_v1.$(DEVICE).$(BITNESS).cubin)) $(foreach DEVICE,$(NV_DEVICES_ALL),$(foreach BITNESS,$(NV_BITNESS_ALL),$(NV_OUT)/amp_a0_v2.$(DEVICE).$(BITNESS).cubin)) $(foreach DEVICE,$(NV_DEVICES_ALL),$(foreach BITNESS,$(NV_BITNESS_ALL),$(NV_OUT)/amp_a0_v4.$(DEVICE).$(BITNESS).cubin)) \ - $(foreach DEVICE,$(NV_DEVICES_ALL),$(foreach BITNESS,$(NV_BITNESS_ALL),$(NV_OUT)/amp_a1_v1.$(DEVICE).$(BITNESS).cubin)) $(foreach DEVICE,$(NV_DEVICES_ALL),$(foreach BITNESS,$(NV_BITNESS_ALL),$(NV_OUT)/amp_a1_v2.$(DEVICE).$(BITNESS).cubin)) $(foreach DEVICE,$(NV_DEVICES_ALL),$(foreach BITNESS,$(NV_BITNESS_ALL),$(NV_OUT)/amp_a1_v4.$(DEVICE).$(BITNESS).cubin)) \ - $(foreach DEVICE,$(NV_DEVICES_ALL),$(foreach BITNESS,$(NV_BITNESS_ALL),$(NV_OUT)/amp_a3_v1.$(DEVICE).$(BITNESS).cubin)) $(foreach DEVICE,$(NV_DEVICES_ALL),$(foreach BITNESS,$(NV_BITNESS_ALL),$(NV_OUT)/amp_a3_v2.$(DEVICE).$(BITNESS).cubin)) $(foreach DEVICE,$(NV_DEVICES_ALL),$(foreach BITNESS,$(NV_BITNESS_ALL),$(NV_OUT)/amp_a3_v4.$(DEVICE).$(BITNESS).cubin)) +win32: oclHashcat32.exe +win64: oclHashcat64.exe ## ## Targets: Linux install @@ -204,8 +154,6 @@ install: linux64 amd_all $(INSTALL) -m 755 -d $(SHARED_FOLDER) $(INSTALL) -m 755 -d $(SHARED_FOLDER)/charsets $(CP) -a charsets/* $(SHARED_FOLDER)/charsets/ - $(INSTALL) -m 755 -d $(SHARED_FOLDER)/kernels - $(CP) -a kernels/* $(SHARED_FOLDER)/kernels/ $(INSTALL) -m 755 -d $(SHARED_FOLDER)/masks $(CP) -a masks/* $(SHARED_FOLDER)/masks/ $(INSTALL) -m 755 -d $(SHARED_FOLDER)/rules @@ -219,168 +167,6 @@ uninstall: $(RM) -rf $(SHARED_FOLDER) $(RM) -rf $(DOCUMENT_FOLDER) -## -## Targets: AMD Kernel (oclHashcat) -## - -# general kernels - -$(AMD_OUT)/markov_%.llvmir: $(AMD_IN)/markov_%.cl ${CLCOMPILE} - @rm -f $(subst .llvmir,*.kernel,$@) - $(CLCOMPILE) "-D VLIW1 -I $(AMD_IN)/" $< $@ - -$(AMD_OUT)/amp_%_v1.llvmir: $(AMD_IN)/amp_%_v1.cl ${CLCOMPILE} - @rm -f $(subst .llvmir,*.kernel,$@) - $(CLCOMPILE) "-D VLIW1 -I $(AMD_IN)/" $< $@ - -$(AMD_OUT)/amp_%_v2.llvmir: $(AMD_IN)/amp_%_v2.cl ${CLCOMPILE} - @rm -f $(subst .llvmir,*.kernel,$@) - $(CLCOMPILE) "-D VLIW1 -I $(AMD_IN)/" $< $@ - -$(AMD_OUT)/amp_%_v4.llvmir: $(AMD_IN)/amp_%_v4.cl ${CLCOMPILE} - @rm -f $(subst .llvmir,*.kernel,$@) - $(CLCOMPILE) "-D VLIW1 -I $(AMD_IN)/" $< $@ - -$(AMD_OUT)/%.VLIW1.llvmir: $(AMD_IN)/%.cl ${CLCOMPILE} - @rm -f $(subst VLIW1.llvmir,*.kernel,$@) - $(CLCOMPILE) "-D VLIW1 -I $(AMD_IN)/" $< $@ - -$(AMD_OUT)/%.VLIW4.llvmir: $(AMD_IN)/%.cl ${CLCOMPILE} - @rm -f $(subst VLIW4.llvmir,*.kernel,$@) - $(CLCOMPILE) "-D VLIW4 -I $(AMD_IN)/" $< $@ - -$(AMD_OUT)/%.VLIW5.llvmir: $(AMD_IN)/%.cl ${CLCOMPILE} - @rm -f $(subst VLIW5.llvmir,*.kernel,$@) - $(CLCOMPILE) "-D VLIW5 -I $(AMD_IN)/" $< $@ - -# scrypt specific kernels - -.PHONY : $(AMD_OUT)/m08900.VLIW1.llvmir $(AMD_OUT)/m08900.VLIW4.llvmir $(AMD_OUT)/m08900.VLIW5.llvmir - -$(AMD_OUT)/m08900.VLIW1.llvmir: $(foreach N,$(SCRYPT_N_ALL),$(foreach R,$(SCRYPT_R_ALL),$(foreach P,$(SCRYPT_P_ALL),$(foreach TMTO,$(SCRYPT_TMTO_ALL),$(AMD_OUT)/m08900_$(N)_$(R)_$(P)_$(TMTO).VLIW1.llvmir)))) ${CLCOMPILE} - -$(AMD_OUT)/m08900.VLIW4.llvmir: $(foreach N,$(SCRYPT_N_ALL),$(foreach R,$(SCRYPT_R_ALL),$(foreach P,$(SCRYPT_P_ALL),$(foreach TMTO,$(SCRYPT_TMTO_ALL),$(AMD_OUT)/m08900_$(N)_$(R)_$(P)_$(TMTO).VLIW4.llvmir)))) ${CLCOMPILE} - -$(AMD_OUT)/m08900.VLIW5.llvmir: $(foreach N,$(SCRYPT_N_ALL),$(foreach R,$(SCRYPT_R_ALL),$(foreach P,$(SCRYPT_P_ALL),$(foreach TMTO,$(SCRYPT_TMTO_ALL),$(AMD_OUT)/m08900_$(N)_$(R)_$(P)_$(TMTO).VLIW5.llvmir)))) ${CLCOMPILE} - -$(AMD_OUT)/m08900_%.llvmir: $(AMD_IN)/m08900.cl ${CLCOMPILE} - @rm -f $(subst .llvmir,*.kernel,$@) - $(eval splitted := $(subst ., ,$(subst _, ,$@))) - $(eval n_val := $(wordlist 2, 2, $(splitted))) - $(eval r_val := $(wordlist 3, 3, $(splitted))) - $(eval p_val := $(wordlist 4, 4, $(splitted))) - $(eval tmto_val := $(wordlist 5, 5, $(splitted))) - $(eval vliw_val := $(wordlist 6, 6, $(splitted))) - $(CLCOMPILE) "-D $(vliw_val) -I $(AMD_IN)/ -DSCRYPT_N=$(n_val) -DSCRYPT_R=$(r_val) -DSCRYPT_P=$(p_val) -DSCRYPT_TMTO=$(tmto_val)" $< $@ - -## -## Targets: NV Kernels 32 bit (oclHashcat) -## - -# general kernels - -$(NV_OUT)/%.sm_20.32.cubin: $(NV_IN)/%.cu - ${NVCC} -ccbin $(CC_LINUX_32) -cubin --machine 32 -o $@ -arch sm_20 -I . -I $(NV_IN)/ $< -Dsm_20 -DNV32 - -$(NV_OUT)/%.sm_21.32.cubin: $(NV_IN)/%.cu - ${NVCC} -ccbin $(CC_LINUX_32) -cubin --machine 32 -o $@ -arch sm_21 -I . -I $(NV_IN)/ $< -Dsm_21 -DNV32 - -$(NV_OUT)/%.sm_30.32.cubin: $(NV_IN)/%.cu - ${NVCC} -ccbin $(CC_LINUX_32) -cubin --machine 32 -o $@ -arch sm_30 -I . -I $(NV_IN)/ $< -Dsm_30 -DNV32 - -$(NV_OUT)/%.sm_35.32.cubin: $(NV_IN)/%.cu - ${NVCC} -ccbin $(CC_LINUX_32) -cubin --machine 32 -o $@ -arch sm_35 -I . -I $(NV_IN)/ $< -Dsm_35 -DNV32 - -$(NV_OUT)/%.sm_37.32.cubin: $(NV_IN)/%.cu - ${NVCC} -ccbin $(CC_LINUX_32) -cubin --machine 32 -o $@ -arch sm_37 -I . -I $(NV_IN)/ $< -Dsm_37 -DNV32 - -$(NV_OUT)/%.sm_50.32.cubin: $(NV_IN)/%.cu - ${NVCC} -ccbin $(CC_LINUX_32) -cubin --machine 32 -o $@ -arch sm_50 -I . -I $(NV_IN)/ $< -Dsm_50 -DNV32 - -$(NV_OUT)/%.sm_52.32.cubin: $(NV_IN)/%.cu - ${NVCC} -ccbin $(CC_LINUX_32) -cubin --machine 32 -o $@ -arch sm_52 -I . -I $(NV_IN)/ $< -Dsm_52 -DNV32 - -# scrypt specific kernels - -.PHONY : $(NV_OUT)/m08900.sm_20.32.cubin $(NV_OUT)/m08900.sm_21.32.cubin $(NV_OUT)/m08900.sm_30.32.cubin $(NV_OUT)/m08900.sm_35.32.cubin $(NV_OUT)/m08900.sm_37.32.cubin $(NV_OUT)/m08900.sm_50.32.cubin $(NV_OUT)/m08900.sm_52.32.cubin - -$(NV_OUT)/m08900.sm_20.32.cubin:$(foreach N,$(SCRYPT_N_ALL),$(foreach R,$(SCRYPT_R_ALL),$(foreach P,$(SCRYPT_P_ALL),$(foreach TMTO,$(SCRYPT_TMTO_ALL),$(NV_OUT)/m08900_$(N)_$(R)_$(P)_$(TMTO).sm_20.32.cubin)))) - -$(NV_OUT)/m08900.sm_21.32.cubin:$(foreach N,$(SCRYPT_N_ALL),$(foreach R,$(SCRYPT_R_ALL),$(foreach P,$(SCRYPT_P_ALL),$(foreach TMTO,$(SCRYPT_TMTO_ALL),$(NV_OUT)/m08900_$(N)_$(R)_$(P)_$(TMTO).sm_21.32.cubin)))) - -$(NV_OUT)/m08900.sm_30.32.cubin:$(foreach N,$(SCRYPT_N_ALL),$(foreach R,$(SCRYPT_R_ALL),$(foreach P,$(SCRYPT_P_ALL),$(foreach TMTO,$(SCRYPT_TMTO_ALL),$(NV_OUT)/m08900_$(N)_$(R)_$(P)_$(TMTO).sm_30.32.cubin)))) - -$(NV_OUT)/m08900.sm_35.32.cubin:$(foreach N,$(SCRYPT_N_ALL),$(foreach R,$(SCRYPT_R_ALL),$(foreach P,$(SCRYPT_P_ALL),$(foreach TMTO,$(SCRYPT_TMTO_ALL),$(NV_OUT)/m08900_$(N)_$(R)_$(P)_$(TMTO).sm_35.32.cubin)))) - -$(NV_OUT)/m08900.sm_37.32.cubin:$(foreach N,$(SCRYPT_N_ALL),$(foreach R,$(SCRYPT_R_ALL),$(foreach P,$(SCRYPT_P_ALL),$(foreach TMTO,$(SCRYPT_TMTO_ALL),$(NV_OUT)/m08900_$(N)_$(R)_$(P)_$(TMTO).sm_37.32.cubin)))) - -$(NV_OUT)/m08900.sm_50.32.cubin:$(foreach N,$(SCRYPT_N_ALL),$(foreach R,$(SCRYPT_R_ALL),$(foreach P,$(SCRYPT_P_ALL),$(foreach TMTO,$(SCRYPT_TMTO_ALL),$(NV_OUT)/m08900_$(N)_$(R)_$(P)_$(TMTO).sm_50.32.cubin)))) - -$(NV_OUT)/m08900.sm_52.32.cubin:$(foreach N,$(SCRYPT_N_ALL),$(foreach R,$(SCRYPT_R_ALL),$(foreach P,$(SCRYPT_P_ALL),$(foreach TMTO,$(SCRYPT_TMTO_ALL),$(NV_OUT)/m08900_$(N)_$(R)_$(P)_$(TMTO).sm_52.32.cubin)))) - -$(NV_OUT)/m08900_%.32.cubin: $(NV_IN)/m08900.cu - $(eval splitted := $(subst ., ,$(subst _, ,$@))) - $(eval n_val := $(wordlist 2, 2, $(splitted))) - $(eval r_val := $(wordlist 3, 3, $(splitted))) - $(eval p_val := $(wordlist 4, 4, $(splitted))) - $(eval tmto_val := $(wordlist 5, 5, $(splitted))) - $(eval cubin_val := $(wordlist 7, 7, $(splitted))) - ${NVCC} -ccbin $(CC_LINUX_32) -cubin --machine 32 -o $@ -arch sm_$(cubin_val) -I . -I $(NV_IN)/ $< -Dsm_$(cubin_val) -DSCRYPT_N=$(n_val) -DSCRYPT_R=$(r_val) -DSCRYPT_P=$(p_val) -DSCRYPT_TMTO=$(tmto_val) - -## -## Targets: NV Kernels 64 bit (oclHashcat) -## - -# general kernels - -$(NV_OUT)/%.sm_20.64.cubin: $(NV_IN)/%.cu - ${NVCC} -ccbin $(CC_LINUX_64) -cubin --machine 64 -o $@ -arch sm_20 -I . -I $(NV_IN)/ $< -Dsm_20 -DNV64 - -$(NV_OUT)/%.sm_21.64.cubin: $(NV_IN)/%.cu - ${NVCC} -ccbin $(CC_LINUX_64) -cubin --machine 64 -o $@ -arch sm_21 -I . -I $(NV_IN)/ $< -Dsm_21 -DNV64 - -$(NV_OUT)/%.sm_30.64.cubin: $(NV_IN)/%.cu - ${NVCC} -ccbin $(CC_LINUX_64) -cubin --machine 64 -o $@ -arch sm_30 -I . -I $(NV_IN)/ $< -Dsm_30 -DNV64 - -$(NV_OUT)/%.sm_35.64.cubin: $(NV_IN)/%.cu - ${NVCC} -ccbin $(CC_LINUX_64) -cubin --machine 64 -o $@ -arch sm_35 -I . -I $(NV_IN)/ $< -Dsm_35 -DNV64 - -$(NV_OUT)/%.sm_37.64.cubin: $(NV_IN)/%.cu - ${NVCC} -ccbin $(CC_LINUX_64) -cubin --machine 64 -o $@ -arch sm_37 -I . -I $(NV_IN)/ $< -Dsm_37 -DNV64 - -$(NV_OUT)/%.sm_50.64.cubin: $(NV_IN)/%.cu - ${NVCC} -ccbin $(CC_LINUX_64) -cubin --machine 64 -o $@ -arch sm_50 -I . -I $(NV_IN)/ $< -Dsm_50 -DNV64 - -$(NV_OUT)/%.sm_52.64.cubin: $(NV_IN)/%.cu - ${NVCC} -ccbin $(CC_LINUX_64) -cubin --machine 64 -o $@ -arch sm_52 -I . -I $(NV_IN)/ $< -Dsm_52 -DNV64 - -# scrypt specific kernels - -.PHONY : $(NV_OUT)/m08900.sm_20.64.cubin $(NV_OUT)/m08900.sm_21.64.cubin $(NV_OUT)/m08900.sm_30.64.cubin $(NV_OUT)/m08900.sm_35.64.cubin $(NV_OUT)/m08900.sm_37.64.cubin $(NV_OUT)/m08900.sm_50.64.cubin $(NV_OUT)/m08900.sm_52.64.cubin - -$(NV_OUT)/m08900.sm_20.64.cubin:$(foreach N,$(SCRYPT_N_ALL),$(foreach R,$(SCRYPT_R_ALL),$(foreach P,$(SCRYPT_P_ALL),$(foreach TMTO,$(SCRYPT_TMTO_ALL),$(NV_OUT)/m08900_$(N)_$(R)_$(P)_$(TMTO).sm_20.64.cubin)))) - -$(NV_OUT)/m08900.sm_21.64.cubin:$(foreach N,$(SCRYPT_N_ALL),$(foreach R,$(SCRYPT_R_ALL),$(foreach P,$(SCRYPT_P_ALL),$(foreach TMTO,$(SCRYPT_TMTO_ALL),$(NV_OUT)/m08900_$(N)_$(R)_$(P)_$(TMTO).sm_21.64.cubin)))) - -$(NV_OUT)/m08900.sm_30.64.cubin:$(foreach N,$(SCRYPT_N_ALL),$(foreach R,$(SCRYPT_R_ALL),$(foreach P,$(SCRYPT_P_ALL),$(foreach TMTO,$(SCRYPT_TMTO_ALL),$(NV_OUT)/m08900_$(N)_$(R)_$(P)_$(TMTO).sm_30.64.cubin)))) - -$(NV_OUT)/m08900.sm_35.64.cubin:$(foreach N,$(SCRYPT_N_ALL),$(foreach R,$(SCRYPT_R_ALL),$(foreach P,$(SCRYPT_P_ALL),$(foreach TMTO,$(SCRYPT_TMTO_ALL),$(NV_OUT)/m08900_$(N)_$(R)_$(P)_$(TMTO).sm_35.64.cubin)))) - -$(NV_OUT)/m08900.sm_37.64.cubin:$(foreach N,$(SCRYPT_N_ALL),$(foreach R,$(SCRYPT_R_ALL),$(foreach P,$(SCRYPT_P_ALL),$(foreach TMTO,$(SCRYPT_TMTO_ALL),$(NV_OUT)/m08900_$(N)_$(R)_$(P)_$(TMTO).sm_37.64.cubin)))) - -$(NV_OUT)/m08900.sm_50.64.cubin:$(foreach N,$(SCRYPT_N_ALL),$(foreach R,$(SCRYPT_R_ALL),$(foreach P,$(SCRYPT_P_ALL),$(foreach TMTO,$(SCRYPT_TMTO_ALL),$(NV_OUT)/m08900_$(N)_$(R)_$(P)_$(TMTO).sm_50.64.cubin)))) - -$(NV_OUT)/m08900.sm_52.64.cubin:$(foreach N,$(SCRYPT_N_ALL),$(foreach R,$(SCRYPT_R_ALL),$(foreach P,$(SCRYPT_P_ALL),$(foreach TMTO,$(SCRYPT_TMTO_ALL),$(NV_OUT)/m08900_$(N)_$(R)_$(P)_$(TMTO).sm_52.64.cubin)))) - -$(NV_OUT)/m08900_%.64.cubin: $(NV_IN)/m08900.cu - $(eval splitted := $(subst ., ,$(subst _, ,$@))) - $(eval n_val := $(wordlist 2, 2, $(splitted))) - $(eval r_val := $(wordlist 3, 3, $(splitted))) - $(eval p_val := $(wordlist 4, 4, $(splitted))) - $(eval tmto_val := $(wordlist 5, 5, $(splitted))) - $(eval cubin_val := $(wordlist 7, 7, $(splitted))) - ${NVCC} -ccbin $(CC_LINUX_64) -cubin --machine 64 -o $@ -arch sm_$(cubin_val) -I . -I $(NV_IN)/ $< -Dsm_$(cubin_val) -DNV64 -DSCRYPT_N=$(n_val) -DSCRYPT_R=$(r_val) -DSCRYPT_P=$(p_val) -DSCRYPT_TMTO=$(tmto_val) - ## ## Targets: Libraries ## @@ -391,60 +177,30 @@ lib/libOpenCL.a: lib/libOpenCL64.a: ${DLL_WIN_64} -A -k -l lib/libOpenCL64.a -d lib/OpenCL64.def -lib/libcuda.a: - ${DLL_WIN_32} -A -k -l lib/libcuda.a -d lib/cuda.def - -lib/libcuda64.a: - ${DLL_WIN_64} -A -k -l lib/libcuda64.a -d lib/cuda64.def - ## ## oclHashcat ## obj/%.oclHashcat.LINUX.32.o: src/%.c - $(CC_LINUX_32) $(CFLAGS) $(CFLAGS_LINUX) $(CFLAGS_32) $(CFLAGS_OCL_LINUX) -c -o $@ $< + $(CC_LINUX_32) $(CFLAGS) $(CFLAGS_LINUX) $(CFLAGS_32) $(CFLAGS_LINUX) -c -o $@ $< obj/%.oclHashcat.LINUX.64.o: src/%.c - $(CC_LINUX_64) $(CFLAGS) $(CFLAGS_LINUX) $(CFLAGS_64) $(CFLAGS_OCL_LINUX) -c -o $@ $< + $(CC_LINUX_64) $(CFLAGS) $(CFLAGS_LINUX) $(CFLAGS_64) $(CFLAGS_LINUX) -c -o $@ $< obj/%.oclHashcat.WIN.32.o: src/%.c - $(CC_WIN_32) $(CFLAGS) $(CFLAGS_WIN) $(CFLAGS_32) $(CFLAGS_OCL_WIN) -c -o $@ $< + $(CC_WIN_32) $(CFLAGS) $(CFLAGS_WIN) $(CFLAGS_32) $(CFLAGS_WIN) -c -o $@ $< obj/%.oclHashcat.WIN.64.o: src/%.c - $(CC_WIN_64) $(CFLAGS) $(CFLAGS_WIN) $(CFLAGS_64) $(CFLAGS_OCL_WIN) -c -o $@ $< - -obj/%.cudaHashcat.LINUX.32.o: src/%.c - $(CC_LINUX_32) $(CFLAGS) $(CFLAGS_LINUX) $(CFLAGS_32) $(CFLAGS_CUDA_LINUX) -c -o $@ $< - -obj/%.cudaHashcat.LINUX.64.o: src/%.c - $(CC_LINUX_64) $(CFLAGS) $(CFLAGS_LINUX) $(CFLAGS_64) $(CFLAGS_CUDA_LINUX) -c -o $@ $< - -obj/%.cudaHashcat.WIN.32.o: src/%.c - $(CC_WIN_32) $(CFLAGS) $(CFLAGS_WIN) $(CFLAGS_32) $(CFLAGS_CUDA_WIN) -c -o $@ $< - -obj/%.cudaHashcat.WIN.64.o: src/%.c - $(CC_WIN_64) $(CFLAGS) $(CFLAGS_WIN) $(CFLAGS_64) $(CFLAGS_CUDA_WIN) -c -o $@ $< - -oclHashcat32.bin: src/oclHashcat.c obj/ext_OpenCL.oclHashcat.LINUX.32.o obj/ext_ADL.oclHashcat.LINUX.32.o obj/shared.oclHashcat.LINUX.32.o obj/rp_gpu_on_cpu.oclHashcat.LINUX.32.o - $(CC_LINUX_32) $(CFLAGS) $(CFLAGS_LINUX) $(CFLAGS_32) $(CFLAGS_OCL_LINUX) -o $@ -DCOMPTIME=$(NOW) $^ -L$(OCLLIBPATH32) $(LFLAGS_OCL_LINUX) -DINSTALL_FOLDER=\"$(INSTALL_FOLDER)\" -DSHARED_FOLDER=\"$(SHARED_FOLDER)\" -DDOCUMENT_FOLDER=\"$(DOCUMENT_FOLDER)\" - -oclHashcat64.bin: src/oclHashcat.c obj/ext_OpenCL.oclHashcat.LINUX.64.o obj/ext_ADL.oclHashcat.LINUX.64.o obj/shared.oclHashcat.LINUX.64.o obj/rp_gpu_on_cpu.oclHashcat.LINUX.64.o - $(CC_LINUX_64) $(CFLAGS) $(CFLAGS_LINUX) $(CFLAGS_64) $(CFLAGS_OCL_LINUX) -o $@ -DCOMPTIME=$(NOW) $^ -L$(OCLLIBPATH64) $(LFLAGS_OCL_LINUX) -DINSTALL_FOLDER=\"$(INSTALL_FOLDER)\" -DSHARED_FOLDER=\"$(SHARED_FOLDER)\" -DDOCUMENT_FOLDER=\"$(DOCUMENT_FOLDER)\" - -oclHashcat32.exe: src/oclHashcat.c obj/ext_OpenCL.oclHashcat.WIN.32.o obj/ext_ADL.oclHashcat.WIN.32.o obj/shared.oclHashcat.WIN.32.o obj/rp_gpu_on_cpu.oclHashcat.WIN.32.o lib/libOpenCL.a /usr/i686-w64-mingw32/lib/CRT_glob.o - $(CC_WIN_32) $(CFLAGS) $(CFLAGS_WIN) $(CFLAGS_32) $(CFLAGS_OCL_WIN) -o $@ -DCOMPTIME=$(NOW) $^ -L$(OCLLIBPATH32) $(LFLAGS_OCL_WIN) -static-libgcc - -oclHashcat64.exe: src/oclHashcat.c obj/ext_OpenCL.oclHashcat.WIN.64.o obj/ext_ADL.oclHashcat.WIN.64.o obj/shared.oclHashcat.WIN.64.o obj/rp_gpu_on_cpu.oclHashcat.WIN.64.o lib/libOpenCL64.a /usr/x86_64-w64-mingw32/lib/CRT_glob.o - $(CC_WIN_64) $(CFLAGS) $(CFLAGS_WIN) $(CFLAGS_64) $(CFLAGS_OCL_WIN) -o $@ -DCOMPTIME=$(NOW) $^ -L$(OCLLIBPATH64) $(LFLAGS_OCL_WIN) -static-libgcc + $(CC_WIN_64) $(CFLAGS) $(CFLAGS_WIN) $(CFLAGS_64) $(CFLAGS_WIN) -c -o $@ $< -cudaHashcat32.bin: src/oclHashcat.c obj/ext_cuda.cudaHashcat.LINUX.32.o obj/ext_nvml.cudaHashcat.LINUX.32.o obj/shared.cudaHashcat.LINUX.32.o obj/rp_gpu_on_cpu.cudaHashcat.LINUX.32.o - $(CC_LINUX_32) $(CFLAGS) $(CFLAGS_LINUX) $(CFLAGS_32) $(CFLAGS_CUDA_LINUX) -o $@ -DCOMPTIME=$(NOW) $^ -L$(CUDALIBPATH32) $(LFLAGS_CUDA_LINUX) -L$(NVMLLIBPATH32) -DINSTALL_FOLDER=\"$(INSTALL_FOLDER)\" -DSHARED_FOLDER=\"$(SHARED_FOLDER)\" -DDOCUMENT_FOLDER=\"$(DOCUMENT_FOLDER)\" +oclHashcat32.bin: src/oclHashcat.c obj/ext_OpenCL.oclHashcat.LINUX.32.o obj/ext_nvml.oclHashcat.LINUX.32.o obj/ext_ADL.oclHashcat.LINUX.32.o obj/shared.oclHashcat.LINUX.32.o obj/rp_gpu_on_cpu.oclHashcat.LINUX.32.o + $(CC_LINUX_32) $(CFLAGS) $(CFLAGS_LINUX) $(CFLAGS_32) $(CFLAGS_LINUX) -o $@ -DCOMPTIME=$(NOW) $^ -L$(OCLLIBPATH32) $(LFLAGS_LINUX) -L$(NVMLLIBPATH32) -DINSTALL_FOLDER=\"$(INSTALL_FOLDER)\" -DSHARED_FOLDER=\"$(SHARED_FOLDER)\" -DDOCUMENT_FOLDER=\"$(DOCUMENT_FOLDER)\" -cudaHashcat64.bin: src/oclHashcat.c obj/ext_cuda.cudaHashcat.LINUX.64.o obj/ext_nvml.cudaHashcat.LINUX.64.o obj/shared.cudaHashcat.LINUX.64.o obj/rp_gpu_on_cpu.cudaHashcat.LINUX.64.o - $(CC_LINUX_64) $(CFLAGS) $(CFLAGS_LINUX) $(CFLAGS_64) $(CFLAGS_CUDA_LINUX) -o $@ -DCOMPTIME=$(NOW) $^ -L$(CUDALIBPATH64) $(LFLAGS_CUDA_LINUX) -L$(NVMLLIBPATH64) -DINSTALL_FOLDER=\"$(INSTALL_FOLDER)\" -DSHARED_FOLDER=\"$(SHARED_FOLDER)\" -DDOCUMENT_FOLDER=\"$(DOCUMENT_FOLDER)\" +oclHashcat64.bin: src/oclHashcat.c obj/ext_OpenCL.oclHashcat.LINUX.64.o obj/ext_nvml.oclHashcat.LINUX.64.o obj/ext_ADL.oclHashcat.LINUX.64.o obj/shared.oclHashcat.LINUX.64.o obj/rp_gpu_on_cpu.oclHashcat.LINUX.64.o + $(CC_LINUX_64) $(CFLAGS) $(CFLAGS_LINUX) $(CFLAGS_64) $(CFLAGS_LINUX) -o $@ -DCOMPTIME=$(NOW) $^ -L$(OCLLIBPATH64) $(LFLAGS_LINUX) -L$(NVMLLIBPATH64) -DINSTALL_FOLDER=\"$(INSTALL_FOLDER)\" -DSHARED_FOLDER=\"$(SHARED_FOLDER)\" -DDOCUMENT_FOLDER=\"$(DOCUMENT_FOLDER)\" -cudaHashcat32.exe: src/oclHashcat.c obj/ext_cuda.cudaHashcat.WIN.32.o obj/ext_nvapi.cudaHashcat.WIN.32.o obj/shared.cudaHashcat.WIN.32.o obj/rp_gpu_on_cpu.cudaHashcat.WIN.32.o lib/libcuda.a /usr/i686-w64-mingw32/lib/CRT_glob.o - $(CC_WIN_32) $(CFLAGS) $(CFLAGS_WIN) $(CFLAGS_32) $(CFLAGS_CUDA_WIN) -o $@ -DCOMPTIME=$(NOW) $^ -L$(CUDALIBPATH32) $(LFLAGS_CUDA_WIN) -static-libgcc $(NVAPI)/x86/nvapi.lib +oclHashcat32.exe: src/oclHashcat.c obj/ext_OpenCL.oclHashcat.WIN.32.o obj/ext_nvapi.oclHashcat.WIN.32.o obj/ext_ADL.oclHashcat.WIN.32.o obj/shared.oclHashcat.WIN.32.o obj/rp_gpu_on_cpu.oclHashcat.WIN.32.o lib/libOpenCL.a /usr/i686-w64-mingw32/lib/CRT_glob.o + $(CC_WIN_32) $(CFLAGS) $(CFLAGS_WIN) $(CFLAGS_32) $(CFLAGS_WIN) -o $@ -DCOMPTIME=$(NOW) $^ -L$(OCLLIBPATH32) $(LFLAGS_WIN) -static-libgcc $(NVAPI)/x86/nvapi.lib -cudaHashcat64.exe: src/oclHashcat.c obj/ext_cuda.cudaHashcat.WIN.64.o obj/ext_nvapi.cudaHashcat.WIN.64.o obj/shared.cudaHashcat.WIN.64.o obj/rp_gpu_on_cpu.cudaHashcat.WIN.64.o lib/libcuda64.a /usr/x86_64-w64-mingw32/lib/CRT_glob.o - $(CC_WIN_64) $(CFLAGS) $(CFLAGS_WIN) $(CFLAGS_64) $(CFLAGS_CUDA_WIN) -o $@ -DCOMPTIME=$(NOW) $^ -L$(CUDALIBPATH64) $(LFLAGS_CUDA_WIN) -static-libgcc $(NVAPI)/amd64/nvapi64.lib +oclHashcat64.exe: src/oclHashcat.c obj/ext_OpenCL.oclHashcat.WIN.64.o obj/ext_nvapi.oclHashcat.WIN.64.o obj/ext_ADL.oclHashcat.WIN.64.o obj/shared.oclHashcat.WIN.64.o obj/rp_gpu_on_cpu.oclHashcat.WIN.64.o lib/libOpenCL64.a /usr/x86_64-w64-mingw32/lib/CRT_glob.o + $(CC_WIN_64) $(CFLAGS) $(CFLAGS_WIN) $(CFLAGS_64) $(CFLAGS_WIN) -o $@ -DCOMPTIME=$(NOW) $^ -L$(OCLLIBPATH64) $(LFLAGS_WIN) -static-libgcc $(NVAPI)/amd64/nvapi64.lib diff --git a/src/ext_OpenCL.c b/src/ext_OpenCL.c index fff2b10..17f287e 100644 --- a/src/ext_OpenCL.c +++ b/src/ext_OpenCL.c @@ -165,7 +165,7 @@ cl_context hc_clCreateContext (cl_context_properties *properties, cl_uint num_de return (context); } -/* + cl_command_queue hc_clCreateCommandQueue (cl_context context, cl_device_id device, cl_command_queue_properties properties) { cl_int CL_err; @@ -181,8 +181,8 @@ cl_command_queue hc_clCreateCommandQueue (cl_context context, cl_device_id devic return (command_queue); } -*/ +/* cl_command_queue hc_clCreateCommandQueueWithProperties (cl_context context, cl_device_id device, const cl_queue_properties *properties) { cl_int CL_err; @@ -198,6 +198,7 @@ cl_command_queue hc_clCreateCommandQueueWithProperties (cl_context context, cl_d return (command_queue); } +*/ cl_mem hc_clCreateBuffer (cl_context context, cl_mem_flags flags, size_t size, void *host_ptr) { diff --git a/src/oclHashcat.c b/src/oclHashcat.c index f9503e7..c0f1908 100644 --- a/src/oclHashcat.c +++ b/src/oclHashcat.c @@ -6,18 +6,12 @@ #include #include #include - #include -#ifdef _CUDA -const char *PROGNAME = "cudaHashcat"; -#elif _OCL const char *PROGNAME = "oclHashcat"; -#endif - const char *VERSION_TXT = "2.01"; const uint VERSION_BIN = 201; -const uint RESTORE_MIN = 200; +const uint RESTORE_MIN = 201; #define INCR_RULES 10000 #define INCR_SALTS 100000 @@ -26,7 +20,7 @@ const uint RESTORE_MIN = 200; // comment-out for kernel source mode -#define BINARY_KERNEL +//#define BINARY_KERNEL #define USAGE 0 #define VERSION 0 @@ -77,7 +71,6 @@ const uint RESTORE_MIN = 200; #define SEPARATOR ':' #define BITMAP_MIN 16 #define BITMAP_MAX 24 -#define GPU_ASYNC 0 #define GPU_TEMP_DISABLE 0 #define GPU_TEMP_ABORT 90 #define GPU_TEMP_RETAIN 80 @@ -87,17 +80,11 @@ const uint RESTORE_MIN = 200; #define GPU_RULES 1024 #define GPU_COMBS 1024 #define GPU_BFS 1024 -#define GPU_THREADS_AMD 64 -#define GPU_THREADS_NV 256 +#define GPU_THREADS 64 #define POWERTUNE_ENABLE 0 #define LOGFILE_DISABLE 0 #define SCRYPT_TMTO 0 -#define VECT_SIZE_1 1 -#define VECT_SIZE_2 2 -#define VECT_SIZE_4 4 -#define VECT_SIZE_8 8 - #define WL_MODE_STDIN 1 #define WL_MODE_FILE 2 #define WL_MODE_MASK 3 @@ -400,7 +387,6 @@ const char *USAGE_BIG[] = " --bitmap-min=NUM Minimum number of bits allowed for bitmaps", " --bitmap-max=NUM Maximum number of bits allowed for bitmaps", " --cpu-affinity=STR Locks to CPU devices, seperate with comma", - " --gpu-async Use non-blocking async calls (NV only)", " -d, --gpu-devices=STR Devices to use, separate with comma", " -w, --workload-profile=NUM Enable a specific workload profile, see references below", " -n, --gpu-accel=NUM Workload tuning: 1, 8, 40, 80, 160", @@ -845,6 +831,7 @@ void status_display_automat () * temperature */ +/* if (data.gpu_temp_disable == 0) { fprintf (out, "TEMP\t"); @@ -860,6 +847,7 @@ void status_display_automat () hc_thread_mutex_unlock (mux_adl); } +*/ #ifdef _WIN fputc ('\r', out); @@ -1499,6 +1487,7 @@ void status_display () } } +/* if (data.gpu_temp_disable == 0) { hc_thread_mutex_lock (mux_adl); @@ -1511,15 +1500,19 @@ void status_display () const int utilization = hm_get_utilization_with_device_id (i); const int fanspeed = hm_get_fanspeed_with_device_id (i); - #ifdef _OCL - log_info ("HWMon.GPU.#%d...: %2d%% Util, %2dc Temp, %2d%% Fan", i + 1, utilization, temperature, fanspeed); - #else - #ifdef LINUX - log_info ("HWMon.GPU.#%d...: %2d%% Util, %2dc Temp, %2d%% Fan", i + 1, utilization, temperature, fanspeed); - #else - log_info ("HWMon.GPU.#%d...: %2d%% Util, %2dc Temp, %2drpm Fan", i + 1, utilization, temperature, fanspeed); - #endif - #endif + if (vendor_id == VENDOR_ID_AMD) + { + log_info ("HWMon.GPU.#%d...: %2d%% Util, %2dc Temp, %2d%% Fan", i + 1, utilization, temperature, fanspeed); + } + + if (vendor_id == VENDOR_ID_NV) + { + #ifdef LINUX + log_info ("HWMon.GPU.#%d...: %2d%% Util, %2dc Temp, %2d%% Fan", i + 1, utilization, temperature, fanspeed); + #else + log_info ("HWMon.GPU.#%d...: %2d%% Util, %2dc Temp, %2drpm Fan", i + 1, utilization, temperature, fanspeed); + #endif + } } else { @@ -1532,6 +1525,7 @@ void status_display () hc_thread_mutex_unlock (mux_adl); } +*/ } static void status_benchmark () @@ -1607,43 +1601,21 @@ static void status_benchmark () * oclHashcat -only- functions */ -#ifdef _CUDA - static void generate_source_kernel_filename (const uint attack_exec, const uint attack_kern, const uint kern_type, char *install_dir, char *kernel_file) { if (attack_exec == ATTACK_EXEC_ON_GPU) { if (attack_kern == ATTACK_KERN_STRAIGHT) - snprintf (kernel_file, 255, "%s/nv/m%05d_a0.cu", install_dir, (int) kern_type); + snprintf (kernel_file, 255, "%s/OpenCL/m%05d_a0.cl", install_dir, (int) kern_type); else if (attack_kern == ATTACK_KERN_COMBI) - snprintf (kernel_file, 255, "%s/nv/m%05d_a1.cu", install_dir, (int) kern_type); + snprintf (kernel_file, 255, "%s/OpenCL/m%05d_a1.cl", install_dir, (int) kern_type); else if (attack_kern == ATTACK_KERN_BF) - snprintf (kernel_file, 255, "%s/nv/m%05d_a3.cu", install_dir, (int) kern_type); + snprintf (kernel_file, 255, "%s/OpenCL/m%05d_a3.cl", install_dir, (int) kern_type); } else - snprintf (kernel_file, 255, "%s/nv/m%05d.cu", install_dir, (int) kern_type); + snprintf (kernel_file, 255, "%s/OpenCL/m%05d.cl", install_dir, (int) kern_type); } -#elif _OCL - -static void generate_source_kernel_filename (const uint attack_exec, const uint attack_kern, const uint kern_type, char *install_dir, char *kernel_file) -{ - if (attack_exec == ATTACK_EXEC_ON_GPU) - { - if (attack_kern == ATTACK_KERN_STRAIGHT) - snprintf (kernel_file, 255, "%s/amd/m%05d_a0.cl", install_dir, (int) kern_type); - else if (attack_kern == ATTACK_KERN_COMBI) - snprintf (kernel_file, 255, "%s/amd/m%05d_a1.cl", install_dir, (int) kern_type); - else if (attack_kern == ATTACK_KERN_BF) - snprintf (kernel_file, 255, "%s/amd/m%05d_a3.cl", install_dir, (int) kern_type); - } - else - snprintf (kernel_file, 255, "%s/amd/m%05d.cl", install_dir, (int) kern_type); -} - -#endif - - static uint convert_from_hex (char *line_buf, const uint line_len) { if (line_len & 1) return (line_len); // not in hex @@ -1751,17 +1723,7 @@ static void clear_prompt () static void gidd_to_pw_t (hc_device_param_t *device_param, const uint64_t gidd, pw_t *pw) { - #ifdef _CUDA - hc_cuCtxPushCurrent (device_param->context); - - hc_cuMemcpyDtoH (pw, device_param->d_pws_buf + (gidd * sizeof (pw_t)), sizeof (pw_t)); - - hc_cuCtxPopCurrent (&device_param->context); - - #elif _OCL hc_clEnqueueReadBuffer (device_param->command_queue, device_param->d_pws_buf, CL_TRUE, gidd * sizeof (pw_t), sizeof (pw_t), pw, 0, NULL, NULL); - - #endif } static void check_hash (hc_device_param_t *device_param, const uint salt_pos, const uint digest_pos) @@ -1791,15 +1753,7 @@ static void check_hash (hc_device_param_t *device_param, const uint salt_pos, co plain_t plain; - #ifdef _CUDA - hc_cuCtxPushCurrent (device_param->context); - - hc_cuMemcpyDtoH (&plain, device_param->d_plain_bufs + (idx * sizeof (plain_t)), sizeof (plain_t)); - - hc_cuCtxPopCurrent (&device_param->context); - #elif _OCL hc_clEnqueueReadBuffer (device_param->command_queue, device_param->d_plain_bufs, CL_TRUE, idx * sizeof (plain_t), sizeof (plain_t), &plain, 0, NULL, NULL); - #endif uint gidvid = plain.gidvid; uint il_pos = plain.il_pos; @@ -1813,14 +1767,14 @@ static void check_hash (hc_device_param_t *device_param, const uint salt_pos, co if (data.attack_mode == ATTACK_MODE_STRAIGHT) { - uint64_t gidd = gidvid / device_param->gpu_vector_width; - uint64_t gidm = gidvid % device_param->gpu_vector_width; + uint64_t gidd = gidvid; + uint64_t gidm = 0; pw_t pw; gidd_to_pw_t (device_param, gidd, &pw); - for (int i = 0, j = gidm; i < 16; i++, j += device_param->gpu_vector_width) + for (int i = 0, j = gidm; i < 16; i++, j++) { plain_buf[i] = pw.hi1[0][j]; } @@ -1862,14 +1816,14 @@ static void check_hash (hc_device_param_t *device_param, const uint salt_pos, co } else if (data.attack_mode == ATTACK_MODE_COMBI) { - uint64_t gidd = gidvid / device_param->gpu_vector_width; - uint64_t gidm = gidvid % device_param->gpu_vector_width; + uint64_t gidd = gidvid; + uint64_t gidm = 0; pw_t pw; gidd_to_pw_t (device_param, gidd, &pw); - for (int i = 0, j = gidm; i < 16; i++, j += device_param->gpu_vector_width) + for (int i = 0, j = gidm; i < 16; i++, j++) { plain_buf[i] = pw.hi1[0][j]; } @@ -1923,14 +1877,14 @@ static void check_hash (hc_device_param_t *device_param, const uint salt_pos, co } else if (data.attack_mode == ATTACK_MODE_HYBRID1) { - uint64_t gidd = gidvid / device_param->gpu_vector_width; - uint64_t gidm = gidvid % device_param->gpu_vector_width; + uint64_t gidd = gidvid; + uint64_t gidm = 0; pw_t pw; gidd_to_pw_t (device_param, gidd, &pw); - for (int i = 0, j = gidm; i < 16; i++, j += device_param->gpu_vector_width) + for (int i = 0, j = gidm; i < 16; i++, j++) { plain_buf[i] = pw.hi1[0][j]; } @@ -1957,14 +1911,14 @@ static void check_hash (hc_device_param_t *device_param, const uint salt_pos, co } else if (data.attack_mode == ATTACK_MODE_HYBRID2) { - uint64_t gidd = gidvid / device_param->gpu_vector_width; - uint64_t gidm = gidvid % device_param->gpu_vector_width; + uint64_t gidd = gidvid; + uint64_t gidm = 0; pw_t pw; gidd_to_pw_t (device_param, gidd, &pw); - for (int i = 0, j = gidm; i < 16; i++, j += device_param->gpu_vector_width) + for (int i = 0, j = gidm; i < 16; i++, j++) { plain_buf[i] = pw.hi1[0][j]; } @@ -2113,23 +2067,9 @@ static void check_cracked (hc_device_param_t *device_param, const uint salt_pos) int found = 0; - #ifdef _CUDA - - hc_cuCtxPushCurrent (device_param->context); - - hc_cuMemcpyDtoH (device_param->result, device_param->d_result, device_param->size_results); - - hc_cuCtxPopCurrent (&device_param->context); - - for (uint i = 0; i < GPU_THREADS_NV; i++) if (device_param->result[i] == 1) found = 1; - - #elif _OCL - hc_clEnqueueReadBuffer (device_param->command_queue, device_param->d_result, CL_TRUE, 0, device_param->size_results, device_param->result, 0, NULL, NULL); - for (uint i = 0; i < GPU_THREADS_AMD; i++) if (device_param->result[i] == 1) found = 1; - - #endif + for (uint i = 0; i < GPU_THREADS; i++) if (device_param->result[i] == 1) found = 1; if (found == 1) { @@ -2137,20 +2077,8 @@ static void check_cracked (hc_device_param_t *device_param, const uint salt_pos) log_info_nn (""); - #ifdef _CUDA - - hc_cuCtxPushCurrent (device_param->context); - - hc_cuMemcpyDtoH (&data.digests_shown_tmp[salt_buf->digests_offset], device_param->d_digests_shown + (salt_buf->digests_offset * sizeof (uint)), salt_buf->digests_cnt * sizeof (uint)); - - hc_cuCtxPopCurrent (&device_param->context); - - #elif _OCL - hc_clEnqueueReadBuffer (device_param->command_queue, device_param->d_digests_shown, CL_TRUE, salt_buf->digests_offset * sizeof (uint), salt_buf->digests_cnt * sizeof (uint), &data.digests_shown_tmp[salt_buf->digests_offset], 0, NULL, NULL); - #endif - uint cpt_cracked = 0; for (uint digest_pos = 0; digest_pos < salt_buf->digests_cnt; digest_pos++) @@ -2204,36 +2132,12 @@ static void check_cracked (hc_device_param_t *device_param, const uint salt_pos) memset (data.digests_shown_tmp, 0, salt_buf->digests_cnt * sizeof (uint)); - #ifdef _CUDA - - hc_cuCtxPushCurrent (device_param->context); - - hc_cuMemsetD8 (device_param->d_digests_shown + (salt_buf->digests_offset * sizeof (uint)), 0, salt_buf->digests_cnt * sizeof (uint)); - - hc_cuCtxPopCurrent (&device_param->context); - - #elif _OCL - hc_clEnqueueWriteBuffer (device_param->command_queue, device_param->d_digests_shown, CL_TRUE, salt_buf->digests_offset * sizeof (uint), salt_buf->digests_cnt * sizeof (uint), &data.digests_shown_tmp[salt_buf->digests_offset], 0, NULL, NULL); - - #endif } - #ifdef _CUDA - - hc_cuCtxPushCurrent (device_param->context); - - hc_cuMemsetD8 (device_param->d_result, 0, device_param->size_results); - - hc_cuCtxPopCurrent (&device_param->context); - - #elif _OCL - memset (device_param->result, 0, device_param->size_results); hc_clEnqueueWriteBuffer (device_param->command_queue, device_param->d_result, CL_TRUE, 0, device_param->size_results, device_param->result, 0, NULL, NULL); - - #endif } } @@ -2373,10 +2277,6 @@ static float find_gpu_blocks_div (const uint64_t total_left, const uint gpu_bloc static void run_kernel (const uint kern_run, hc_device_param_t *device_param, const uint num) { - // uint gpu_vector_width = device_param->gpu_vector_width; - - // uint num_elements = mydivc32 (num, gpu_vector_width); - uint num_elements = num; device_param->kernel_params_buf32[30] = data.combs_mode; @@ -2386,30 +2286,6 @@ static void run_kernel (const uint kern_run, hc_device_param_t *device_param, co while (num_elements % gpu_threads) num_elements++; - #ifdef _CUDA - CUfunction function = NULL; - - switch (kern_run) - { - case KERN_RUN_1: function = device_param->function1; break; - case KERN_RUN_12: function = device_param->function12; break; - case KERN_RUN_2: function = device_param->function2; break; - case KERN_RUN_23: function = device_param->function23; break; - case KERN_RUN_3: function = device_param->function3; break; - } - - num_elements /= gpu_threads; - - hc_cuCtxPushCurrent (device_param->context); - - hc_cuLaunchKernel (function, num_elements, 1, 1, gpu_threads, 1, 1, 0, device_param->stream, device_param->kernel_params, NULL); - - hc_cuStreamSynchronize (device_param->stream); - - hc_cuCtxPopCurrent (&device_param->context); - - #elif _OCL - cl_kernel kernel = NULL; switch (kern_run) @@ -2451,16 +2327,10 @@ static void run_kernel (const uint kern_run, hc_device_param_t *device_param, co hc_clFlush (device_param->command_queue); hc_clFinish (device_param->command_queue); - - #endif } static void run_kernel_mp (const uint kern_run, hc_device_param_t *device_param, const uint num) { - // uint gpu_vector_width = device_param->gpu_vector_width; - - // uint num_elements = mydivc32 (num, gpu_vector_width); - uint num_elements = num; switch (kern_run) @@ -2473,43 +2343,7 @@ static void run_kernel_mp (const uint kern_run, hc_device_param_t *device_param, // causes problems with special threads like in bcrypt // const uint gpu_threads = device_param->gpu_threads; - #ifdef _CUDA - - const uint gpu_threads = GPU_THREADS_NV; - - while (num_elements % gpu_threads) num_elements++; - - CUfunction function = NULL; - - switch (kern_run) - { - case KERN_RUN_MP: function = device_param->function_mp; break; - case KERN_RUN_MP_R: function = device_param->function_mp_r; break; - case KERN_RUN_MP_L: function = device_param->function_mp_l; break; - } - - void **kernel_params = NULL; - - switch (kern_run) - { - case KERN_RUN_MP: kernel_params = device_param->kernel_params_mp; break; - case KERN_RUN_MP_R: kernel_params = device_param->kernel_params_mp_r; break; - case KERN_RUN_MP_L: kernel_params = device_param->kernel_params_mp_l; break; - } - - num_elements /= gpu_threads; - - hc_cuCtxPushCurrent (device_param->context); - - hc_cuLaunchKernel (function, num_elements, 1, 1, gpu_threads, 1, 1, 0, device_param->stream, kernel_params, NULL); - - hc_cuStreamSynchronize (device_param->stream); - - hc_cuCtxPopCurrent (&device_param->context); - - #elif _OCL - - const uint gpu_threads = GPU_THREADS_AMD; + const uint gpu_threads = GPU_THREADS; while (num_elements % gpu_threads) num_elements++; @@ -2556,8 +2390,6 @@ static void run_kernel_mp (const uint kern_run, hc_device_param_t *device_param, hc_clFlush (device_param->command_queue); hc_clFinish (device_param->command_queue); - - #endif } static void run_kernel_tb (hc_device_param_t *device_param, const uint num) @@ -2568,22 +2400,6 @@ static void run_kernel_tb (hc_device_param_t *device_param, const uint num) while (num_elements % gpu_threads) num_elements++; - #ifdef _CUDA - - CUfunction function = device_param->function_tb; - - void **kernel_params = device_param->kernel_params_tb; - - hc_cuCtxPushCurrent (device_param->context); - - hc_cuLaunchKernel (function, num_elements / gpu_threads, 1, 1, gpu_threads, 1, 1, 0, device_param->stream, kernel_params, NULL); - - hc_cuStreamSynchronize (device_param->stream); - - hc_cuCtxPopCurrent (&device_param->context); - - #elif _OCL - cl_kernel kernel = device_param->kernel_tb; const size_t global_work_size[3] = { num_elements, 1, 1 }; @@ -2594,8 +2410,6 @@ static void run_kernel_tb (hc_device_param_t *device_param, const uint num) hc_clFlush (device_param->command_queue); hc_clFinish (device_param->command_queue); - - #endif } static void run_kernel_tm (hc_device_param_t *device_param) @@ -2604,22 +2418,6 @@ static void run_kernel_tm (hc_device_param_t *device_param) const uint gpu_threads = 32; - #ifdef _CUDA - - CUfunction function = device_param->function_tm; - - void **kernel_params = device_param->kernel_params_tm; - - hc_cuCtxPushCurrent (device_param->context); - - hc_cuLaunchKernel (function, num_elements / gpu_threads, 1, 1, gpu_threads, 1, 1, 0, device_param->stream, kernel_params, NULL); - - hc_cuStreamSynchronize (device_param->stream); - - hc_cuCtxPopCurrent (&device_param->context); - - #elif _OCL - cl_kernel kernel = device_param->kernel_tm; const size_t global_work_size[3] = { num_elements, 1, 1 }; @@ -2630,16 +2428,10 @@ static void run_kernel_tm (hc_device_param_t *device_param) hc_clFlush (device_param->command_queue); hc_clFinish (device_param->command_queue); - - #endif } static void run_kernel_amp (hc_device_param_t *device_param, const uint num) { - // uint gpu_vector_width = device_param->gpu_vector_width; - - // uint num_elements = mydivc32 (num, gpu_vector_width); - uint num_elements = num; device_param->kernel_params_amp_buf32[5] = data.combs_mode; @@ -2648,29 +2440,7 @@ static void run_kernel_amp (hc_device_param_t *device_param, const uint num) // causes problems with special threads like in bcrypt // const uint gpu_threads = device_param->gpu_threads; - #ifdef _CUDA - - const uint gpu_threads = GPU_THREADS_NV; - - while (num_elements % gpu_threads) num_elements++; - - CUfunction function = device_param->function_amp; - - void **kernel_params = device_param->kernel_params_amp; - - num_elements /= gpu_threads; - - hc_cuCtxPushCurrent (device_param->context); - - hc_cuLaunchKernel (function, num_elements, 1, 1, gpu_threads, 1, 1, 0, device_param->stream, kernel_params, NULL); - - hc_cuStreamSynchronize (device_param->stream); - - hc_cuCtxPopCurrent (&device_param->context); - - #elif _OCL - - const uint gpu_threads = GPU_THREADS_AMD; + const uint gpu_threads = GPU_THREADS; while (num_elements % gpu_threads) num_elements++; @@ -2687,27 +2457,21 @@ static void run_kernel_amp (hc_device_param_t *device_param, const uint num) hc_clFlush (device_param->command_queue); hc_clFinish (device_param->command_queue); - - #endif } -#ifdef _OCL static void run_kernel_bzero (hc_device_param_t *device_param, cl_mem buf, const uint size) { - const cl_uchar zero = 0; + // not supported with Nvidia + // hc_clEnqueueFillBuffer (device_param->command_queue, buf, &zero, sizeof (cl_uchar), 0, size, 0, NULL, NULL); - hc_clEnqueueFillBuffer (device_param->command_queue, buf, &zero, sizeof (cl_uchar), 0, size, 0, NULL, NULL); -} -#elif _CUDA -static void run_kernel_bzero (hc_device_param_t *device_param, CUdeviceptr buf, const uint size) -{ - hc_cuCtxPushCurrent (device_param->context); + char *tmp = (char *) mymalloc (size); + + memset (tmp, 0, size); - hc_cuMemsetD8 (buf, 0, size); + hc_clEnqueueWriteBuffer (device_param->command_queue, buf, CL_TRUE, 0, size, tmp, 0, NULL, NULL); - hc_cuCtxPopCurrent (&device_param->context); + free (tmp); } -#endif static int run_rule_engine (const int rule_len, const char *rule_buf) { @@ -2725,34 +2489,13 @@ static int run_rule_engine (const int rule_len, const char *rule_buf) static void run_copy (hc_device_param_t *device_param, const uint pws_cnt) { - #ifdef _CUDA - hc_cuCtxPushCurrent (device_param->context); - #endif - - // clear some leftovers from previous run (maskfiles, etc) - - #ifdef _CUDA - if (device_param->c_bfs != 0) // should be only true in this specific case: if (data.attack_kern == ATTACK_KERN_BF) - { - hc_cuMemsetD8 (device_param->c_bfs, 0, device_param->c_bytes); - } - #endif - if (data.attack_kern == ATTACK_KERN_STRAIGHT) { - #ifdef _CUDA - hc_cuMemcpyHtoD (device_param->d_pws_buf, device_param->pws_buf, pws_cnt * sizeof (pw_t)); - #elif _OCL hc_clEnqueueWriteBuffer (device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, pws_cnt * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL); - #endif } else if (data.attack_kern == ATTACK_KERN_COMBI) { - #ifdef _CUDA - hc_cuMemcpyHtoD (device_param->d_pws_buf, device_param->pws_buf, pws_cnt * sizeof (pw_t)); - #elif _OCL hc_clEnqueueWriteBuffer (device_param->command_queue, device_param->d_pws_buf, CL_TRUE, 0, pws_cnt * sizeof (pw_t), device_param->pws_buf, 0, NULL, NULL); - #endif } else if (data.attack_kern == ATTACK_KERN_BF) { @@ -2762,10 +2505,6 @@ static void run_copy (hc_device_param_t *device_param, const uint pws_cnt) run_kernel_mp (KERN_RUN_MP_L, device_param, pws_cnt); } - - #ifdef _CUDA - hc_cuCtxPopCurrent (&device_param->context); - #endif } static void run_cracker (hc_device_param_t *device_param, const uint pw_cnt, const uint pws_cnt) @@ -2973,11 +2712,7 @@ static void run_cracker (hc_device_param_t *device_param, const uint pw_cnt, con device_param->kernel_params_mp_r_buf64[3] = off; - const uint gpu_vector_width = device_param->gpu_vector_width; - - const uint innerloop_left_d = mydivc32 (innerloop_left, gpu_vector_width); - - run_kernel_mp (KERN_RUN_MP_R, device_param, innerloop_left_d); + run_kernel_mp (KERN_RUN_MP_R, device_param, innerloop_left); } else if (data.attack_mode == ATTACK_MODE_HYBRID1) { @@ -2985,11 +2720,7 @@ static void run_cracker (hc_device_param_t *device_param, const uint pw_cnt, con device_param->kernel_params_mp_buf64[3] = off; - const uint gpu_vector_width = device_param->gpu_vector_width; - - const uint innerloop_left_d = mydivc32 (innerloop_left, gpu_vector_width); - - run_kernel_mp (KERN_RUN_MP, device_param, innerloop_left_d); + run_kernel_mp (KERN_RUN_MP, device_param, innerloop_left); } else if (data.attack_mode == ATTACK_MODE_HYBRID2) { @@ -2997,42 +2728,11 @@ static void run_cracker (hc_device_param_t *device_param, const uint pw_cnt, con device_param->kernel_params_mp_buf64[3] = off; - const uint gpu_vector_width = device_param->gpu_vector_width; - - const uint innerloop_left_d = mydivc32 (innerloop_left, gpu_vector_width); - - run_kernel_mp (KERN_RUN_MP, device_param, innerloop_left_d); + run_kernel_mp (KERN_RUN_MP, device_param, innerloop_left); } // copy amplifiers - #ifdef _CUDA - hc_cuCtxPushCurrent (device_param->context); - - if (data.attack_mode == ATTACK_MODE_STRAIGHT) - { - hc_cuMemcpyDtoD (device_param->c_rules, device_param->d_rules + (innerloop_pos * sizeof (gpu_rule_t)), innerloop_left * sizeof (gpu_rule_t)); - } - else if (data.attack_mode == ATTACK_MODE_COMBI) - { - hc_cuMemcpyHtoD (device_param->c_combs, device_param->combs_buf, innerloop_left * sizeof (comb_t)); - } - else if (data.attack_mode == ATTACK_MODE_BF) - { - hc_cuMemcpyDtoD (device_param->c_bfs, device_param->d_bfs, innerloop_left * sizeof (bf_t)); - } - else if (data.attack_mode == ATTACK_MODE_HYBRID1) - { - hc_cuMemcpyDtoD (device_param->c_combs, device_param->d_combs, innerloop_left * sizeof (comb_t)); - } - else if (data.attack_mode == ATTACK_MODE_HYBRID2) - { - hc_cuMemcpyDtoD (device_param->c_combs, device_param->d_combs, innerloop_left * sizeof (comb_t)); - } - - hc_cuCtxPopCurrent (&device_param->context); - - #elif _OCL if (data.attack_mode == ATTACK_MODE_STRAIGHT) { hc_clEnqueueCopyBuffer (device_param->command_queue, device_param->d_rules, device_param->d_rules_c, innerloop_pos * sizeof (gpu_rule_t), 0, innerloop_left * sizeof (gpu_rule_t), 0, NULL, NULL); @@ -3054,8 +2754,6 @@ static void run_cracker (hc_device_param_t *device_param, const uint pw_cnt, con hc_clEnqueueCopyBuffer (device_param->command_queue, device_param->d_combs, device_param->d_combs_c, 0, 0, innerloop_left * sizeof (comb_t), 0, NULL, NULL); } - #endif - if (data.attack_exec == ATTACK_EXEC_ON_GPU) { if (data.attack_mode == ATTACK_MODE_BF) @@ -3064,23 +2762,11 @@ static void run_cracker (hc_device_param_t *device_param, const uint pw_cnt, con { const uint size_tm = 32 * sizeof (bs_word_t); - #ifdef _CUDA - run_kernel_bzero (device_param, device_param->d_tm, size_tm); - #elif _OCL run_kernel_bzero (device_param, device_param->d_tm_c, size_tm); - #endif run_kernel_tm (device_param); - #ifdef _CUDA - hc_cuCtxPushCurrent (device_param->context); - - hc_cuMemcpyDtoD (device_param->c_tm, device_param->d_tm, size_tm); - - hc_cuCtxPopCurrent (&device_param->context); - #elif _OCL hc_clEnqueueCopyBuffer (device_param->command_queue, device_param->d_tm_c, device_param->d_bfs_c, 0, 0, size_tm, 0, NULL, NULL); - #endif } } @@ -3132,28 +2818,11 @@ static void run_cracker (hc_device_param_t *device_param, const uint pw_cnt, con { run_kernel (KERN_RUN_23, device_param, pws_cnt); - #ifdef _CUDA - hc_cuCtxPushCurrent (device_param->context); - - hc_cuMemcpyDtoH (device_param->hooks_buf, device_param->d_hooks, device_param->size_hooks); - - hc_cuCtxPopCurrent (&device_param->context); - #elif _OCL hc_clEnqueueReadBuffer (device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL); - #endif // do something with data - - #ifdef _CUDA - hc_cuCtxPushCurrent (device_param->context); - - hc_cuMemcpyHtoD (device_param->d_hooks, device_param->hooks_buf, device_param->size_hooks); - - hc_cuCtxPopCurrent (&device_param->context); - #elif _OCL hc_clEnqueueWriteBuffer (device_param->command_queue, device_param->d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL); - #endif } run_kernel (KERN_RUN_3, device_param, pws_cnt); @@ -3551,479 +3220,12 @@ static uint64_t count_words (wl_data_t *wl_data, FILE *fd, char *dictfile, dicts return (cnt); } -static uint get_gpu_vector_width (const uint hash_mode, const uint attack_mode, const uint attack_exec, const uint opti_type, const uint vliw) -{ - uint gpu_vector_width = 0; - - if ((attack_mode == ATTACK_MODE_BF) && (attack_exec == ATTACK_EXEC_ON_GPU) && (opti_type & OPTI_TYPE_SCALAR_MODE)) - { - return VECT_SIZE_1; - } - - #ifdef _CUDA - if ((attack_mode == ATTACK_MODE_STRAIGHT) && (attack_exec == ATTACK_EXEC_ON_GPU)) - { - return VECT_SIZE_1; - } - - if (vliw == 1) - { - switch (hash_mode) - { - default: gpu_vector_width = VECT_SIZE_1; break; - } - } - else if (vliw == 2) - { - switch (hash_mode) - { - case 0: gpu_vector_width = VECT_SIZE_4; break; - case 10: gpu_vector_width = VECT_SIZE_4; break; - case 11: gpu_vector_width = VECT_SIZE_4; break; - case 12: gpu_vector_width = VECT_SIZE_4; break; - case 20: gpu_vector_width = VECT_SIZE_4; break; - case 21: gpu_vector_width = VECT_SIZE_4; break; - case 22: gpu_vector_width = VECT_SIZE_4; break; - case 23: gpu_vector_width = VECT_SIZE_4; break; - case 30: gpu_vector_width = VECT_SIZE_4; break; - case 40: gpu_vector_width = VECT_SIZE_4; break; - case 50: gpu_vector_width = VECT_SIZE_4; break; - case 60: gpu_vector_width = VECT_SIZE_4; break; - case 100: gpu_vector_width = VECT_SIZE_4; break; - case 101: gpu_vector_width = VECT_SIZE_4; break; - case 110: gpu_vector_width = VECT_SIZE_4; break; - case 111: gpu_vector_width = VECT_SIZE_4; break; - case 112: gpu_vector_width = VECT_SIZE_4; break; - case 120: gpu_vector_width = VECT_SIZE_4; break; - case 121: gpu_vector_width = VECT_SIZE_4; break; - case 122: gpu_vector_width = VECT_SIZE_4; break; - case 124: gpu_vector_width = VECT_SIZE_4; break; - case 130: gpu_vector_width = VECT_SIZE_4; break; - case 131: gpu_vector_width = VECT_SIZE_4; break; - case 132: gpu_vector_width = VECT_SIZE_4; break; - case 133: gpu_vector_width = VECT_SIZE_4; break; - case 140: gpu_vector_width = VECT_SIZE_4; break; - case 141: gpu_vector_width = VECT_SIZE_4; break; - case 150: gpu_vector_width = VECT_SIZE_4; break; - case 160: gpu_vector_width = VECT_SIZE_4; break; - case 190: gpu_vector_width = VECT_SIZE_4; break; - case 200: gpu_vector_width = VECT_SIZE_4; break; - case 400: gpu_vector_width = VECT_SIZE_2; break; - case 500: gpu_vector_width = VECT_SIZE_2; break; - case 501: gpu_vector_width = VECT_SIZE_2; break; - case 900: gpu_vector_width = VECT_SIZE_4; break; - case 1000: gpu_vector_width = VECT_SIZE_4; break; - case 1100: gpu_vector_width = VECT_SIZE_4; break; - case 2400: gpu_vector_width = VECT_SIZE_4; break; - case 2410: gpu_vector_width = VECT_SIZE_4; break; - case 2600: gpu_vector_width = VECT_SIZE_4; break; - case 2611: gpu_vector_width = VECT_SIZE_4; break; - case 2612: gpu_vector_width = VECT_SIZE_4; break; - case 2711: gpu_vector_width = VECT_SIZE_4; break; - case 2811: gpu_vector_width = VECT_SIZE_4; break; - case 3710: gpu_vector_width = VECT_SIZE_4; break; - case 3800: gpu_vector_width = VECT_SIZE_4; break; - case 3711: gpu_vector_width = VECT_SIZE_4; break; - case 4300: gpu_vector_width = VECT_SIZE_4; break; - case 4800: gpu_vector_width = VECT_SIZE_4; break; - case 4900: gpu_vector_width = VECT_SIZE_4; break; - case 5100: gpu_vector_width = VECT_SIZE_4; break; - case 9900: gpu_vector_width = VECT_SIZE_4; break; - case 10200: gpu_vector_width = VECT_SIZE_4; break; - case 11000: gpu_vector_width = VECT_SIZE_4; break; - case 11500: gpu_vector_width = VECT_SIZE_4; break; - - default: gpu_vector_width = VECT_SIZE_1; break; - } - } - #endif - - #ifdef _OCL - if (vliw == 1) - { - switch (hash_mode) - { - default: gpu_vector_width = VECT_SIZE_1; break; - } - } - else if (vliw == 4) - { - switch (hash_mode) - { - case 150: gpu_vector_width = VECT_SIZE_2; break; - case 160: gpu_vector_width = VECT_SIZE_2; break; - case 300: gpu_vector_width = VECT_SIZE_2; break; - case 1400: gpu_vector_width = VECT_SIZE_2; break; - case 1410: gpu_vector_width = VECT_SIZE_2; break; - case 1420: gpu_vector_width = VECT_SIZE_2; break; - case 1421: gpu_vector_width = VECT_SIZE_2; break; - case 1430: gpu_vector_width = VECT_SIZE_2; break; - case 1440: gpu_vector_width = VECT_SIZE_2; break; - case 1441: gpu_vector_width = VECT_SIZE_2; break; - case 1450: gpu_vector_width = VECT_SIZE_1; break; - case 1460: gpu_vector_width = VECT_SIZE_2; break; - case 1500: gpu_vector_width = VECT_SIZE_1; break; - case 1700: gpu_vector_width = VECT_SIZE_1; break; - case 1710: gpu_vector_width = VECT_SIZE_1; break; - case 1711: gpu_vector_width = VECT_SIZE_1; break; - case 1720: gpu_vector_width = VECT_SIZE_1; break; - case 1722: gpu_vector_width = VECT_SIZE_1; break; - case 1730: gpu_vector_width = VECT_SIZE_1; break; - case 1731: gpu_vector_width = VECT_SIZE_1; break; - case 1740: gpu_vector_width = VECT_SIZE_1; break; - case 1750: gpu_vector_width = VECT_SIZE_1; break; - case 1760: gpu_vector_width = VECT_SIZE_1; break; - case 1800: gpu_vector_width = VECT_SIZE_1; break; - case 2100: gpu_vector_width = VECT_SIZE_2; break; - case 2500: gpu_vector_width = VECT_SIZE_2; break; - case 3000: gpu_vector_width = VECT_SIZE_1; break; - case 3100: gpu_vector_width = VECT_SIZE_2; break; - case 3200: gpu_vector_width = VECT_SIZE_1; break; - case 5000: gpu_vector_width = VECT_SIZE_1; break; - case 5200: gpu_vector_width = VECT_SIZE_2; break; - case 5600: gpu_vector_width = VECT_SIZE_2; break; - case 5700: gpu_vector_width = VECT_SIZE_2; break; - case 6100: gpu_vector_width = VECT_SIZE_2; break; - case 6211: - case 6212: - case 6213: - case 6221: - case 6222: - case 6223: - case 6231: - case 6232: - case 6233: - case 6241: - case 6242: - case 6243: gpu_vector_width = VECT_SIZE_1; break; - case 6400: gpu_vector_width = VECT_SIZE_1; break; - case 6500: gpu_vector_width = VECT_SIZE_1; break; - case 6600: gpu_vector_width = VECT_SIZE_1; break; - case 6700: gpu_vector_width = VECT_SIZE_2; break; - case 6800: gpu_vector_width = VECT_SIZE_1; break; - case 6900: gpu_vector_width = VECT_SIZE_1; break; - case 7100: gpu_vector_width = VECT_SIZE_1; break; - case 7200: gpu_vector_width = VECT_SIZE_1; break; - case 7300: gpu_vector_width = VECT_SIZE_1; break; - case 7400: gpu_vector_width = VECT_SIZE_1; break; - case 7500: gpu_vector_width = VECT_SIZE_1; break; - case 7700: gpu_vector_width = VECT_SIZE_1; break; - case 7800: gpu_vector_width = VECT_SIZE_1; break; - case 7900: gpu_vector_width = VECT_SIZE_1; break; - case 8000: gpu_vector_width = VECT_SIZE_2; break; - case 8200: gpu_vector_width = VECT_SIZE_1; break; - case 8500: gpu_vector_width = VECT_SIZE_2; break; - case 8700: gpu_vector_width = VECT_SIZE_2; break; - case 8800: gpu_vector_width = VECT_SIZE_1; break; - case 8900: gpu_vector_width = VECT_SIZE_1; break; - case 9000: gpu_vector_width = VECT_SIZE_1; break; - case 9100: gpu_vector_width = VECT_SIZE_1; break; - case 9200: gpu_vector_width = VECT_SIZE_1; break; - case 9300: gpu_vector_width = VECT_SIZE_1; break; - case 9400: gpu_vector_width = VECT_SIZE_1; break; - case 9500: gpu_vector_width = VECT_SIZE_1; break; - case 9600: gpu_vector_width = VECT_SIZE_1; break; - case 9700: gpu_vector_width = VECT_SIZE_1; break; - case 9710: gpu_vector_width = VECT_SIZE_1; break; - case 9720: gpu_vector_width = VECT_SIZE_2; break; - case 9800: gpu_vector_width = VECT_SIZE_1; break; - case 9810: gpu_vector_width = VECT_SIZE_1; break; - case 9820: gpu_vector_width = VECT_SIZE_2; break; - case 10000: gpu_vector_width = VECT_SIZE_1; break; - case 10100: gpu_vector_width = VECT_SIZE_1; break; - case 10400: gpu_vector_width = VECT_SIZE_1; break; - case 10410: gpu_vector_width = VECT_SIZE_1; break; - case 10420: gpu_vector_width = VECT_SIZE_2; break; - case 10500: gpu_vector_width = VECT_SIZE_1; break; - case 10600: gpu_vector_width = VECT_SIZE_2; break; - case 10700: gpu_vector_width = VECT_SIZE_1; break; - case 10800: gpu_vector_width = VECT_SIZE_1; break; - case 10900: gpu_vector_width = VECT_SIZE_1; break; - case 11100: gpu_vector_width = VECT_SIZE_2; break; - case 11200: gpu_vector_width = VECT_SIZE_2; break; - case 11300: gpu_vector_width = VECT_SIZE_1; break; - case 11400: gpu_vector_width = VECT_SIZE_1; break; - case 11600: gpu_vector_width = VECT_SIZE_1; break; - case 11700: gpu_vector_width = VECT_SIZE_1; break; - case 11800: gpu_vector_width = VECT_SIZE_1; break; - case 11900: gpu_vector_width = VECT_SIZE_1; break; - case 12000: gpu_vector_width = VECT_SIZE_1; break; - case 12100: gpu_vector_width = VECT_SIZE_1; break; - case 12200: gpu_vector_width = VECT_SIZE_1; break; - case 12300: gpu_vector_width = VECT_SIZE_1; break; - case 12500: gpu_vector_width = VECT_SIZE_1; break; - case 12700: gpu_vector_width = VECT_SIZE_1; break; - case 12800: gpu_vector_width = VECT_SIZE_1; break; - - default: gpu_vector_width = VECT_SIZE_4; break; - } - } - else if (vliw == 5) - { - switch (hash_mode) - { - case 150: gpu_vector_width = VECT_SIZE_2; break; - case 160: gpu_vector_width = VECT_SIZE_2; break; - case 300: gpu_vector_width = VECT_SIZE_2; break; - case 1400: gpu_vector_width = VECT_SIZE_2; break; - case 1410: gpu_vector_width = VECT_SIZE_2; break; - case 1420: gpu_vector_width = VECT_SIZE_2; break; - case 1421: gpu_vector_width = VECT_SIZE_2; break; - case 1430: gpu_vector_width = VECT_SIZE_2; break; - case 1440: gpu_vector_width = VECT_SIZE_2; break; - case 1441: gpu_vector_width = VECT_SIZE_2; break; - case 1450: gpu_vector_width = VECT_SIZE_1; break; - case 1460: gpu_vector_width = VECT_SIZE_2; break; - case 1500: gpu_vector_width = VECT_SIZE_1; break; - case 1700: gpu_vector_width = VECT_SIZE_1; break; - case 1710: gpu_vector_width = VECT_SIZE_1; break; - case 1711: gpu_vector_width = VECT_SIZE_1; break; - case 1720: gpu_vector_width = VECT_SIZE_1; break; - case 1722: gpu_vector_width = VECT_SIZE_1; break; - case 1730: gpu_vector_width = VECT_SIZE_1; break; - case 1731: gpu_vector_width = VECT_SIZE_1; break; - case 1740: gpu_vector_width = VECT_SIZE_1; break; - case 1750: gpu_vector_width = VECT_SIZE_1; break; - case 1760: gpu_vector_width = VECT_SIZE_1; break; - case 1800: gpu_vector_width = VECT_SIZE_1; break; - case 2100: gpu_vector_width = VECT_SIZE_2; break; - case 2500: gpu_vector_width = VECT_SIZE_2; break; - case 3000: gpu_vector_width = VECT_SIZE_1; break; - case 3100: gpu_vector_width = VECT_SIZE_2; break; - case 3200: gpu_vector_width = VECT_SIZE_1; break; - case 5000: gpu_vector_width = VECT_SIZE_1; break; - case 5200: gpu_vector_width = VECT_SIZE_2; break; - case 5400: gpu_vector_width = VECT_SIZE_2; break; - case 5600: gpu_vector_width = VECT_SIZE_2; break; - case 5700: gpu_vector_width = VECT_SIZE_2; break; - case 6100: gpu_vector_width = VECT_SIZE_2; break; - case 6211: - case 6212: - case 6213: - case 6221: - case 6222: - case 6223: - case 6231: - case 6232: - case 6233: - case 6241: - case 6242: - case 6243: gpu_vector_width = VECT_SIZE_1; break; - case 6400: gpu_vector_width = VECT_SIZE_1; break; - case 6500: gpu_vector_width = VECT_SIZE_1; break; - case 6600: gpu_vector_width = VECT_SIZE_1; break; - case 6700: gpu_vector_width = VECT_SIZE_2; break; - case 6800: gpu_vector_width = VECT_SIZE_1; break; - case 6900: gpu_vector_width = VECT_SIZE_1; break; - case 7100: gpu_vector_width = VECT_SIZE_1; break; - case 7200: gpu_vector_width = VECT_SIZE_1; break; - case 7300: gpu_vector_width = VECT_SIZE_1; break; - case 7400: gpu_vector_width = VECT_SIZE_1; break; - case 7500: gpu_vector_width = VECT_SIZE_1; break; - case 7700: gpu_vector_width = VECT_SIZE_1; break; - case 7800: gpu_vector_width = VECT_SIZE_1; break; - case 7900: gpu_vector_width = VECT_SIZE_1; break; - case 8000: gpu_vector_width = VECT_SIZE_2; break; - case 8200: gpu_vector_width = VECT_SIZE_1; break; - case 8300: gpu_vector_width = VECT_SIZE_2; break; - case 8400: gpu_vector_width = VECT_SIZE_2; break; - case 8500: gpu_vector_width = VECT_SIZE_2; break; - case 8700: gpu_vector_width = VECT_SIZE_2; break; - case 8800: gpu_vector_width = VECT_SIZE_1; break; - case 8900: gpu_vector_width = VECT_SIZE_1; break; - case 9000: gpu_vector_width = VECT_SIZE_1; break; - case 9100: gpu_vector_width = VECT_SIZE_1; break; - case 9200: gpu_vector_width = VECT_SIZE_1; break; - case 9300: gpu_vector_width = VECT_SIZE_1; break; - case 9400: gpu_vector_width = VECT_SIZE_1; break; - case 9500: gpu_vector_width = VECT_SIZE_1; break; - case 9600: gpu_vector_width = VECT_SIZE_1; break; - case 9700: gpu_vector_width = VECT_SIZE_1; break; - case 9710: gpu_vector_width = VECT_SIZE_1; break; - case 9720: gpu_vector_width = VECT_SIZE_2; break; - case 9800: gpu_vector_width = VECT_SIZE_1; break; - case 9810: gpu_vector_width = VECT_SIZE_1; break; - case 9820: gpu_vector_width = VECT_SIZE_2; break; - case 10000: gpu_vector_width = VECT_SIZE_1; break; - case 10100: gpu_vector_width = VECT_SIZE_1; break; - case 10400: gpu_vector_width = VECT_SIZE_1; break; - case 10410: gpu_vector_width = VECT_SIZE_1; break; - case 10420: gpu_vector_width = VECT_SIZE_2; break; - case 10500: gpu_vector_width = VECT_SIZE_1; break; - case 10600: gpu_vector_width = VECT_SIZE_2; break; - case 10700: gpu_vector_width = VECT_SIZE_1; break; - case 10800: gpu_vector_width = VECT_SIZE_1; break; - case 10900: gpu_vector_width = VECT_SIZE_1; break; - case 11100: gpu_vector_width = VECT_SIZE_2; break; - case 11200: gpu_vector_width = VECT_SIZE_2; break; - case 11300: gpu_vector_width = VECT_SIZE_1; break; - case 11400: gpu_vector_width = VECT_SIZE_1; break; - case 11600: gpu_vector_width = VECT_SIZE_1; break; - case 11700: gpu_vector_width = VECT_SIZE_1; break; - case 11800: gpu_vector_width = VECT_SIZE_1; break; - case 11900: gpu_vector_width = VECT_SIZE_1; break; - case 12000: gpu_vector_width = VECT_SIZE_1; break; - case 12100: gpu_vector_width = VECT_SIZE_1; break; - case 12200: gpu_vector_width = VECT_SIZE_1; break; - case 12300: gpu_vector_width = VECT_SIZE_1; break; - case 12500: gpu_vector_width = VECT_SIZE_1; break; - case 12700: gpu_vector_width = VECT_SIZE_1; break; - case 12800: gpu_vector_width = VECT_SIZE_1; break; - - default: gpu_vector_width = VECT_SIZE_4; break; - } - } - #endif - - return gpu_vector_width; -} - static void pw_transpose_to_hi1 (const pw_t *p1, pw_t *p2) { memcpy (p2->hi1, p1->hi1, 64 * sizeof (uint)); } -static void pw_transpose_to_hi2 (const pw_t *p1, pw_t *p2) -{ - p2->hi2[0][ 0] = p1->hi2[0][ 0]; - p2->hi2[0][ 2] = p1->hi2[0][ 1]; - p2->hi2[0][ 4] = p1->hi2[0][ 2]; - p2->hi2[0][ 6] = p1->hi2[0][ 3]; - p2->hi2[0][ 8] = p1->hi2[0][ 4]; - p2->hi2[0][10] = p1->hi2[0][ 5]; - p2->hi2[0][12] = p1->hi2[0][ 6]; - p2->hi2[0][14] = p1->hi2[0][ 7]; - p2->hi2[0][16] = p1->hi2[0][ 8]; - p2->hi2[0][18] = p1->hi2[0][ 9]; - p2->hi2[0][20] = p1->hi2[0][10]; - p2->hi2[0][22] = p1->hi2[0][11]; - p2->hi2[0][24] = p1->hi2[0][12]; - p2->hi2[0][26] = p1->hi2[0][13]; - p2->hi2[0][28] = p1->hi2[0][14]; - p2->hi2[0][30] = p1->hi2[0][15]; - p2->hi2[1][ 0] = p1->hi2[0][16]; - p2->hi2[1][ 2] = p1->hi2[0][17]; - p2->hi2[1][ 4] = p1->hi2[0][18]; - p2->hi2[1][ 6] = p1->hi2[0][19]; - p2->hi2[1][ 8] = p1->hi2[0][20]; - p2->hi2[1][10] = p1->hi2[0][21]; - p2->hi2[1][12] = p1->hi2[0][22]; - p2->hi2[1][14] = p1->hi2[0][23]; - p2->hi2[1][16] = p1->hi2[0][24]; - p2->hi2[1][18] = p1->hi2[0][25]; - p2->hi2[1][20] = p1->hi2[0][26]; - p2->hi2[1][22] = p1->hi2[0][27]; - p2->hi2[1][24] = p1->hi2[0][28]; - p2->hi2[1][26] = p1->hi2[0][29]; - p2->hi2[1][28] = p1->hi2[0][30]; - p2->hi2[1][30] = p1->hi2[0][31]; - - p2->hi2[0][ 1] = p1->hi2[1][ 0]; - p2->hi2[0][ 3] = p1->hi2[1][ 1]; - p2->hi2[0][ 5] = p1->hi2[1][ 2]; - p2->hi2[0][ 7] = p1->hi2[1][ 3]; - p2->hi2[0][ 9] = p1->hi2[1][ 4]; - p2->hi2[0][11] = p1->hi2[1][ 5]; - p2->hi2[0][13] = p1->hi2[1][ 6]; - p2->hi2[0][15] = p1->hi2[1][ 7]; - p2->hi2[0][17] = p1->hi2[1][ 8]; - p2->hi2[0][19] = p1->hi2[1][ 9]; - p2->hi2[0][21] = p1->hi2[1][10]; - p2->hi2[0][23] = p1->hi2[1][11]; - p2->hi2[0][25] = p1->hi2[1][12]; - p2->hi2[0][27] = p1->hi2[1][13]; - p2->hi2[0][29] = p1->hi2[1][14]; - p2->hi2[0][31] = p1->hi2[1][15]; - p2->hi2[1][ 1] = p1->hi2[1][16]; - p2->hi2[1][ 3] = p1->hi2[1][17]; - p2->hi2[1][ 5] = p1->hi2[1][18]; - p2->hi2[1][ 7] = p1->hi2[1][19]; - p2->hi2[1][ 9] = p1->hi2[1][20]; - p2->hi2[1][11] = p1->hi2[1][21]; - p2->hi2[1][13] = p1->hi2[1][22]; - p2->hi2[1][15] = p1->hi2[1][23]; - p2->hi2[1][17] = p1->hi2[1][24]; - p2->hi2[1][19] = p1->hi2[1][25]; - p2->hi2[1][21] = p1->hi2[1][26]; - p2->hi2[1][23] = p1->hi2[1][27]; - p2->hi2[1][25] = p1->hi2[1][28]; - p2->hi2[1][27] = p1->hi2[1][29]; - p2->hi2[1][29] = p1->hi2[1][30]; - p2->hi2[1][31] = p1->hi2[1][31]; -} - -static void pw_transpose_to_hi4 (const pw_t *p1, pw_t *p2) -{ - p2->hi4[0][ 0] = p1->hi4[0][ 0]; - p2->hi4[0][ 4] = p1->hi4[0][ 1]; - p2->hi4[0][ 8] = p1->hi4[0][ 2]; - p2->hi4[0][12] = p1->hi4[0][ 3]; - p2->hi4[1][ 0] = p1->hi4[0][ 4]; - p2->hi4[1][ 4] = p1->hi4[0][ 5]; - p2->hi4[1][ 8] = p1->hi4[0][ 6]; - p2->hi4[1][12] = p1->hi4[0][ 7]; - p2->hi4[2][ 0] = p1->hi4[0][ 8]; - p2->hi4[2][ 4] = p1->hi4[0][ 9]; - p2->hi4[2][ 8] = p1->hi4[0][10]; - p2->hi4[2][12] = p1->hi4[0][11]; - p2->hi4[3][ 0] = p1->hi4[0][12]; - p2->hi4[3][ 4] = p1->hi4[0][13]; - p2->hi4[3][ 8] = p1->hi4[0][14]; - p2->hi4[3][12] = p1->hi4[0][15]; - - p2->hi4[0][ 1] = p1->hi4[1][ 0]; - p2->hi4[0][ 5] = p1->hi4[1][ 1]; - p2->hi4[0][ 9] = p1->hi4[1][ 2]; - p2->hi4[0][13] = p1->hi4[1][ 3]; - p2->hi4[1][ 1] = p1->hi4[1][ 4]; - p2->hi4[1][ 5] = p1->hi4[1][ 5]; - p2->hi4[1][ 9] = p1->hi4[1][ 6]; - p2->hi4[1][13] = p1->hi4[1][ 7]; - p2->hi4[2][ 1] = p1->hi4[1][ 8]; - p2->hi4[2][ 5] = p1->hi4[1][ 9]; - p2->hi4[2][ 9] = p1->hi4[1][10]; - p2->hi4[2][13] = p1->hi4[1][11]; - p2->hi4[3][ 1] = p1->hi4[1][12]; - p2->hi4[3][ 5] = p1->hi4[1][13]; - p2->hi4[3][ 9] = p1->hi4[1][14]; - p2->hi4[3][13] = p1->hi4[1][15]; - - p2->hi4[0][ 2] = p1->hi4[2][ 0]; - p2->hi4[0][ 6] = p1->hi4[2][ 1]; - p2->hi4[0][10] = p1->hi4[2][ 2]; - p2->hi4[0][14] = p1->hi4[2][ 3]; - p2->hi4[1][ 2] = p1->hi4[2][ 4]; - p2->hi4[1][ 6] = p1->hi4[2][ 5]; - p2->hi4[1][10] = p1->hi4[2][ 6]; - p2->hi4[1][14] = p1->hi4[2][ 7]; - p2->hi4[2][ 2] = p1->hi4[2][ 8]; - p2->hi4[2][ 6] = p1->hi4[2][ 9]; - p2->hi4[2][10] = p1->hi4[2][10]; - p2->hi4[2][14] = p1->hi4[2][11]; - p2->hi4[3][ 2] = p1->hi4[2][12]; - p2->hi4[3][ 6] = p1->hi4[2][13]; - p2->hi4[3][10] = p1->hi4[2][14]; - p2->hi4[3][14] = p1->hi4[2][15]; - - p2->hi4[0][ 3] = p1->hi4[3][ 0]; - p2->hi4[0][ 7] = p1->hi4[3][ 1]; - p2->hi4[0][11] = p1->hi4[3][ 2]; - p2->hi4[0][15] = p1->hi4[3][ 3]; - p2->hi4[1][ 3] = p1->hi4[3][ 4]; - p2->hi4[1][ 7] = p1->hi4[3][ 5]; - p2->hi4[1][11] = p1->hi4[3][ 6]; - p2->hi4[1][15] = p1->hi4[3][ 7]; - p2->hi4[2][ 3] = p1->hi4[3][ 8]; - p2->hi4[2][ 7] = p1->hi4[3][ 9]; - p2->hi4[2][11] = p1->hi4[3][10]; - p2->hi4[2][15] = p1->hi4[3][11]; - p2->hi4[3][ 3] = p1->hi4[3][12]; - p2->hi4[3][ 7] = p1->hi4[3][13]; - p2->hi4[3][11] = p1->hi4[3][14]; - p2->hi4[3][15] = p1->hi4[3][15]; -} - -static uint pw_add_to_hc1 (hc_device_param_t *device_param, const uint8_t *pw_buf, const uint pw_len) +static uint pw_add_to_hc1 (hc_device_param_t *device_param, const uint8_t *pw_buf, const uint pw_len) { if (data.devices_status == STATUS_BYPASS) return 0; @@ -4041,99 +3243,18 @@ static uint pw_add_to_hc1 (hc_device_param_t *device_param, const uint8_t *pw_bu cache_cnt++; - if (cache_cnt == VECT_SIZE_1) - { - pw_t *pw = device_param->pws_buf + pws_cnt; - - device_param->pw_transpose (&pw_cache->pw_buf, pw); - - pw->pw_len = pw_len; - - pws_cnt++; - - device_param->pws_cnt = pws_cnt; - device_param->pw_cnt = pws_cnt * 1; - - cache_cnt = 0; - } - - pw_cache->cnt = cache_cnt; - - return pws_cnt; -} - -static uint pw_add_to_hc2 (hc_device_param_t *device_param, const uint8_t *pw_buf, const uint pw_len) -{ - if (data.devices_status == STATUS_BYPASS) return 0; - - pw_cache_t *pw_cache = device_param->pw_caches + pw_len; - - uint cache_cnt = pw_cache->cnt; - - uint8_t *pw_hc2 = pw_cache->pw_buf.hc2[cache_cnt]; - - memcpy (pw_hc2, pw_buf, pw_len); - - memset (pw_hc2 + pw_len, 0, 128 - pw_len); - - uint pws_cnt = device_param->pws_cnt; - - cache_cnt++; - - if (cache_cnt == VECT_SIZE_2) - { - pw_t *pw = device_param->pws_buf + pws_cnt; - - device_param->pw_transpose (&pw_cache->pw_buf, pw); - - pw->pw_len = pw_len; - - pws_cnt++; - - device_param->pws_cnt = pws_cnt; - device_param->pw_cnt = pws_cnt * 2; - - cache_cnt = 0; - } - - pw_cache->cnt = cache_cnt; - - return pws_cnt; -} - -static uint pw_add_to_hc4 (hc_device_param_t *device_param, const uint8_t *pw_buf, const uint pw_len) -{ - if (data.devices_status == STATUS_BYPASS) return 0; - - pw_cache_t *pw_cache = device_param->pw_caches + pw_len; - - uint cache_cnt = pw_cache->cnt; - - uint8_t *pw_hc4 = pw_cache->pw_buf.hc4[cache_cnt]; - - memcpy (pw_hc4, pw_buf, pw_len); - - memset (pw_hc4 + pw_len, 0, 64 - pw_len); - - uint pws_cnt = device_param->pws_cnt; - - cache_cnt++; - - if (cache_cnt == VECT_SIZE_4) - { - pw_t *pw = device_param->pws_buf + pws_cnt; + pw_t *pw = device_param->pws_buf + pws_cnt; - device_param->pw_transpose (&pw_cache->pw_buf, pw); + device_param->pw_transpose (&pw_cache->pw_buf, pw); - pw->pw_len = pw_len; + pw->pw_len = pw_len; - pws_cnt++; + pws_cnt++; - device_param->pws_cnt = pws_cnt; - device_param->pw_cnt = pws_cnt * 4; + device_param->pws_cnt = pws_cnt; + device_param->pw_cnt = pws_cnt * 1; - cache_cnt = 0; - } + cache_cnt = 0; pw_cache->cnt = cache_cnt; @@ -4152,9 +3273,6 @@ static void *thread_monitor (void *p) uint remove_left = data.remove_timer; uint status_left = data.status_timer; - #ifdef _OCL - #ifndef OSX - int *fan_speed_chgd = (int *) mycalloc (data.devices_cnt, sizeof (int)); // temperature controller "loopback" values @@ -4169,9 +3287,6 @@ static void *thread_monitor (void *p) time_t last_temp_check_time; - #endif - #endif - uint sleep_time = 1; if (data.runtime) @@ -4188,11 +3303,7 @@ static void *thread_monitor (void *p) if (data.gpu_temp_disable == 0) { - #ifdef _OCL - #ifndef OSX time (&last_temp_check_time); - #endif - #endif hwmon_check = 1; } @@ -4208,13 +3319,11 @@ static void *thread_monitor (void *p) if (data.devices_status != STATUS_RUNNING) continue; + /* if (hwmon_check == 1) { hc_thread_mutex_lock (mux_adl); - #ifdef _OCL - #ifndef OSX - time_t temp_check_time; time (&temp_check_time); @@ -4223,9 +3332,6 @@ static void *thread_monitor (void *p) if (Ta == 0) Ta = 1; - #endif - #endif - for (uint i = 0; i < data.devices_cnt; i++) { const int temperature = hm_get_temperature_with_device_id (i); @@ -4239,9 +3345,6 @@ static void *thread_monitor (void *p) break; } - #ifdef _OCL - #ifndef OSX - const int gpu_temp_retain = data.gpu_temp_retain; if (gpu_temp_retain) @@ -4296,13 +3399,11 @@ static void *thread_monitor (void *p) } } } - - #endif - #endif } hc_thread_mutex_unlock (mux_adl); } + */ if (restore_check == 1) { @@ -4375,14 +3476,10 @@ static void *thread_monitor (void *p) } } - #ifdef _OCL - #ifndef OSX myfree (fan_speed_chgd); myfree (temp_diff_old); myfree (temp_diff_sum); - #endif - #endif p = NULL; @@ -4726,7 +3823,7 @@ static uint get_work (hc_device_param_t *device_param, const uint64_t max) if (device_param->gpu_blocks == device_param->gpu_blocks_user) { const uint32_t gpu_blocks_new = (float) device_param->gpu_blocks * data.gpu_blocks_div; - const uint32_t gpu_power_new = gpu_blocks_new / device_param->gpu_vector_width; + const uint32_t gpu_power_new = gpu_blocks_new; if (gpu_blocks_new < device_param->gpu_blocks) { @@ -4981,10 +4078,8 @@ static void *thread_calc (void *p) const uint64_t words_off = device_param->words_off; const uint64_t words_fin = words_off + work; - const uint gpu_vector_width = device_param->gpu_vector_width; - const uint pw_cnt = work; - const uint pws_cnt = mydivc32 (work, gpu_vector_width); + const uint pws_cnt = work; device_param->pw_cnt = pw_cnt; device_param->pws_cnt = pws_cnt; @@ -5334,10 +4429,6 @@ static void *thread_calc (void *p) static void weak_hash_check (hc_device_param_t *device_param, const uint salt_pos, const uint gpu_loops) { - #ifdef _CUDA - hc_cuCtxPushCurrent (device_param->context); - #endif - salt_t *salt_buf = &data.salts_buf[salt_pos]; device_param->kernel_params_buf32[24] = salt_pos; @@ -5408,10 +4499,6 @@ static void weak_hash_check (hc_device_param_t *device_param, const uint salt_po data.dictfile = dictfile_old; data.dictfile2 = dictfile2_old; data.mask = mask_old; - - #ifdef _CUDA - hc_cuCtxPopCurrent (&device_param->context); - #endif } // hlfmt hashcat @@ -5747,12 +4834,11 @@ static uint hlfmt_detect (FILE *fp, uint max_check) * main */ -#ifdef _OCL +// temp ? void *__stdcall ADL_Main_Memory_Alloc (const int iSize) { return mymalloc (iSize); } -#endif static uint generate_bitmaps (const uint digests_cnt, const uint dgst_size, const uint dgst_shifts, char *digests_buf_ptr, const uint bitmap_mask, const uint bitmap_size, uint *bitmap_a, uint *bitmap_b, uint *bitmap_c, uint *bitmap_d, const uint64_t collisions_max) { @@ -5906,7 +4992,6 @@ int main (int argc, char **argv) uint increment_min = INCREMENT_MIN; uint increment_max = INCREMENT_MAX; char *cpu_affinity = NULL; - uint gpu_async = GPU_ASYNC; char *gpu_devices = NULL; char *truecrypt_keyfiles = NULL; uint workload_profile = WORKLOAD_PROFILE; @@ -5980,7 +5065,6 @@ int main (int argc, char **argv) #define IDX_MARKOV_THRESHOLD 't' #define IDX_MARKOV_HCSTAT 0xff24 #define IDX_CPU_AFFINITY 0xff25 - #define IDX_GPU_ASYNC 0xff26 #define IDX_GPU_DEVICES 'd' #define IDX_WORKLOAD_PROFILE 'w' #define IDX_GPU_ACCEL 'n' @@ -6060,7 +5144,6 @@ int main (int argc, char **argv) {"markov-threshold", required_argument, 0, IDX_MARKOV_THRESHOLD}, {"markov-hcstat", required_argument, 0, IDX_MARKOV_HCSTAT}, {"cpu-affinity", required_argument, 0, IDX_CPU_AFFINITY}, - {"gpu-async", no_argument, 0, IDX_GPU_ASYNC}, {"gpu-devices", required_argument, 0, IDX_GPU_DEVICES}, {"workload-profile", required_argument, 0, IDX_WORKLOAD_PROFILE}, {"gpu-accel", required_argument, 0, IDX_GPU_ACCEL}, @@ -6216,11 +5299,8 @@ int main (int argc, char **argv) uint remove_timer_chgd = 0; uint increment_min_chgd = 0; uint increment_max_chgd = 0; - - #if _OCL uint gpu_temp_abort_chgd = 0; uint gpu_temp_retain_chgd = 0; - #endif optind = 1; optopt = 0; @@ -6292,7 +5372,6 @@ int main (int argc, char **argv) case IDX_HEX_SALT: hex_salt = 1; break; case IDX_HEX_WORDLIST: hex_wordlist = 1; break; case IDX_CPU_AFFINITY: cpu_affinity = optarg; break; - case IDX_GPU_ASYNC: gpu_async = 1; break; case IDX_GPU_DEVICES: gpu_devices = optarg; break; case IDX_WORKLOAD_PROFILE: workload_profile = atoi (optarg); break; case IDX_GPU_ACCEL: gpu_accel = atoi (optarg); @@ -6300,15 +5379,9 @@ int main (int argc, char **argv) case IDX_GPU_LOOPS: gpu_loops = atoi (optarg); gpu_loops_chgd = 1; break; case IDX_GPU_TEMP_DISABLE: gpu_temp_disable = 1; break; - case IDX_GPU_TEMP_ABORT: - #if _OCL - gpu_temp_abort_chgd = 1; - #endif + case IDX_GPU_TEMP_ABORT: gpu_temp_abort_chgd = 1; gpu_temp_abort = atoi (optarg); break; - case IDX_GPU_TEMP_RETAIN: - #if _OCL - gpu_temp_retain_chgd = 1; - #endif + case IDX_GPU_TEMP_RETAIN: gpu_temp_retain_chgd = 1; gpu_temp_retain = atoi (optarg); break; case IDX_POWERTUNE_ENABLE: powertune_enable = 1; break; case IDX_LOGFILE_DISABLE: logfile_disable = 1; break; @@ -7126,7 +6199,6 @@ int main (int argc, char **argv) logfile_top_uint (debug_mode); logfile_top_uint (force); logfile_top_uint (gpu_accel); - logfile_top_uint (gpu_async); logfile_top_uint (gpu_loops); logfile_top_uint (gpu_temp_abort); logfile_top_uint (gpu_temp_disable); @@ -7315,7 +6387,6 @@ int main (int argc, char **argv) opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT | OPTI_TYPE_PRECOMPUTE_MERKLE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_MEET_IN_MIDDLE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED @@ -7340,7 +6411,6 @@ int main (int argc, char **argv) opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT | OPTI_TYPE_PRECOMPUTE_MERKLE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_MEET_IN_MIDDLE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED @@ -7365,7 +6435,6 @@ int main (int argc, char **argv) opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT | OPTI_TYPE_PRECOMPUTE_MERKLE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_MEET_IN_MIDDLE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED @@ -7390,7 +6459,6 @@ int main (int argc, char **argv) opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT | OPTI_TYPE_PRECOMPUTE_MERKLE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_MEET_IN_MIDDLE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED @@ -7508,7 +6576,6 @@ int main (int argc, char **argv) opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT | OPTI_TYPE_PRECOMPUTE_MERKLE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_MEET_IN_MIDDLE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED @@ -7593,7 +6660,6 @@ int main (int argc, char **argv) opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT | OPTI_TYPE_PRECOMPUTE_MERKLE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED | OPTI_TYPE_NOT_SALTED @@ -7617,7 +6683,6 @@ int main (int argc, char **argv) opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT | OPTI_TYPE_PRECOMPUTE_MERKLE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED | OPTI_TYPE_NOT_SALTED @@ -7641,7 +6706,6 @@ int main (int argc, char **argv) opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT | OPTI_TYPE_PRECOMPUTE_MERKLE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED | OPTI_TYPE_APPENDED_SALT @@ -7665,7 +6729,6 @@ int main (int argc, char **argv) opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT | OPTI_TYPE_PRECOMPUTE_MERKLE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED | OPTI_TYPE_APPENDED_SALT @@ -7690,7 +6753,6 @@ int main (int argc, char **argv) opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT | OPTI_TYPE_PRECOMPUTE_MERKLE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED | OPTI_TYPE_APPENDED_SALT @@ -7809,7 +6871,6 @@ int main (int argc, char **argv) opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT | OPTI_TYPE_PRECOMPUTE_MERKLE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED | OPTI_TYPE_APPENDED_SALT @@ -7836,7 +6897,6 @@ int main (int argc, char **argv) opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT | OPTI_TYPE_PRECOMPUTE_MERKLE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED | OPTI_TYPE_APPENDED_SALT @@ -7862,7 +6922,6 @@ int main (int argc, char **argv) opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT | OPTI_TYPE_PRECOMPUTE_MERKLE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED | OPTI_TYPE_APPENDED_SALT @@ -7887,7 +6946,6 @@ int main (int argc, char **argv) opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT | OPTI_TYPE_PRECOMPUTE_MERKLE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED | OPTI_TYPE_APPENDED_SALT @@ -7995,7 +7053,6 @@ int main (int argc, char **argv) sort_by_digest = sort_by_digest_4_5; opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED | OPTI_TYPE_NOT_SALTED; @@ -8013,8 +7070,7 @@ int main (int argc, char **argv) dgst_size = DGST_SIZE_4_4; // originally DGST_SIZE_4_2 parse_func = mysql323_parse_hash; sort_by_digest = sort_by_digest_4_4; // originally sort_by_digest_4_2 - opti_type = OPTI_TYPE_ZERO_BYTE - | OPTI_TYPE_SCALAR_MODE; + opti_type = OPTI_TYPE_ZERO_BYTE; dgst_pos0 = 0; dgst_pos1 = 1; dgst_pos2 = 2; @@ -8034,7 +7090,6 @@ int main (int argc, char **argv) opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT | OPTI_TYPE_PRECOMPUTE_MERKLE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED | OPTI_TYPE_NOT_SALTED; @@ -8103,7 +7158,6 @@ int main (int argc, char **argv) opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT | OPTI_TYPE_PRECOMPUTE_MERKLE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_MEET_IN_MIDDLE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED @@ -8129,7 +7183,6 @@ int main (int argc, char **argv) opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT | OPTI_TYPE_PRECOMPUTE_MERKLE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_MEET_IN_MIDDLE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED @@ -8158,7 +7211,6 @@ int main (int argc, char **argv) opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT | OPTI_TYPE_PRECOMPUTE_MERKLE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED; dgst_pos0 = 0; @@ -8180,7 +7232,6 @@ int main (int argc, char **argv) opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT | OPTI_TYPE_PRECOMPUTE_MERKLE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED | OPTI_TYPE_NOT_SALTED @@ -8204,7 +7255,6 @@ int main (int argc, char **argv) opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT | OPTI_TYPE_PRECOMPUTE_MERKLE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED | OPTI_TYPE_APPENDED_SALT @@ -8275,7 +7325,6 @@ int main (int argc, char **argv) opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT | OPTI_TYPE_PRECOMPUTE_MERKLE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED | OPTI_TYPE_APPENDED_SALT @@ -8380,7 +7429,6 @@ int main (int argc, char **argv) parse_func = descrypt_parse_hash; sort_by_digest = sort_by_digest_4_4; // originally sort_by_digest_4_2 opti_type = OPTI_TYPE_ZERO_BYTE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_PRECOMPUTE_PERMUT; dgst_pos0 = 0; dgst_pos1 = 1; @@ -8416,7 +7464,6 @@ int main (int argc, char **argv) opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT | OPTI_TYPE_PRECOMPUTE_MERKLE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED | OPTI_TYPE_NOT_SALTED @@ -8440,7 +7487,6 @@ int main (int argc, char **argv) opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT | OPTI_TYPE_PRECOMPUTE_MERKLE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED | OPTI_TYPE_APPENDED_SALT @@ -8464,7 +7510,6 @@ int main (int argc, char **argv) opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT | OPTI_TYPE_PRECOMPUTE_MERKLE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED | OPTI_TYPE_APPENDED_SALT @@ -8536,7 +7581,6 @@ int main (int argc, char **argv) opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT | OPTI_TYPE_PRECOMPUTE_MERKLE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED | OPTI_TYPE_APPENDED_SALT @@ -8562,7 +7606,6 @@ int main (int argc, char **argv) opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT | OPTI_TYPE_PRECOMPUTE_MERKLE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED | OPTI_TYPE_APPENDED_SALT @@ -8675,7 +7718,6 @@ int main (int argc, char **argv) opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT | OPTI_TYPE_PRECOMPUTE_MERKLE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED | OPTI_TYPE_NOT_SALTED; @@ -8696,7 +7738,6 @@ int main (int argc, char **argv) opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT | OPTI_TYPE_PRECOMPUTE_MERKLE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED; dgst_pos0 = 0; @@ -8834,7 +7875,6 @@ int main (int argc, char **argv) parse_func = lm_parse_hash; sort_by_digest = sort_by_digest_4_4; // originally sort_by_digest_4_2 opti_type = OPTI_TYPE_ZERO_BYTE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_PRECOMPUTE_PERMUT; dgst_pos0 = 0; dgst_pos1 = 1; @@ -8852,8 +7892,7 @@ int main (int argc, char **argv) dgst_size = DGST_SIZE_4_4; // originally DGST_SIZE_4_2 parse_func = oracleh_parse_hash; sort_by_digest = sort_by_digest_4_4; // originally sort_by_digest_4_2 - opti_type = OPTI_TYPE_ZERO_BYTE - | OPTI_TYPE_SCALAR_MODE; + opti_type = OPTI_TYPE_ZERO_BYTE; dgst_pos0 = 0; dgst_pos1 = 1; dgst_pos2 = 2; @@ -9161,7 +8200,6 @@ int main (int argc, char **argv) parse_func = netntlmv1_parse_hash; sort_by_digest = sort_by_digest_4_4; opti_type = OPTI_TYPE_ZERO_BYTE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_PRECOMPUTE_PERMUT; dgst_pos0 = 0; dgst_pos1 = 1; @@ -9200,7 +8238,6 @@ int main (int argc, char **argv) opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT | OPTI_TYPE_PRECOMPUTE_MERKLE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED | OPTI_TYPE_NOT_SALTED @@ -9710,7 +8747,6 @@ int main (int argc, char **argv) sort_by_digest = sort_by_digest_4_8; opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED | OPTI_TYPE_RAW_HASH; @@ -9802,7 +8838,6 @@ int main (int argc, char **argv) parse_func = racf_parse_hash; sort_by_digest = sort_by_digest_4_4; // originally sort_by_digest_4_2 opti_type = OPTI_TYPE_ZERO_BYTE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_PRECOMPUTE_PERMUT; dgst_pos0 = 0; dgst_pos1 = 1; @@ -9818,8 +8853,7 @@ int main (int argc, char **argv) dgst_size = DGST_SIZE_4_4; parse_func = lotus5_parse_hash; sort_by_digest = sort_by_digest_4_4; - opti_type = OPTI_TYPE_SCALAR_MODE - | OPTI_TYPE_EARLY_SKIP + opti_type = OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED | OPTI_TYPE_NOT_SALTED | OPTI_TYPE_RAW_HASH; @@ -9837,8 +8871,7 @@ int main (int argc, char **argv) dgst_size = DGST_SIZE_4_4; parse_func = lotus6_parse_hash; sort_by_digest = sort_by_digest_4_4; - opti_type = OPTI_TYPE_SCALAR_MODE - | OPTI_TYPE_EARLY_SKIP + opti_type = OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED | OPTI_TYPE_RAW_HASH; dgst_pos0 = 0; @@ -10106,7 +9139,6 @@ int main (int argc, char **argv) sort_by_digest = sort_by_digest_4_4; opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED | OPTI_TYPE_NOT_SALTED; @@ -10140,7 +9172,6 @@ int main (int argc, char **argv) parse_func = siphash_parse_hash; sort_by_digest = sort_by_digest_4_4; // originally sort_by_digest_4_2 opti_type = OPTI_TYPE_ZERO_BYTE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_NOT_ITERATED | OPTI_TYPE_RAW_HASH; dgst_pos0 = 0; @@ -10260,7 +9291,6 @@ int main (int argc, char **argv) opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT | OPTI_TYPE_PRECOMPUTE_MERKLE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED | OPTI_TYPE_APPENDED_SALT @@ -10301,7 +9331,6 @@ int main (int argc, char **argv) opti_type = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT | OPTI_TYPE_PRECOMPUTE_MERKLE - | OPTI_TYPE_SCALAR_MODE | OPTI_TYPE_EARLY_SKIP | OPTI_TYPE_NOT_ITERATED | OPTI_TYPE_NOT_SALTED @@ -10429,8 +9458,7 @@ int main (int argc, char **argv) dgst_size = DGST_SIZE_4_4; // originally DGST_SIZE_4_2 parse_func = crc32_parse_hash; sort_by_digest = sort_by_digest_4_4; // originally sort_by_digest_4_2 - opti_type = OPTI_TYPE_ZERO_BYTE - | OPTI_TYPE_SCALAR_MODE; + opti_type = OPTI_TYPE_ZERO_BYTE; dgst_pos0 = 0; dgst_pos1 = 1; dgst_pos2 = 2; @@ -13136,71 +12164,61 @@ int main (int argc, char **argv) * platform */ - #ifdef _CUDA - if (cuInit (0) != CUDA_SUCCESS) - { - log_error ("ERROR: No NVidia compatible platform found"); - - return (-1); - } - #endif - - /** - * devices get - */ - - uint devices_all_cnt = 0; + cl_platform_id CL_platforms[CL_PLATFORMS_MAX]; - #ifdef _CUDA - CUdevice devices_all[DEVICES_MAX]; - CUdevice devices[DEVICES_MAX]; + uint CL_platforms_cnt = 0; - hc_cuDeviceGetCount ((int *) &devices_all_cnt); + hc_clGetPlatformIDs (CL_PLATFORMS_MAX, CL_platforms, &CL_platforms_cnt); - for (uint i = 0; i < devices_all_cnt; i++) + if (CL_platforms_cnt == 0) { - hc_cuDeviceGet (&devices_all[i], i); - } - - #elif _OCL - cl_platform_id CL_platform = NULL; + log_error ("ERROR: No OpenCL compatible platform found"); - cl_platform_id CL_platforms[CL_PLATFORMS_MAX]; + return (-1); + } - uint CL_platforms_cnt = 0; + if (CL_platforms_cnt > 1) + { + log_error ("ERROR: Too many OpenCL compatible platforms found"); - hc_clGetPlatformIDs (CL_PLATFORMS_MAX, CL_platforms, &CL_platforms_cnt); + return (-1); + } - for (uint i = 0; i < CL_platforms_cnt; i++) - { - char CL_platform_vendor[INFOSZ]; + cl_platform_id CL_platform = CL_platforms[0]; - memset (CL_platform_vendor, 0, sizeof (CL_platform_vendor)); + char CL_platform_vendor[INFOSZ]; - hc_clGetPlatformInfo (CL_platforms[i], CL_PLATFORM_VENDOR, sizeof (CL_platform_vendor), CL_platform_vendor, NULL); + memset (CL_platform_vendor, 0, sizeof (CL_platform_vendor)); - if ((strcmp (CL_platform_vendor, CL_VENDOR_AMD) != 0) - && (strcmp (CL_platform_vendor, CL_VENDOR_SDS) != 0) - && (strcmp (CL_platform_vendor, CL_VENDOR_APPLE) != 0)) continue; + hc_clGetPlatformInfo (CL_platform, CL_PLATFORM_VENDOR, sizeof (CL_platform_vendor), CL_platform_vendor, NULL); - if (strcmp (CL_platform_vendor, CL_VENDOR_SDS) == 0) gpu_temp_disable = 1; + uint vendor_id; - CL_platform = CL_platforms[i]; + if (strcmp (CL_platform_vendor, CL_VENDOR_AMD) == 0) + { + vendor_id = VENDOR_ID_AMD; } - - if (CL_platform == NULL) + else if (strcmp (CL_platform_vendor, CL_VENDOR_NV) == 0) { - log_error ("ERROR: No AMD/SDS compatible platform found"); - - return (-1); + vendor_id = VENDOR_ID_NV; + } + else + { + vendor_id = VENDOR_ID_UNKNOWN; } + data.vendor_id = vendor_id; + + /** + * devices + */ + cl_device_id devices_all[DEVICES_MAX]; cl_device_id devices[DEVICES_MAX]; - hc_clGetDeviceIDs (CL_platform, CL_DEVICE_TYPE_GPU, DEVICES_MAX, devices_all, (uint *) &devices_all_cnt); + uint devices_all_cnt = 0; - #endif + hc_clGetDeviceIDs (CL_platform, CL_DEVICE_TYPE_GPU, DEVICES_MAX, devices_all, (uint *) &devices_all_cnt); int hm_adapters_all = devices_all_cnt; @@ -13208,107 +12226,108 @@ int main (int argc, char **argv) memset (hm_adapter_all, 0, sizeof (hm_adapter_all)); + /* if (gpu_temp_disable == 0) { - #ifdef _CUDA - #ifdef LINUX - if (hc_NVML_nvmlInit () == NVML_SUCCESS) + if (vendor_id == VENDOR_ID_NV) { - HM_ADAPTER nvGPUHandle[DEVICES_MAX]; + #ifdef LINUX + if (hc_NVML_nvmlInit () == NVML_SUCCESS) + { + HM_ADAPTER nvGPUHandle[DEVICES_MAX]; - int tmp_in = hm_get_adapter_index (nvGPUHandle); + int tmp_in = hm_get_adapter_index (nvGPUHandle); - int tmp_out = 0; + int tmp_out = 0; - for (int i = 0; i < tmp_in; i++) - { - hm_adapter_all[tmp_out++].adapter_index = nvGPUHandle[i]; - } + for (int i = 0; i < tmp_in; i++) + { + hm_adapter_all[tmp_out++].adapter_index = nvGPUHandle[i]; + } - hm_adapters_all = tmp_out; + hm_adapters_all = tmp_out; - for (int i = 0; i < tmp_out; i++) - { - unsigned int speed; + for (int i = 0; i < tmp_out; i++) + { + unsigned int speed; - if (nvmlDeviceGetFanSpeed (hm_adapter_all[i].adapter_index, &speed) != NVML_ERROR_NOT_SUPPORTED) hm_adapter_all[i].fan_supported = 1; + if (nvmlDeviceGetFanSpeed (hm_adapter_all[i].adapter_index, &speed) != NVML_ERROR_NOT_SUPPORTED) hm_adapter_all[i].fan_supported = 1; + } } - } - #endif + #endif - #ifdef WIN - if (NvAPI_Initialize () == NVAPI_OK) - { - HM_ADAPTER nvGPUHandle[DEVICES_MAX]; + #ifdef WIN + if (NvAPI_Initialize () == NVAPI_OK) + { + HM_ADAPTER nvGPUHandle[DEVICES_MAX]; - int tmp_in = hm_get_adapter_index (nvGPUHandle); + int tmp_in = hm_get_adapter_index (nvGPUHandle); - int tmp_out = 0; + int tmp_out = 0; - for (int i = 0; i < tmp_in; i++) - { - hm_adapter_all[tmp_out++].adapter_index = nvGPUHandle[i]; - } + for (int i = 0; i < tmp_in; i++) + { + hm_adapter_all[tmp_out++].adapter_index = nvGPUHandle[i]; + } - hm_adapters_all = tmp_out; + hm_adapters_all = tmp_out; - for (int i = 0; i < tmp_out; i++) - { - NvU32 speed; + for (int i = 0; i < tmp_out; i++) + { + NvU32 speed; - if (NvAPI_GPU_GetTachReading (hm_adapter_all[i].adapter_index, &speed) != NVAPI_NOT_SUPPORTED) hm_adapter_all[i].fan_supported = 1; + if (NvAPI_GPU_GetTachReading (hm_adapter_all[i].adapter_index, &speed) != NVAPI_NOT_SUPPORTED) hm_adapter_all[i].fan_supported = 1; + } } + #endif } - #endif - #endif - #ifdef _OCL - #ifndef OSX - HM_LIB hm_dll = hm_init (); + if (vendor_id == VENDOR_ID_AMD) + { + HM_LIB hm_dll = hm_init (); - data.hm_dll = hm_dll; + data.hm_dll = hm_dll; - if (hc_ADL_Main_Control_Create (hm_dll, ADL_Main_Memory_Alloc, 0) == ADL_OK) - { - // total number of adapters + if (hc_ADL_Main_Control_Create (hm_dll, ADL_Main_Memory_Alloc, 0) == ADL_OK) + { + // total number of adapters - int hm_adapters_num; + int hm_adapters_num; - if (get_adapters_num (hm_dll, &hm_adapters_num) != 0) return (-1); + if (get_adapters_num (hm_dll, &hm_adapters_num) != 0) return (-1); - // adapter info + // adapter info - LPAdapterInfo lpAdapterInfo = hm_get_adapter_info (hm_dll, hm_adapters_num); + LPAdapterInfo lpAdapterInfo = hm_get_adapter_info (hm_dll, hm_adapters_num); - if (lpAdapterInfo == NULL) return (-1); + if (lpAdapterInfo == NULL) return (-1); - // get a list (of ids of) valid/usable adapters + // get a list (of ids of) valid/usable adapters - int num_adl_adapters = 0; + int num_adl_adapters = 0; - uint32_t *valid_adl_device_list = hm_get_list_valid_adl_adapters (hm_adapters_num, &num_adl_adapters, lpAdapterInfo); + uint32_t *valid_adl_device_list = hm_get_list_valid_adl_adapters (hm_adapters_num, &num_adl_adapters, lpAdapterInfo); - if (num_adl_adapters > 0) - { - hc_thread_mutex_lock (mux_adl); + if (num_adl_adapters > 0) + { + hc_thread_mutex_lock (mux_adl); - // hm_get_opencl_busid_devid (hm_adapter_all, devices_all_cnt, devices_all); + // hm_get_opencl_busid_devid (hm_adapter_all, devices_all_cnt, devices_all); - hm_get_adapter_index (hm_adapter_all, valid_adl_device_list, num_adl_adapters, lpAdapterInfo); + hm_get_adapter_index (hm_adapter_all, valid_adl_device_list, num_adl_adapters, lpAdapterInfo); - hm_get_overdrive_version (hm_dll, hm_adapter_all, valid_adl_device_list, num_adl_adapters, lpAdapterInfo); - hm_check_fanspeed_control (hm_dll, hm_adapter_all, valid_adl_device_list, num_adl_adapters, lpAdapterInfo); + hm_get_overdrive_version (hm_dll, hm_adapter_all, valid_adl_device_list, num_adl_adapters, lpAdapterInfo); + hm_check_fanspeed_control (hm_dll, hm_adapter_all, valid_adl_device_list, num_adl_adapters, lpAdapterInfo); - hc_thread_mutex_unlock (mux_adl); - } + hc_thread_mutex_unlock (mux_adl); + } - hm_adapters_all = num_adl_adapters; + hm_adapters_all = num_adl_adapters; - myfree (valid_adl_device_list); - myfree (lpAdapterInfo); + myfree (valid_adl_device_list); + myfree (lpAdapterInfo); + } } - #endif - #endif } if (hm_adapters_all == 0) @@ -13321,6 +12340,7 @@ int main (int argc, char **argv) gpu_temp_abort = 0; gpu_temp_retain = 0; } + */ /** * enable custom signal handler(s) @@ -13365,19 +12385,6 @@ int main (int argc, char **argv) memset (device_name, 0, sizeof (device_name)); - #ifdef _CUDA - size_t global_mem_size; - int max_clock_frequency; - int max_compute_units; - int kernel_exec_timeout; - - hc_cuDeviceGetName (device_name, sizeof (device_name), devices[device_id]); - hc_cuDeviceTotalMem (&global_mem_size, devices[device_id]); - hc_cuDeviceGetAttribute (&max_clock_frequency, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, devices[device_id]); max_clock_frequency /= 1000; - hc_cuDeviceGetAttribute (&max_compute_units, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, devices[device_id]); - hc_cuDeviceGetAttribute (&kernel_exec_timeout, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, devices[device_id]); - - #elif _OCL cl_ulong global_mem_size; cl_uint max_clock_frequency; cl_uint max_compute_units; @@ -13387,8 +12394,6 @@ int main (int argc, char **argv) hc_clGetDeviceInfo (devices[device_id], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof (max_clock_frequency), &max_clock_frequency, NULL); hc_clGetDeviceInfo (devices[device_id], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof (max_compute_units), &max_compute_units, NULL); - #endif - if ((benchmark == 1 || quiet == 0) && (algorithm_pos == 0)) { log_info ("Device #%u: %s, %luMB, %dMhz, %uMCU", @@ -13399,16 +12404,6 @@ int main (int argc, char **argv) (unsigned int) max_compute_units); } - #ifdef _CUDA - if (quiet == 0 && kernel_exec_timeout != 0 && algorithm_pos == 0) - { - log_info ("Device #%u: WARNING! Kernel exec timeout is not disabled, it might cause you errors of code 702", device_id + 1); - #if _WIN - log_info (" You can disable it with a regpatch, see here: http://hashcat.net/wiki/doku.php?id=timeout_patch"); - #endif - } - #endif - devices_cnt++; } @@ -13419,6 +12414,8 @@ int main (int argc, char **argv) return (-1); } + data.devices_cnt = devices_cnt; + if ((benchmark == 1 || quiet == 0) && (algorithm_pos == 0)) { log_info (""); @@ -13440,6 +12437,10 @@ int main (int argc, char **argv) } } + data.gpu_temp_disable = gpu_temp_disable; + data.gpu_temp_abort = gpu_temp_abort; + data.gpu_temp_retain = gpu_temp_retain; + if (data.quiet == 0) { log_info ("Hashes: %u hashes; %u unique digests, %u unique salts", hashes_cnt_orig, digests_cnt, salts_cnt); @@ -13486,43 +12487,15 @@ int main (int argc, char **argv) } } - /** - * store all the preparation, not hash_mode dependant - */ - - data.gpu_temp_disable = gpu_temp_disable; - data.gpu_temp_abort = gpu_temp_abort; - data.gpu_temp_retain = gpu_temp_retain; - - data.devices_cnt = devices_cnt; - - #ifdef _OCL - /** - * catalyst driver check - */ - - int catalyst_check = (force == 1) ? 0 : 1; - - int catalyst_warn = 0; - - int catalyst_broken = 0; - #endif - /** * devices init */ - #ifdef _OCL - #ifndef OSX - int gpu_temp_retain_set = 0; - int *temp_retain_fanspeed_value = (int *) mycalloc (devices_cnt, sizeof (int)); ADLOD6MemClockState *od_clock_mem_status = (ADLOD6MemClockState *) mycalloc (devices_cnt, sizeof (ADLOD6MemClockState)); int *od_power_control_status = (int *) mycalloc (devices_cnt, sizeof (int)); - #endif - #endif hc_device_param_t *devices_param = (hc_device_param_t *) mycalloc (devices_cnt, sizeof (hc_device_param_t)); @@ -13532,56 +12505,6 @@ int main (int argc, char **argv) { hc_device_param_t *device_param = &data.devices_param[device_id]; - #ifdef _CUDA - CUdevice device = devices[device_id]; - - device_param->device = device; - - size_t bytes; - - hc_cuDeviceTotalMem (&bytes, device); - - device_param->gpu_maxmem_alloc = bytes; - - int sm_major = 0; - int sm_minor = 0; - int max_compute_units = 0; - - hc_cuDeviceComputeCapability (&sm_major, &sm_minor, device); - - if (sm_major == 1) - { - log_error ("ERROR: Shader Model 1.0 - 1.3 based GPU detected. Support for CUDA was dropped by NVidia."); - log_error (" Remove it from your system or use -d and select only supported cards."); - - return (-1); - } - - device_param->sm_major = sm_major; - device_param->sm_minor = sm_minor; - - hc_cuDeviceGetAttribute (&max_compute_units, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device); - - if (sm_major >= 5) - { - // those maxwell and newer are so good compared to older chipsets we need to equalize - // their power to older chipsets, otherwise workload distribution which is based on the compute_units - // gets out of control - - max_compute_units *= 3; - } - - device_param->gpu_processors = max_compute_units; - - /** - * calculate vector size - */ - - uint vliw = get_vliw_by_compute_capability (sm_major, sm_minor); - - device_param->gpu_vector_width = get_gpu_vector_width (hash_mode, attack_kern, attack_exec, opti_type, vliw); - - #elif _OCL cl_device_id device = devices[device_id]; device_param->device = device; @@ -13622,890 +12545,172 @@ int main (int argc, char **argv) device_param->driver_version = mystrdup (tmp); - /** - * calculate vector size - */ - - uint vliw = get_vliw_by_device_name (device_param->device_name); - - device_param->gpu_vector_width = get_gpu_vector_width (hash_mode, attack_kern, attack_exec, opti_type, vliw); - - /** - * catalyst driver check - */ - - if (catalyst_check == 1) - { - catalyst_warn = 1; - - // v14.9 and higher - if ((atoi (device_param->device_version) >= 1573) - && (atoi (device_param->driver_version) >= 1573)) - { - catalyst_warn = 0; - } - - /* - // v14.9 - if ((strstr (device_param->device_version, "1573.") != NULL) - && (strstr (device_param->driver_version, "1573.") != NULL)) - { - catalyst_warn = 0; - } - - // v14.12 -- version overlaps with v15.4 beta - if ((strstr (device_param->device_version, "1642.") != NULL) - && (strstr (device_param->driver_version, "1642.") != NULL)) - { - catalyst_broken = 1; - } - - // v15.4 (Beta, Windows only release) - if ((strstr (device_param->device_version, "1642.") != NULL) - && (strstr (device_param->driver_version, "1642.") != NULL)) - { - catalyst_warn = 0; - } - - // v15.5 (Release, Linux) - if ((strstr (device_param->device_version, "1702.") != NULL) - && (strstr (device_param->driver_version, "1702.") != NULL)) - { - catalyst_warn = 0; - } - - // v15.3 (Beta, Ubuntu repository release) - if ((strstr (device_param->device_version, "1729.") != NULL) - && (strstr (device_param->driver_version, "1729.") != NULL)) - { - catalyst_warn = 0; - } - */ - - catalyst_check = 0; - } - #endif - } - - #ifdef _OCL - if (catalyst_broken == 1) - { - log_error (""); - log_error ("ATTENTION! The installed GPU driver in your system is known to be broken!"); - log_error ("It will pass over cracked hashes and does not report them as cracked"); - log_error ("You are STRONGLY encouraged not to use it"); - log_error ("You can use --force to override this but do not post error reports if you do so"); - - return (-1); - } - - if (catalyst_warn == 1) - { - log_error (""); - log_error ("ATTENTION! Unsupported or incorrect installed GPU driver detected!"); - log_error ("You are STRONGLY encouraged to use the official supported GPU driver for good reasons"); - log_error ("See oclHashcat's homepage for official supported GPU drivers"); - #ifdef _WIN - log_error ("Also see: http://hashcat.net/wiki/doku.php?id=upgrading_amd_drivers_how_to"); - #endif - log_error ("You can use --force to override this but do not post error reports if you do so"); - - return (-1); - } - #endif - - uint gpu_blocks_all = 0; - - #ifdef _CUDA - for (uint device_id = 0; device_id < devices_cnt; device_id++) - { - /** - * host buffer - */ - - hc_device_param_t *device_param = &data.devices_param[device_id]; - - /** - * device properties - */ - - int sm_minor = device_param->sm_minor; - int sm_major = device_param->sm_major; - - uint gpu_processors = device_param->gpu_processors; - uint gpu_vector_width = device_param->gpu_vector_width; - - /** - * create context for each device - */ - - uint flags = 0; - - if (gpu_async == 0) flags |= CU_CTX_SCHED_BLOCKING_SYNC; - else flags |= CU_CTX_SCHED_SPIN; - - hc_cuCtxCreate (&device_param->context, flags, device_param->device); - - // does bad things hc_cuCtxSetCacheConfig (CU_FUNC_CACHE_PREFER_L1); - - /** - * create input buffers on device - */ - - uint gpu_threads = GPU_THREADS_NV; - - if (hash_mode == 1500) gpu_threads = 64; - if (hash_mode == 3000) gpu_threads = 64; - if (hash_mode == 3200) gpu_threads = 8; - if (hash_mode == 7500) gpu_threads = 64; - if (hash_mode == 8900) gpu_threads = 64; - if (hash_mode == 9000) gpu_threads = 8; - if (hash_mode == 9300) gpu_threads = 64; - if (hash_mode == 9700) gpu_threads = 64; - if (hash_mode == 9710) gpu_threads = 64; - if (hash_mode == 9800) gpu_threads = 64; - if (hash_mode == 9810) gpu_threads = 64; - if (hash_mode == 10400) gpu_threads = 64; - if (hash_mode == 10410) gpu_threads = 64; - if (hash_mode == 10500) gpu_threads = 64; - - uint gpu_power = gpu_processors * gpu_threads * gpu_accel; - uint gpu_blocks = gpu_power * gpu_vector_width; - - device_param->gpu_threads = gpu_threads; - device_param->gpu_power_user = gpu_power; - device_param->gpu_blocks_user = gpu_blocks; - - gpu_blocks_all += gpu_blocks; - - uint size_pws = gpu_power * sizeof (pw_t); - - uint size_tmps = 4; - - switch (hash_mode) - { - case 400: size_tmps = gpu_blocks * sizeof (phpass_tmp_t); break; - case 500: size_tmps = gpu_blocks * sizeof (md5crypt_tmp_t); break; - case 501: size_tmps = gpu_blocks * sizeof (md5crypt_tmp_t); break; - case 1600: size_tmps = gpu_blocks * sizeof (md5crypt_tmp_t); break; - case 1800: size_tmps = gpu_blocks * sizeof (sha512crypt_tmp_t); break; - case 2100: size_tmps = gpu_blocks * sizeof (dcc2_tmp_t); break; - case 2500: size_tmps = gpu_blocks * sizeof (wpa_tmp_t); break; - case 3200: size_tmps = gpu_blocks * sizeof (bcrypt_tmp_t); break; - case 5200: size_tmps = gpu_blocks * sizeof (pwsafe3_tmp_t); break; - case 5800: size_tmps = gpu_blocks * sizeof (androidpin_tmp_t); break; - case 6211: - case 6212: - case 6213: size_tmps = gpu_blocks * sizeof (tc_tmp_t); break; - case 6221: - case 6222: - case 6223: size_tmps = gpu_blocks * sizeof (tc64_tmp_t); break; - case 6231: - case 6232: - case 6233: size_tmps = gpu_blocks * sizeof (tc_tmp_t); break; - case 6241: - case 6242: - case 6243: size_tmps = gpu_blocks * sizeof (tc_tmp_t); break; - case 6300: size_tmps = gpu_blocks * sizeof (md5crypt_tmp_t); break; - case 6400: size_tmps = gpu_blocks * sizeof (sha256aix_tmp_t); break; - case 6500: size_tmps = gpu_blocks * sizeof (sha512aix_tmp_t); break; - case 6600: size_tmps = gpu_blocks * sizeof (agilekey_tmp_t); break; - case 6700: size_tmps = gpu_blocks * sizeof (sha1aix_tmp_t); break; - case 6800: size_tmps = gpu_blocks * sizeof (lastpass_tmp_t); break; - case 7100: size_tmps = gpu_blocks * sizeof (pbkdf2_sha512_tmp_t); break; - case 7200: size_tmps = gpu_blocks * sizeof (pbkdf2_sha512_tmp_t); break; - case 7400: size_tmps = gpu_blocks * sizeof (sha256crypt_tmp_t); break; - case 7900: size_tmps = gpu_blocks * sizeof (drupal7_tmp_t); break; - case 8200: size_tmps = gpu_blocks * sizeof (pbkdf2_sha512_tmp_t); break; - case 8800: size_tmps = gpu_blocks * sizeof (androidfde_tmp_t); break; - case 8900: size_tmps = gpu_blocks * sizeof (scrypt_tmp_t); break; - case 9000: size_tmps = gpu_blocks * sizeof (pwsafe2_tmp_t); break; - case 9100: size_tmps = gpu_blocks * sizeof (lotus8_tmp_t); break; - case 9200: size_tmps = gpu_blocks * sizeof (pbkdf2_sha256_tmp_t); break; - case 9300: size_tmps = gpu_blocks * sizeof (scrypt_tmp_t); break; - case 9400: size_tmps = gpu_blocks * sizeof (office2007_tmp_t); break; - case 9500: size_tmps = gpu_blocks * sizeof (office2010_tmp_t); break; - case 9600: size_tmps = gpu_blocks * sizeof (office2013_tmp_t); break; - case 10000: size_tmps = gpu_blocks * sizeof (pbkdf2_sha256_tmp_t); break; - case 10200: size_tmps = gpu_blocks * sizeof (cram_md5_t); break; - case 10300: size_tmps = gpu_blocks * sizeof (saph_sha1_tmp_t); break; - case 10500: size_tmps = gpu_blocks * sizeof (pdf14_tmp_t); break; - case 10700: size_tmps = gpu_blocks * sizeof (pdf17l8_tmp_t); break; - case 10900: size_tmps = gpu_blocks * sizeof (pbkdf2_sha256_tmp_t); break; - case 11300: size_tmps = gpu_blocks * sizeof (bitcoin_wallet_tmp_t); break; - case 11600: size_tmps = gpu_blocks * sizeof (seven_zip_tmp_t); break; - case 11900: size_tmps = gpu_blocks * sizeof (pbkdf2_md5_tmp_t); break; - case 12000: size_tmps = gpu_blocks * sizeof (pbkdf2_sha1_tmp_t); break; - case 12100: size_tmps = gpu_blocks * sizeof (pbkdf2_sha512_tmp_t); break; - case 12200: size_tmps = gpu_blocks * sizeof (ecryptfs_tmp_t); break; - case 12300: size_tmps = gpu_blocks * sizeof (oraclet_tmp_t); break; - case 12400: size_tmps = gpu_blocks * sizeof (bsdicrypt_tmp_t); break; - case 12500: size_tmps = gpu_blocks * sizeof (rar3_tmp_t); break; - case 12700: size_tmps = gpu_blocks * sizeof (mywallet_tmp_t); break; - case 12800: size_tmps = gpu_blocks * sizeof (pbkdf2_sha256_tmp_t); break; - }; - - uint size_hooks = 4; - - if ((opts_type & OPTS_TYPE_HOOK12) || (opts_type & OPTS_TYPE_HOOK23)) - { - // fill size_hook with correct size - } - - // we can optimize some stuff here... - - device_param->size_pws = size_pws; - device_param->size_tmps = size_tmps; - device_param->size_hooks = size_hooks; - - uint size_root_css = SP_PW_MAX * sizeof (cs_t); - uint size_markov_css = SP_PW_MAX * CHARSIZ * sizeof (cs_t); - - device_param->size_root_css = size_root_css; - device_param->size_markov_css = size_markov_css; - - uint size_results = GPU_THREADS_NV * sizeof (uint); - - device_param->size_results = size_results; - - uint size_rules = gpu_rules_cnt * sizeof (gpu_rule_t); - uint size_plains = digests_cnt * sizeof (plain_t); - uint size_salts = salts_cnt * sizeof (salt_t); - uint size_esalts = salts_cnt * esalt_size; - - device_param->size_plains = size_plains; - device_param->size_digests = size_digests; - device_param->size_shown = size_shown; - device_param->size_salts = size_salts; - - uint size_combs = GPU_COMBS * sizeof (comb_t); - uint size_bfs = GPU_BFS * sizeof (bf_t); - uint size_tm = 32 * sizeof (bs_word_t); - - uint64_t size_scryptV = 1; - - if ((hash_mode == 8900) || (hash_mode == 9300)) - { - #define SHADER_PER_MP 32 - #define WARPS 32 - - uint tmto_start = 2; - uint tmto_stop = 1024; - - if (scrypt_tmto) - { - tmto_start = 1 << scrypt_tmto; - tmto_stop = tmto_start + 1; - } - - for (uint tmto = tmto_start; tmto < tmto_stop; tmto <<= 1) - { - // todo -- make sure all salts get the new tmto value - - size_scryptV = (128 * data.salts_buf[0].scrypt_r) * data.salts_buf[0].scrypt_N; - - size_scryptV /= tmto; - - size_scryptV *= gpu_processors * WARPS * SHADER_PER_MP; - - if (size_scryptV > (device_param->gpu_maxmem_alloc / 2)) continue; - - for (uint salts_pos = 0; salts_pos < data.salts_cnt; salts_pos++) - { - data.salts_buf[salts_pos].scrypt_tmto = tmto; - data.salts_buf[salts_pos].scrypt_phy = gpu_processors * WARPS * SHADER_PER_MP; - } - - break; - } - - if (data.salts_buf[0].scrypt_tmto == 0) - { - log_error ("ERROR: can't allocate enough GPU memory"); - - return -1; - } - - if (quiet == 0) log_info (""); - if (quiet == 0) log_info ("SCRYPT tmto optimizer value set to: %u\n", data.salts_buf[0].scrypt_tmto); - } - - /** - * stream - */ - - hc_cuStreamCreate (&device_param->stream, 0); - - /** - * In theory we'd need a real JIT solution as we have it with OpenCL, but CUDA does not provide such a feature, what a shame! - * There's NVRTC library which is able to compile sourcecode to PTX which we could use, but for some unknown reason this works only for 64 bit - * There's also the problem that the user needs to install the CUDA SDK to get this to work. - */ - - force_jit_compilation = 0; - - /** - * module find - */ - - struct stat st; - - char module_file[256]; - - memset (module_file, 0, sizeof (module_file)); - - #ifdef BINARY_KERNEL - - if (force_jit_compilation == 0) - { - #ifdef __x86_64__ - if (attack_exec == ATTACK_EXEC_ON_GPU) - { - if (attack_kern == ATTACK_KERN_STRAIGHT) - snprintf (module_file, sizeof (module_file) - 1, "%s/kernels/4318/m%05d_a0.sm_%d%d.64.cubin", install_dir, (int) kern_type, sm_major, sm_minor); - else if (attack_kern == ATTACK_KERN_COMBI) - snprintf (module_file, sizeof (module_file) - 1, "%s/kernels/4318/m%05d_a1.sm_%d%d.64.cubin", install_dir, (int) kern_type, sm_major, sm_minor); - else if (attack_kern == ATTACK_KERN_BF) - snprintf (module_file, sizeof (module_file) - 1, "%s/kernels/4318/m%05d_a3.sm_%d%d.64.cubin", install_dir, (int) kern_type, sm_major, sm_minor); - } - else - { - snprintf (module_file, sizeof (module_file) - 1, "%s/kernels/4318/m%05d.sm_%d%d.64.cubin", install_dir, (int) kern_type, sm_major, sm_minor); - - if ((hash_mode == 8900) || (hash_mode == 9300)) - { - snprintf (module_file, sizeof (module_file) - 1, "%s/kernels/4318/m%05d_%d_%d_%d_%d.sm_%d%d.64.cubin", install_dir, (int) kern_type, data.salts_buf[0].scrypt_N, data.salts_buf[0].scrypt_r, data.salts_buf[0].scrypt_p, data.salts_buf[0].scrypt_tmto, sm_major, sm_minor); - } - } - - #else - if (attack_exec == ATTACK_EXEC_ON_GPU) - { - if (attack_kern == ATTACK_KERN_STRAIGHT) - snprintf (module_file, sizeof (module_file) - 1, "%s/kernels/4318/m%05d_a0.sm_%d%d.32.cubin", install_dir, (int) kern_type, sm_major, sm_minor); - else if (attack_kern == ATTACK_KERN_COMBI) - snprintf (module_file, sizeof (module_file) - 1, "%s/kernels/4318/m%05d_a1.sm_%d%d.32.cubin", install_dir, (int) kern_type, sm_major, sm_minor); - else if (attack_kern == ATTACK_KERN_BF) - snprintf (module_file, sizeof (module_file) - 1, "%s/kernels/4318/m%05d_a3.sm_%d%d.32.cubin", install_dir, (int) kern_type, sm_major, sm_minor); - } - else - { - snprintf (module_file, sizeof (module_file) - 1, "%s/kernels/4318/m%05d.sm_%d%d.32.cubin", install_dir, (int) kern_type, sm_major, sm_minor); - - if ((hash_mode == 8900) || (hash_mode == 9300)) - { - snprintf (module_file, sizeof (module_file) - 1, "%s/kernels/4318/m%05d_%d_%d_%d_%d.sm_%d%d.32.cubin", install_dir, (int) kern_type, data.salts_buf[0].scrypt_N, data.salts_buf[0].scrypt_r, data.salts_buf[0].scrypt_p, data.salts_buf[0].scrypt_tmto, sm_major, sm_minor); - } - } - - #endif - } - else - { - generate_source_kernel_filename (attack_exec, attack_kern, kern_type, install_dir, module_file); - - if (stat (module_file, &st) == -1) - { - log_error ("ERROR: %s: %s", module_file, strerror (errno)); - - return -1; - } - } - - #else - - generate_source_kernel_filename (attack_exec, attack_kern, kern_type, install_dir, module_file); - - if (stat (module_file, &st) == -1) - { - log_error ("ERROR: %s: %s", module_file, strerror (errno)); - - return -1; - } - - #endif - - char module_mp_file[256]; - - memset (module_mp_file, 0, sizeof (module_mp_file)); - - if ((opti_type & OPTI_TYPE_BRUTE_FORCE) && (opts_type & OPTS_TYPE_PT_GENERATE_BE)) - { - #ifdef __x86_64__ - snprintf (module_mp_file, sizeof (module_mp_file) - 1, "%s/kernels/4318/markov_be_v%d.sm_%d%d.64.cubin", install_dir, gpu_vector_width, sm_major, sm_minor); - #else - snprintf (module_mp_file, sizeof (module_mp_file) - 1, "%s/kernels/4318/markov_be_v%d.sm_%d%d.32.cubin", install_dir, gpu_vector_width, sm_major, sm_minor); - #endif - } - else - { - #ifdef __x86_64__ - snprintf (module_mp_file, sizeof (module_mp_file) - 1, "%s/kernels/4318/markov_le_v%d.sm_%d%d.64.cubin", install_dir, gpu_vector_width, sm_major, sm_minor); - #else - snprintf (module_mp_file, sizeof (module_mp_file) - 1, "%s/kernels/4318/markov_le_v%d.sm_%d%d.32.cubin", install_dir, gpu_vector_width, sm_major, sm_minor); - #endif - } - - char module_amp_file[256]; - - memset (module_amp_file, 0, sizeof (module_amp_file)); - - #ifdef __x86_64__ - snprintf (module_amp_file, sizeof (module_amp_file) - 1, "%s/kernels/4318/amp_a%d_v%d.sm_%d%d.64.cubin", install_dir, attack_kern, gpu_vector_width, sm_major, sm_minor); - #else - snprintf (module_amp_file, sizeof (module_amp_file) - 1, "%s/kernels/4318/amp_a%d_v%d.sm_%d%d.32.cubin", install_dir, attack_kern, gpu_vector_width, sm_major, sm_minor); - #endif - - /** - * module load - */ - - hc_cuModuleLoad (&device_param->module, module_file); - - if (quiet == 0) log_info ("Device #%u: Kernel %s", device_id + 1, module_file); - - if (attack_mode != ATTACK_MODE_STRAIGHT) - { - hc_cuModuleLoad (&device_param->module_mp, module_mp_file); - - if (quiet == 0) log_info ("Device #%u: Kernel %s", device_id + 1, module_mp_file); - } - - if (attack_exec == ATTACK_EXEC_ON_GPU) - { - // nothing to do - } - else - { - hc_cuModuleLoad (&device_param->module_amp, module_amp_file); - - if (quiet == 0) log_info ("Device #%u: Kernel %s", device_id + 1, module_amp_file); - } - - /** - * module functions - */ - - char module_name[64]; - - memset (module_name, 0, sizeof (module_name)); - - if (attack_exec == ATTACK_EXEC_ON_GPU) - { - if (opti_type & OPTI_TYPE_SINGLE_HASH) - { - snprintf (module_name, sizeof (module_name) - 1, "m%05d_s%02d", kern_type, 4); - - hc_cuModuleGetFunction (&device_param->function1, device_param->module, module_name); - - snprintf (module_name, sizeof (module_name) - 1, "m%05d_s%02d", kern_type, 8); - - hc_cuModuleGetFunction (&device_param->function2, device_param->module, module_name); - - snprintf (module_name, sizeof (module_name) - 1, "m%05d_s%02d", kern_type, 16); - - hc_cuModuleGetFunction (&device_param->function3, device_param->module, module_name); - } - else - { - snprintf (module_name, sizeof (module_name) - 1, "m%05d_m%02d", kern_type, 4); - - hc_cuModuleGetFunction (&device_param->function1, device_param->module, module_name); - - snprintf (module_name, sizeof (module_name) - 1, "m%05d_m%02d", kern_type, 8); - - hc_cuModuleGetFunction (&device_param->function2, device_param->module, module_name); - - snprintf (module_name, sizeof (module_name) - 1, "m%05d_m%02d", kern_type, 16); - - hc_cuModuleGetFunction (&device_param->function3, device_param->module, module_name); - } - - if (attack_mode == ATTACK_MODE_BF) - { - if (opts_type & OPTS_TYPE_PT_BITSLICE) - { - snprintf (module_name, sizeof (module_name) - 1, "m%05d_tb", kern_type); - - hc_cuModuleGetFunction (&device_param->function_tb, device_param->module, module_name); - - snprintf (module_name, sizeof (module_name) - 1, "m%05d_tm", kern_type); - - hc_cuModuleGetFunction (&device_param->function_tm, device_param->module, module_name); - } - } - } - else - { - snprintf (module_name, sizeof (module_name) - 1, "m%05d_init", kern_type); - - hc_cuModuleGetFunction (&device_param->function1, device_param->module, module_name); - - snprintf (module_name, sizeof (module_name) - 1, "m%05d_loop", kern_type); - - hc_cuModuleGetFunction (&device_param->function2, device_param->module, module_name); - - snprintf (module_name, sizeof (module_name) - 1, "m%05d_comp", kern_type); - - hc_cuModuleGetFunction (&device_param->function3, device_param->module, module_name); - - if (opts_type & OPTS_TYPE_HOOK12) - { - snprintf (module_name, sizeof (module_name) - 1, "m%05d_hook12", kern_type); - - hc_cuModuleGetFunction (&device_param->function12, device_param->module, module_name); - } - - if (opts_type & OPTS_TYPE_HOOK23) - { - snprintf (module_name, sizeof (module_name) - 1, "m%05d_hook23", kern_type); - - hc_cuModuleGetFunction (&device_param->function23, device_param->module, module_name); - } - } - - if (attack_mode == ATTACK_MODE_BF) - { - hc_cuModuleGetFunction (&device_param->function_mp_l, device_param->module_mp, "l_markov"); - hc_cuModuleGetFunction (&device_param->function_mp_r, device_param->module_mp, "r_markov"); - } - else if (attack_mode == ATTACK_MODE_HYBRID1) - { - hc_cuModuleGetFunction (&device_param->function_mp, device_param->module_mp, "C_markov"); - } - else if (attack_mode == ATTACK_MODE_HYBRID2) - { - hc_cuModuleGetFunction (&device_param->function_mp, device_param->module_mp, "C_markov"); - } - - /** - * amplifiers are not independant - */ - - if (attack_exec == ATTACK_EXEC_ON_GPU) - { - // nothing to do - } - else - { - hc_cuModuleGetFunction (&device_param->function_amp, device_param->module_amp, "amp"); - } - - /** - * global buffers - */ - - hc_cuMemAlloc (&device_param->d_pws_buf, size_pws); - hc_cuMemAlloc (&device_param->d_pws_amp_buf, size_pws); - hc_cuMemAlloc (&device_param->d_tmps, size_tmps); - hc_cuMemAlloc (&device_param->d_hooks, size_hooks); - hc_cuMemAlloc (&device_param->d_bitmap_s1_a, bitmap_size); - hc_cuMemAlloc (&device_param->d_bitmap_s1_b, bitmap_size); - hc_cuMemAlloc (&device_param->d_bitmap_s1_c, bitmap_size); - hc_cuMemAlloc (&device_param->d_bitmap_s1_d, bitmap_size); - hc_cuMemAlloc (&device_param->d_bitmap_s2_a, bitmap_size); - hc_cuMemAlloc (&device_param->d_bitmap_s2_b, bitmap_size); - hc_cuMemAlloc (&device_param->d_bitmap_s2_c, bitmap_size); - hc_cuMemAlloc (&device_param->d_bitmap_s2_d, bitmap_size); - hc_cuMemAlloc (&device_param->d_plain_bufs, size_plains); - hc_cuMemAlloc (&device_param->d_digests_buf, size_digests); - hc_cuMemAlloc (&device_param->d_digests_shown, size_shown); - hc_cuMemAlloc (&device_param->d_salt_bufs, size_salts); - hc_cuMemAlloc (&device_param->d_result, size_results); - hc_cuMemAlloc (&device_param->d_scryptV_buf, size_scryptV); - - hc_cuMemcpyHtoD (device_param->d_bitmap_s1_a, bitmap_s1_a, bitmap_size); - hc_cuMemcpyHtoD (device_param->d_bitmap_s1_b, bitmap_s1_b, bitmap_size); - hc_cuMemcpyHtoD (device_param->d_bitmap_s1_c, bitmap_s1_c, bitmap_size); - hc_cuMemcpyHtoD (device_param->d_bitmap_s1_d, bitmap_s1_d, bitmap_size); - hc_cuMemcpyHtoD (device_param->d_bitmap_s2_a, bitmap_s2_a, bitmap_size); - hc_cuMemcpyHtoD (device_param->d_bitmap_s2_b, bitmap_s2_b, bitmap_size); - hc_cuMemcpyHtoD (device_param->d_bitmap_s2_c, bitmap_s2_c, bitmap_size); - hc_cuMemcpyHtoD (device_param->d_bitmap_s2_d, bitmap_s2_d, bitmap_size); - hc_cuMemcpyHtoD (device_param->d_digests_buf, data.digests_buf, size_digests); - hc_cuMemcpyHtoD (device_param->d_digests_shown, data.digests_shown, size_shown); - hc_cuMemcpyHtoD (device_param->d_salt_bufs, data.salts_buf, size_salts); - - run_kernel_bzero (device_param, device_param->d_pws_buf, size_pws); - run_kernel_bzero (device_param, device_param->d_pws_amp_buf, size_pws); - run_kernel_bzero (device_param, device_param->d_tmps, size_tmps); - run_kernel_bzero (device_param, device_param->d_hooks, size_hooks); - run_kernel_bzero (device_param, device_param->d_plain_bufs, size_plains); - run_kernel_bzero (device_param, device_param->d_result, size_results); - - /** - * special buffers - */ - - if (attack_kern == ATTACK_KERN_STRAIGHT) - { - hc_cuMemAlloc (&device_param->d_rules, size_rules); - - hc_cuMemcpyHtoD (device_param->d_rules, gpu_rules_buf, size_rules); - } - else if (attack_kern == ATTACK_KERN_COMBI) - { - hc_cuMemAlloc (&device_param->d_combs, size_combs); - hc_cuMemAlloc (&device_param->d_root_css_buf, size_root_css); - hc_cuMemAlloc (&device_param->d_markov_css_buf, size_markov_css); - - run_kernel_bzero (device_param, device_param->d_combs, size_combs); - run_kernel_bzero (device_param, device_param->d_root_css_buf, size_root_css); - run_kernel_bzero (device_param, device_param->d_markov_css_buf, size_markov_css); - } - else if (attack_kern == ATTACK_KERN_BF) - { - hc_cuMemAlloc (&device_param->d_bfs, size_bfs); - hc_cuMemAlloc (&device_param->d_tm, size_tm); - hc_cuMemAlloc (&device_param->d_root_css_buf, size_root_css); - hc_cuMemAlloc (&device_param->d_markov_css_buf, size_markov_css); - - run_kernel_bzero (device_param, device_param->d_bfs, size_bfs); - run_kernel_bzero (device_param, device_param->d_tm, size_tm); - run_kernel_bzero (device_param, device_param->d_root_css_buf, size_root_css); - run_kernel_bzero (device_param, device_param->d_markov_css_buf, size_markov_css); - } - - if (size_esalts) - { - hc_cuMemAlloc (&device_param->d_esalt_bufs, size_esalts); - - hc_cuMemcpyHtoD (device_param->d_esalt_bufs, data.esalts_buf, size_esalts); - } - - /** - * main host data - */ - - uint *result = (uint *) mymalloc (size_results); - - memset (result, 0, size_results); - - device_param->result = result; - - pw_t *pws_buf = (pw_t *) mymalloc (size_pws); - - memset (pws_buf, 0, size_pws); - - device_param->pws_buf = pws_buf; - - pw_cache_t *pw_caches = (pw_cache_t *) mycalloc (64, sizeof (pw_cache_t)); - - for (int i = 0; i < 64; i++) - { - pw_caches[i].pw_buf.pw_len = i; - pw_caches[i].cnt = 0; - } - - device_param->pw_caches = pw_caches; - - comb_t *combs_buf = (comb_t *) mycalloc (GPU_COMBS, sizeof (comb_t)); - - device_param->combs_buf = combs_buf; + if (vendor_id == VENDOR_ID_NV) + { + cl_uint sm_minor = 0; + cl_uint sm_major = 0; - void *hooks_buf = mymalloc (size_hooks); + #define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000 + #define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001 - device_param->hooks_buf = hooks_buf; + hc_clGetDeviceInfo (device, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, sizeof (sm_minor), &sm_minor, NULL); + hc_clGetDeviceInfo (device, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, sizeof (sm_major), &sm_major, NULL); - switch (device_param->gpu_vector_width) - { - case 1: device_param->pw_transpose = pw_transpose_to_hi1; - device_param->pw_add = pw_add_to_hc1; - break; - case 2: device_param->pw_transpose = pw_transpose_to_hi2; - device_param->pw_add = pw_add_to_hc2; - break; - case 4: device_param->pw_transpose = pw_transpose_to_hi4; - device_param->pw_add = pw_add_to_hc4; - break; + device_param->sm_minor = sm_minor; + device_param->sm_major = sm_major; } /** - * module args + * catalyst driver check */ - device_param->kernel_params_buf32[21] = bitmap_mask; - device_param->kernel_params_buf32[22] = bitmap_shift1; - device_param->kernel_params_buf32[23] = bitmap_shift2; - device_param->kernel_params_buf32[24] = 0; // salt_pos - device_param->kernel_params_buf32[25] = 0; // loop_pos - device_param->kernel_params_buf32[26] = 0; // loop_cnt - device_param->kernel_params_buf32[27] = 0; // gpu_rules_cnt - device_param->kernel_params_buf32[28] = 0; // digests_cnt - device_param->kernel_params_buf32[29] = 0; // digests_offset - device_param->kernel_params_buf32[30] = 0; // combs_mode - device_param->kernel_params_buf32[31] = 0; // gid_max + if (vendor_id == VENDOR_ID_AMD) + { + int catalyst_check = (force == 1) ? 0 : 1; - device_param->kernel_params[ 0] = (attack_exec == ATTACK_EXEC_ON_GPU) - ? &device_param->d_pws_buf - : &device_param->d_pws_amp_buf; - device_param->kernel_params[ 1] = &device_param->d_rules; - device_param->kernel_params[ 2] = &device_param->d_combs; - device_param->kernel_params[ 3] = &device_param->d_bfs; - device_param->kernel_params[ 4] = &device_param->d_tmps; - device_param->kernel_params[ 5] = &device_param->d_hooks; - device_param->kernel_params[ 6] = &device_param->d_bitmap_s1_a; - device_param->kernel_params[ 7] = &device_param->d_bitmap_s1_b; - device_param->kernel_params[ 8] = &device_param->d_bitmap_s1_c; - device_param->kernel_params[ 9] = &device_param->d_bitmap_s1_d; - device_param->kernel_params[10] = &device_param->d_bitmap_s2_a; - device_param->kernel_params[11] = &device_param->d_bitmap_s2_b; - device_param->kernel_params[12] = &device_param->d_bitmap_s2_c; - device_param->kernel_params[13] = &device_param->d_bitmap_s2_d; - device_param->kernel_params[14] = &device_param->d_plain_bufs; - device_param->kernel_params[15] = &device_param->d_digests_buf; - device_param->kernel_params[16] = &device_param->d_digests_shown; - device_param->kernel_params[17] = &device_param->d_salt_bufs; - device_param->kernel_params[18] = &device_param->d_esalt_bufs; - device_param->kernel_params[19] = &device_param->d_result; - device_param->kernel_params[20] = &device_param->d_scryptV_buf; - device_param->kernel_params[21] = &device_param->kernel_params_buf32[21]; - device_param->kernel_params[22] = &device_param->kernel_params_buf32[22]; - device_param->kernel_params[23] = &device_param->kernel_params_buf32[23]; - device_param->kernel_params[24] = &device_param->kernel_params_buf32[24]; - device_param->kernel_params[25] = &device_param->kernel_params_buf32[25]; - device_param->kernel_params[26] = &device_param->kernel_params_buf32[26]; - device_param->kernel_params[27] = &device_param->kernel_params_buf32[27]; - device_param->kernel_params[28] = &device_param->kernel_params_buf32[28]; - device_param->kernel_params[29] = &device_param->kernel_params_buf32[29]; - device_param->kernel_params[30] = &device_param->kernel_params_buf32[30]; - device_param->kernel_params[31] = &device_param->kernel_params_buf32[31]; + int catalyst_warn = 0; - device_param->kernel_params_mp_buf64[3] = 0; - device_param->kernel_params_mp_buf32[4] = 0; - device_param->kernel_params_mp_buf32[5] = 0; - device_param->kernel_params_mp_buf32[6] = 0; - device_param->kernel_params_mp_buf32[7] = 0; - device_param->kernel_params_mp_buf32[8] = 0; + int catalyst_broken = 0; - device_param->kernel_params_mp[0] = NULL; - device_param->kernel_params_mp[1] = NULL; - device_param->kernel_params_mp[2] = NULL; - device_param->kernel_params_mp[3] = &device_param->kernel_params_mp_buf64[3]; - device_param->kernel_params_mp[4] = &device_param->kernel_params_mp_buf32[4]; - device_param->kernel_params_mp[5] = &device_param->kernel_params_mp_buf32[5]; - device_param->kernel_params_mp[6] = &device_param->kernel_params_mp_buf32[6]; - device_param->kernel_params_mp[7] = &device_param->kernel_params_mp_buf32[7]; - device_param->kernel_params_mp[8] = &device_param->kernel_params_mp_buf32[8]; + if (catalyst_check == 1) + { + catalyst_warn = 1; - device_param->kernel_params_mp_l_buf64[3] = 0; - device_param->kernel_params_mp_l_buf32[4] = 0; - device_param->kernel_params_mp_l_buf32[5] = 0; - device_param->kernel_params_mp_l_buf32[6] = 0; - device_param->kernel_params_mp_l_buf32[7] = 0; - device_param->kernel_params_mp_l_buf32[8] = 0; - device_param->kernel_params_mp_l_buf32[9] = 0; + // v14.9 and higher + if ((atoi (device_param->device_version) >= 1573) + && (atoi (device_param->driver_version) >= 1573)) + { + catalyst_warn = 0; + } - device_param->kernel_params_mp_l[0] = NULL; - device_param->kernel_params_mp_l[1] = NULL; - device_param->kernel_params_mp_l[2] = NULL; - device_param->kernel_params_mp_l[3] = &device_param->kernel_params_mp_l_buf64[3]; - device_param->kernel_params_mp_l[4] = &device_param->kernel_params_mp_l_buf32[4]; - device_param->kernel_params_mp_l[5] = &device_param->kernel_params_mp_l_buf32[5]; - device_param->kernel_params_mp_l[6] = &device_param->kernel_params_mp_l_buf32[6]; - device_param->kernel_params_mp_l[7] = &device_param->kernel_params_mp_l_buf32[7]; - device_param->kernel_params_mp_l[8] = &device_param->kernel_params_mp_l_buf32[8]; - device_param->kernel_params_mp_l[9] = &device_param->kernel_params_mp_l_buf32[9]; + /* + // v14.9 + if ((strstr (device_param->device_version, "1573.") != NULL) + && (strstr (device_param->driver_version, "1573.") != NULL)) + { + catalyst_warn = 0; + } - device_param->kernel_params_mp_r_buf64[3] = 0; - device_param->kernel_params_mp_r_buf32[4] = 0; - device_param->kernel_params_mp_r_buf32[5] = 0; - device_param->kernel_params_mp_r_buf32[6] = 0; - device_param->kernel_params_mp_r_buf32[7] = 0; - device_param->kernel_params_mp_r_buf32[8] = 0; + // v14.12 -- version overlaps with v15.4 beta + if ((strstr (device_param->device_version, "1642.") != NULL) + && (strstr (device_param->driver_version, "1642.") != NULL)) + { + catalyst_broken = 1; + } - device_param->kernel_params_mp_r[0] = NULL; - device_param->kernel_params_mp_r[1] = NULL; - device_param->kernel_params_mp_r[2] = NULL; - device_param->kernel_params_mp_r[3] = &device_param->kernel_params_mp_r_buf64[3]; - device_param->kernel_params_mp_r[4] = &device_param->kernel_params_mp_r_buf32[4]; - device_param->kernel_params_mp_r[5] = &device_param->kernel_params_mp_r_buf32[5]; - device_param->kernel_params_mp_r[6] = &device_param->kernel_params_mp_r_buf32[6]; - device_param->kernel_params_mp_r[7] = &device_param->kernel_params_mp_r_buf32[7]; - device_param->kernel_params_mp_r[8] = &device_param->kernel_params_mp_r_buf32[8]; + // v15.4 (Beta, Windows only release) + if ((strstr (device_param->device_version, "1642.") != NULL) + && (strstr (device_param->driver_version, "1642.") != NULL)) + { + catalyst_warn = 0; + } - device_param->kernel_params_amp_buf32[5] = 0; // combs_mode - device_param->kernel_params_amp_buf32[6] = 0; // gid_max + // v15.5 (Release, Linux) + if ((strstr (device_param->device_version, "1702.") != NULL) + && (strstr (device_param->driver_version, "1702.") != NULL)) + { + catalyst_warn = 0; + } - device_param->kernel_params_amp[0] = &device_param->d_pws_buf; - device_param->kernel_params_amp[1] = &device_param->d_pws_amp_buf; - device_param->kernel_params_amp[2] = &device_param->d_rules; - device_param->kernel_params_amp[3] = &device_param->d_combs; - device_param->kernel_params_amp[4] = &device_param->d_bfs; - device_param->kernel_params_amp[5] = &device_param->kernel_params_amp_buf32[5]; - device_param->kernel_params_amp[6] = &device_param->kernel_params_amp_buf32[6]; + // v15.3 (Beta, Ubuntu repository release) + if ((strstr (device_param->device_version, "1729.") != NULL) + && (strstr (device_param->driver_version, "1729.") != NULL)) + { + catalyst_warn = 0; + } + */ - device_param->kernel_params_tb[0] = &device_param->d_pws_buf; + catalyst_check = 0; + } - device_param->kernel_params_tm[0] = &device_param->d_bfs; - device_param->kernel_params_tm[1] = &device_param->d_tm; + if (catalyst_broken == 1) + { + log_error (""); + log_error ("ATTENTION! The installed GPU driver in your system is known to be broken!"); + log_error ("It will pass over cracked hashes and does not report them as cracked"); + log_error ("You are STRONGLY encouraged not to use it"); + log_error ("You can use --force to override this but do not post error reports if you do so"); - /* constant memory init */ + return (-1); + } - CUmodule c_module; + if (catalyst_warn == 1) + { + log_error (""); + log_error ("ATTENTION! Unsupported or incorrect installed GPU driver detected!"); + log_error ("You are STRONGLY encouraged to use the official supported GPU driver for good reasons"); + log_error ("See oclHashcat's homepage for official supported GPU drivers"); + #ifdef _WIN + log_error ("Also see: http://hashcat.net/wiki/doku.php?id=upgrading_amd_drivers_how_to"); + #endif + log_error ("You can use --force to override this but do not post error reports if you do so"); - if (attack_exec == ATTACK_EXEC_ON_GPU) - { - c_module = device_param->module; - } - else - { - c_module = device_param->module_amp; + return (-1); + } } + } - size_t c_bytes; - - if (attack_kern == ATTACK_KERN_STRAIGHT) - { - CUdeviceptr c_rules; - - hc_cuModuleGetGlobal (&c_rules, &c_bytes, c_module, "c_rules"); + /* + * Temporary fix: + * with AMD r9 295x cards it seems that we need to set the powertune value just AFTER the ocl init stuff + * otherwise after hc_clCreateContext () etc, powertune value was set back to "normal" and cards unfortunately + * were not working @ full speed (setting hc_ADL_Overdrive_PowerControl_Set () here seems to fix the problem) + * Driver / ADL bug? + * - device_param->c_rules = c_rules; - device_param->c_bytes = c_bytes; - hc_cuMemsetD8 (c_rules, 0, c_bytes); - } - else if (attack_kern == ATTACK_KERN_COMBI) + if (vendor_id == VENDOR_ID_AMD) + { + if (powertune_enable == 1) { - CUdeviceptr c_combs; - - hc_cuModuleGetGlobal (&c_combs, &c_bytes, c_module, "c_combs"); + hc_thread_mutex_lock (mux_adl); - device_param->c_combs = c_combs; - device_param->c_bytes = c_bytes; + for (uint i = 0; i < devices_cnt; i++) + { + if (data.hm_device[i].od_version == 6) + { + // set powertune value only - hc_cuMemsetD8 (c_combs, 0, c_bytes); - } - else if (attack_kern == ATTACK_KERN_BF) - { - CUdeviceptr c_bfs; + int powertune_supported = 0; - hc_cuModuleGetGlobal (&c_bfs, &c_bytes, c_module, "c_bfs"); + int ADL_rc = 0; - device_param->c_bfs = c_bfs; - device_param->c_bytes = c_bytes; + if ((ADL_rc = hc_ADL_Overdrive6_PowerControl_Caps (data.hm_dll, data.hm_device[i].adapter_index, &powertune_supported)) != ADL_OK) + { + log_error ("ERROR: Failed to get ADL PowerControl Capabilities"); - hc_cuMemsetD8 (c_bfs, 0, c_bytes); + return (-1); + } - if (data.opts_type & OPTS_TYPE_PT_BITSLICE) - { - size_t bytes; + if (powertune_supported != 0) + { + // powertune set + ADLOD6PowerControlInfo powertune = {0, 0, 0, 0, 0}; - CUdeviceptr c_tm; + if ((ADL_rc = hc_ADL_Overdrive_PowerControlInfo_Get (data.hm_dll, data.hm_device[i].adapter_index, &powertune)) != ADL_OK) + { + log_error ("ERROR: Failed to get current ADL PowerControl settings"); - hc_cuModuleGetGlobal (&c_tm, &bytes, c_module, "c_tm"); + return (-1); + } - device_param->c_tm = c_tm; + if ((ADL_rc = hc_ADL_Overdrive_PowerControl_Set (data.hm_dll, data.hm_device[i].adapter_index, powertune.iMaxValue)) != ADL_OK) + { + log_error ("ERROR: Failed to set new ADL PowerControl values"); - hc_cuMemsetD8 (c_tm, 0, bytes); + return (-1); + } + } + } } - } - hc_cuCtxPopCurrent (NULL); + hc_thread_mutex_unlock (mux_adl); + } } + */ + + uint gpu_blocks_all = 0; - #elif _OCL for (uint device_id = 0; device_id < devices_cnt; device_id++) { /** @@ -14523,7 +12728,6 @@ int main (int argc, char **argv) char *driver_version = device_param->driver_version; uint gpu_processors = device_param->gpu_processors; - uint gpu_vector_width = device_param->gpu_vector_width; /** * create context for each device @@ -14535,20 +12739,22 @@ int main (int argc, char **argv) * create command-queue */ - device_param->command_queue = hc_clCreateCommandQueueWithProperties (device_param->context, device_param->device, NULL); + // not support with NV + // device_param->command_queue = hc_clCreateCommandQueueWithProperties (device_param->context, device_param->device, NULL); + + device_param->command_queue = hc_clCreateCommandQueue (device_param->context, device_param->device, 0); /** * create input buffers on device */ - uint gpu_threads = GPU_THREADS_AMD; + uint gpu_threads = GPU_THREADS; if (hash_mode == 3200) gpu_threads = 8; if (hash_mode == 9000) gpu_threads = 8; uint gpu_power = gpu_processors * gpu_threads * gpu_accel; - - uint gpu_blocks = gpu_power * gpu_vector_width; + uint gpu_blocks = gpu_power; device_param->gpu_threads = gpu_threads; device_param->gpu_power_user = gpu_power; @@ -14642,7 +12848,7 @@ int main (int argc, char **argv) device_param->size_root_css = size_root_css; device_param->size_markov_css = size_markov_css; - uint size_results = GPU_THREADS_AMD * sizeof (uint); + uint size_results = GPU_THREADS * sizeof (uint); device_param->size_results = size_results; @@ -14713,7 +12919,11 @@ int main (int argc, char **argv) * kernel find */ - uint vliw = get_vliw_by_device_name (device_name); + char build_opts[100]; + + // we don't have sm_* on AMD but it doesn't matter + + sprintf (build_opts, "-I. -IOpenCL/ -DVENDOR_ID=%d -DCUDA_ARCH=%d", vendor_id, (device_param->sm_major * 100) + device_param->sm_minor); struct stat st; @@ -14758,19 +12968,19 @@ int main (int argc, char **argv) if (attack_exec == ATTACK_EXEC_ON_GPU) { if (attack_kern == ATTACK_KERN_STRAIGHT) - snprintf (module_file, sizeof (module_file) - 1, "%s/kernels/4098/m%05d_a0.VLIW%d.llvmir", install_dir, (int) kern_type, vliw); + snprintf (module_file, sizeof (module_file) - 1, "%s/kernels/4098/m%05d_a0.llvmir", install_dir, (int) kern_type); else if (attack_kern == ATTACK_KERN_COMBI) - snprintf (module_file, sizeof (module_file) - 1, "%s/kernels/4098/m%05d_a1.VLIW%d.llvmir", install_dir, (int) kern_type, vliw); + snprintf (module_file, sizeof (module_file) - 1, "%s/kernels/4098/m%05d_a1.llvmir", install_dir, (int) kern_type); else if (attack_kern == ATTACK_KERN_BF) - snprintf (module_file, sizeof (module_file) - 1, "%s/kernels/4098/m%05d_a3.VLIW%d.llvmir", install_dir, (int) kern_type, vliw); + snprintf (module_file, sizeof (module_file) - 1, "%s/kernels/4098/m%05d_a3.llvmir", install_dir, (int) kern_type); } else { - snprintf (module_file, sizeof (module_file) - 1, "%s/kernels/4098/m%05d.VLIW%d.llvmir", install_dir, (int) kern_type, vliw); + snprintf (module_file, sizeof (module_file) - 1, "%s/kernels/4098/m%05d.llvmir", install_dir, (int) kern_type); if ((hash_mode == 8900) || (hash_mode == 9300)) { - snprintf (module_file, sizeof (module_file) - 1, "%s/kernels/4098/m%05d_%d_%d_%d_%d.VLIW%d.llvmir", install_dir, (int) kern_type, data.salts_buf[0].scrypt_N, data.salts_buf[0].scrypt_r, data.salts_buf[0].scrypt_p, data.salts_buf[0].scrypt_tmto, vliw); + snprintf (module_file, sizeof (module_file) - 1, "%s/kernels/4098/m%05d_%d_%d_%d_%d.llvmir", install_dir, (int) kern_type, data.salts_buf[0].scrypt_N, data.salts_buf[0].scrypt_r, data.salts_buf[0].scrypt_p, data.salts_buf[0].scrypt_tmto); } } @@ -14780,7 +12990,7 @@ int main (int argc, char **argv) local_free (kernel_sources[0]); - hc_clBuildProgram (program, 1, &device_param->device, "-cl-std=CL1.2", NULL, NULL); + hc_clBuildProgram (program, 1, &device_param->device, build_opts, NULL, NULL); size_t binary_size; @@ -14862,11 +13072,11 @@ int main (int argc, char **argv) #ifdef BINARY_KERNEL if ((opti_type & OPTI_TYPE_BRUTE_FORCE) && (opts_type & OPTS_TYPE_PT_GENERATE_BE)) { - snprintf (kernel_mp_file, sizeof (kernel_mp_file) - 1, "%s/kernels/4098/markov_be_v%d.%s_%s_%s_%d.kernel", install_dir, gpu_vector_width, device_name, device_version, driver_version, COMPTIME); + snprintf (kernel_mp_file, sizeof (kernel_mp_file) - 1, "%s/kernels/4098/markov_be.%s_%s_%s_%d.kernel", install_dir, device_name, device_version, driver_version, COMPTIME); } else { - snprintf (kernel_mp_file, sizeof (kernel_mp_file) - 1, "%s/kernels/4098/markov_le_v%d.%s_%s_%s_%d.kernel", install_dir, gpu_vector_width, device_name, device_version, driver_version, COMPTIME); + snprintf (kernel_mp_file, sizeof (kernel_mp_file) - 1, "%s/kernels/4098/markov_le.%s_%s_%s_%d.kernel", install_dir, device_name, device_version, driver_version, COMPTIME); } if (stat (kernel_mp_file, &st) == -1) @@ -14879,11 +13089,11 @@ int main (int argc, char **argv) if ((opti_type & OPTI_TYPE_BRUTE_FORCE) && (opts_type & OPTS_TYPE_PT_GENERATE_BE)) { - snprintf (module_mp_file, sizeof (module_mp_file) - 1, "%s/kernels/4098/markov_be_v%d.llvmir", install_dir, gpu_vector_width); + snprintf (module_mp_file, sizeof (module_mp_file) - 1, "%s/kernels/4098/markov_be.llvmir", install_dir); } else { - snprintf (module_mp_file, sizeof (module_mp_file) - 1, "%s/kernels/4098/markov_le_v%d.llvmir", install_dir, gpu_vector_width); + snprintf (module_mp_file, sizeof (module_mp_file) - 1, "%s/kernels/4098/markov_le.llvmir", install_dir); } load_kernel (module_mp_file, 1, kernel_mp_lengths, kernel_mp_sources); @@ -14892,7 +13102,7 @@ int main (int argc, char **argv) local_free (kernel_mp_sources[0]); - hc_clBuildProgram (program_mp, 1, &device_param->device, "-cl-std=CL1.2", NULL, NULL); + hc_clBuildProgram (program_mp, 1, &device_param->device, build_opts, NULL, NULL); size_t binary_mp_size; @@ -14912,11 +13122,11 @@ int main (int argc, char **argv) #else if ((opti_type & OPTI_TYPE_BRUTE_FORCE) && (opts_type & OPTS_TYPE_PT_GENERATE_BE)) { - snprintf (kernel_mp_file, sizeof (kernel_mp_file) - 1, "%s/amd/markov_be_v%d.cl", install_dir, gpu_vector_width); + snprintf (kernel_mp_file, sizeof (kernel_mp_file) - 1, "%s/OpenCL/markov_be.cl", install_dir); } else { - snprintf (kernel_mp_file, sizeof (kernel_mp_file) - 1, "%s/amd/markov_le_v%d.cl", install_dir, gpu_vector_width); + snprintf (kernel_mp_file, sizeof (kernel_mp_file) - 1, "%s/OpenCL/markov_le.cl", install_dir); } if (stat (kernel_mp_file, &st) == -1) @@ -14964,7 +13174,7 @@ int main (int argc, char **argv) const unsigned char **kernel_amp_sources = (const unsigned char **) mymalloc (sizeof (unsigned char *)); #ifdef BINARY_KERNEL - snprintf (kernel_amp_file, sizeof (kernel_amp_file) - 1, "%s/kernels/4098/amp_a%d_v%d.%s_%s_%s_%d.kernel", install_dir, attack_kern, gpu_vector_width, device_name, device_version, driver_version, COMPTIME); + snprintf (kernel_amp_file, sizeof (kernel_amp_file) - 1, "%s/kernels/4098/amp_a%d.%s_%s_%s_%d.kernel", install_dir, attack_kern, device_name, device_version, driver_version, COMPTIME); if (stat (kernel_amp_file, &st) == -1) { @@ -14974,7 +13184,7 @@ int main (int argc, char **argv) memset (module_amp_file, 0, sizeof (module_amp_file)); - snprintf (module_amp_file, sizeof (module_amp_file) - 1, "%s/kernels/4098/amp_a%d_v%d.llvmir", install_dir, attack_kern, gpu_vector_width); + snprintf (module_amp_file, sizeof (module_amp_file) - 1, "%s/kernels/4098/amp_a%d.llvmir", install_dir, attack_kern); load_kernel (module_amp_file, 1, kernel_amp_lengths, kernel_amp_sources); @@ -14982,7 +13192,7 @@ int main (int argc, char **argv) local_free (kernel_amp_sources[0]); - hc_clBuildProgram (program_amp, 1, &device_param->device, "-cl-std=CL1.2", NULL, NULL); + hc_clBuildProgram (program_amp, 1, &device_param->device, build_opts, NULL, NULL); size_t binary_amp_size; @@ -14999,7 +13209,7 @@ int main (int argc, char **argv) stat (kernel_amp_file, &st); // to reload filesize } #else - snprintf (kernel_amp_file, sizeof (kernel_amp_file) - 1, "%s/amd/amp_a%d_v%d.cl", install_dir, attack_kern, gpu_vector_width); + snprintf (kernel_amp_file, sizeof (kernel_amp_file) - 1, "%s/OpenCL/amp_a%d.cl", install_dir, attack_kern); if (stat (kernel_amp_file, &st) == -1) { @@ -15030,8 +13240,6 @@ int main (int argc, char **argv) * kernel compile */ - char *build_opts = NULL; - #ifdef BINARY_KERNEL if (force_jit_compilation == 0) @@ -15040,23 +13248,13 @@ int main (int argc, char **argv) } else if (force_jit_compilation == 1500) { - build_opts = (char *) mymalloc (256); - - sprintf (build_opts, "-I . -I amd/ -D VLIW%d -x clc++ -cl-std=CL1.2 -DDESCRYPT_SALT=%d", vliw, data.salts_buf[0].salt_buf[0]); + sprintf (build_opts, "%s -DDESCRYPT_SALT=%d", build_opts, data.salts_buf[0].salt_buf[0]); } else if (force_jit_compilation == 8900) { - build_opts = (char *) mymalloc (256); - - sprintf (build_opts, "-I . -I amd/ -D VLIW%d -x clc++ -cl-std=CL1.2 -DSCRYPT_N=%d -DSCRYPT_R=%d -DSCRYPT_P=%d -DSCRYPT_TMTO=%d", vliw, data.salts_buf[0].scrypt_N, data.salts_buf[0].scrypt_r, data.salts_buf[0].scrypt_p, data.salts_buf[0].scrypt_tmto); + sprintf (build_opts, "%s -DSCRYPT_N=%d -DSCRYPT_R=%d -DSCRYPT_P=%d -DSCRYPT_TMTO=%d", build_opts, data.salts_buf[0].scrypt_N, data.salts_buf[0].scrypt_r, data.salts_buf[0].scrypt_p, data.salts_buf[0].scrypt_tmto); } - #else - - build_opts = (char *) mymalloc (256); - - sprintf (build_opts, "-I . -I amd/ -D VLIW%d -x clc++ -cl-std=CL1.2", vliw); - #endif clBuildProgram (device_param->program, 1, &device_param->device, build_opts, NULL, NULL); @@ -15228,18 +13426,8 @@ int main (int argc, char **argv) device_param->hooks_buf = hooks_buf; - switch (device_param->gpu_vector_width) - { - case 1: device_param->pw_transpose = pw_transpose_to_hi1; - device_param->pw_add = pw_add_to_hc1; - break; - case 2: device_param->pw_transpose = pw_transpose_to_hi2; - device_param->pw_add = pw_add_to_hc2; - break; - case 4: device_param->pw_transpose = pw_transpose_to_hi4; - device_param->pw_add = pw_add_to_hc4; - break; - } + device_param->pw_transpose = pw_transpose_to_hi1; + device_param->pw_add = pw_add_to_hc1; /** * kernel args @@ -15506,6 +13694,9 @@ int main (int argc, char **argv) * Store initial fanspeed if gpu_temp_retain is enabled */ + int gpu_temp_retain_set = 0; + + /* if (gpu_temp_disable == 0) { if (gpu_temp_retain != 0) @@ -15558,11 +13749,13 @@ int main (int argc, char **argv) hc_thread_mutex_unlock (mux_adl); } } + */ /** * Store original powercontrol/clocks settings, set overdrive 6 performance tuning settings */ + /* if (powertune_enable == 1) { hc_thread_mutex_lock (mux_adl); @@ -15674,68 +13867,11 @@ int main (int argc, char **argv) hc_thread_mutex_unlock (mux_adl); } + */ } - /* Temporary fix: - * with AMD r9 295x cards it seems that we need to set the powertune value just AFTER the ocl init stuff - * otherwise after hc_clCreateContext () etc, powertune value was set back to "normal" and cards unfortunately - * were not working @ full speed (setting hc_ADL_Overdrive_PowerControl_Set () here seems to fix the problem) - * Driver / ADL bug? - */ - - if (powertune_enable == 1) - { - hc_thread_mutex_lock (mux_adl); - - for (uint i = 0; i < devices_cnt; i++) - { - if (data.hm_device[i].od_version == 6) - { - // set powertune value only - - int powertune_supported = 0; - - int ADL_rc = 0; - - if ((ADL_rc = hc_ADL_Overdrive6_PowerControl_Caps (data.hm_dll, data.hm_device[i].adapter_index, &powertune_supported)) != ADL_OK) - { - log_error ("ERROR: Failed to get ADL PowerControl Capabilities"); - - return (-1); - } - - if (powertune_supported != 0) - { - // powertune set - ADLOD6PowerControlInfo powertune = {0, 0, 0, 0, 0}; - - if ((ADL_rc = hc_ADL_Overdrive_PowerControlInfo_Get (data.hm_dll, data.hm_device[i].adapter_index, &powertune)) != ADL_OK) - { - log_error ("ERROR: Failed to get current ADL PowerControl settings"); - - return (-1); - } - - if ((ADL_rc = hc_ADL_Overdrive_PowerControl_Set (data.hm_dll, data.hm_device[i].adapter_index, powertune.iMaxValue)) != ADL_OK) - { - log_error ("ERROR: Failed to set new ADL PowerControl values"); - - return (-1); - } - } - } - } - - hc_thread_mutex_unlock (mux_adl); - } - #endif - data.gpu_blocks_all = gpu_blocks_all; - #ifdef _OCL - if (gpu_async == 0) gpu_async = 1; // get rid of the warning - #endif - if (data.quiet == 0) log_info (""); /** @@ -16829,21 +14965,12 @@ int main (int argc, char **argv) device_param->kernel_params_mp_buf32[7] = 0; } - #ifdef _CUDA - hc_cuCtxPushCurrent (device_param->context); - - hc_cuMemcpyHtoD (device_param->d_root_css_buf, root_css_buf, device_param->size_root_css); - hc_cuMemcpyHtoD (device_param->d_markov_css_buf, markov_css_buf, device_param->size_markov_css); - - hc_cuCtxPopCurrent (&device_param->context); - #elif _OCL for (uint i = 0; i < 3; i++) hc_clSetKernelArg (device_param->kernel_mp, i, sizeof (cl_mem), (void *) device_param->kernel_params_mp[i]); for (uint i = 3; i < 4; i++) hc_clSetKernelArg (device_param->kernel_mp, i, sizeof (cl_ulong), (void *) device_param->kernel_params_mp[i]); for (uint i = 4; i < 8; i++) hc_clSetKernelArg (device_param->kernel_mp, i, sizeof (cl_uint), (void *) device_param->kernel_params_mp[i]); hc_clEnqueueWriteBuffer (device_param->command_queue, device_param->d_root_css_buf, CL_TRUE, 0, device_param->size_root_css, root_css_buf, 0, NULL, NULL); hc_clEnqueueWriteBuffer (device_param->command_queue, device_param->d_markov_css_buf, CL_TRUE, 0, device_param->size_markov_css, markov_css_buf, 0, NULL, NULL); - #endif } } else if (attack_mode == ATTACK_MODE_BF) @@ -17347,14 +15474,6 @@ int main (int argc, char **argv) device_param->kernel_params_mp_r_buf32[6] = 0; device_param->kernel_params_mp_r_buf32[7] = 0; - #ifdef _CUDA - hc_cuCtxPushCurrent (device_param->context); - - hc_cuMemcpyHtoD (device_param->d_root_css_buf, root_css_buf, device_param->size_root_css); - hc_cuMemcpyHtoD (device_param->d_markov_css_buf, markov_css_buf, device_param->size_markov_css); - - hc_cuCtxPopCurrent (&device_param->context); - #elif _OCL for (uint i = 0; i < 3; i++) hc_clSetKernelArg (device_param->kernel_mp_l, i, sizeof (cl_mem), (void *) device_param->kernel_params_mp_l[i]); for (uint i = 3; i < 4; i++) hc_clSetKernelArg (device_param->kernel_mp_l, i, sizeof (cl_ulong), (void *) device_param->kernel_params_mp_l[i]); for (uint i = 4; i < 9; i++) hc_clSetKernelArg (device_param->kernel_mp_l, i, sizeof (cl_uint), (void *) device_param->kernel_params_mp_l[i]); @@ -17365,7 +15484,6 @@ int main (int argc, char **argv) hc_clEnqueueWriteBuffer (device_param->command_queue, device_param->d_root_css_buf, CL_TRUE, 0, device_param->size_root_css, root_css_buf, 0, NULL, NULL); hc_clEnqueueWriteBuffer (device_param->command_queue, device_param->d_markov_css_buf, CL_TRUE, 0, device_param->size_markov_css, markov_css_buf, 0, NULL, NULL); - #endif } } @@ -17778,43 +15896,6 @@ int main (int argc, char **argv) local_free (device_param->hooks_buf); - #ifdef _CUDA - hc_cuCtxPushCurrent (device_param->context); - - if (device_param->pws_buf) myfree (device_param->pws_buf); - if (device_param->d_pws_buf) hc_cuMemFree (device_param->d_pws_buf); - if (device_param->d_pws_amp_buf) hc_cuMemFree (device_param->d_pws_amp_buf); - if (device_param->d_rules) hc_cuMemFree (device_param->d_rules); - if (device_param->d_combs) hc_cuMemFree (device_param->d_combs); - if (device_param->d_bfs) hc_cuMemFree (device_param->d_bfs); - if (device_param->d_bitmap_s1_a) hc_cuMemFree (device_param->d_bitmap_s1_a); - if (device_param->d_bitmap_s1_b) hc_cuMemFree (device_param->d_bitmap_s1_b); - if (device_param->d_bitmap_s1_c) hc_cuMemFree (device_param->d_bitmap_s1_c); - if (device_param->d_bitmap_s1_d) hc_cuMemFree (device_param->d_bitmap_s1_d); - if (device_param->d_bitmap_s2_a) hc_cuMemFree (device_param->d_bitmap_s2_a); - if (device_param->d_bitmap_s2_b) hc_cuMemFree (device_param->d_bitmap_s2_b); - if (device_param->d_bitmap_s2_c) hc_cuMemFree (device_param->d_bitmap_s2_c); - if (device_param->d_bitmap_s2_d) hc_cuMemFree (device_param->d_bitmap_s2_d); - if (device_param->d_plain_bufs) hc_cuMemFree (device_param->d_plain_bufs); - if (device_param->d_digests_buf) hc_cuMemFree (device_param->d_digests_buf); - if (device_param->d_digests_shown) hc_cuMemFree (device_param->d_digests_shown); - if (device_param->d_salt_bufs) hc_cuMemFree (device_param->d_salt_bufs); - if (device_param->d_esalt_bufs) hc_cuMemFree (device_param->d_esalt_bufs); - if (device_param->d_tmps) hc_cuMemFree (device_param->d_tmps); - if (device_param->d_hooks) hc_cuMemFree (device_param->d_hooks); - if (device_param->d_result) hc_cuMemFree (device_param->d_result); - if (device_param->d_scryptV_buf) hc_cuMemFree (device_param->d_scryptV_buf); - if (device_param->d_root_css_buf) hc_cuMemFree (device_param->d_root_css_buf); - if (device_param->d_markov_css_buf) hc_cuMemFree (device_param->d_markov_css_buf); - - if (device_param->stream) hc_cuStreamDestroy (device_param->stream); - if (device_param->module) hc_cuModuleUnload (device_param->module); - - hc_cuCtxPopCurrent (&device_param->context); - - if (device_param->context) hc_cuCtxDestroy (device_param->context); - - #elif _OCL local_free (device_param->device_name); local_free (device_param->device_version); @@ -17863,14 +15944,11 @@ int main (int argc, char **argv) if (device_param->program_mp) hc_clReleaseProgram (device_param->program_mp); if (device_param->command_queue) hc_clReleaseCommandQueue (device_param->command_queue); if (device_param->context) hc_clReleaseContext (device_param->context); - #endif } - #ifdef _OCL - #ifndef OSX - // reset default fan speed +/* if (gpu_temp_disable == 0) { if (gpu_temp_retain != 0) @@ -17953,29 +16031,27 @@ int main (int argc, char **argv) hc_thread_mutex_unlock (mux_adl); } - #endif - #endif - if (gpu_temp_disable == 0) { - #ifdef _CUDA - #ifdef LINUX - hc_NVML_nvmlShutdown (); - #endif + if (vendor_id == VENDOR_ID_NV) + { + #ifdef LINUX + hc_NVML_nvmlShutdown (); + #endif - #ifdef WIN - NvAPI_Unload (); - #endif - #endif + #ifdef WIN + NvAPI_Unload (); + #endif + } - #ifdef _OCL - #ifndef OSX - hc_ADL_Main_Control_Destroy (data.hm_dll); + if (vendor_id == VENDOR_ID_AMD) + { + hc_ADL_Main_Control_Destroy (data.hm_dll); - hm_close (data.hm_dll); - #endif - #endif + hm_close (data.hm_dll); + } } +*/ // free memory @@ -18014,13 +16090,9 @@ int main (int argc, char **argv) local_free (bitmap_s2_c); local_free (bitmap_s2_d); - #ifdef _OCL - #ifndef OSX local_free (temp_retain_fanspeed_value); local_free (od_clock_mem_status); local_free (od_power_control_status); - #endif - #endif global_free (devices_param); diff --git a/src/shared.c b/src/shared.c index c724f95..ce440cf 100644 --- a/src/shared.c +++ b/src/shared.c @@ -13,13 +13,8 @@ * tuning tools */ -#ifdef _CUDA -#define GET_ACCEL(x) GPU_ACCEL_NV_ ## x -#define GET_LOOPS(x) GPU_LOOPS_NV_ ## x -#elif _OCL #define GET_ACCEL(x) GPU_ACCEL_AMD_ ## x #define GET_LOOPS(x) GPU_LOOPS_AMD_ ## x -#endif /** * bit rotate @@ -2638,7 +2633,7 @@ void fsync (int fd) * thermal */ -#ifdef _CUDA +/* #ifdef _WIN int hm_get_adapter_index (HM_ADAPTER nvGPUHandle[DEVICES_MAX]) { @@ -2664,7 +2659,7 @@ int hm_get_adapter_index (HM_ADAPTER nvGPUHandle[DEVICES_MAX]) for (uint i = 0; i < DEVICES_MAX; i++) { - /* do not use wrapper function to omit warning message */ + // do not use wrapper function to omit warning message if (nvmlDeviceGetHandleByIndex (i, &nvGPUHandle[i]) != NVML_SUCCESS) break; //can be used to determine if the device by index matches the cuda device by index @@ -2684,10 +2679,7 @@ int hm_get_adapter_index (HM_ADAPTER nvGPUHandle[DEVICES_MAX]) return (pGpuCount); } #endif -#endif -#ifdef _OCL -#ifndef OSX void hm_close (HM_LIB hm_dll) { #ifdef _POSIX @@ -2774,9 +2766,9 @@ LPAdapterInfo hm_get_adapter_info (HM_LIB hm_dll, int iNumberAdapters) return lpAdapterInfo; } -/* - * does not help at all, since AMD does not assign different bus id, device id when we have multi GPU setups - * +// +// does not help at all, since AMD does not assign different bus id, device id when we have multi GPU setups +// int hm_get_opencl_device_index (hm_attrs_t *hm_device, uint num_adl_adapters, int bus_num, int dev_num) { @@ -2812,7 +2804,6 @@ void hm_get_opencl_busid_devid (hm_attrs_t *hm_device, uint opencl_num_devices, hm_device[i].devid = device_topology.pcie.device; } } -*/ void hm_sort_adl_adapters_by_busid_devid (uint32_t *valid_adl_device_list, int num_adl_adapters, LPAdapterInfo lpAdapterInfo) { @@ -3050,59 +3041,57 @@ int hm_get_adapter_index (hm_attrs_t *hm_device, uint32_t *valid_adl_device_list return num_adl_adapters; } -#endif -#endif int hm_get_temperature_with_device_id (const uint device_id) { - #ifdef _OCL - #ifndef OSX - if (data.hm_dll) + if (data.vendor_id == VENDOR_ID_AMD) { - if (data.hm_device[device_id].od_version == 5) + if (data.hm_dll) { - ADLTemperature Temperature; + if (data.hm_device[device_id].od_version == 5) + { + ADLTemperature Temperature; - Temperature.iSize = sizeof (ADLTemperature); + Temperature.iSize = sizeof (ADLTemperature); - if (hc_ADL_Overdrive5_Temperature_Get (data.hm_dll, data.hm_device[device_id].adapter_index, 0, &Temperature) != ADL_OK) return -1; + if (hc_ADL_Overdrive5_Temperature_Get (data.hm_dll, data.hm_device[device_id].adapter_index, 0, &Temperature) != ADL_OK) return -1; - return Temperature.iTemperature / 1000; - } - else if (data.hm_device[device_id].od_version == 6) - { - int Temperature = 0; + return Temperature.iTemperature / 1000; + } + else if (data.hm_device[device_id].od_version == 6) + { + int Temperature = 0; - if (hc_ADL_Overdrive6_Temperature_Get (data.hm_dll, data.hm_device[device_id].adapter_index, &Temperature) != ADL_OK) return -1; + if (hc_ADL_Overdrive6_Temperature_Get (data.hm_dll, data.hm_device[device_id].adapter_index, &Temperature) != ADL_OK) return -1; - return Temperature / 1000; + return Temperature / 1000; + } } } - #endif - #endif - #ifdef _CUDA - #ifdef LINUX - int temperature = 0; + if (data.vendor_id == VENDOR_ID_NV) + { + #ifdef LINUX + int temperature = 0; - hc_NVML_nvmlDeviceGetTemperature (data.hm_device[device_id].adapter_index, NVML_TEMPERATURE_GPU, (unsigned int *) &temperature); + hc_NVML_nvmlDeviceGetTemperature (data.hm_device[device_id].adapter_index, NVML_TEMPERATURE_GPU, (unsigned int *) &temperature); - return temperature; - #endif + return temperature; + #endif - #ifdef WIN - NV_GPU_THERMAL_SETTINGS pThermalSettings; + #ifdef WIN + NV_GPU_THERMAL_SETTINGS pThermalSettings; - pThermalSettings.version = NV_GPU_THERMAL_SETTINGS_VER; - pThermalSettings.count = NVAPI_MAX_THERMAL_SENSORS_PER_GPU; - pThermalSettings.sensor[0].controller = NVAPI_THERMAL_CONTROLLER_UNKNOWN; - pThermalSettings.sensor[0].target = NVAPI_THERMAL_TARGET_GPU; + pThermalSettings.version = NV_GPU_THERMAL_SETTINGS_VER; + pThermalSettings.count = NVAPI_MAX_THERMAL_SENSORS_PER_GPU; + pThermalSettings.sensor[0].controller = NVAPI_THERMAL_CONTROLLER_UNKNOWN; + pThermalSettings.sensor[0].target = NVAPI_THERMAL_TARGET_GPU; - if (hc_NvAPI_GPU_GetThermalSettings (data.hm_device[device_id].adapter_index, 0, &pThermalSettings) != NVAPI_OK) return -1; + if (hc_NvAPI_GPU_GetThermalSettings (data.hm_device[device_id].adapter_index, 0, &pThermalSettings) != NVAPI_OK) return -1; - return pThermalSettings.sensor[0].currentTemp; - #endif - #endif + return pThermalSettings.sensor[0].currentTemp; + #endif + } return -1; } @@ -3111,55 +3100,55 @@ int hm_get_fanspeed_with_device_id (const uint device_id) { if (data.hm_device[device_id].fan_supported == 1) { - #ifdef _OCL - #ifndef OSX - if (data.hm_dll) + if (data.vendor_id == VENDOR_ID_AMD) { - if (data.hm_device[device_id].od_version == 5) + if (data.hm_dll) { - ADLFanSpeedValue lpFanSpeedValue; + if (data.hm_device[device_id].od_version == 5) + { + ADLFanSpeedValue lpFanSpeedValue; - memset (&lpFanSpeedValue, 0, sizeof (lpFanSpeedValue)); + memset (&lpFanSpeedValue, 0, sizeof (lpFanSpeedValue)); - lpFanSpeedValue.iSize = sizeof (lpFanSpeedValue); - lpFanSpeedValue.iSpeedType = ADL_DL_FANCTRL_SPEED_TYPE_PERCENT; - lpFanSpeedValue.iFlags = ADL_DL_FANCTRL_FLAG_USER_DEFINED_SPEED; + lpFanSpeedValue.iSize = sizeof (lpFanSpeedValue); + lpFanSpeedValue.iSpeedType = ADL_DL_FANCTRL_SPEED_TYPE_PERCENT; + lpFanSpeedValue.iFlags = ADL_DL_FANCTRL_FLAG_USER_DEFINED_SPEED; - if (hc_ADL_Overdrive5_FanSpeed_Get (data.hm_dll, data.hm_device[device_id].adapter_index, 0, &lpFanSpeedValue) != ADL_OK) return -1; + if (hc_ADL_Overdrive5_FanSpeed_Get (data.hm_dll, data.hm_device[device_id].adapter_index, 0, &lpFanSpeedValue) != ADL_OK) return -1; - return lpFanSpeedValue.iFanSpeed; - } - else // od_version == 6 - { - ADLOD6FanSpeedInfo faninfo; + return lpFanSpeedValue.iFanSpeed; + } + else // od_version == 6 + { + ADLOD6FanSpeedInfo faninfo; - memset (&faninfo, 0, sizeof (faninfo)); + memset (&faninfo, 0, sizeof (faninfo)); - if (hc_ADL_Overdrive6_FanSpeed_Get (data.hm_dll, data.hm_device[device_id].adapter_index, &faninfo) != ADL_OK) return -1; + if (hc_ADL_Overdrive6_FanSpeed_Get (data.hm_dll, data.hm_device[device_id].adapter_index, &faninfo) != ADL_OK) return -1; - return faninfo.iFanSpeedPercent; + return faninfo.iFanSpeedPercent; + } } } - #endif - #endif - #ifdef _CUDA - #ifdef LINUX - int speed = 0; + if (data.vendor_id == VENDOR_ID_NV) + { + #ifdef LINUX + int speed = 0; - hc_NVML_nvmlDeviceGetFanSpeed (data.hm_device[device_id].adapter_index, (unsigned int *) &speed); + hc_NVML_nvmlDeviceGetFanSpeed (data.hm_device[device_id].adapter_index, (unsigned int *) &speed); - return speed; - #endif + return speed; + #endif - #ifdef WIN - NvU32 speed = 0; + #ifdef WIN + NvU32 speed = 0; - hc_NvAPI_GPU_GetTachReading (data.hm_device[device_id].adapter_index, &speed); + hc_NvAPI_GPU_GetTachReading (data.hm_device[device_id].adapter_index, &speed); - return speed; - #endif - #endif + return speed; + #endif + } } return -1; @@ -3167,46 +3156,44 @@ int hm_get_fanspeed_with_device_id (const uint device_id) int hm_get_utilization_with_device_id (const uint device_id) { - #ifdef _OCL - #ifndef OSX - if (data.hm_dll) + if (data.vendor_id == VENDOR_ID_AMD) { - ADLPMActivity PMActivity; + if (data.hm_dll) + { + ADLPMActivity PMActivity; - PMActivity.iSize = sizeof (ADLPMActivity); + PMActivity.iSize = sizeof (ADLPMActivity); - if (hc_ADL_Overdrive_CurrentActivity_Get (data.hm_dll, data.hm_device[device_id].adapter_index, &PMActivity) != ADL_OK) return -1; + if (hc_ADL_Overdrive_CurrentActivity_Get (data.hm_dll, data.hm_device[device_id].adapter_index, &PMActivity) != ADL_OK) return -1; - return PMActivity.iActivityPercent; + return PMActivity.iActivityPercent; + } } - #endif - #endif - #ifdef _CUDA - #ifdef LINUX - nvmlUtilization_t utilization; + if (data.vendor_id == VENDOR_ID_AMD) + { + #ifdef LINUX + nvmlUtilization_t utilization; - hc_NVML_nvmlDeviceGetUtilizationRates (data.hm_device[device_id].adapter_index, &utilization); + hc_NVML_nvmlDeviceGetUtilizationRates (data.hm_device[device_id].adapter_index, &utilization); - return utilization.gpu; - #endif + return utilization.gpu; + #endif - #ifdef WIN - NV_GPU_DYNAMIC_PSTATES_INFO_EX pDynamicPstatesInfoEx; + #ifdef WIN + NV_GPU_DYNAMIC_PSTATES_INFO_EX pDynamicPstatesInfoEx; - pDynamicPstatesInfoEx.version = NV_GPU_DYNAMIC_PSTATES_INFO_EX_VER; + pDynamicPstatesInfoEx.version = NV_GPU_DYNAMIC_PSTATES_INFO_EX_VER; - if (hc_NvAPI_GPU_GetDynamicPstatesInfoEx (data.hm_device[device_id].adapter_index, &pDynamicPstatesInfoEx) != NVAPI_OK) return -1; + if (hc_NvAPI_GPU_GetDynamicPstatesInfoEx (data.hm_device[device_id].adapter_index, &pDynamicPstatesInfoEx) != NVAPI_OK) return -1; - return pDynamicPstatesInfoEx.utilization[0].percentage; - #endif - #endif + return pDynamicPstatesInfoEx.utilization[0].percentage; + #endif + } return -1; } -#ifdef _OCL -#ifndef OSX int hm_set_fanspeed_with_device_id (const uint device_id, const int fanspeed) { if (data.hm_device[device_id].fan_supported == 1) @@ -3246,8 +3233,7 @@ int hm_set_fanspeed_with_device_id (const uint device_id, const int fanspeed) return -1; } -#endif -#endif +*/ /** * maskprocessor @@ -5368,7 +5354,6 @@ char *stroptitype (const uint opti_type) case OPTI_TYPE_SINGLE_HASH: return ((char *) OPTI_STR_SINGLE_HASH); break; case OPTI_TYPE_SINGLE_SALT: return ((char *) OPTI_STR_SINGLE_SALT); break; case OPTI_TYPE_BRUTE_FORCE: return ((char *) OPTI_STR_BRUTE_FORCE); break; - case OPTI_TYPE_SCALAR_MODE: return ((char *) OPTI_STR_SCALAR_MODE); break; case OPTI_TYPE_RAW_HASH: return ((char *) OPTI_STR_RAW_HASH); break; } @@ -8438,76 +8423,6 @@ void myquit () data.devices_status = STATUS_QUIT; } -#ifdef _OCL -uint get_vliw_by_device_name (const char *device_name) -{ - uint vliw = 0; - - if (strcmp (device_name, "Capeverde" ) == 0) vliw = 1; - if (strcmp (device_name, "Pitcairn" ) == 0) vliw = 1; - if (strcmp (device_name, "Tahiti" ) == 0) vliw = 1; - if (strcmp (device_name, "ATI RV710" ) == 0) vliw = 1; - if (strcmp (device_name, "ATI RV730" ) == 0) vliw = 1; - if (strcmp (device_name, "ATI RV770" ) == 0) vliw = 4; - if (strcmp (device_name, "Cayman" ) == 0) vliw = 4; - if (strcmp (device_name, "Devastator" ) == 0) vliw = 4; - if (strcmp (device_name, "Scrapper" ) == 0) vliw = 4; - if (strcmp (device_name, "Barts" ) == 0) vliw = 5; - if (strcmp (device_name, "BeaverCreek" ) == 0) vliw = 5; - if (strcmp (device_name, "Caicos" ) == 0) vliw = 5; - if (strcmp (device_name, "Cedar" ) == 0) vliw = 5; - if (strcmp (device_name, "Cypress" ) == 0) vliw = 5; - if (strcmp (device_name, "Juniper" ) == 0) vliw = 5; - if (strcmp (device_name, "Loveland" ) == 0) vliw = 5; - if (strcmp (device_name, "Redwood" ) == 0) vliw = 5; - if (strcmp (device_name, "Turks" ) == 0) vliw = 5; - if (strcmp (device_name, "WinterPark" ) == 0) vliw = 5; - if (strcmp (device_name, "Oland" ) == 0) vliw = 1; - if (strcmp (device_name, "Cats" ) == 0) vliw = 1; - if (strcmp (device_name, "Raccoons" ) == 0) vliw = 1; - if (strcmp (device_name, "Bonaire" ) == 0) vliw = 1; - if (strcmp (device_name, "Hawaii" ) == 0) vliw = 1; - if (strcmp (device_name, "Spectre" ) == 0) vliw = 1; - if (strcmp (device_name, "Spooky" ) == 0) vliw = 1; - if (strcmp (device_name, "Kalindi" ) == 0) vliw = 1; - if (strcmp (device_name, "Hainan" ) == 0) vliw = 1; - if (strcmp (device_name, "Iceland" ) == 0) vliw = 1; - if (strcmp (device_name, "Tonga" ) == 0) vliw = 1; - if (strcmp (device_name, "Mullins" ) == 0) vliw = 1; - if (strcmp (device_name, "Fiji" ) == 0) vliw = 1; - - if (strncmp (device_name, "ATI Radeon HD 4", 15) == 0) vliw = 1; - if (strncmp (device_name, "ATI Radeon HD 5", 15) == 0) vliw = 5; - if (strncmp (device_name, "ATI Radeon HD 6", 15) == 0) vliw = 4; - if (strncmp (device_name, "ATI Radeon HD 7", 15) == 0) vliw = 4; - if (strncmp (device_name, "ATI Radeon HD 79", 16) == 0) vliw = 1; - if (strncmp (device_name, "ATI Radeon HD 8", 15) == 0) vliw = 1; - if (strncmp (device_name, "AMD Radeon R9", 13) == 0) vliw = 1; - - return vliw; -} -#else -uint get_vliw_by_compute_capability (const uint major, const uint minor) -{ - uint vliw = 0; - - if (major == 1 && minor == 0) vliw = 1; - if (major == 1 && minor == 1) vliw = 1; - if (major == 1 && minor == 2) vliw = 1; - if (major == 1 && minor == 3) vliw = 1; - if (major == 2 && minor == 0) vliw = 1; - if (major == 2 && minor == 1) vliw = 2; - if (major == 3 && minor == 0) vliw = 2; - if (major == 3 && minor == 5) vliw = 2; - if (major == 3 && minor == 7) vliw = 2; - if (major == 5 && minor == 0) vliw = 2; - if (major == 5 && minor == 2) vliw = 2; - - return vliw; -} -#endif - -#ifdef _OCL void load_kernel (const char *kernel_file, int num_devices, size_t *kernel_lengths, const unsigned char **kernel_sources) { FILE *fp; @@ -8561,7 +8476,6 @@ void writeProgramBin (char *dst, unsigned char *binary, size_t binary_size) fflush (fp); fclose (fp); } -#endif /** * restore -- 2.43.0